2017-04-27 14:42:22 +02:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
class Sanitize
|
|
|
|
module Config
|
2020-02-08 21:22:38 +01:00
|
|
|
HTTP_PROTOCOLS = %w(
|
|
|
|
http
|
|
|
|
https
|
|
|
|
).freeze
|
|
|
|
|
|
|
|
LINK_PROTOCOLS = %w(
|
|
|
|
http
|
|
|
|
https
|
|
|
|
dat
|
|
|
|
dweb
|
|
|
|
ipfs
|
|
|
|
ipns
|
|
|
|
ssb
|
|
|
|
gopher
|
|
|
|
xmpp
|
|
|
|
magnet
|
2020-10-19 17:02:13 +02:00
|
|
|
gemini
|
2020-02-08 21:22:38 +01:00
|
|
|
).freeze
|
2017-04-27 14:42:22 +02:00
|
|
|
|
2024-10-04 10:29:23 +02:00
|
|
|
ALLOWED_CLASS_TRANSFORMER = lambda do |env|
|
2017-06-17 20:26:05 +02:00
|
|
|
node = env[:node]
|
2018-01-03 03:54:08 +01:00
|
|
|
class_list = node['class']&.split(/[\t\n\f\r ]/)
|
2017-06-17 20:26:05 +02:00
|
|
|
|
|
|
|
return unless class_list
|
|
|
|
|
|
|
|
class_list.keep_if do |e|
|
2021-01-22 10:09:08 +01:00
|
|
|
next true if /^(h|p|u|dt|e)-/.match?(e) # microformats classes
|
|
|
|
next true if /^(mention|hashtag)$/.match?(e) # semantic classes
|
|
|
|
next true if /^(ellipsis|invisible)$/.match?(e) # link formatting classes
|
2017-06-17 20:26:05 +02:00
|
|
|
end
|
|
|
|
|
|
|
|
node['class'] = class_list.join(' ')
|
|
|
|
end
|
|
|
|
|
2023-06-20 18:10:19 +02:00
|
|
|
TRANSLATE_TRANSFORMER = lambda do |env|
|
|
|
|
node = env[:node]
|
|
|
|
node.remove_attribute('translate') unless node['translate'] == 'no'
|
|
|
|
end
|
|
|
|
|
2020-02-08 21:22:38 +01:00
|
|
|
UNSUPPORTED_HREF_TRANSFORMER = lambda do |env|
|
|
|
|
return unless env[:node_name] == 'a'
|
|
|
|
|
|
|
|
current_node = env[:node]
|
|
|
|
|
2023-02-18 23:09:40 +01:00
|
|
|
scheme = if current_node['href'] =~ Sanitize::REGEX_PROTOCOL
|
|
|
|
Regexp.last_match(1).downcase
|
|
|
|
else
|
|
|
|
:relative
|
|
|
|
end
|
2020-02-08 21:22:38 +01:00
|
|
|
|
2024-09-08 20:41:37 +02:00
|
|
|
current_node.replace(current_node.document.create_text_node(current_node.text)) unless LINK_PROTOCOLS.include?(scheme)
|
2020-02-08 21:22:38 +01:00
|
|
|
end
|
|
|
|
|
2019-06-16 21:46:36 +02:00
|
|
|
UNSUPPORTED_ELEMENTS_TRANSFORMER = lambda do |env|
|
2023-03-03 20:19:29 +01:00
|
|
|
return unless %w(h1 h2 h3 h4 h5 h6).include?(env[:node_name])
|
2019-06-16 21:46:36 +02:00
|
|
|
|
2020-02-08 21:22:38 +01:00
|
|
|
current_node = env[:node]
|
|
|
|
|
2023-03-03 20:19:29 +01:00
|
|
|
current_node.name = 'strong'
|
|
|
|
current_node.wrap('<p></p>')
|
2019-06-16 21:46:36 +02:00
|
|
|
end
|
|
|
|
|
2024-11-28 13:02:34 +01:00
|
|
|
# We assume that incomming <math> nodes are of the form
|
|
|
|
# <math><semantics>...<annotation>...</annotation></semantics></math>
|
|
|
|
# according to the [FEP]. We try to grab the most relevant plain-text
|
|
|
|
# annotation from the semantics node, and use it to display a representation
|
|
|
|
# of the mathematics.
|
|
|
|
#
|
|
|
|
# FEP: https://codeberg.org/fediverse/fep/src/branch/main/fep/dc88/fep-dc88.md
|
|
|
|
MATH_TRANSFORMER = lambda do |env|
|
|
|
|
math = env[:node]
|
|
|
|
return if env[:is_allowlisted]
|
|
|
|
return unless math.element? && env[:node_name] == 'math'
|
|
|
|
|
|
|
|
semantics = math.element_children[0]
|
|
|
|
return if semantics.nil? || semantics.name != 'semantics'
|
|
|
|
|
|
|
|
# next, we find the plain-text description
|
|
|
|
is_annotation_with_encoding = lambda do |encoding, node|
|
|
|
|
return false unless node.name == 'annotation'
|
|
|
|
|
|
|
|
node.attributes['encoding'].value == encoding
|
|
|
|
end
|
|
|
|
|
|
|
|
annotation = semantics.children.find(&is_annotation_with_encoding.curry['application/x-tex'])
|
|
|
|
if annotation
|
|
|
|
text = if math.attributes['display']&.value == 'block'
|
|
|
|
"$$#{annotation.text}$$"
|
|
|
|
else
|
|
|
|
"$#{annotation.text}$"
|
|
|
|
end
|
|
|
|
math.replace(math.document.create_text_node(text))
|
|
|
|
return
|
|
|
|
end
|
|
|
|
# Don't bother surrounding 'text/plain' annotations with dollar signs,
|
|
|
|
# since it isn't LaTeX
|
|
|
|
annotation = semantics.children.find(&is_annotation_with_encoding.curry['text/plain'])
|
|
|
|
math.replace(math.document.create_text_node(annotation.text)) unless annotation.nil?
|
|
|
|
end
|
|
|
|
|
2023-11-07 10:11:50 +01:00
|
|
|
MASTODON_STRICT = freeze_config(
|
2024-08-12 13:10:05 +02:00
|
|
|
elements: %w(p br span a del s pre blockquote code b strong u i em ul ol li ruby rt rp),
|
2017-04-27 14:42:22 +02:00
|
|
|
|
|
|
|
attributes: {
|
2023-06-20 18:10:19 +02:00
|
|
|
'a' => %w(href rel class translate),
|
|
|
|
'span' => %w(class translate),
|
2023-03-03 20:19:29 +01:00
|
|
|
'ol' => %w(start reversed),
|
|
|
|
'li' => %w(value),
|
2017-04-27 14:42:22 +02:00
|
|
|
},
|
|
|
|
|
2017-04-30 00:28:41 +02:00
|
|
|
add_attributes: {
|
|
|
|
'a' => {
|
2024-12-09 23:42:45 +01:00
|
|
|
'rel' => 'nofollow noopener',
|
2017-04-30 00:28:41 +02:00
|
|
|
'target' => '_blank',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
|
2020-02-08 21:22:38 +01:00
|
|
|
protocols: {},
|
2017-06-17 20:26:05 +02:00
|
|
|
|
|
|
|
transformers: [
|
2024-10-04 10:29:23 +02:00
|
|
|
ALLOWED_CLASS_TRANSFORMER,
|
2023-06-20 18:10:19 +02:00
|
|
|
TRANSLATE_TRANSFORMER,
|
2024-11-28 13:02:34 +01:00
|
|
|
MATH_TRANSFORMER,
|
2019-06-16 21:46:36 +02:00
|
|
|
UNSUPPORTED_ELEMENTS_TRANSFORMER,
|
2020-02-08 21:22:38 +01:00
|
|
|
UNSUPPORTED_HREF_TRANSFORMER,
|
2017-06-17 20:26:05 +02:00
|
|
|
]
|
2017-04-27 14:42:22 +02:00
|
|
|
)
|
|
|
|
|
2023-11-07 10:11:50 +01:00
|
|
|
MASTODON_OEMBED = freeze_config(
|
2023-07-06 15:03:33 +02:00
|
|
|
elements: %w(audio embed iframe source video),
|
2017-04-27 14:42:22 +02:00
|
|
|
|
2023-07-06 15:03:33 +02:00
|
|
|
attributes: {
|
2023-02-20 06:58:28 +01:00
|
|
|
'audio' => %w(controls),
|
|
|
|
'embed' => %w(height src type width),
|
2017-04-27 14:42:22 +02:00
|
|
|
'iframe' => %w(allowfullscreen frameborder height scrolling src width),
|
|
|
|
'source' => %w(src type),
|
2023-02-20 06:58:28 +01:00
|
|
|
'video' => %w(controls height loop width),
|
2023-07-06 15:03:33 +02:00
|
|
|
},
|
2017-04-27 14:42:22 +02:00
|
|
|
|
2023-07-06 15:03:33 +02:00
|
|
|
protocols: {
|
2023-02-20 06:58:28 +01:00
|
|
|
'embed' => { 'src' => HTTP_PROTOCOLS },
|
2017-04-27 14:42:22 +02:00
|
|
|
'iframe' => { 'src' => HTTP_PROTOCOLS },
|
2023-07-06 15:03:33 +02:00
|
|
|
'source' => { 'src' => HTTP_PROTOCOLS },
|
|
|
|
},
|
|
|
|
|
|
|
|
add_attributes: {
|
|
|
|
'iframe' => { 'sandbox' => 'allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox allow-forms' },
|
|
|
|
}
|
2017-04-27 14:42:22 +02:00
|
|
|
)
|
|
|
|
end
|
|
|
|
end
|