Change some instances of Nokogiri HTML4 parsing to HTML5 (#31812)

This commit is contained in:
Mike Dalessio 2024-09-08 14:41:37 -04:00 committed by GitHub
parent b716248fc5
commit 10143d053a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 11 additions and 11 deletions

View File

@ -5,7 +5,7 @@ module Admin::Trends::StatusesHelper
text = if status.local? text = if status.local?
status.text.split("\n").first status.text.split("\n").first
else else
Nokogiri::HTML(status.text).css('html > body > *').first&.text Nokogiri::HTML5(status.text).css('html > body > *').first&.text
end end
return '' if text.blank? return '' if text.blank?

View File

@ -24,7 +24,7 @@ class EmojiFormatter
def to_s def to_s
return html if custom_emojis.empty? || html.blank? return html if custom_emojis.empty? || html.blank?
tree = Nokogiri::HTML.fragment(html) tree = Nokogiri::HTML5.fragment(html)
tree.xpath('./text()|.//text()[not(ancestor[@class="invisible"])]').to_a.each do |node| tree.xpath('./text()|.//text()[not(ancestor[@class="invisible"])]').to_a.each do |node|
i = -1 i = -1
inside_shortname = false inside_shortname = false
@ -43,8 +43,8 @@ class EmojiFormatter
next unless (char_after.nil? || !DISALLOWED_BOUNDING_REGEX.match?(char_after)) && (emoji = emoji_map[shortcode]) next unless (char_after.nil? || !DISALLOWED_BOUNDING_REGEX.match?(char_after)) && (emoji = emoji_map[shortcode])
result << Nokogiri::XML::Text.new(text[last_index..shortname_start_index - 1], tree.document) if shortname_start_index.positive? result << tree.document.create_text_node(text[last_index..shortname_start_index - 1]) if shortname_start_index.positive?
result << Nokogiri::HTML.fragment(tag_for_emoji(shortcode, emoji)) result << tree.document.fragment(tag_for_emoji(shortcode, emoji))
last_index = i + 1 last_index = i + 1
elsif text[i] == ':' && (i.zero? || !DISALLOWED_BOUNDING_REGEX.match?(text[i - 1])) elsif text[i] == ':' && (i.zero? || !DISALLOWED_BOUNDING_REGEX.match?(text[i - 1]))
@ -53,7 +53,7 @@ class EmojiFormatter
end end
end end
result << Nokogiri::XML::Text.new(text[last_index..], tree.document) result << tree.document.create_text_node(text[last_index..])
node.replace(result) node.replace(result)
end end

View File

@ -16,7 +16,7 @@ class PlainTextFormatter
if local? if local?
text text
else else
node = Nokogiri::HTML.fragment(insert_newlines) node = Nokogiri::HTML5.fragment(insert_newlines)
# Elements that are entirely removed with our Sanitize config # Elements that are entirely removed with our Sanitize config
node.xpath('.//iframe|.//math|.//noembed|.//noframes|.//noscript|.//plaintext|.//script|.//style|.//svg|.//xmp').remove node.xpath('.//iframe|.//math|.//noembed|.//noframes|.//noscript|.//plaintext|.//script|.//style|.//svg|.//xmp').remove
node.text.chomp node.text.chomp

View File

@ -25,7 +25,7 @@ class FetchOEmbedService
return if html.nil? return if html.nil?
@format = @options[:format] @format = @options[:format]
page = Nokogiri::HTML(html) page = Nokogiri::HTML5(html)
if @format.nil? || @format == :json if @format.nil? || @format == :json
@endpoint_url ||= page.at_xpath('//link[@type="application/json+oembed"]|//link[@type="text/json+oembed"]')&.attribute('href')&.value @endpoint_url ||= page.at_xpath('//link[@type="application/json+oembed"]|//link[@type="text/json+oembed"]')&.attribute('href')&.value

View File

@ -73,7 +73,7 @@ class FetchResourceService < BaseService
end end
def process_html(response) def process_html(response)
page = Nokogiri::HTML(response.body_with_limit) page = Nokogiri::HTML5(response.body_with_limit)
json_link = page.xpath('//link[@rel="alternate"]').find { |link| ACTIVITY_STREAM_LINK_TYPES.include?(link['type']) } json_link = page.xpath('//link[@rel="alternate"]').find { |link| ACTIVITY_STREAM_LINK_TYPES.include?(link['type']) }
process(json_link['href'], terminal: true) unless json_link.nil? process(json_link['href'], terminal: true) unless json_link.nil?

View File

@ -100,7 +100,7 @@ class TranslateStatusService < BaseService
end end
def unwrap_emoji_shortcodes(html) def unwrap_emoji_shortcodes(html)
fragment = Nokogiri::HTML.fragment(html) fragment = Nokogiri::HTML5.fragment(html)
fragment.css('span[translate="no"]').each do |element| fragment.css('span[translate="no"]').each do |element|
element.remove_attribute('translate') element.remove_attribute('translate')
element.replace(element.children) if element.attributes.empty? element.replace(element.children) if element.attributes.empty?

View File

@ -52,7 +52,7 @@ class Sanitize
:relative :relative
end end
current_node.replace(Nokogiri::XML::Text.new(current_node.text, current_node.document)) unless LINK_PROTOCOLS.include?(scheme) current_node.replace(current_node.document.create_text_node(current_node.text)) unless LINK_PROTOCOLS.include?(scheme)
end end
UNSUPPORTED_ELEMENTS_TRANSFORMER = lambda do |env| UNSUPPORTED_ELEMENTS_TRANSFORMER = lambda do |env|

View File

@ -13,7 +13,7 @@ def gen_border(codepoint, color)
view_box[3] += 4 view_box[3] += 4
svg['viewBox'] = view_box.join(' ') svg['viewBox'] = view_box.join(' ')
end end
g = Nokogiri::XML::Node.new 'g', doc g = doc.create_element('g')
doc.css('svg > *').each do |elem| doc.css('svg > *').each do |elem|
border_elem = elem.dup border_elem = elem.dup