Change some instances of Nokogiri HTML4 parsing to HTML5 (#31812)

This commit is contained in:
Mike Dalessio 2024-09-08 14:41:37 -04:00 committed by GitHub
parent b716248fc5
commit 10143d053a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 11 additions and 11 deletions

View file

@ -5,7 +5,7 @@ module Admin::Trends::StatusesHelper
text = if status.local?
status.text.split("\n").first
else
Nokogiri::HTML(status.text).css('html > body > *').first&.text
Nokogiri::HTML5(status.text).css('html > body > *').first&.text
end
return '' if text.blank?

View file

@ -24,7 +24,7 @@ class EmojiFormatter
def to_s
return html if custom_emojis.empty? || html.blank?
tree = Nokogiri::HTML.fragment(html)
tree = Nokogiri::HTML5.fragment(html)
tree.xpath('./text()|.//text()[not(ancestor[@class="invisible"])]').to_a.each do |node|
i = -1
inside_shortname = false
@ -43,8 +43,8 @@ class EmojiFormatter
next unless (char_after.nil? || !DISALLOWED_BOUNDING_REGEX.match?(char_after)) && (emoji = emoji_map[shortcode])
result << Nokogiri::XML::Text.new(text[last_index..shortname_start_index - 1], tree.document) if shortname_start_index.positive?
result << Nokogiri::HTML.fragment(tag_for_emoji(shortcode, emoji))
result << tree.document.create_text_node(text[last_index..shortname_start_index - 1]) if shortname_start_index.positive?
result << tree.document.fragment(tag_for_emoji(shortcode, emoji))
last_index = i + 1
elsif text[i] == ':' && (i.zero? || !DISALLOWED_BOUNDING_REGEX.match?(text[i - 1]))
@ -53,7 +53,7 @@ class EmojiFormatter
end
end
result << Nokogiri::XML::Text.new(text[last_index..], tree.document)
result << tree.document.create_text_node(text[last_index..])
node.replace(result)
end

View file

@ -16,7 +16,7 @@ class PlainTextFormatter
if local?
text
else
node = Nokogiri::HTML.fragment(insert_newlines)
node = Nokogiri::HTML5.fragment(insert_newlines)
# Elements that are entirely removed with our Sanitize config
node.xpath('.//iframe|.//math|.//noembed|.//noframes|.//noscript|.//plaintext|.//script|.//style|.//svg|.//xmp').remove
node.text.chomp

View file

@ -25,7 +25,7 @@ class FetchOEmbedService
return if html.nil?
@format = @options[:format]
page = Nokogiri::HTML(html)
page = Nokogiri::HTML5(html)
if @format.nil? || @format == :json
@endpoint_url ||= page.at_xpath('//link[@type="application/json+oembed"]|//link[@type="text/json+oembed"]')&.attribute('href')&.value

View file

@ -73,7 +73,7 @@ class FetchResourceService < BaseService
end
def process_html(response)
page = Nokogiri::HTML(response.body_with_limit)
page = Nokogiri::HTML5(response.body_with_limit)
json_link = page.xpath('//link[@rel="alternate"]').find { |link| ACTIVITY_STREAM_LINK_TYPES.include?(link['type']) }
process(json_link['href'], terminal: true) unless json_link.nil?

View file

@ -100,7 +100,7 @@ class TranslateStatusService < BaseService
end
def unwrap_emoji_shortcodes(html)
fragment = Nokogiri::HTML.fragment(html)
fragment = Nokogiri::HTML5.fragment(html)
fragment.css('span[translate="no"]').each do |element|
element.remove_attribute('translate')
element.replace(element.children) if element.attributes.empty?

View file

@ -52,7 +52,7 @@ class Sanitize
:relative
end
current_node.replace(Nokogiri::XML::Text.new(current_node.text, current_node.document)) unless LINK_PROTOCOLS.include?(scheme)
current_node.replace(current_node.document.create_text_node(current_node.text)) unless LINK_PROTOCOLS.include?(scheme)
end
UNSUPPORTED_ELEMENTS_TRANSFORMER = lambda do |env|

View file

@ -13,7 +13,7 @@ def gen_border(codepoint, color)
view_box[3] += 4
svg['viewBox'] = view_box.join(' ')
end
g = Nokogiri::XML::Node.new 'g', doc
g = doc.create_element('g')
doc.css('svg > *').each do |elem|
border_elem = elem.dup