1
0
Fork 0
mirror of https://github.com/mastodon/mastodon.git synced 2024-08-20 21:08:15 -07:00

Handle link rel attribute as case-insensitive token list

This commit is contained in:
Christian Schmidt 2024-08-09 16:45:50 +02:00
parent 1701575704
commit 6e134e78af
2 changed files with 14 additions and 6 deletions

View file

@ -157,7 +157,7 @@ class LinkDetailsExtractor
end
def title
html_entities_decode(structured_data&.headline || opengraph_tag('og:title') || document.xpath('//title').map(&:content).first)&.strip
html_entities_decode(structured_data&.headline || opengraph_tag('og:title') || head.at_xpath('title')&.content)&.strip
end
def description
@ -205,11 +205,11 @@ class LinkDetailsExtractor
end
def language
valid_locale_or_nil(structured_data&.language || opengraph_tag('og:locale') || document.xpath('//html').pick('lang'))
valid_locale_or_nil(structured_data&.language || opengraph_tag('og:locale') || document.root.attr('lang'))
end
def icon
valid_url_or_nil(structured_data&.publisher_icon || link_tag('apple-touch-icon') || link_tag('shortcut icon'))
valid_url_or_nil(structured_data&.publisher_icon || link_tag('apple-touch-icon') || link_tag('icon'))
end
private
@ -237,11 +237,11 @@ class LinkDetailsExtractor
end
def link_tag(name)
document.xpath("//link[@rel=\"#{name}\"]").pick('href')
head.xpath('//link[@rel][@href]').find { |el| el.kwattr_values('rel').any? { |v| name.casecmp?(v) } }&.attr('href')
end
def opengraph_tag(name)
document.xpath("//meta[@property=\"#{name}\" or @name=\"#{name}\"]").pick('content')
head.at_xpath("//meta[@property='#{name}' or @name='#{name}'][@content]")&.attr('content')
end
def meta_tag(name)
@ -249,6 +249,8 @@ class LinkDetailsExtractor
end
def structured_data
return @structured_data if defined?(@structured_data)
# Some publications have more than one JSON-LD definition on the page,
# and some of those definitions aren't valid JSON either, so we have
# to loop through here until we find something that is the right type
@ -273,6 +275,10 @@ class LinkDetailsExtractor
@document ||= detect_encoding_and_parse_document
end
def head
@head ||= document.at_xpath('/html/head')
end
def detect_encoding_and_parse_document
[detect_encoding, nil, header_encoding].uniq.each do |encoding|
document = Nokogiri::HTML(@html, nil, encoding)

View file

@ -42,6 +42,7 @@ RSpec.describe LinkDetailsExtractor do
<head>
<title>Man bites dog</title>
<meta name="description" content="A dog&#39;s tale">
<link rel="pretty IcoN" href="/favicon.ico">
</head>
</html>
HTML
@ -51,7 +52,8 @@ RSpec.describe LinkDetailsExtractor do
.to have_attributes(
title: eq('Man bites dog'),
description: eq("A dog's tale"),
language: eq('en')
language: eq('en'),
icon: eq('https://example.com/favicon.ico')
)
end
end