Fix details extraction when no title exists. (#30933)

This commit is contained in:
David Roetzel 2024-07-05 15:28:52 +02:00 committed by GitHub
parent 016c1e4e78
commit 97eddb5906
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 27 additions and 1 deletions

View file

@ -156,7 +156,7 @@ class LinkDetailsExtractor
end
def title
html_entities_decode(structured_data&.headline || opengraph_tag('og:title') || document.xpath('//title').map(&:content).first).strip
html_entities_decode(structured_data&.headline || opengraph_tag('og:title') || document.xpath('//title').map(&:content).first)&.strip
end
def description

View file

@ -0,0 +1,17 @@
HTTP/1.1 200 OK
server: nginx
date: Thu, 13 Jun 2024 14:33:13 GMT
content-type: text/html; charset=utf-8
content-length: 171
accept-ranges: bytes
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
</head>
<body>
<h2>I am not a valid page</h2>
<p>Thankfully, browsers do not care</p>
</body>
</html>

View file

@ -30,6 +30,7 @@ RSpec.describe FetchLinkCardService do
stub_request(:get, 'http://example.com/latin1_posing_as_utf8_broken').to_return(request_fixture('latin1_posing_as_utf8_broken.txt'))
stub_request(:get, 'http://example.com/latin1_posing_as_utf8_recoverable').to_return(request_fixture('latin1_posing_as_utf8_recoverable.txt'))
stub_request(:get, 'http://example.com/aergerliche-umlaute').to_return(request_fixture('redirect_with_utf8_url.txt'))
stub_request(:get, 'http://example.com/page_without_title').to_return(request_fixture('page_without_title.txt'))
Rails.cache.write('oembed_endpoint:example.com', oembed_cache) if oembed_cache
@ -112,6 +113,14 @@ RSpec.describe FetchLinkCardService do
end
end
context 'with a page that has no title' do
let(:status) { Fabricate(:status, text: 'http://example.com/page_without_title') }
it 'does not create a preview card' do
expect(status.preview_card).to be_nil
end
end
context 'with a 404 URL' do
let(:status) { Fabricate(:status, text: 'http://example.com/not-found') }