From d14e74eff52352f1a2fb4bc2053bbb28c1aa29e0 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Sun, 17 Nov 2019 18:40:33 +0100 Subject: [PATCH] Add cache for OEmbed endpoints to avoid extra HTTP requests (#12403) * add youtube oembed endpoint * add check for oembed endpoint * change unless for a more readable if * clear blank lines * endpoint via https * Fix string literal in condition * use cache for endpoints * use cache for endpoints * clean up and adding check * clean up and remove redundant return * add html check * add false to return * use double quotes * use double quotes * Clean up --- app/services/fetch_link_card_service.rb | 23 +++++++++++----- app/services/fetch_oembed_service.rb | 31 +++++++++++++++++++++- spec/services/fetch_oembed_service_spec.rb | 18 +++++++++++++ 3 files changed, 64 insertions(+), 8 deletions(-) diff --git a/app/services/fetch_link_card_service.rb b/app/services/fetch_link_card_service.rb index 640c60fd4..29880e8d8 100644 --- a/app/services/fetch_link_card_service.rb +++ b/app/services/fetch_link_card_service.rb @@ -39,6 +39,12 @@ class FetchLinkCardService < BaseService def process_url @card ||= PreviewCard.new(url: @url) + attempt_oembed || attempt_opengraph + end + + def html + return @html if defined?(@html) + Request.new(:get, @url).perform do |res| if res.code == 200 && res.mime_type == 'text/html' @html = res.body_with_limit @@ -48,10 +54,6 @@ class FetchLinkCardService < BaseService @html_charset = nil end end - - return if @html.nil? - - attempt_oembed || attempt_opengraph end def attach_card @@ -88,12 +90,17 @@ class FetchLinkCardService < BaseService end def attempt_oembed - service = FetchOEmbedService.new - embed = service.call(@url, html: @html) - url = Addressable::URI.parse(service.endpoint_url) + service = FetchOEmbedService.new + url_domain = Addressable::URI.parse(@url).normalized_host + cached_endpoint = Rails.cache.read("oembed_endpoint:#{url_domain}") + + embed = service.call(@url, cached_endpoint: cached_endpoint) unless cached_endpoint.nil? + embed ||= service.call(@url, html: html) unless html.nil? return false if embed.nil? + url = Addressable::URI.parse(service.endpoint_url) + @card.type = embed[:type] @card.title = embed[:title] || '' @card.author_name = embed[:author_name] || '' @@ -127,6 +134,8 @@ class FetchLinkCardService < BaseService end def attempt_opengraph + return if html.nil? + detector = CharlockHolmes::EncodingDetector.new detector.strip_tags = true diff --git a/app/services/fetch_oembed_service.rb b/app/services/fetch_oembed_service.rb index 10176cfb9..4f8498c62 100644 --- a/app/services/fetch_oembed_service.rb +++ b/app/services/fetch_oembed_service.rb @@ -1,13 +1,20 @@ # frozen_string_literal: true class FetchOEmbedService + ENDPOINT_CACHE_EXPIRES_IN = 24.hours.freeze + attr_reader :url, :options, :format, :endpoint_url def call(url, options = {}) @url = url @options = options - discover_endpoint! + if @options[:cached_endpoint] + parse_cached_endpoint! + else + discover_endpoint! + end + fetch! end @@ -32,10 +39,32 @@ class FetchOEmbedService return if @endpoint_url.blank? @endpoint_url = (Addressable::URI.parse(@url) + @endpoint_url).to_s + + cache_endpoint! rescue Addressable::URI::InvalidURIError @endpoint_url = nil end + def parse_cached_endpoint! + cached = @options[:cached_endpoint] + + return if cached[:endpoint].nil? || cached[:format].nil? + + @endpoint_url = Addressable::Template.new(cached[:endpoint]).expand(url: @url).to_s + @format = cached[:format] + end + + def cache_endpoint! + url_domain = Addressable::URI.parse(@url).normalized_host + + endpoint_hash = { + endpoint: @endpoint_url.gsub(URI.encode_www_form_component(@url), '{url}'), + format: @format, + } + + Rails.cache.write("oembed_endpoint:#{url_domain}", endpoint_hash, expires_in: ENDPOINT_CACHE_EXPIRES_IN) + end + def fetch! return if @endpoint_url.blank? diff --git a/spec/services/fetch_oembed_service_spec.rb b/spec/services/fetch_oembed_service_spec.rb index 5789fb53b..a4262b040 100644 --- a/spec/services/fetch_oembed_service_spec.rb +++ b/spec/services/fetch_oembed_service_spec.rb @@ -113,6 +113,24 @@ describe FetchOEmbedService, type: :service do end + context 'when endpoint is cached' do + before do + stub_request(:get, 'http://www.youtube.com/oembed?format=json&url=https://www.youtube.com/watch?v=dqwpQarrDwk').to_return( + status: 200, + headers: { 'Content-Type': 'text/html' }, + body: request_fixture('oembed_json_empty.html') + ) + end + + it 'returns new provider without fetching original URL first' do + subject.call('https://www.youtube.com/watch?v=dqwpQarrDwk', cached_endpoint: { endpoint: 'http://www.youtube.com/oembed?format=json&url={url}', format: :json }) + expect(a_request(:get, 'https://www.youtube.com/watch?v=dqwpQarrDwk')).to_not have_been_made + expect(subject.endpoint_url).to eq 'http://www.youtube.com/oembed?format=json&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DdqwpQarrDwk' + expect(subject.format).to eq :json + expect(a_request(:get, 'http://www.youtube.com/oembed?format=json&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DdqwpQarrDwk')).to have_been_made + end + end + context 'when status code is not 200' do before do stub_request(:get, 'https://host.test/oembed.html').to_return(