From 41118205bec707ed5bebd96aa08028b0744d67dd Mon Sep 17 00:00:00 2001 From: Christian Schmidt Date: Wed, 11 Sep 2024 16:59:05 +0200 Subject: [PATCH] Retry oEmbed cache after redirect --- app/services/fetch_link_card_service.rb | 7 ++-- app/services/fetch_oembed_service.rb | 14 ++++---- spec/services/fetch_link_card_service_spec.rb | 34 +++++++++++++++++++ spec/services/fetch_oembed_service_spec.rb | 4 ++- 4 files changed, 48 insertions(+), 11 deletions(-) diff --git a/app/services/fetch_link_card_service.rb b/app/services/fetch_link_card_service.rb index 7662fc1f29..1f2f021497 100644 --- a/app/services/fetch_link_card_service.rb +++ b/app/services/fetch_link_card_service.rb @@ -106,12 +106,11 @@ class FetchLinkCardService < BaseService end def attempt_oembed - service = FetchOEmbedService.new - url_domain = Addressable::URI.parse(@url).normalized_host - cached_endpoint = Rails.cache.read("oembed_endpoint:#{url_domain}") + service = FetchOEmbedService.new - embed = service.call(@url, cached_endpoint: cached_endpoint) unless cached_endpoint.nil? + embed = service.call(@url, use_cached_endpoint: true) embed ||= service.call(@url, html: html) unless html.nil? + embed ||= service.call(@url, use_cached_endpoint: true) if @url != @original_url.to_s return false if embed.nil? diff --git a/app/services/fetch_oembed_service.rb b/app/services/fetch_oembed_service.rb index c7d4f7e292..c36dce730f 100644 --- a/app/services/fetch_oembed_service.rb +++ b/app/services/fetch_oembed_service.rb @@ -10,7 +10,7 @@ class FetchOEmbedService @url = url @options = options - if @options[:cached_endpoint] + if @options[:use_cached_endpoint] parse_cached_endpoint! else discover_endpoint! @@ -57,9 +57,9 @@ class FetchOEmbedService end def parse_cached_endpoint! - cached = @options[:cached_endpoint] + cached = Rails.cache.read(cache_key) - return if cached[:endpoint].nil? || cached[:format].nil? + return if cached.nil? || cached[:endpoint].nil? || cached[:format].nil? @endpoint_url = Addressable::Template.new(cached[:endpoint]).expand(url: @url).to_s @format = cached[:format] @@ -68,14 +68,16 @@ class FetchOEmbedService def cache_endpoint! return unless URL_REGEX.match?(@endpoint_url) - url_domain = Addressable::URI.parse(@url).normalized_host - endpoint_hash = { endpoint: @endpoint_url.gsub(URL_REGEX, '={url}'), format: @format, } - Rails.cache.write("oembed_endpoint:#{url_domain}", endpoint_hash, expires_in: ENDPOINT_CACHE_EXPIRES_IN) + Rails.cache.write(cache_key, endpoint_hash, expires_in: ENDPOINT_CACHE_EXPIRES_IN) + end + + def cache_key + "oembed_endpoint:#{Addressable::URI.parse(@url).normalized_host}" end def fetch! diff --git a/spec/services/fetch_link_card_service_spec.rb b/spec/services/fetch_link_card_service_spec.rb index 1d61e33c0b..18477b6fc4 100644 --- a/spec/services/fetch_link_card_service_spec.rb +++ b/spec/services/fetch_link_card_service_spec.rb @@ -13,6 +13,7 @@ RSpec.describe FetchLinkCardService do stub_request(:get, 'http://example.com/not-found').to_return(status: 404, headers: { 'Content-Type' => 'text/html' }, body: html) stub_request(:get, 'http://example.com/text').to_return(status: 404, headers: { 'Content-Type' => 'text/plain' }, body: 'Hello') stub_request(:get, 'http://example.com/redirect').to_return(status: 302, headers: { 'Location' => 'http://example.com/html' }) + stub_request(:get, 'http://example.net/redirect-to-other-domain').to_return(status: 302, headers: { 'Location' => 'http://example.com/html' }) stub_request(:get, 'http://example.com/redirect-to-404').to_return(status: 302, headers: { 'Location' => 'http://example.com/not-found' }) stub_request(:get, 'http://example.com/oembed?url=http://example.com/html').to_return(headers: { 'Content-Type' => 'application/json' }, body: '{ "version": "1.0", "type": "link", "title": "oEmbed title" }') stub_request(:get, 'http://example.com/oembed?format=json&url=http://example.com/html').to_return(headers: { 'Content-Type' => 'application/json' }, body: '{ "version": "1.0", "type": "link", "title": "oEmbed title" }') @@ -264,6 +265,39 @@ RSpec.describe FetchLinkCardService do end end + context 'when oEmbed endpoint cache populated with redirect target' do + let(:status) { Fabricate(:status, text: 'http://example.net/redirect-to-other-domain') } + let(:oembed_cache) { { endpoint: 'http://example.com/oembed?format=json&url={url}', format: :json } } + + it 'uses the cached oEmbed response' do + expect(a_request(:get, 'http://example.net/redirect-to-other-domain')).to have_been_made + expect(a_request(:get, 'http://example.com/oembed?url=http://example.com/html')).to have_been_made + end + + it 'creates preview card' do + expect(status.preview_card).to_not be_nil + expect(status.preview_card.url).to eq 'http://example.com/html' + expect(status.preview_card.title).to eq 'oEmbed title' + end + end + + context 'when oEmbed endpoint cache populated with redirect target but page contains no oEmbed tags' do + let(:status) { Fabricate(:status, text: 'http://example.net/redirect-to-other-domain') } + let(:html) { 'Please fill out CAPTCHA' } + let(:oembed_cache) { { endpoint: 'http://example.com/oembed?format=json&url={url}', format: :json } } + + it 'uses the cached oEmbed response' do + expect(a_request(:get, 'http://example.net/redirect-to-other-domain')).to have_been_made + expect(a_request(:get, 'http://example.com/oembed?format=json&url=http://example.com/html')).to have_been_made + end + + it 'creates preview card' do + expect(status.preview_card).to_not be_nil + expect(status.preview_card.url).to eq 'http://example.com/html' + expect(status.preview_card.title).to eq 'oEmbed title' + end + end + # If the original HTML URL for whatever reason (e.g. DOS protection) redirects to # an error page, we can still use the cached oEmbed but should not use the # redirect URL on the card. diff --git a/spec/services/fetch_oembed_service_spec.rb b/spec/services/fetch_oembed_service_spec.rb index 52d2b9cf22..99e3e7f984 100644 --- a/spec/services/fetch_oembed_service_spec.rb +++ b/spec/services/fetch_oembed_service_spec.rb @@ -160,10 +160,12 @@ RSpec.describe FetchOEmbedService do headers: { 'Content-Type': 'text/html' }, body: request_fixture('oembed_json_empty.html') ) + + Rails.cache.write('oembed_endpoint:www.youtube.com', { endpoint: 'http://www.youtube.com/oembed?format=json&url={url}', format: :json }) end it 'returns new provider without fetching original URL first' do - subject.call('https://www.youtube.com/watch?v=dqwpQarrDwk', cached_endpoint: { endpoint: 'http://www.youtube.com/oembed?format=json&url={url}', format: :json }) + subject.call('https://www.youtube.com/watch?v=dqwpQarrDwk', use_cached_endpoint: true) expect(a_request(:get, 'https://www.youtube.com/watch?v=dqwpQarrDwk')).to_not have_been_made expect(subject.endpoint_url).to eq 'http://www.youtube.com/oembed?format=json&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DdqwpQarrDwk' expect(subject.format).to eq :json