diff --git a/app/lib/link_details_extractor.rb b/app/lib/link_details_extractor.rb index fe7f23f481..a8004f2925 100644 --- a/app/lib/link_details_extractor.rb +++ b/app/lib/link_details_extractor.rb @@ -46,7 +46,7 @@ class LinkDetailsExtractor end def image - obj = first_of_value(json['image']) + obj = first_of_hash(json['image']) return obj['url'] if obj.is_a?(Hash) @@ -85,15 +85,15 @@ class LinkDetailsExtractor private def author - first_of_value(json['author']) || {} + first_of_hash(json['author']) || {} end def publisher - first_of_value(json['publisher']) || {} + first_of_hash(json['publisher']) || {} end - def first_of_value(arr) - arr.is_a?(Array) ? arr.first : arr + def first_of_hash(arr) + arr.is_a?(Array) ? arr.flatten.find { |item| item.is_a?(Hash) } : arr end def root_array(root) diff --git a/spec/lib/link_details_extractor_spec.rb b/spec/lib/link_details_extractor_spec.rb index 36d6f22b00..cb072c4870 100644 --- a/spec/lib/link_details_extractor_spec.rb +++ b/spec/lib/link_details_extractor_spec.rb @@ -249,6 +249,44 @@ RSpec.describe LinkDetailsExtractor do expect(subject.author_name).to eq 'Author 1, Author 2' end end + + context 'with embedded arrays' do + let(:ld_json) do + { + '@context' => 'https://schema.org', + '@type' => 'NewsArticle', + 'headline' => 'A lot of authors', + 'description' => 'But we decided to cram them into one', + 'author' => [[{ + '@type' => 'Person', + 'name' => ['Author 1'], + }]], + 'publisher' => [[{ + '@type' => 'NewsMediaOrganization', + 'name' => 'Pet News', + 'url' => 'https://example.com', + }]], + }.to_json + end + let(:html) { <<~HTML } + + + + + + + HTML + + it 'gives correct author_name' do + expect(subject.author_name).to eq 'Author 1' + end + + it 'gives provider_name' do + expect(subject.provider_name).to eq 'Pet News' + end + end end context 'when Open Graph protocol data is present' do