add maximum page limit

This commit is contained in:
sneakers-the-rat 2024-11-15 02:26:17 -08:00 committed by Jonny Saunders
parent 5dbfebf756
commit 19b67e66bb
5 changed files with 30 additions and 18 deletions

View File

@ -104,3 +104,6 @@ FETCH_REPLIES_MAX_GLOBAL=1000
# Max number of replies to fetch - for a single post
FETCH_REPLIES_MAX_SINGLE=500
# Max number of replies Collection pages to fetch - total
FETCH_REPLIES_MAX_PAGES=500

View File

@ -6,18 +6,18 @@ class ActivityPub::FetchAllRepliesService < ActivityPub::FetchRepliesService
# Limit of replies to fetch per status
MAX_REPLIES = (ENV['FETCH_REPLIES_MAX_SINGLE'] || 500).to_i
def call(collection_or_uri, allow_synchronous_requests: true, request_id: nil)
def call(collection_or_uri, max_pages = nil, allow_synchronous_requests: true, request_id: nil)
@allow_synchronous_requests = allow_synchronous_requests
@filter_by_host = false
@collection_or_uri = collection_or_uri
@items = collection_items(collection_or_uri)
@items, n_pages = collection_items(collection_or_uri, max_pages)
@items = filtered_replies
return if @items.nil?
FetchReplyWorker.push_bulk(@items) { |reply_uri| [reply_uri, { 'request_id' => request_id }] }
@items
[@items, n_pages]
end
private

View File

@ -11,7 +11,7 @@ class ActivityPub::FetchRepliesService < BaseService
@allow_synchronous_requests = allow_synchronous_requests
@filter_by_host = filter_by_host
@items = collection_items(collection_or_uri)
@items, = collection_items(collection_or_uri)
return if @items.nil?
FetchReplyWorker.push_bulk(filtered_replies) { |reply_uri| [reply_uri, { 'request_id' => request_id }] }
@ -21,7 +21,7 @@ class ActivityPub::FetchRepliesService < BaseService
private
def collection_items(collection_or_uri)
def collection_items(collection_or_uri, max_pages = nil)
collection = fetch_collection(collection_or_uri)
return unless collection.is_a?(Hash)
@ -29,6 +29,7 @@ class ActivityPub::FetchRepliesService < BaseService
return unless collection.is_a?(Hash)
all_items = []
n_pages = 1
while collection.is_a?(Hash)
items = case collection['type']
when 'Collection', 'CollectionPage'
@ -39,12 +40,14 @@ class ActivityPub::FetchRepliesService < BaseService
all_items.concat(as_array(items))
break if all_items.size > MAX_REPLIES
break if all_items.size >= MAX_REPLIES
break if !max_pages.nil? && n_pages >= max_pages
collection = collection['next'].present? ? fetch_collection(collection['next']) : nil
n_pages += 1
end
all_items
[all_items, n_pages]
end
def fetch_collection(collection_or_uri)

View File

@ -13,28 +13,30 @@ class ActivityPub::FetchAllRepliesWorker
# Global max replies to fetch per request (all replies, recursively)
MAX_REPLIES = (ENV['FETCH_REPLIES_MAX_GLOBAL'] || 1000).to_i
MAX_PAGES = (ENV['FETCH_REPLIES_MAX_PAGES'] || 500).to_i
def perform(parent_status_id, options = {})
@parent_status = Status.find(parent_status_id)
Rails.logger.debug { "FetchAllRepliesWorker - #{@parent_status.uri}: Fetching all replies for status: #{@parent_status}" }
uris_to_fetch = get_replies(@parent_status.uri, options)
uris_to_fetch, n_pages = get_replies(@parent_status.uri, MAX_PAGES, options)
return if uris_to_fetch.nil?
@parent_status.touch(:fetched_replies_at)
fetched_uris = uris_to_fetch.clone.to_set
until uris_to_fetch.empty? || fetched_uris.length >= MAX_REPLIES
until uris_to_fetch.empty? || fetched_uris.length >= MAX_REPLIES || n_pages >= MAX_PAGES
next_reply = uris_to_fetch.pop
next if next_reply.nil?
new_reply_uris = get_replies(next_reply, options)
new_reply_uris, new_n_pages = get_replies(next_reply, MAX_PAGES - n_pages, options)
next if new_reply_uris.nil?
new_reply_uris = new_reply_uris.reject { |uri| fetched_uris.include?(uri) }
uris_to_fetch.concat(new_reply_uris)
fetched_uris = fetched_uris.merge(new_reply_uris)
n_pages += new_n_pages
end
Rails.logger.debug { "FetchAllRepliesWorker - #{parent_status_id}: fetched #{fetched_uris.length} replies" }
@ -43,11 +45,11 @@ class ActivityPub::FetchAllRepliesWorker
private
def get_replies(status_uri, options = {})
def get_replies(status_uri, max_pages, options = {})
replies_collection_or_uri = get_replies_uri(status_uri)
return if replies_collection_or_uri.nil?
ActivityPub::FetchAllRepliesService.new.call(replies_collection_or_uri, **options.deep_symbolize_keys)
ActivityPub::FetchAllRepliesService.new.call(replies_collection_or_uri, max_pages, **options.deep_symbolize_keys)
end
def get_replies_uri(parent_status_uri)

View File

@ -122,20 +122,18 @@ RSpec.describe ActivityPub::FetchAllRepliesWorker do
stub_request(:get, item).to_return(status: 200, body: Oj.dump(empty_object), headers: { 'Content-Type': 'application/activity+json' })
end
stub_request(:get, top_note_uri).to_return(status: 200, body: Oj.dump(top_object), headers: { 'Content-Type': 'application/activity+json' })
stub_request(:get, reply_note_uri).to_return(status: 200, body: Oj.dump(reply_object), headers: { 'Content-Type': 'application/activity+json' })
end
shared_examples 'fetches all replies' do
before do
stub_request(:get, top_note_uri).to_return(status: 200, body: Oj.dump(top_object), headers: { 'Content-Type': 'application/activity+json' })
stub_request(:get, reply_note_uri).to_return(status: 200, body: Oj.dump(reply_object), headers: { 'Content-Type': 'application/activity+json' })
end
it 'fetches statuses recursively' do
got_uris = subject.perform(status.id)
expect(got_uris).to match_array(all_items)
end
it 'respects the maxium limits set by not recursing after the max is reached' do
it 'respects the maximum limits set by not recursing after the max is reached' do
stub_const('ActivityPub::FetchAllRepliesWorker::MAX_REPLIES', 5)
got_uris = subject.perform(status.id)
expect(got_uris).to match_array(top_items + top_items_paged)
@ -249,6 +247,12 @@ RSpec.describe ActivityPub::FetchAllRepliesWorker do
end
it_behaves_like 'fetches all replies'
it 'limits by max pages' do
stub_const('ActivityPub::FetchAllRepliesWorker::MAX_PAGES', 3)
got_uris = subject.perform(status.id)
expect(got_uris).to match_array(top_items + top_items_paged + nested_items)
end
end
end
end