From cc706b1191df5f272ee5e53e69860062d8a2b989 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Tue, 22 Mar 2022 09:51:46 +0100 Subject: [PATCH] Fix being able to post URLs longer than 4096 characters --- app/lib/extractor.rb | 7 +++ app/validators/status_length_validator.rb | 50 +++++++++++++------ .../status_length_validator_spec.rb | 15 ++++++ 3 files changed, 58 insertions(+), 14 deletions(-) diff --git a/app/lib/extractor.rb b/app/lib/extractor.rb index 8020aa9166..a8fb77e947 100644 --- a/app/lib/extractor.rb +++ b/app/lib/extractor.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true module Extractor + MAX_DOMAIN_LENGTH = 253 + extend Twitter::TwitterText::Extractor module_function @@ -14,7 +16,12 @@ module Extractor text.to_s.scan(Account::MENTION_RE) do |screen_name, _| match_data = $LAST_MATCH_INFO after = $' + unless Twitter::TwitterText::Regex[:end_mention_match].match?(after) + username, domain = screen_name.split('@') + + next if domain.present? && domain.length > MAX_DOMAIN_LENGTH + start_position = match_data.char_begin(1) - 1 end_position = match_data.char_end(1) possible_entries << { diff --git a/app/validators/status_length_validator.rb b/app/validators/status_length_validator.rb index 4c258ec16e..e107912b77 100644 --- a/app/validators/status_length_validator.rb +++ b/app/validators/status_length_validator.rb @@ -3,35 +3,57 @@ class StatusLengthValidator < ActiveModel::Validator MAX_CHARS = 500 URL_PLACEHOLDER_CHARS = 23 - URL_PLACEHOLDER = "\1#{'x' * URL_PLACEHOLDER_CHARS}" + URL_PLACEHOLDER = 'x' * 23 def validate(status) return unless status.local? && !status.reblog? - @status = status - status.errors.add(:text, I18n.t('statuses.over_character_limit', max: MAX_CHARS)) if too_long? + status.errors.add(:text, I18n.t('statuses.over_character_limit', max: MAX_CHARS)) if too_long?(status) end private - def too_long? - countable_length > MAX_CHARS + def too_long?(status) + countable_length(combined_text(status)) > MAX_CHARS end - def countable_length - total_text.mb_chars.grapheme_length + def countable_length(str) + str.mb_chars.grapheme_length end - def total_text - [@status.spoiler_text, countable_text].join + def combined_text(status) + [status.spoiler_text, countable_text(status.text)].join end - def countable_text - return '' if @status.text.nil? + def countable_text(str) + return '' if str.blank? - @status.text.dup.tap do |new_text| - new_text.gsub!(FetchLinkCardService::URL_PATTERN, URL_PLACEHOLDER) - new_text.gsub!(Account::MENTION_RE, '@\2') + # To ensure that we only give length concessions to entities that + # will be correctly parsed during formatting, we go through full + # entity extraction + + entities = Extractor.remove_overlapping_entities(Extractor.extract_urls_with_indices(str, extract_url_without_protocol: false) + Extractor.extract_mentions_or_lists_with_indices(str)) + + rewrite_entities(str, entities) do |entity| + if entity[:url] + URL_PLACEHOLDER + elsif entity[:screen_name] + "@#{entity[:screen_name].split('@').first}" + end end end + + def rewrite_entities(str, entities) + entities.sort_by! { |entity| entity[:indices].first } + result = ''.dup + + last_index = entities.reduce(0) do |index, entity| + result << str[index...entity[:indices].first] + result << yield(entity) + entity[:indices].last + end + + result << str[last_index..-1] + result + end end diff --git a/spec/validators/status_length_validator_spec.rb b/spec/validators/status_length_validator_spec.rb index bef3f29f50..db9c728a8b 100644 --- a/spec/validators/status_length_validator_spec.rb +++ b/spec/validators/status_length_validator_spec.rb @@ -50,6 +50,13 @@ describe StatusLengthValidator do expect(status.errors).to have_received(:add) end + it 'does not count overly long URLs as 23 characters flat' do + text = "http://example.com/valid?#{'#foo?' * 1000}" + status = double(spoiler_text: '', text: text, errors: double(add: nil), local?: true, reblog?: false) + subject.validate(status) + expect(status.errors).to have_received(:add) + end + it 'counts only the front part of remote usernames' do text = ('a' * 475) + " @alice@#{'b' * 30}.com" status = double(spoiler_text: '', text: text, errors: double(add: nil), local?: true, reblog?: false) @@ -57,5 +64,13 @@ describe StatusLengthValidator do subject.validate(status) expect(status.errors).to_not have_received(:add) end + + it 'does count both parts of remote usernames for overly long domains' do + text = "@alice@#{'b' * 500}.com" + status = double(spoiler_text: '', text: text, errors: double(add: nil), local?: true, reblog?: false) + + subject.validate(status) + expect(status.errors).to have_received(:add) + end end end