From c3ca3801f2b8a44db09b83da2e64130eb2c41ef1 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Sun, 26 Apr 2020 23:29:08 +0200 Subject: [PATCH] Add separate cache directory for non-local uploads (#12821) --- app/models/account.rb | 90 +++++------ app/models/custom_emoji.rb | 29 ++-- app/models/media_attachment.rb | 35 +++-- app/models/preview_card.rb | 43 ++--- config/initializers/paperclip.rb | 22 ++- ...200417125749_add_storage_schema_version.rb | 9 ++ db/schema.rb | 7 +- lib/cli.rb | 4 + lib/mastodon/cli_helper.rb | 4 + lib/mastodon/media_cli.rb | 24 ++- lib/mastodon/upgrade_cli.rb | 148 ++++++++++++++++++ lib/paperclip/attachment_extensions.rb | 9 ++ 12 files changed, 319 insertions(+), 105 deletions(-) create mode 100644 db/migrate/20200417125749_add_storage_schema_version.rb create mode 100644 lib/mastodon/upgrade_cli.rb diff --git a/app/models/account.rb b/app/models/account.rb index dc14e85386d..ff7386aaff9 100644 --- a/app/models/account.rb +++ b/app/models/account.rb @@ -3,50 +3,52 @@ # # Table name: accounts # -# id :bigint(8) not null, primary key -# username :string default(""), not null -# domain :string -# secret :string default(""), not null -# private_key :text -# public_key :text default(""), not null -# remote_url :string default(""), not null -# salmon_url :string default(""), not null -# hub_url :string default(""), not null -# created_at :datetime not null -# updated_at :datetime not null -# note :text default(""), not null -# display_name :string default(""), not null -# uri :string default(""), not null -# url :string -# avatar_file_name :string -# avatar_content_type :string -# avatar_file_size :integer -# avatar_updated_at :datetime -# header_file_name :string -# header_content_type :string -# header_file_size :integer -# header_updated_at :datetime -# avatar_remote_url :string -# subscription_expires_at :datetime -# locked :boolean default(FALSE), not null -# header_remote_url :string default(""), not null -# last_webfingered_at :datetime -# inbox_url :string default(""), not null -# outbox_url :string default(""), not null -# shared_inbox_url :string default(""), not null -# followers_url :string default(""), not null -# protocol :integer default("ostatus"), not null -# memorial :boolean default(FALSE), not null -# moved_to_account_id :bigint(8) -# featured_collection_url :string -# fields :jsonb -# actor_type :string -# discoverable :boolean -# also_known_as :string is an Array -# silenced_at :datetime -# suspended_at :datetime -# trust_level :integer -# hide_collections :boolean +# id :bigint(8) not null, primary key +# username :string default(""), not null +# domain :string +# secret :string default(""), not null +# private_key :text +# public_key :text default(""), not null +# remote_url :string default(""), not null +# salmon_url :string default(""), not null +# hub_url :string default(""), not null +# created_at :datetime not null +# updated_at :datetime not null +# note :text default(""), not null +# display_name :string default(""), not null +# uri :string default(""), not null +# url :string +# avatar_file_name :string +# avatar_content_type :string +# avatar_file_size :integer +# avatar_updated_at :datetime +# header_file_name :string +# header_content_type :string +# header_file_size :integer +# header_updated_at :datetime +# avatar_remote_url :string +# subscription_expires_at :datetime +# locked :boolean default(FALSE), not null +# header_remote_url :string default(""), not null +# last_webfingered_at :datetime +# inbox_url :string default(""), not null +# outbox_url :string default(""), not null +# shared_inbox_url :string default(""), not null +# followers_url :string default(""), not null +# protocol :integer default("ostatus"), not null +# memorial :boolean default(FALSE), not null +# moved_to_account_id :bigint(8) +# featured_collection_url :string +# fields :jsonb +# actor_type :string +# discoverable :boolean +# also_known_as :string is an Array +# silenced_at :datetime +# suspended_at :datetime +# trust_level :integer +# hide_collections :boolean +# avatar_storage_schema_version :integer +# header_storage_schema_version :integer # class Account < ApplicationRecord diff --git a/app/models/custom_emoji.rb b/app/models/custom_emoji.rb index d177cf2815d..7cb03b8199b 100644 --- a/app/models/custom_emoji.rb +++ b/app/models/custom_emoji.rb @@ -3,20 +3,21 @@ # # Table name: custom_emojis # -# id :bigint(8) not null, primary key -# shortcode :string default(""), not null -# domain :string -# image_file_name :string -# image_content_type :string -# image_file_size :integer -# image_updated_at :datetime -# created_at :datetime not null -# updated_at :datetime not null -# disabled :boolean default(FALSE), not null -# uri :string -# image_remote_url :string -# visible_in_picker :boolean default(TRUE), not null -# category_id :bigint(8) +# id :bigint(8) not null, primary key +# shortcode :string default(""), not null +# domain :string +# image_file_name :string +# image_content_type :string +# image_file_size :integer +# image_updated_at :datetime +# created_at :datetime not null +# updated_at :datetime not null +# disabled :boolean default(FALSE), not null +# uri :string +# image_remote_url :string +# visible_in_picker :boolean default(TRUE), not null +# category_id :bigint(8) +# image_storage_schema_version :integer # class CustomEmoji < ApplicationRecord diff --git a/app/models/media_attachment.rb b/app/models/media_attachment.rb index f45e2c9f722..75ce9fc4f8d 100644 --- a/app/models/media_attachment.rb +++ b/app/models/media_attachment.rb @@ -3,23 +3,24 @@ # # Table name: media_attachments # -# id :bigint(8) not null, primary key -# status_id :bigint(8) -# file_file_name :string -# file_content_type :string -# file_file_size :integer -# file_updated_at :datetime -# remote_url :string default(""), not null -# created_at :datetime not null -# updated_at :datetime not null -# shortcode :string -# type :integer default("image"), not null -# file_meta :json -# account_id :bigint(8) -# description :text -# scheduled_status_id :bigint(8) -# blurhash :string -# processing :integer +# id :bigint(8) not null, primary key +# status_id :bigint(8) +# file_file_name :string +# file_content_type :string +# file_file_size :integer +# file_updated_at :datetime +# remote_url :string default(""), not null +# created_at :datetime not null +# updated_at :datetime not null +# shortcode :string +# type :integer default("image"), not null +# file_meta :json +# account_id :bigint(8) +# description :text +# scheduled_status_id :bigint(8) +# blurhash :string +# processing :integer +# file_storage_schema_version :integer # class MediaAttachment < ApplicationRecord diff --git a/app/models/preview_card.rb b/app/models/preview_card.rb index 4e89fbf8510..2802f4667b2 100644 --- a/app/models/preview_card.rb +++ b/app/models/preview_card.rb @@ -3,25 +3,26 @@ # # Table name: preview_cards # -# id :bigint(8) not null, primary key -# url :string default(""), not null -# title :string default(""), not null -# description :string default(""), not null -# image_file_name :string -# image_content_type :string -# image_file_size :integer -# image_updated_at :datetime -# type :integer default("link"), not null -# html :text default(""), not null -# author_name :string default(""), not null -# author_url :string default(""), not null -# provider_name :string default(""), not null -# provider_url :string default(""), not null -# width :integer default(0), not null -# height :integer default(0), not null -# created_at :datetime not null -# updated_at :datetime not null -# embed_url :string default(""), not null +# id :bigint(8) not null, primary key +# url :string default(""), not null +# title :string default(""), not null +# description :string default(""), not null +# image_file_name :string +# image_content_type :string +# image_file_size :integer +# image_updated_at :datetime +# type :integer default("link"), not null +# html :text default(""), not null +# author_name :string default(""), not null +# author_url :string default(""), not null +# provider_name :string default(""), not null +# provider_url :string default(""), not null +# width :integer default(0), not null +# height :integer default(0), not null +# created_at :datetime not null +# updated_at :datetime not null +# embed_url :string default(""), not null +# image_storage_schema_version :integer # class PreviewCard < ApplicationRecord @@ -47,6 +48,10 @@ class PreviewCard < ApplicationRecord before_save :extract_dimensions, if: :link? + def local? + false + end + def missing_image? width.present? && height.present? && image_file_name.blank? end diff --git a/config/initializers/paperclip.rb b/config/initializers/paperclip.rb index 8909678d653..43449eb4fd2 100644 --- a/config/initializers/paperclip.rb +++ b/config/initializers/paperclip.rb @@ -10,9 +10,25 @@ Paperclip.interpolates :filename do |attachment, style| end end +Paperclip.interpolates :path_prefix do |attachment, style| + if attachment.storage_schema_version >= 1 && attachment.instance.respond_to?(:local?) && !attachment.instance.local? + 'cache' + File::SEPARATOR + else + '' + end +end + +Paperclip.interpolates :url_prefix do |attachment, style| + if attachment.storage_schema_version >= 1 && attachment.instance.respond_to?(:local?) && !attachment.instance.local? + 'cache/' + else + '' + end +end + Paperclip::Attachment.default_options.merge!( use_timestamp: false, - path: ':class/:attachment/:id_partition/:style/:filename', + path: ':url_prefix:class/:attachment/:id_partition/:style/:filename', storage: :fog ) @@ -91,7 +107,7 @@ else Paperclip::Attachment.default_options.merge!( storage: :filesystem, use_timestamp: true, - path: File.join(ENV.fetch('PAPERCLIP_ROOT_PATH', File.join(':rails_root', 'public', 'system')), ':class', ':attachment', ':id_partition', ':style', ':filename'), - url: ENV.fetch('PAPERCLIP_ROOT_URL', '/system') + '/:class/:attachment/:id_partition/:style/:filename', + path: File.join(ENV.fetch('PAPERCLIP_ROOT_PATH', File.join(':rails_root', 'public', 'system')), ':path_prefix:class', ':attachment', ':id_partition', ':style', ':filename'), + url: ENV.fetch('PAPERCLIP_ROOT_URL', '/system') + '/:url_prefix:class/:attachment/:id_partition/:style/:filename', ) end diff --git a/db/migrate/20200417125749_add_storage_schema_version.rb b/db/migrate/20200417125749_add_storage_schema_version.rb new file mode 100644 index 00000000000..7438f97bae8 --- /dev/null +++ b/db/migrate/20200417125749_add_storage_schema_version.rb @@ -0,0 +1,9 @@ +class AddStorageSchemaVersion < ActiveRecord::Migration[5.2] + def change + add_column :preview_cards, :image_storage_schema_version, :integer + add_column :accounts, :avatar_storage_schema_version, :integer + add_column :accounts, :header_storage_schema_version, :integer + add_column :media_attachments, :file_storage_schema_version, :integer + add_column :custom_emojis, :image_storage_schema_version, :integer + end +end diff --git a/db/schema.rb b/db/schema.rb index 54e81bd3ff3..7cbfebb0094 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2020_04_07_202420) do +ActiveRecord::Schema.define(version: 2020_04_17_125749) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -172,6 +172,8 @@ ActiveRecord::Schema.define(version: 2020_04_07_202420) do t.datetime "suspended_at" t.integer "trust_level" t.boolean "hide_collections" + t.integer "avatar_storage_schema_version" + t.integer "header_storage_schema_version" t.index "(((setweight(to_tsvector('simple'::regconfig, (display_name)::text), 'A'::\"char\") || setweight(to_tsvector('simple'::regconfig, (username)::text), 'B'::\"char\")) || setweight(to_tsvector('simple'::regconfig, (COALESCE(domain, ''::character varying))::text), 'C'::\"char\")))", name: "search_index", using: :gin t.index "lower((username)::text), lower((domain)::text)", name: "index_accounts_on_username_and_domain_lower", unique: true t.index ["moved_to_account_id"], name: "index_accounts_on_moved_to_account_id" @@ -299,6 +301,7 @@ ActiveRecord::Schema.define(version: 2020_04_07_202420) do t.string "image_remote_url" t.boolean "visible_in_picker", default: true, null: false t.bigint "category_id" + t.integer "image_storage_schema_version" t.index ["shortcode", "domain"], name: "index_custom_emojis_on_shortcode_and_domain", unique: true end @@ -464,6 +467,7 @@ ActiveRecord::Schema.define(version: 2020_04_07_202420) do t.bigint "scheduled_status_id" t.string "blurhash" t.integer "processing" + t.integer "file_storage_schema_version" t.index ["account_id"], name: "index_media_attachments_on_account_id" t.index ["scheduled_status_id"], name: "index_media_attachments_on_scheduled_status_id" t.index ["shortcode"], name: "index_media_attachments_on_shortcode", unique: true @@ -604,6 +608,7 @@ ActiveRecord::Schema.define(version: 2020_04_07_202420) do t.datetime "created_at", null: false t.datetime "updated_at", null: false t.string "embed_url", default: "", null: false + t.integer "image_storage_schema_version" t.index ["url"], name: "index_preview_cards_on_url", unique: true end diff --git a/lib/cli.rb b/lib/cli.rb index 19cc5d6b519..313a36a3d98 100644 --- a/lib/cli.rb +++ b/lib/cli.rb @@ -11,6 +11,7 @@ require_relative 'mastodon/statuses_cli' require_relative 'mastodon/domains_cli' require_relative 'mastodon/preview_cards_cli' require_relative 'mastodon/cache_cli' +require_relative 'mastodon/upgrade_cli' require_relative 'mastodon/version' module Mastodon @@ -49,6 +50,9 @@ module Mastodon desc 'cache SUBCOMMAND ...ARGS', 'Manage cache' subcommand 'cache', Mastodon::CacheCLI + desc 'upgrade SUBCOMMAND ...ARGS', 'Various version upgrade utilities' + subcommand 'upgrade', Mastodon::UpgradeCLI + option :dry_run, type: :boolean desc 'self-destruct', 'Erase the server from the federation' long_desc <<~LONG_DESC diff --git a/lib/mastodon/cli_helper.rb b/lib/mastodon/cli_helper.rb index ec4d9a81e5f..4a20fa8d64b 100644 --- a/lib/mastodon/cli_helper.rb +++ b/lib/mastodon/cli_helper.rb @@ -10,6 +10,10 @@ Paperclip.options[:log] = false module Mastodon module CLIHelper + def dry_run? + options[:dry_run] + end + def create_progress_bar(total = nil) ProgressBar.create(total: total, format: '%c/%u |%b%i| %e') end diff --git a/lib/mastodon/media_cli.rb b/lib/mastodon/media_cli.rb index 0f211f2726a..424d65a5f4e 100644 --- a/lib/mastodon/media_cli.rb +++ b/lib/mastodon/media_cli.rb @@ -85,7 +85,9 @@ module Mastodon record_map = preload_records_from_mixed_objects(objects) objects.each do |object| - path_segments = object.key.split('/') + path_segments = object.key.split('/') + path_segments.delete('cache') + model_name = path_segments.first.classify attachment_name = path_segments[1].singularize record_id = path_segments[2..-2].join.to_i @@ -120,8 +122,11 @@ module Mastodon Find.find(File.join(*[root_path, prefix].compact)) do |path| next if File.directory?(path) - key = path.gsub("#{root_path}#{File::SEPARATOR}", '') - path_segments = key.split(File::SEPARATOR) + key = path.gsub("#{root_path}#{File::SEPARATOR}", '') + + path_segments = key.split(File::SEPARATOR) + path_segments.delete('cache') + model_name = path_segments.first.classify record_id = path_segments[2..-2].join.to_i attachment_name = path_segments[1].singularize @@ -229,10 +234,13 @@ module Mastodon desc 'lookup URL', 'Lookup where media is displayed by passing a media URL' def lookup(url) - path = Addressable::URI.parse(url).path + path = Addressable::URI.parse(url).path + path_segments = path.split('/')[2..-1] - model_name = path_segments.first.classify - record_id = path_segments[2..-2].join.to_i + path_segments.delete('cache') + + model_name = path_segments.first.classify + record_id = path_segments[2..-2].join.to_i unless PRELOAD_MODEL_WHITELIST.include?(model_name) say("Cannot find corresponding model: #{model_name}", :red) @@ -276,7 +284,9 @@ module Mastodon preload_map = Hash.new { |hash, key| hash[key] = [] } objects.map do |object| - segments = object.key.split('/') + segments = object.key.split('/') + segments.delete('cache') + model_name = segments.first.classify record_id = segments[2..-2].join.to_i diff --git a/lib/mastodon/upgrade_cli.rb b/lib/mastodon/upgrade_cli.rb new file mode 100644 index 00000000000..74d13f62d9b --- /dev/null +++ b/lib/mastodon/upgrade_cli.rb @@ -0,0 +1,148 @@ +# frozen_string_literal: true + +require_relative '../../config/boot' +require_relative '../../config/environment' +require_relative 'cli_helper' + +module Mastodon + class UpgradeCLI < Thor + include CLIHelper + + def self.exit_on_failure? + true + end + + CURRENT_STORAGE_SCHEMA_VERSION = 1 + + option :dry_run, type: :boolean, default: false + option :verbose, type: :boolean, default: false, aliases: [:v] + desc 'storage-schema', 'Upgrade storage schema of various file attachments to the latest version' + long_desc <<~LONG_DESC + Iterates over every file attachment of every record and, if its storage schema is outdated, performs the + necessary upgrade to the latest one. In practice this means e.g. moving files to different directories. + + Will most likely take a long time. + LONG_DESC + def storage_schema + progress = create_progress_bar(nil) + dry_run = dry_run? ? ' (DRY RUN)' : '' + records = 0 + + klasses = [ + Account, + CustomEmoji, + MediaAttachment, + PreviewCard, + ] + + klasses.each do |klass| + attachment_names = klass.attachment_definitions.keys + + klass.find_each do |record| + attachment_names.each do |attachment_name| + attachment = record.public_send(attachment_name) + + next if attachment.blank? || attachment.storage_schema_version >= CURRENT_STORAGE_SCHEMA_VERSION + + attachment.styles.each_key do |style| + case Paperclip::Attachment.default_options[:storage] + when :s3 + upgrade_storage_s3(progress, attachment, style) + when :fog + upgrade_storage_fog(progress, attachment, style) + when :filesystem + upgrade_storage_filesystem(progress, attachment, style) + end + + progress.increment + end + + attachment.instance_write(:storage_schema_version, CURRENT_STORAGE_SCHEMA_VERSION) + end + + if record.changed? + record.save unless dry_run? + records += 1 + end + end + end + + progress.total = progress.progress + progress.finish + + say("Upgraded storage schema of #{records} records#{dry_run}", :green, true) + end + + private + + def upgrade_storage_s3(progress, attachment, style) + previous_storage_schema_version = attachment.storage_schema_version + object = attachment.s3_object(style) + + attachment.instance_write(:storage_schema_version, CURRENT_STORAGE_SCHEMA_VERSION) + + upgraded_path = attachment.path(style) + + if upgraded_path != object.key && object.exists? + progress.log("Moving #{object.key} to #{upgraded_path}") if options[:verbose] + + begin + object.move_to(upgraded_path) unless dry_run? + rescue => e + progress.log(pastel.red("Error processing #{object.key}: #{e}")) + end + end + + # Because we move files style-by-style, it's important to restore + # previous version at the end. The upgrade will be recorded after + # all styles are updated + attachment.instance_write(:storage_schema_version, previous_storage_schema_version) + end + + def upgrade_storage_fog(_progress, _attachment, _style) + say('The fog storage driver is not supported for this operation at this time', :red) + exit(1) + end + + def upgrade_storage_filesystem(progress, attachment, style) + previous_storage_schema_version = attachment.storage_schema_version + previous_path = attachment.path(style) + + attachment.instance_write(:storage_schema_version, CURRENT_STORAGE_SCHEMA_VERSION) + + upgraded_path = attachment.path(style) + + if upgraded_path != previous_path && File.exist?(previous_path) + progress.log("Moving #{previous_path} to #{upgraded_path}") if options[:verbose] + + begin + unless dry_run? + FileUtils.mkdir_p(File.dirname(upgraded_path)) + FileUtils.mv(previous_path, upgraded_path) + + begin + FileUtils.rmdir(previous_path, parents: true) + rescue Errno::ENOTEMPTY + # OK + end + end + rescue => e + progress.log(pastel.red("Error processing #{previous_path}: #{e}")) + + unless dry_run? + begin + FileUtils.rmdir(upgraded_path, parents: true) + rescue Errno::ENOTEMPTY + # OK + end + end + end + end + + # Because we move files style-by-style, it's important to restore + # previous version at the end. The upgrade will be recorded after + # all styles are updated + attachment.instance_write(:storage_schema_version, previous_storage_schema_version) + end + end +end diff --git a/lib/paperclip/attachment_extensions.rb b/lib/paperclip/attachment_extensions.rb index ce578055792..f3e51dbd38c 100644 --- a/lib/paperclip/attachment_extensions.rb +++ b/lib/paperclip/attachment_extensions.rb @@ -14,6 +14,15 @@ module Paperclip end end + def storage_schema_version + instance_read(:storage_schema_version) || 0 + end + + def assign_attributes + super + instance_write(:storage_schema_version, 1) + end + def variant?(other_filename) return true if original_filename == other_filename return false if original_filename.nil?