From 031ca25014e0ba88d3dcc3086947b41449a672e2 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Tue, 10 Sep 2019 15:29:12 +0200 Subject: [PATCH] Add retry for failed media downloads and `tootctl media refresh` (#11775) --- app/lib/activitypub/activity/create.rb | 21 +++++----- app/models/concerns/remotable.rb | 12 +++--- app/models/media_attachment.rb | 2 +- app/workers/redownload_media_worker.rb | 19 +++++++++ lib/mastodon/media_cli.rb | 54 ++++++++++++++++++++++++++ 5 files changed, 92 insertions(+), 16 deletions(-) create mode 100644 app/workers/redownload_media_worker.rb diff --git a/app/lib/activitypub/activity/create.rb b/app/lib/activitypub/activity/create.rb index 000b77df5f..dea7fd43c6 100644 --- a/app/lib/activitypub/activity/create.rb +++ b/app/lib/activitypub/activity/create.rb @@ -189,22 +189,25 @@ class ActivityPub::Activity::Create < ActivityPub::Activity media_attachments = [] as_array(@object['attachment']).each do |attachment| - next if attachment['url'].blank? + next if attachment['url'].blank? || media_attachments.size >= 4 - href = Addressable::URI.parse(attachment['url']).normalize.to_s - media_attachment = MediaAttachment.create(account: @account, remote_url: href, description: attachment['name'].presence, focus: attachment['focalPoint'], blurhash: supported_blurhash?(attachment['blurhash']) ? attachment['blurhash'] : nil) - media_attachments << media_attachment + begin + href = Addressable::URI.parse(attachment['url']).normalize.to_s + media_attachment = MediaAttachment.create(account: @account, remote_url: href, description: attachment['name'].presence, focus: attachment['focalPoint'], blurhash: supported_blurhash?(attachment['blurhash']) ? attachment['blurhash'] : nil) + media_attachments << media_attachment - next if unsupported_media_type?(attachment['mediaType']) || skip_download? + next if unsupported_media_type?(attachment['mediaType']) || skip_download? - media_attachment.file_remote_url = href - media_attachment.save + media_attachment.file_remote_url = href + media_attachment.save + rescue Mastodon::UnexpectedResponseError, HTTP::TimeoutError, HTTP::ConnectionError, OpenSSL::SSL::SSLError + RedownloadMediaWorker.perform_in(rand(30..600).seconds, media_attachment.id) + end end media_attachments rescue Addressable::URI::InvalidURIError => e - Rails.logger.debug e - + Rails.logger.debug "Invalid URL in attachment: #{e}" media_attachments end diff --git a/app/models/concerns/remotable.rb b/app/models/concerns/remotable.rb index 9372a963bf..0823026193 100644 --- a/app/models/concerns/remotable.rb +++ b/app/models/concerns/remotable.rb @@ -4,7 +4,7 @@ module Remotable extend ActiveSupport::Concern class_methods do - def remotable_attachment(attachment_name, limit) + def remotable_attachment(attachment_name, limit, suppress_errors: true) attribute_name = "#{attachment_name}_remote_url".to_sym method_name = "#{attribute_name}=".to_sym alt_method_name = "reset_#{attachment_name}!".to_sym @@ -22,7 +22,7 @@ module Remotable begin Request.new(:get, url).perform do |response| - next if response.code != 200 + raise Mastodon::UnexpectedResponseError, response unless (200...300).cover?(response.code) content_type = parse_content_type(response.headers.get('content-type').last) extname = detect_extname_from_content_type(content_type) @@ -41,11 +41,11 @@ module Remotable self[attribute_name] = url if has_attribute?(attribute_name) end - rescue HTTP::TimeoutError, HTTP::ConnectionError, OpenSSL::SSL::SSLError, Paperclip::Errors::NotIdentifiedByImageMagickError, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError => e + rescue Mastodon::UnexpectedResponseError, HTTP::TimeoutError, HTTP::ConnectionError, OpenSSL::SSL::SSLError => e + Rails.logger.debug "Error fetching remote #{attachment_name}: #{e}" + raise e unless suppress_errors + rescue Paperclip::Errors::NotIdentifiedByImageMagickError, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError, Paperclip::Error, Mastodon::DimensionsValidationError => e Rails.logger.debug "Error fetching remote #{attachment_name}: #{e}" - nil - rescue Paperclip::Error, Mastodon::DimensionsValidationError => e - Rails.logger.debug "Error processing remote #{attachment_name}: #{e}" nil end end diff --git a/app/models/media_attachment.rb b/app/models/media_attachment.rb index b580250155..a2b73f1508 100644 --- a/app/models/media_attachment.rb +++ b/app/models/media_attachment.rb @@ -118,7 +118,7 @@ class MediaAttachment < ApplicationRecord validates_attachment_content_type :file, content_type: IMAGE_MIME_TYPES + VIDEO_MIME_TYPES + AUDIO_MIME_TYPES validates_attachment_size :file, less_than: IMAGE_LIMIT, unless: :larger_media_format? validates_attachment_size :file, less_than: VIDEO_LIMIT, if: :larger_media_format? - remotable_attachment :file, VIDEO_LIMIT + remotable_attachment :file, VIDEO_LIMIT, suppress_errors: false include Attachmentable diff --git a/app/workers/redownload_media_worker.rb b/app/workers/redownload_media_worker.rb new file mode 100644 index 0000000000..98e9959189 --- /dev/null +++ b/app/workers/redownload_media_worker.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +class RedownloadMediaWorker + include Sidekiq::Worker + include ExponentialBackoff + + sidekiq_options queue: 'pull', retry: 3 + + def perform(id) + media_attachment = MediaAttachment.find(id) + + return if media_attachment.remote_url.blank? + + media_attachment.reset_file! + media_attachment.save + rescue ActiveRecord::RecordNotFound + true + end +end diff --git a/lib/mastodon/media_cli.rb b/lib/mastodon/media_cli.rb index 0659b6b65c..ec2f36c302 100644 --- a/lib/mastodon/media_cli.rb +++ b/lib/mastodon/media_cli.rb @@ -43,5 +43,59 @@ module Mastodon say("Removed #{processed} media attachments (approx. #{number_to_human_size(aggregate)}) #{dry_run}", :green, true) end + + option :account, type: :string + option :domain, type: :string + option :status, type: :numeric + option :concurrency, type: :numeric, default: 5, aliases: [:c] + option :verbose, type: :boolean, default: false, aliases: [:v] + option :dry_run, type: :boolean, default: false + desc 'refresh', 'Fetch remote media files' + long_desc <<-DESC + Re-downloads media attachments from other servers. You must specify the + source of media attachments with one of the following options: + + Use the --status option to download attachments from a specific status, + using the status local numeric ID. + + Use the --account option to download attachments from a specific account, + using username@domain handle of the account. + + Use the --domain option to download attachments from a specific domain. + DESC + def refresh + dry_run = options[:dry_run] ? ' (DRY RUN)' : '' + + if options[:status] + scope = MediaAttachment.where(status_id: options[:status]) + elsif options[:account] + username, domain = username.split('@') + account = Account.find_remote(username, domain) + + if account.nil? + say('No such account', :red) + exit(1) + end + + scope = MediaAttachment.where(account_id: account.id) + elsif options[:domain] + scope = MediaAttachment.joins(:account).merge(Account.by_domain_and_subdomains(options[:domain])) + else + exit(1) + end + + processed, aggregate = parallelize_with_progress(scope) do |media_attachment| + next if media_attachment.remote_url.blank? + + unless options[:dry_run] + media_attachment.reset_file! + media_attachment.save + end + + media_attachment.file_file_size + end + + say("Downloaded #{processed} media attachments (approx. #{number_to_human_size(aggregate)})#{dry_run}", :green, true) + end end end