From 7aaf2b44ec698fd4f20b927fcac7edc0394a2647 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Tue, 30 Jun 2020 23:58:02 +0200 Subject: [PATCH] Fix remote files not using Content-Type header, streaming (#14184) --- app/lib/response_with_limit.rb | 10 ++++ app/models/concerns/attachmentable.rb | 17 ++++-- app/models/concerns/remotable.rb | 40 +------------- config/application.rb | 2 + config/initializers/paperclip.rb | 1 + lib/paperclip/image_extractor.rb | 45 ++++++++------- .../media_type_spoof_detector_extensions.rb | 27 +++++++++ lib/paperclip/response_with_limit_adapter.rb | 55 +++++++++++++++++++ spec/models/concerns/remotable_spec.rb | 10 +--- 9 files changed, 139 insertions(+), 68 deletions(-) create mode 100644 app/lib/response_with_limit.rb create mode 100644 lib/paperclip/media_type_spoof_detector_extensions.rb create mode 100644 lib/paperclip/response_with_limit_adapter.rb diff --git a/app/lib/response_with_limit.rb b/app/lib/response_with_limit.rb new file mode 100644 index 0000000000..2cc17bc5fe --- /dev/null +++ b/app/lib/response_with_limit.rb @@ -0,0 +1,10 @@ +# frozen_string_literal: true + +class ResponseWithLimit + def initialize(response, limit) + @response = response + @limit = limit + end + + attr_reader :response, :limit +end diff --git a/app/models/concerns/attachmentable.rb b/app/models/concerns/attachmentable.rb index 18b872c1e6..c5febb8286 100644 --- a/app/models/concerns/attachmentable.rb +++ b/app/models/concerns/attachmentable.rb @@ -8,6 +8,17 @@ module Attachmentable MAX_MATRIX_LIMIT = 16_777_216 # 4096x4096px or approx. 16MB GIF_MATRIX_LIMIT = 921_600 # 1280x720px + # For some file extensions, there exist different content + # type variants, and browsers often send the wrong one, + # for example, sending an audio .ogg file as video/ogg, + # likewise, MimeMagic also misreports them as such. For + # those files, it is necessary to use the output of the + # `file` utility instead + INCORRECT_CONTENT_TYPES = %w( + video/ogg + video/webm + ).freeze + included do before_post_process :obfuscate_file_name before_post_process :set_file_extensions @@ -21,7 +32,7 @@ module Attachmentable self.class.attachment_definitions.each_key do |attachment_name| attachment = send(attachment_name) - next if attachment.blank? || attachment.queued_for_write[:original].blank? + next if attachment.blank? || attachment.queued_for_write[:original].blank? || !INCORRECT_CONTENT_TYPES.include?(attachment.instance_read(:content_type)) attachment.instance_write :content_type, calculated_content_type(attachment) end @@ -63,9 +74,7 @@ module Attachmentable end def calculated_content_type(attachment) - content_type = Paperclip.run('file', '-b --mime :file', file: attachment.queued_for_write[:original].path).split(/[:;\s]+/).first.chomp - content_type = 'video/mp4' if content_type == 'video/x-m4v' - content_type + Paperclip.run('file', '-b --mime :file', file: attachment.queued_for_write[:original].path).split(/[:;\s]+/).first.chomp rescue Terrapin::CommandLineError '' end diff --git a/app/models/concerns/remotable.rb b/app/models/concerns/remotable.rb index 53ebc08357..c6d0c7f1f4 100644 --- a/app/models/concerns/remotable.rb +++ b/app/models/concerns/remotable.rb @@ -24,28 +24,16 @@ module Remotable Request.new(:get, url).perform do |response| raise Mastodon::UnexpectedResponseError, response unless (200...300).cover?(response.code) - content_type = parse_content_type(response.headers.get('content-type').last) - extname = detect_extname_from_content_type(content_type) - - if extname.nil? - disposition = response.headers.get('content-disposition').last - matches = disposition&.match(/filename="([^"]*)"/) - filename = matches.nil? ? parsed_url.path.split('/').last : matches[1] - extname = filename.nil? ? '' : File.extname(filename) - end - - basename = SecureRandom.hex(8) - - public_send("#{attachment_name}_file_name=", basename + extname) - public_send("#{attachment_name}=", StringIO.new(response.body_with_limit(limit))) + public_send("#{attachment_name}=", ResponseWithLimit.new(response, limit)) end rescue Mastodon::UnexpectedResponseError, HTTP::TimeoutError, HTTP::ConnectionError, OpenSSL::SSL::SSLError => e Rails.logger.debug "Error fetching remote #{attachment_name}: #{e}" raise e unless suppress_errors rescue Paperclip::Errors::NotIdentifiedByImageMagickError, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError, Paperclip::Error, Mastodon::DimensionsValidationError => e Rails.logger.debug "Error fetching remote #{attachment_name}: #{e}" - nil end + + nil end define_method("#{attribute_name}=") do |url| @@ -59,26 +47,4 @@ module Remotable alias_method("reset_#{attachment_name}!", "download_#{attachment_name}!") end end - - private - - def detect_extname_from_content_type(content_type) - return if content_type.nil? - - type = MIME::Types[content_type].first - - return if type.nil? - - extname = type.extensions.first - - return if extname.nil? - - ".#{extname}" - end - - def parse_content_type(content_type) - return if content_type.nil? - - content_type.split(/\s*;\s*/).first - end end diff --git a/config/application.rb b/config/application.rb index 8348649df4..a1da9d61f6 100644 --- a/config/application.rb +++ b/config/application.rb @@ -9,10 +9,12 @@ Bundler.require(*Rails.groups) require_relative '../app/lib/exceptions' require_relative '../lib/paperclip/url_generator_extensions' require_relative '../lib/paperclip/attachment_extensions' +require_relative '../lib/paperclip/media_type_spoof_detector_extensions' require_relative '../lib/paperclip/lazy_thumbnail' require_relative '../lib/paperclip/gif_transcoder' require_relative '../lib/paperclip/video_transcoder' require_relative '../lib/paperclip/type_corrector' +require_relative '../lib/paperclip/response_with_limit_adapter' require_relative '../lib/mastodon/snowflake' require_relative '../lib/mastodon/version' require_relative '../lib/devise/two_factor_ldap_authenticatable' diff --git a/config/initializers/paperclip.rb b/config/initializers/paperclip.rb index ebb009065b..b4849370db 100644 --- a/config/initializers/paperclip.rb +++ b/config/initializers/paperclip.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true Paperclip::DataUriAdapter.register +Paperclip::ResponseWithLimitAdapter.register Paperclip.interpolates :filename do |attachment, style| if style == :original diff --git a/lib/paperclip/image_extractor.rb b/lib/paperclip/image_extractor.rb index 114852e8b9..f5a54d1a5c 100644 --- a/lib/paperclip/image_extractor.rb +++ b/lib/paperclip/image_extractor.rb @@ -4,28 +4,10 @@ require 'mime/types/columnar' module Paperclip class ImageExtractor < Paperclip::Processor - IMAGE_EXTRACTION_OPTIONS = { - convert_options: { - output: { - 'loglevel' => 'fatal', - vf: 'scale=\'min(400\, iw):min(400\, ih)\':force_original_aspect_ratio=decrease', - }.freeze, - }.freeze, - format: 'png', - time: -1, - file_geometry_parser: FastGeometryParser, - }.freeze - def make return @file unless options[:style] == :original - image = begin - begin - Paperclip::Transcoder.make(file, IMAGE_EXTRACTION_OPTIONS.dup, attachment) - rescue Paperclip::Error, ::Av::CommandError - nil - end - end + image = extract_image_from_file! unless image.nil? begin @@ -36,7 +18,7 @@ module Paperclip # to make sure it's cleaned up begin - FileUtils.rm(image) + image.close(true) rescue Errno::ENOENT nil end @@ -45,5 +27,28 @@ module Paperclip @file end + + private + + def extract_image_from_file! + ::Av.logger = Paperclip.logger + + cli = ::Av.cli + dst = Tempfile.new([File.basename(@file.path, '.*'), '.png']) + dst.binmode + + cli.add_source(@file.path) + cli.add_destination(dst.path) + cli.add_output_param loglevel: 'fatal' + + begin + cli.run + rescue Cocaine::ExitStatusError + dst.close(true) + return nil + end + + dst + end end end diff --git a/lib/paperclip/media_type_spoof_detector_extensions.rb b/lib/paperclip/media_type_spoof_detector_extensions.rb new file mode 100644 index 0000000000..9c05573564 --- /dev/null +++ b/lib/paperclip/media_type_spoof_detector_extensions.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +module Paperclip + module MediaTypeSpoofDetectorExtensions + def calculated_content_type + @calculated_content_type ||= type_from_mime_magic || type_from_file_command + end + + def type_from_mime_magic + @type_from_mime_magic ||= begin + begin + File.open(@file.path) do |file| + MimeMagic.by_magic(file)&.type + end + rescue Errno::ENOENT + '' + end + end + end + + def type_from_file_command + @type_from_file_command ||= FileCommandContentTypeDetector.new(@file.path).detect + end + end +end + +Paperclip::MediaTypeSpoofDetector.prepend(Paperclip::MediaTypeSpoofDetectorExtensions) diff --git a/lib/paperclip/response_with_limit_adapter.rb b/lib/paperclip/response_with_limit_adapter.rb new file mode 100644 index 0000000000..7d897b8d67 --- /dev/null +++ b/lib/paperclip/response_with_limit_adapter.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +module Paperclip + class ResponseWithLimitAdapter < AbstractAdapter + def self.register + Paperclip.io_adapters.register self do |target| + target.is_a?(ResponseWithLimit) + end + end + + def initialize(target, options = {}) + super + cache_current_values + end + + private + + def cache_current_values + @original_filename = filename_from_content_disposition || filename_from_path || 'data' + @size = @target.response.content_length + @tempfile = copy_to_tempfile(@target) + @content_type = @target.response.mime_type || ContentTypeDetector.new(@tempfile.path).detect + end + + def copy_to_tempfile(source) + bytes_read = 0 + + source.response.body.each do |chunk| + bytes_read += chunk.bytesize + + destination.write(chunk) + chunk.clear + + raise Mastodon::LengthValidationError if bytes_read > source.limit + end + + destination.rewind + destination + rescue Mastodon::LengthValidationError + destination.close(true) + raise + ensure + source.response.connection.close + end + + def filename_from_content_disposition + disposition = @target.response.headers['content-disposition'] + disposition&.match(/filename="([^"]*)"/)&.captures&.first + end + + def filename_from_path + @target.response.uri.path.split('/').last + end + end +end diff --git a/spec/models/concerns/remotable_spec.rb b/spec/models/concerns/remotable_spec.rb index 2e6c8a9c6d..9cc849dedb 100644 --- a/spec/models/concerns/remotable_spec.rb +++ b/spec/models/concerns/remotable_spec.rb @@ -162,19 +162,15 @@ RSpec.describe Remotable do let(:headers) { { 'content-disposition' => file } } it 'assigns file' do - string_io = StringIO.new('') - extname = '.txt' - basename = '0123456789abcdef' + response_with_limit = ResponseWithLimit.new(nil, 0) - allow(SecureRandom).to receive(:hex).and_return(basename) - allow(StringIO).to receive(:new).with(anything).and_return(string_io) + allow(ResponseWithLimit).to receive(:new).with(anything, anything).and_return(response_with_limit) expect(foo).to receive(:public_send).with("download_#{hoge}!", url) foo.hoge_remote_url = url - expect(foo).to receive(:public_send).with("#{hoge}=", string_io) - expect(foo).to receive(:public_send).with("#{hoge}_file_name=", basename + extname) + expect(foo).to receive(:public_send).with("#{hoge}=", response_with_limit) foo.download_hoge!(url) end