From b233da5996c7bfcf7a9373bdc7029feb04c704b8 Mon Sep 17 00:00:00 2001 From: Claire Date: Tue, 2 May 2023 15:09:43 +0200 Subject: [PATCH] Optimize archive export service and export zip files instead of gzipped tar files (#23360) --- Gemfile | 1 + Gemfile.lock | 2 + app/services/backup_service.rb | 150 ++++++++++++++++----------- spec/services/backup_service_spec.rb | 67 ++++++++++++ 4 files changed, 157 insertions(+), 63 deletions(-) create mode 100644 spec/services/backup_service_spec.rb diff --git a/Gemfile b/Gemfile index 3a591e1b5d..456ba3d168 100644 --- a/Gemfile +++ b/Gemfile @@ -162,3 +162,4 @@ gem 'xorcist', '~> 1.1' gem 'cocoon', '~> 1.2' gem 'net-http', '~> 0.3.2' +gem 'rubyzip', '~> 2.3' diff --git a/Gemfile.lock b/Gemfile.lock index 3fe83a169e..65df9a2260 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -636,6 +636,7 @@ GEM nokogiri (>= 1.10.5) rexml ruby2_keywords (0.0.5) + rubyzip (2.3.2) rufus-scheduler (3.8.2) fugit (~> 1.1, >= 1.1.6) safety_net_attestation (0.4.0) @@ -876,6 +877,7 @@ DEPENDENCIES rubocop-rails rubocop-rspec ruby-progressbar (~> 1.13) + rubyzip (~> 2.3) sanitize (~> 6.0) scenic (~> 1.7) sidekiq (~> 6.5) diff --git a/app/services/backup_service.rb b/app/services/backup_service.rb index 5498cdd455..670b34ea86 100644 --- a/app/services/backup_service.rb +++ b/app/services/backup_service.rb @@ -1,59 +1,67 @@ # frozen_string_literal: true -require 'rubygems/package' +require 'zip' class BackupService < BaseService include Payloadable + include ContextHelper - attr_reader :account, :backup, :collection + attr_reader :account, :backup def call(backup) @backup = backup @account = backup.user.account - build_json! build_archive! end private - def build_json! - @collection = serialize(collection_presenter, ActivityPub::CollectionSerializer) + def build_outbox_json!(file) + skeleton = serialize(collection_presenter, ActivityPub::CollectionSerializer) + skeleton[:@context] = full_context + skeleton[:orderedItems] = ['!PLACEHOLDER!'] + skeleton = Oj.dump(skeleton) + prepend, append = skeleton.split('"!PLACEHOLDER!"') + add_comma = false + + file.write(prepend) account.statuses.with_includes.reorder(nil).find_in_batches do |statuses| - statuses.each do |status| - item = serialize_payload(ActivityPub::ActivityPresenter.from_status(status), ActivityPub::ActivitySerializer, signer: @account) - item.delete(:@context) + file.write(',') if add_comma + add_comma = true + + file.write(statuses.map do |status| + item = serialize_payload(ActivityPub::ActivityPresenter.from_status(status), ActivityPub::ActivitySerializer) + item.delete('@context') unless item[:type] == 'Announce' || item[:object][:attachment].blank? item[:object][:attachment].each do |attachment| - attachment[:url] = Addressable::URI.parse(attachment[:url]).path.gsub(/\A\/system\//, '') + attachment[:url] = Addressable::URI.parse(attachment[:url]).path.delete_prefix('/system/') end end - @collection[:orderedItems] << item - end + Oj.dump(item) + end.join(',')) GC.start end + + file.write(append) end def build_archive! - tmp_file = Tempfile.new(%w(archive .tar.gz)) + tmp_file = Tempfile.new(%w(archive .zip)) - File.open(tmp_file, 'wb') do |file| - Zlib::GzipWriter.wrap(file) do |gz| - Gem::Package::TarWriter.new(gz) do |tar| - dump_media_attachments!(tar) - dump_outbox!(tar) - dump_likes!(tar) - dump_bookmarks!(tar) - dump_actor!(tar) - end - end + Zip::File.open(tmp_file, create: true) do |zipfile| + dump_outbox!(zipfile) + dump_media_attachments!(zipfile) + dump_likes!(zipfile) + dump_bookmarks!(zipfile) + dump_actor!(zipfile) end - archive_filename = "#{['archive', Time.now.utc.strftime('%Y%m%d%H%M%S'), SecureRandom.hex(16)].join('-')}.tar.gz" + archive_filename = "#{['archive', Time.now.utc.strftime('%Y%m%d%H%M%S'), SecureRandom.hex(16)].join('-')}.zip" @backup.dump = ActionDispatch::Http::UploadedFile.new(tempfile: tmp_file, filename: archive_filename) @backup.processed = true @@ -63,27 +71,28 @@ class BackupService < BaseService tmp_file.unlink end - def dump_media_attachments!(tar) + def dump_media_attachments!(zipfile) MediaAttachment.attached.where(account: account).reorder(nil).find_in_batches do |media_attachments| media_attachments.each do |m| - next unless m.file&.path + path = m.file&.path + next unless path - download_to_tar(tar, m.file, m.file.path) + path = path.gsub(/\A.*\/system\//, '') + path = path.gsub(/\A\/+/, '') + download_to_zip(zipfile, m.file, path) end GC.start end end - def dump_outbox!(tar) - json = Oj.dump(collection) - - tar.add_file_simple('outbox.json', 0o444, json.bytesize) do |io| - io.write(json) + def dump_outbox!(zipfile) + zipfile.get_output_stream('outbox.json') do |io| + build_outbox_json!(io) end end - def dump_actor!(tar) + def dump_actor!(zipfile) actor = serialize(account, ActivityPub::ActorSerializer) actor[:icon][:url] = "avatar#{File.extname(actor[:icon][:url])}" if actor[:icon] @@ -92,51 +101,66 @@ class BackupService < BaseService actor[:likes] = 'likes.json' actor[:bookmarks] = 'bookmarks.json' - download_to_tar(tar, account.avatar, "avatar#{File.extname(account.avatar.path)}") if account.avatar.exists? - download_to_tar(tar, account.header, "header#{File.extname(account.header.path)}") if account.header.exists? + download_to_zip(tar, account.avatar, "avatar#{File.extname(account.avatar.path)}") if account.avatar.exists? + download_to_zip(tar, account.header, "header#{File.extname(account.header.path)}") if account.header.exists? json = Oj.dump(actor) - tar.add_file_simple('actor.json', 0o444, json.bytesize) do |io| + zipfile.get_output_stream('actor.json') do |io| io.write(json) end end - def dump_likes!(tar) - collection = serialize(ActivityPub::CollectionPresenter.new(id: 'likes.json', type: :ordered, size: 0, items: []), ActivityPub::CollectionSerializer) + def dump_likes!(zipfile) + skeleton = serialize(ActivityPub::CollectionPresenter.new(id: 'likes.json', type: :ordered, size: 0, items: []), ActivityPub::CollectionSerializer) + skeleton.delete(:totalItems) + skeleton[:orderedItems] = ['!PLACEHOLDER!'] + skeleton = Oj.dump(skeleton) + prepend, append = skeleton.split('"!PLACEHOLDER!"') - Status.reorder(nil).joins(:favourites).includes(:account).merge(account.favourites).find_in_batches do |statuses| - statuses.each do |status| - collection[:totalItems] += 1 - collection[:orderedItems] << ActivityPub::TagManager.instance.uri_for(status) + zipfile.get_output_stream('likes.json') do |io| + io.write(prepend) + + add_comma = false + + Status.reorder(nil).joins(:favourites).includes(:account).merge(account.favourites).find_in_batches do |statuses| + io.write(',') if add_comma + add_comma = true + + io.write(statuses.map do |status| + Oj.dump(ActivityPub::TagManager.instance.uri_for(status)) + end.join(',')) + + GC.start end - GC.start - end - - json = Oj.dump(collection) - - tar.add_file_simple('likes.json', 0o444, json.bytesize) do |io| - io.write(json) + io.write(append) end end - def dump_bookmarks!(tar) - collection = serialize(ActivityPub::CollectionPresenter.new(id: 'bookmarks.json', type: :ordered, size: 0, items: []), ActivityPub::CollectionSerializer) + def dump_bookmarks!(zipfile) + skeleton = serialize(ActivityPub::CollectionPresenter.new(id: 'bookmarks.json', type: :ordered, size: 0, items: []), ActivityPub::CollectionSerializer) + skeleton.delete(:totalItems) + skeleton[:orderedItems] = ['!PLACEHOLDER!'] + skeleton = Oj.dump(skeleton) + prepend, append = skeleton.split('"!PLACEHOLDER!"') - Status.reorder(nil).joins(:bookmarks).includes(:account).merge(account.bookmarks).find_in_batches do |statuses| - statuses.each do |status| - collection[:totalItems] += 1 - collection[:orderedItems] << ActivityPub::TagManager.instance.uri_for(status) + zipfile.get_output_stream('bookmarks.json') do |io| + io.write(prepend) + + add_comma = false + Status.reorder(nil).joins(:bookmarks).includes(:account).merge(account.bookmarks).find_in_batches do |statuses| + io.write(',') if add_comma + add_comma = true + + io.write(statuses.map do |status| + Oj.dump(ActivityPub::TagManager.instance.uri_for(status)) + end.join(',')) + + GC.start end - GC.start - end - - json = Oj.dump(collection) - - tar.add_file_simple('bookmarks.json', 0o444, json.bytesize) do |io| - io.write(json) + io.write(append) end end @@ -159,10 +183,10 @@ class BackupService < BaseService CHUNK_SIZE = 1.megabyte - def download_to_tar(tar, attachment, filename) + def download_to_zip(zipfile, attachment, filename) adapter = Paperclip.io_adapters.for(attachment) - tar.add_file_simple(filename, 0o444, adapter.size) do |io| + zipfile.get_output_stream(filename) do |io| while (buffer = adapter.read(CHUNK_SIZE)) io.write(buffer) end diff --git a/spec/services/backup_service_spec.rb b/spec/services/backup_service_spec.rb new file mode 100644 index 0000000000..b961b7f675 --- /dev/null +++ b/spec/services/backup_service_spec.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true + +require 'rails_helper' + +RSpec.describe BackupService, type: :service do + subject(:service_call) { described_class.new.call(backup) } + + let!(:user) { Fabricate(:user) } + let!(:attachment) { Fabricate(:media_attachment, account: user.account) } + let!(:status) { Fabricate(:status, account: user.account, text: 'Hello', visibility: :public, media_attachments: [attachment]) } + let!(:private_status) { Fabricate(:status, account: user.account, text: 'secret', visibility: :private) } + let!(:favourite) { Fabricate(:favourite, account: user.account) } + let!(:bookmark) { Fabricate(:bookmark, account: user.account) } + let!(:backup) { Fabricate(:backup, user: user) } + + def read_zip_file(backup, filename) + file = Paperclip.io_adapters.for(backup.dump) + Zip::File.open(file) do |zipfile| + entry = zipfile.glob(filename).first + return entry.get_input_stream.read + end + end + + it 'marks the backup as processed' do + expect { service_call }.to change(backup, :processed).from(false).to(true) + end + + it 'exports outbox.json as expected' do + service_call + + json = Oj.load(read_zip_file(backup, 'outbox.json')) + expect(json['@context']).to_not be_nil + expect(json['type']).to eq 'OrderedCollection' + expect(json['totalItems']).to eq 2 + expect(json['orderedItems'][0]['@context']).to be_nil + expect(json['orderedItems'][0]).to include({ + 'type' => 'Create', + 'object' => include({ + 'id' => ActivityPub::TagManager.instance.uri_for(status), + 'content' => '

Hello

', + }), + }) + expect(json['orderedItems'][1]).to include({ + 'type' => 'Create', + 'object' => include({ + 'id' => ActivityPub::TagManager.instance.uri_for(private_status), + 'content' => '

secret

', + }), + }) + end + + it 'exports likes.json as expected' do + service_call + + json = Oj.load(read_zip_file(backup, 'likes.json')) + expect(json['type']).to eq 'OrderedCollection' + expect(json['orderedItems']).to eq [ActivityPub::TagManager.instance.uri_for(favourite.status)] + end + + it 'exports bookmarks.json as expected' do + service_call + + json = Oj.load(read_zip_file(backup, 'bookmarks.json')) + expect(json['type']).to eq 'OrderedCollection' + expect(json['orderedItems']).to eq [ActivityPub::TagManager.instance.uri_for(bookmark.status)] + end +end