Add reusable duplicate ID finder methods in maintenance CLI (#28910)

This commit is contained in:
Matt Jankowski 2024-04-17 05:00:08 -04:00 committed by GitHub
parent 03abff3b30
commit 9ae2594726
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 38 additions and 23 deletions

View File

@ -254,7 +254,7 @@ module Mastodon::CLI
say 'Deduplicating accounts… for local accounts, you will be asked to chose which account to keep unchanged.' say 'Deduplicating accounts… for local accounts, you will be asked to chose which account to keep unchanged.'
find_duplicate_accounts.each do |row| duplicate_record_ids(:accounts, "lower(username), COALESCE(lower(domain), '')").each do |row|
accounts = Account.where(id: row['ids'].split(',')) accounts = Account.where(id: row['ids'].split(','))
if accounts.first.local? if accounts.first.local?
@ -306,7 +306,7 @@ module Mastodon::CLI
end end
def deduplicate_users_process_email def deduplicate_users_process_email
database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM users GROUP BY email HAVING count(*) > 1").each do |row| duplicate_record_ids(:users, 'email').each do |row|
users = User.where(id: row['ids'].split(',')).order(updated_at: :desc).includes(:account).to_a users = User.where(id: row['ids'].split(',')).order(updated_at: :desc).includes(:account).to_a
ref_user = users.shift ref_user = users.shift
say "Multiple users registered with e-mail address #{ref_user.email}.", :yellow say "Multiple users registered with e-mail address #{ref_user.email}.", :yellow
@ -320,7 +320,7 @@ module Mastodon::CLI
end end
def deduplicate_users_process_confirmation_token def deduplicate_users_process_confirmation_token
database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM users WHERE confirmation_token IS NOT NULL GROUP BY confirmation_token HAVING count(*) > 1").each do |row| duplicate_record_ids_without_nulls(:users, 'confirmation_token').each do |row|
users = User.where(id: row['ids'].split(',')).order(created_at: :desc).includes(:account).to_a.drop(1) users = User.where(id: row['ids'].split(',')).order(created_at: :desc).includes(:account).to_a.drop(1)
say "Unsetting confirmation token for those accounts: #{users.map { |user| user.account.acct }.join(', ')}", :yellow say "Unsetting confirmation token for those accounts: #{users.map { |user| user.account.acct }.join(', ')}", :yellow
@ -332,7 +332,7 @@ module Mastodon::CLI
def deduplicate_users_process_remember_token def deduplicate_users_process_remember_token
if migrator_version < 2022_01_18_183010 if migrator_version < 2022_01_18_183010
database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM users WHERE remember_token IS NOT NULL GROUP BY remember_token HAVING count(*) > 1").each do |row| duplicate_record_ids_without_nulls(:users, 'remember_token').each do |row|
users = User.where(id: row['ids'].split(',')).order(updated_at: :desc).to_a.drop(1) users = User.where(id: row['ids'].split(',')).order(updated_at: :desc).to_a.drop(1)
say "Unsetting remember token for those accounts: #{users.map { |user| user.account.acct }.join(', ')}", :yellow say "Unsetting remember token for those accounts: #{users.map { |user| user.account.acct }.join(', ')}", :yellow
@ -344,7 +344,7 @@ module Mastodon::CLI
end end
def deduplicate_users_process_password_token def deduplicate_users_process_password_token
database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM users WHERE reset_password_token IS NOT NULL GROUP BY reset_password_token HAVING count(*) > 1").each do |row| duplicate_record_ids_without_nulls(:users, 'reset_password_token').each do |row|
users = User.where(id: row['ids'].split(',')).order(updated_at: :desc).includes(:account).to_a.drop(1) users = User.where(id: row['ids'].split(',')).order(updated_at: :desc).includes(:account).to_a.drop(1)
say "Unsetting password reset token for those accounts: #{users.map { |user| user.account.acct }.join(', ')}", :yellow say "Unsetting password reset token for those accounts: #{users.map { |user| user.account.acct }.join(', ')}", :yellow
@ -358,7 +358,7 @@ module Mastodon::CLI
remove_index_if_exists!(:account_domain_blocks, 'index_account_domain_blocks_on_account_id_and_domain') remove_index_if_exists!(:account_domain_blocks, 'index_account_domain_blocks_on_account_id_and_domain')
say 'Removing duplicate account domain blocks…' say 'Removing duplicate account domain blocks…'
database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM account_domain_blocks GROUP BY account_id, domain HAVING count(*) > 1").each do |row| duplicate_record_ids(:account_domain_blocks, 'account_id, domain').each do |row|
AccountDomainBlock.where(id: row['ids'].split(',').drop(1)).delete_all AccountDomainBlock.where(id: row['ids'].split(',').drop(1)).delete_all
end end
@ -372,7 +372,7 @@ module Mastodon::CLI
remove_index_if_exists!(:account_identity_proofs, 'index_account_proofs_on_account_and_provider_and_username') remove_index_if_exists!(:account_identity_proofs, 'index_account_proofs_on_account_and_provider_and_username')
say 'Removing duplicate account identity proofs…' say 'Removing duplicate account identity proofs…'
database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM account_identity_proofs GROUP BY account_id, provider, provider_username HAVING count(*) > 1").each do |row| duplicate_record_ids(:account_identity_proofs, 'account_id, provider, provider_username').each do |row|
AccountIdentityProof.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy) AccountIdentityProof.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy)
end end
@ -386,7 +386,7 @@ module Mastodon::CLI
remove_index_if_exists!(:announcement_reactions, 'index_announcement_reactions_on_account_id_and_announcement_id') remove_index_if_exists!(:announcement_reactions, 'index_announcement_reactions_on_account_id_and_announcement_id')
say 'Removing duplicate announcement reactions…' say 'Removing duplicate announcement reactions…'
database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM announcement_reactions GROUP BY account_id, announcement_id, name HAVING count(*) > 1").each do |row| duplicate_record_ids(:announcement_reactions, 'account_id, announcement_id, name').each do |row|
AnnouncementReaction.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy) AnnouncementReaction.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy)
end end
@ -398,7 +398,7 @@ module Mastodon::CLI
remove_index_if_exists!(:conversations, 'index_conversations_on_uri') remove_index_if_exists!(:conversations, 'index_conversations_on_uri')
say 'Deduplicating conversations…' say 'Deduplicating conversations…'
database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM conversations WHERE uri IS NOT NULL GROUP BY uri HAVING count(*) > 1").each do |row| duplicate_record_ids_without_nulls(:conversations, 'uri').each do |row|
conversations = Conversation.where(id: row['ids'].split(',')).order(id: :desc).to_a conversations = Conversation.where(id: row['ids'].split(',')).order(id: :desc).to_a
ref_conversation = conversations.shift ref_conversation = conversations.shift
@ -421,7 +421,7 @@ module Mastodon::CLI
remove_index_if_exists!(:custom_emojis, 'index_custom_emojis_on_shortcode_and_domain') remove_index_if_exists!(:custom_emojis, 'index_custom_emojis_on_shortcode_and_domain')
say 'Deduplicating custom_emojis…' say 'Deduplicating custom_emojis…'
database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM custom_emojis GROUP BY shortcode, domain HAVING count(*) > 1").each do |row| duplicate_record_ids(:custom_emojis, 'shortcode, domain').each do |row|
emojis = CustomEmoji.where(id: row['ids'].split(',')).order(id: :desc).to_a emojis = CustomEmoji.where(id: row['ids'].split(',')).order(id: :desc).to_a
ref_emoji = emojis.shift ref_emoji = emojis.shift
@ -440,7 +440,7 @@ module Mastodon::CLI
remove_index_if_exists!(:custom_emoji_categories, 'index_custom_emoji_categories_on_name') remove_index_if_exists!(:custom_emoji_categories, 'index_custom_emoji_categories_on_name')
say 'Deduplicating custom_emoji_categories…' say 'Deduplicating custom_emoji_categories…'
database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM custom_emoji_categories GROUP BY name HAVING count(*) > 1").each do |row| duplicate_record_ids(:custom_emoji_categories, 'name').each do |row|
categories = CustomEmojiCategory.where(id: row['ids'].split(',')).order(id: :desc).to_a categories = CustomEmojiCategory.where(id: row['ids'].split(',')).order(id: :desc).to_a
ref_category = categories.shift ref_category = categories.shift
@ -459,7 +459,7 @@ module Mastodon::CLI
remove_index_if_exists!(:domain_allows, 'index_domain_allows_on_domain') remove_index_if_exists!(:domain_allows, 'index_domain_allows_on_domain')
say 'Deduplicating domain_allows…' say 'Deduplicating domain_allows…'
database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM domain_allows GROUP BY domain HAVING count(*) > 1").each do |row| duplicate_record_ids(:domain_allows, 'domain').each do |row|
DomainAllow.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy) DomainAllow.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy)
end end
@ -471,7 +471,7 @@ module Mastodon::CLI
remove_index_if_exists!(:domain_blocks, 'index_domain_blocks_on_domain') remove_index_if_exists!(:domain_blocks, 'index_domain_blocks_on_domain')
say 'Deduplicating domain_blocks…' say 'Deduplicating domain_blocks…'
database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM domain_blocks GROUP BY domain HAVING count(*) > 1").each do |row| duplicate_record_ids(:domain_blocks, 'domain').each do |row|
domain_blocks = DomainBlock.where(id: row['ids'].split(',')).by_severity.reverse.to_a domain_blocks = DomainBlock.where(id: row['ids'].split(',')).by_severity.reverse.to_a
reject_media = domain_blocks.any?(&:reject_media?) reject_media = domain_blocks.any?(&:reject_media?)
@ -497,7 +497,7 @@ module Mastodon::CLI
remove_index_if_exists!(:unavailable_domains, 'index_unavailable_domains_on_domain') remove_index_if_exists!(:unavailable_domains, 'index_unavailable_domains_on_domain')
say 'Deduplicating unavailable_domains…' say 'Deduplicating unavailable_domains…'
database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM unavailable_domains GROUP BY domain HAVING count(*) > 1").each do |row| duplicate_record_ids(:unavailable_domains, 'domain').each do |row|
UnavailableDomain.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy) UnavailableDomain.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy)
end end
@ -509,7 +509,7 @@ module Mastodon::CLI
remove_index_if_exists!(:email_domain_blocks, 'index_email_domain_blocks_on_domain') remove_index_if_exists!(:email_domain_blocks, 'index_email_domain_blocks_on_domain')
say 'Deduplicating email_domain_blocks…' say 'Deduplicating email_domain_blocks…'
database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM email_domain_blocks GROUP BY domain HAVING count(*) > 1").each do |row| duplicate_record_ids(:email_domain_blocks, 'domain').each do |row|
domain_blocks = EmailDomainBlock.where(id: row['ids'].split(',')).order(EmailDomainBlock.arel_table[:parent_id].asc.nulls_first).to_a domain_blocks = EmailDomainBlock.where(id: row['ids'].split(',')).order(EmailDomainBlock.arel_table[:parent_id].asc.nulls_first).to_a
domain_blocks.drop(1).each(&:destroy) domain_blocks.drop(1).each(&:destroy)
end end
@ -522,7 +522,7 @@ module Mastodon::CLI
remove_index_if_exists!(:media_attachments, 'index_media_attachments_on_shortcode') remove_index_if_exists!(:media_attachments, 'index_media_attachments_on_shortcode')
say 'Deduplicating media_attachments…' say 'Deduplicating media_attachments…'
database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM media_attachments WHERE shortcode IS NOT NULL GROUP BY shortcode HAVING count(*) > 1").each do |row| duplicate_record_ids_without_nulls(:media_attachments, 'shortcode').each do |row|
MediaAttachment.where(id: row['ids'].split(',').drop(1)).update_all(shortcode: nil) MediaAttachment.where(id: row['ids'].split(',').drop(1)).update_all(shortcode: nil)
end end
@ -538,7 +538,7 @@ module Mastodon::CLI
remove_index_if_exists!(:preview_cards, 'index_preview_cards_on_url') remove_index_if_exists!(:preview_cards, 'index_preview_cards_on_url')
say 'Deduplicating preview_cards…' say 'Deduplicating preview_cards…'
database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM preview_cards GROUP BY url HAVING count(*) > 1").each do |row| duplicate_record_ids(:preview_cards, 'url').each do |row|
PreviewCard.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy) PreviewCard.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy)
end end
@ -550,7 +550,7 @@ module Mastodon::CLI
remove_index_if_exists!(:statuses, 'index_statuses_on_uri') remove_index_if_exists!(:statuses, 'index_statuses_on_uri')
say 'Deduplicating statuses…' say 'Deduplicating statuses…'
database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM statuses WHERE uri IS NOT NULL GROUP BY uri HAVING count(*) > 1").each do |row| duplicate_record_ids_without_nulls(:statuses, 'uri').each do |row|
statuses = Status.where(id: row['ids'].split(',')).order(id: :asc).to_a statuses = Status.where(id: row['ids'].split(',')).order(id: :asc).to_a
ref_status = statuses.shift ref_status = statuses.shift
statuses.each do |status| statuses.each do |status|
@ -572,7 +572,7 @@ module Mastodon::CLI
remove_index_if_exists!(:tags, 'index_tags_on_name_lower_btree') remove_index_if_exists!(:tags, 'index_tags_on_name_lower_btree')
say 'Deduplicating tags…' say 'Deduplicating tags…'
database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM tags GROUP BY lower((name)::text) HAVING count(*) > 1").each do |row| duplicate_record_ids(:tags, 'lower((name)::text)').each do |row|
tags = Tag.where(id: row['ids'].split(',')).order(Arel.sql('(usable::int + trendable::int + listable::int) desc')).to_a tags = Tag.where(id: row['ids'].split(',')).order(Arel.sql('(usable::int + trendable::int + listable::int) desc')).to_a
ref_tag = tags.shift ref_tag = tags.shift
tags.each do |tag| tags.each do |tag|
@ -595,7 +595,7 @@ module Mastodon::CLI
remove_index_if_exists!(:webauthn_credentials, 'index_webauthn_credentials_on_external_id') remove_index_if_exists!(:webauthn_credentials, 'index_webauthn_credentials_on_external_id')
say 'Deduplicating webauthn_credentials…' say 'Deduplicating webauthn_credentials…'
database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM webauthn_credentials GROUP BY external_id HAVING count(*) > 1").each do |row| duplicate_record_ids(:webauthn_credentials, 'external_id').each do |row|
WebauthnCredential.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy) WebauthnCredential.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy)
end end
@ -609,7 +609,7 @@ module Mastodon::CLI
remove_index_if_exists!(:webhooks, 'index_webhooks_on_url') remove_index_if_exists!(:webhooks, 'index_webhooks_on_url')
say 'Deduplicating webhooks…' say 'Deduplicating webhooks…'
database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM webhooks GROUP BY url HAVING count(*) > 1").each do |row| duplicate_record_ids(:webhooks, 'url').each do |row|
Webhook.where(id: row['ids'].split(',')).order(id: :desc).drop(1).each(&:destroy) Webhook.where(id: row['ids'].split(',')).order(id: :desc).drop(1).each(&:destroy)
end end
@ -746,8 +746,23 @@ module Mastodon::CLI
ActiveRecord::Migrator.current_version ActiveRecord::Migrator.current_version
end end
def find_duplicate_accounts def duplicate_record_ids_without_nulls(table, group_by)
database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM accounts GROUP BY lower(username), COALESCE(lower(domain), '') HAVING count(*) > 1") database_connection.select_all(<<~SQL.squish)
SELECT string_agg(id::text, ',') AS ids
FROM #{table}
WHERE #{group_by} IS NOT NULL
GROUP BY #{group_by}
HAVING COUNT(*) > 1
SQL
end
def duplicate_record_ids(table, group_by)
database_connection.select_all(<<~SQL.squish)
SELECT string_agg(id::text, ',') AS ids
FROM #{table}
GROUP BY #{group_by}
HAVING COUNT(*) > 1
SQL
end end
def remove_index_if_exists!(table, name) def remove_index_if_exists!(table, name)