Improve performance of follow recommendation scheduler (#16159)

Express follow_recommendations in terms of account_summaries rather than
accounts, integrate filters that are unconditionally used, and materialize
the resulting view.

This should result in the bulk of the computation being performed only once
instead of **once per recommendation language**.
This commit is contained in:
Claire 2021-05-05 22:04:52 +02:00 committed by GitHub
parent 351c744590
commit d9ae3db8d5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 75 additions and 16 deletions

View File

@ -14,9 +14,11 @@ class FollowRecommendation < ApplicationRecord
belongs_to :account_summary, foreign_key: :account_id belongs_to :account_summary, foreign_key: :account_id
belongs_to :account, foreign_key: :account_id belongs_to :account, foreign_key: :account_id
scope :safe, -> { joins(:account_summary).merge(AccountSummary.safe) }
scope :localized, ->(locale) { joins(:account_summary).merge(AccountSummary.localized(locale)) } scope :localized, ->(locale) { joins(:account_summary).merge(AccountSummary.localized(locale)) }
scope :filtered, -> { joins(:account_summary).merge(AccountSummary.filtered) }
def self.refresh
Scenic.database.refresh_materialized_view(table_name, concurrently: true, cascade: false)
end
def readonly? def readonly?
true true

View File

@ -14,13 +14,14 @@ class Scheduler::FollowRecommendationsScheduler
def perform def perform
# Maintaining a materialized view speeds-up subsequent queries significantly # Maintaining a materialized view speeds-up subsequent queries significantly
AccountSummary.refresh AccountSummary.refresh
FollowRecommendation.refresh
fallback_recommendations = FollowRecommendation.safe.filtered.limit(SET_SIZE).index_by(&:account_id) fallback_recommendations = FollowRecommendation.limit(SET_SIZE).index_by(&:account_id)
I18n.available_locales.each do |locale| I18n.available_locales.each do |locale|
recommendations = begin recommendations = begin
if AccountSummary.safe.filtered.localized(locale).exists? # We can skip the work if no accounts with that language exist if AccountSummary.safe.filtered.localized(locale).exists? # We can skip the work if no accounts with that language exist
FollowRecommendation.safe.filtered.localized(locale).limit(SET_SIZE).index_by(&:account_id) FollowRecommendation.localized(locale).limit(SET_SIZE).index_by(&:account_id)
else else
{} {}
end end

View File

@ -0,0 +1,18 @@
class UpdateFollowRecommendationsToVersion2 < ActiveRecord::Migration[6.1]
# We're switching from a normal to a materialized view so we need
# custom `up` and `down` paths.
def up
drop_view :follow_recommendations
create_view :follow_recommendations, version: 2, materialized: true
# To be able to refresh the view concurrently,
# at least one unique index is required
safety_assured { add_index :follow_recommendations, :account_id, unique: true }
end
def down
drop_view :follow_recommendations, materialized: true
create_view :follow_recommendations, version: 1
end
end

View File

@ -10,7 +10,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 2021_04_25_135952) do ActiveRecord::Schema.define(version: 2021_05_05_174616) do
# These are extensions that must be enabled in order to support this database # These are extensions that must be enabled in order to support this database
enable_extension "plpgsql" enable_extension "plpgsql"
@ -1114,30 +1114,34 @@ ActiveRecord::Schema.define(version: 2021_04_25_135952) do
SQL SQL
add_index "account_summaries", ["account_id"], name: "index_account_summaries_on_account_id", unique: true add_index "account_summaries", ["account_id"], name: "index_account_summaries_on_account_id", unique: true
create_view "follow_recommendations", sql_definition: <<-SQL create_view "follow_recommendations", materialized: true, sql_definition: <<-SQL
SELECT t0.account_id, SELECT t0.account_id,
sum(t0.rank) AS rank, sum(t0.rank) AS rank,
array_agg(t0.reason) AS reason array_agg(t0.reason) AS reason
FROM ( SELECT accounts.id AS account_id, FROM ( SELECT account_summaries.account_id,
((count(follows.id))::numeric / (1.0 + (count(follows.id))::numeric)) AS rank, ((count(follows.id))::numeric / (1.0 + (count(follows.id))::numeric)) AS rank,
'most_followed'::text AS reason 'most_followed'::text AS reason
FROM ((follows FROM (((follows
JOIN accounts ON ((accounts.id = follows.target_account_id))) JOIN account_summaries ON ((account_summaries.account_id = follows.target_account_id)))
JOIN users ON ((users.account_id = follows.account_id))) JOIN users ON ((users.account_id = follows.account_id)))
WHERE ((users.current_sign_in_at >= (now() - 'P30D'::interval)) AND (accounts.suspended_at IS NULL) AND (accounts.moved_to_account_id IS NULL) AND (accounts.silenced_at IS NULL) AND (accounts.locked = false) AND (accounts.discoverable = true)) LEFT JOIN follow_recommendation_suppressions ON ((follow_recommendation_suppressions.account_id = follows.target_account_id)))
GROUP BY accounts.id WHERE ((users.current_sign_in_at >= (now() - 'P30D'::interval)) AND (account_summaries.sensitive = false) AND (follow_recommendation_suppressions.id IS NULL))
GROUP BY account_summaries.account_id
HAVING (count(follows.id) >= 5) HAVING (count(follows.id) >= 5)
UNION ALL UNION ALL
SELECT accounts.id AS account_id, SELECT account_summaries.account_id,
(sum((status_stats.reblogs_count + status_stats.favourites_count)) / (1.0 + sum((status_stats.reblogs_count + status_stats.favourites_count)))) AS rank, (sum((status_stats.reblogs_count + status_stats.favourites_count)) / (1.0 + sum((status_stats.reblogs_count + status_stats.favourites_count)))) AS rank,
'most_interactions'::text AS reason 'most_interactions'::text AS reason
FROM ((status_stats FROM (((status_stats
JOIN statuses ON ((statuses.id = status_stats.status_id))) JOIN statuses ON ((statuses.id = status_stats.status_id)))
JOIN accounts ON ((accounts.id = statuses.account_id))) JOIN account_summaries ON ((account_summaries.account_id = statuses.account_id)))
WHERE ((statuses.id >= (((date_part('epoch'::text, (now() - 'P30D'::interval)) * (1000)::double precision))::bigint << 16)) AND (accounts.suspended_at IS NULL) AND (accounts.moved_to_account_id IS NULL) AND (accounts.silenced_at IS NULL) AND (accounts.locked = false) AND (accounts.discoverable = true)) LEFT JOIN follow_recommendation_suppressions ON ((follow_recommendation_suppressions.account_id = statuses.account_id)))
GROUP BY accounts.id WHERE ((statuses.id >= (((date_part('epoch'::text, (now() - 'P30D'::interval)) * (1000)::double precision))::bigint << 16)) AND (account_summaries.sensitive = false) AND (follow_recommendation_suppressions.id IS NULL))
GROUP BY account_summaries.account_id
HAVING (sum((status_stats.reblogs_count + status_stats.favourites_count)) >= (5)::numeric)) t0 HAVING (sum((status_stats.reblogs_count + status_stats.favourites_count)) >= (5)::numeric)) t0
GROUP BY t0.account_id GROUP BY t0.account_id
ORDER BY (sum(t0.rank)) DESC; ORDER BY (sum(t0.rank)) DESC;
SQL SQL
add_index "follow_recommendations", ["account_id"], name: "index_follow_recommendations_on_account_id", unique: true
end end

View File

@ -0,0 +1,34 @@
SELECT
account_id,
sum(rank) AS rank,
array_agg(reason) AS reason
FROM (
SELECT
account_summaries.account_id AS account_id,
count(follows.id) / (1.0 + count(follows.id)) AS rank,
'most_followed' AS reason
FROM follows
INNER JOIN account_summaries ON account_summaries.account_id = follows.target_account_id
INNER JOIN users ON users.account_id = follows.account_id
LEFT OUTER JOIN follow_recommendation_suppressions ON follow_recommendation_suppressions.account_id = follows.target_account_id
WHERE users.current_sign_in_at >= (now() - interval '30 days')
AND account_summaries.sensitive = 'f'
AND follow_recommendation_suppressions.id IS NULL
GROUP BY account_summaries.account_id
HAVING count(follows.id) >= 5
UNION ALL
SELECT account_summaries.account_id AS account_id,
sum(reblogs_count + favourites_count) / (1.0 + sum(reblogs_count + favourites_count)) AS rank,
'most_interactions' AS reason
FROM status_stats
INNER JOIN statuses ON statuses.id = status_stats.status_id
INNER JOIN account_summaries ON account_summaries.account_id = statuses.account_id
LEFT OUTER JOIN follow_recommendation_suppressions ON follow_recommendation_suppressions.account_id = statuses.account_id
WHERE statuses.id >= ((date_part('epoch', now() - interval '30 days') * 1000)::bigint << 16)
AND account_summaries.sensitive = 'f'
AND follow_recommendation_suppressions.id IS NULL
GROUP BY account_summaries.account_id
HAVING sum(reblogs_count + favourites_count) >= 5
) t0
GROUP BY account_id
ORDER BY rank DESC