Fix unnecessary queries when batch-removing statuses, 100x faster (#15387)
This commit is contained in:
parent
67ebd61f11
commit
9915d11c0d
7 changed files with 168 additions and 100 deletions
|
@ -36,7 +36,7 @@ class Favourite < ApplicationRecord
|
|||
end
|
||||
|
||||
def decrement_cache_counters
|
||||
return if association(:status).loaded? && (status.marked_for_destruction? || status.marked_for_mass_destruction?)
|
||||
return if association(:status).loaded? && status.marked_for_destruction?
|
||||
status&.decrement_count!(:favourites_count)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -228,14 +228,6 @@ class Status < ApplicationRecord
|
|||
@emojis = CustomEmoji.from_text(fields.join(' '), account.domain)
|
||||
end
|
||||
|
||||
def mark_for_mass_destruction!
|
||||
@marked_for_mass_destruction = true
|
||||
end
|
||||
|
||||
def marked_for_mass_destruction?
|
||||
@marked_for_mass_destruction
|
||||
end
|
||||
|
||||
def replies_count
|
||||
status_stat&.replies_count || 0
|
||||
end
|
||||
|
@ -430,7 +422,7 @@ class Status < ApplicationRecord
|
|||
end
|
||||
|
||||
def decrement_counter_caches
|
||||
return if direct_visibility? || marked_for_mass_destruction?
|
||||
return if direct_visibility?
|
||||
|
||||
account&.decrement_count!(:statuses_count)
|
||||
reblog&.decrement_count!(:reblogs_count) if reblog?
|
||||
|
@ -440,7 +432,7 @@ class Status < ApplicationRecord
|
|||
def unlink_from_conversations
|
||||
return unless direct_visibility?
|
||||
|
||||
mentioned_accounts = mentions.includes(:account).map(&:account)
|
||||
mentioned_accounts = (association(:mentions).loaded? ? mentions : mentions.includes(:account)).map(&:account)
|
||||
inbox_owners = mentioned_accounts.select(&:local?) + (account.local? ? [account] : [])
|
||||
|
||||
inbox_owners.each do |inbox_owner|
|
||||
|
|
|
@ -3,29 +3,45 @@
|
|||
class BatchedRemoveStatusService < BaseService
|
||||
include Redisable
|
||||
|
||||
# Delete given statuses and reblogs of them
|
||||
# Remove statuses from home feeds
|
||||
# Push delete events to streaming API for home feeds and public feeds
|
||||
# @param [Enumerable<Status>] statuses A preferably batched array of statuses
|
||||
# Delete multiple statuses and reblogs of them as efficiently as possible
|
||||
# @param [Enumerable<Status>] statuses An array of statuses
|
||||
# @param [Hash] options
|
||||
# @option [Boolean] :skip_side_effects
|
||||
# @option [Boolean] :skip_side_effects Do not modify feeds and send updates to streaming API
|
||||
def call(statuses, **options)
|
||||
statuses = Status.where(id: statuses.map(&:id)).includes(:account).flat_map { |status| [status] + status.reblogs.includes(:account).to_a }
|
||||
ActiveRecord::Associations::Preloader.new.preload(statuses, options[:skip_side_effects] ? :reblogs : [:account, reblogs: :account])
|
||||
|
||||
@mentions = statuses.each_with_object({}) { |s, h| h[s.id] = s.active_mentions.includes(:account).to_a }
|
||||
@tags = statuses.each_with_object({}) { |s, h| h[s.id] = s.tags.pluck(:name) }
|
||||
statuses_and_reblogs = statuses.flat_map { |status| [status] + status.reblogs }
|
||||
|
||||
@json_payloads = statuses.each_with_object({}) { |s, h| h[s.id] = Oj.dump(event: :delete, payload: s.id.to_s) }
|
||||
# The conversations for direct visibility statuses also need
|
||||
# to be manually updated. This part is not efficient but we
|
||||
# rely on direct visibility statuses being relatively rare.
|
||||
statuses_with_account_conversations = statuses.select(&:direct_visibility?)
|
||||
|
||||
statuses.each do |status|
|
||||
status.mark_for_mass_destruction!
|
||||
status.destroy
|
||||
ActiveRecord::Associations::Preloader.new.preload(statuses_with_account_conversations, [mentions: :account])
|
||||
|
||||
statuses_with_account_conversations.each do |status|
|
||||
status.send(:unlink_from_conversations)
|
||||
end
|
||||
|
||||
# We do not batch all deletes into one to avoid having a long-running
|
||||
# transaction lock the database, but we use the delete method instead
|
||||
# of destroy to avoid all callbacks. We rely on foreign keys to
|
||||
# cascade the delete faster without loading the associations.
|
||||
statuses_and_reblogs.each(&:delete)
|
||||
|
||||
# Since we skipped all callbacks, we also need to manually
|
||||
# deindex the statuses
|
||||
Chewy.strategy.current.update(StatusesIndex, statuses_and_reblogs)
|
||||
|
||||
return if options[:skip_side_effects]
|
||||
|
||||
ActiveRecord::Associations::Preloader.new.preload(statuses_and_reblogs, :tags)
|
||||
|
||||
@tags = statuses_and_reblogs.each_with_object({}) { |s, h| h[s.id] = s.tags.map { |tag| tag.name.mb_chars.downcase } }
|
||||
@json_payloads = statuses_and_reblogs.each_with_object({}) { |s, h| h[s.id] = Oj.dump(event: :delete, payload: s.id.to_s) }
|
||||
|
||||
# Batch by source account
|
||||
statuses.group_by(&:account_id).each_value do |account_statuses|
|
||||
statuses_and_reblogs.group_by(&:account_id).each_value do |account_statuses|
|
||||
account = account_statuses.first.account
|
||||
|
||||
next unless account
|
||||
|
@ -35,27 +51,31 @@ class BatchedRemoveStatusService < BaseService
|
|||
end
|
||||
|
||||
# Cannot be batched
|
||||
statuses.each do |status|
|
||||
redis.pipelined do
|
||||
statuses_and_reblogs.each do |status|
|
||||
unpush_from_public_timelines(status)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def unpush_from_home_timelines(account, statuses)
|
||||
recipients = account.followers_for_local_distribution.to_a
|
||||
|
||||
recipients << account if account.local?
|
||||
|
||||
recipients.each do |follower|
|
||||
account.followers_for_local_distribution.includes(:user).find_each do |follower|
|
||||
statuses.each do |status|
|
||||
FeedManager.instance.unpush_from_home(follower, status)
|
||||
end
|
||||
end
|
||||
|
||||
return unless account.local?
|
||||
|
||||
statuses.each do |status|
|
||||
FeedManager.instance.unpush_from_home(account, status)
|
||||
end
|
||||
end
|
||||
|
||||
def unpush_from_list_timelines(account, statuses)
|
||||
account.lists_for_local_distribution.select(:id, :account_id).each do |list|
|
||||
account.lists_for_local_distribution.select(:id, :account_id).includes(account: :user).find_each do |list|
|
||||
statuses.each do |status|
|
||||
FeedManager.instance.unpush_from_list(list, status)
|
||||
end
|
||||
|
@ -67,26 +87,17 @@ class BatchedRemoveStatusService < BaseService
|
|||
|
||||
payload = @json_payloads[status.id]
|
||||
|
||||
redis.pipelined do
|
||||
redis.publish('timeline:public', payload)
|
||||
if status.local?
|
||||
redis.publish('timeline:public:local', payload)
|
||||
else
|
||||
redis.publish('timeline:public:remote', payload)
|
||||
end
|
||||
redis.publish(status.local? ? 'timeline:public:local' : 'timeline:public:remote', payload)
|
||||
|
||||
if status.media_attachments.any?
|
||||
redis.publish('timeline:public:media', payload)
|
||||
if status.local?
|
||||
redis.publish('timeline:public:local:media', payload)
|
||||
else
|
||||
redis.publish('timeline:public:remote:media', payload)
|
||||
end
|
||||
redis.publish(status.local? ? 'timeline:public:local:media' : 'timeline:public:remote:media', payload)
|
||||
end
|
||||
|
||||
@tags[status.id].each do |hashtag|
|
||||
redis.publish("timeline:hashtag:#{hashtag.mb_chars.downcase}", payload)
|
||||
redis.publish("timeline:hashtag:#{hashtag.mb_chars.downcase}:local", payload) if status.local?
|
||||
end
|
||||
redis.publish("timeline:hashtag:#{hashtag}", payload)
|
||||
redis.publish("timeline:hashtag:#{hashtag}:local", payload) if status.local?
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -6,15 +6,21 @@ class DeleteAccountService < BaseService
|
|||
ASSOCIATIONS_ON_SUSPEND = %w(
|
||||
account_pins
|
||||
active_relationships
|
||||
aliases
|
||||
block_relationships
|
||||
blocked_by_relationships
|
||||
bookmarks
|
||||
conversation_mutes
|
||||
conversations
|
||||
custom_filters
|
||||
devices
|
||||
domain_blocks
|
||||
favourites
|
||||
featured_tags
|
||||
follow_requests
|
||||
identity_proofs
|
||||
list_accounts
|
||||
migrations
|
||||
mute_relationships
|
||||
muted_by_relationships
|
||||
notifications
|
||||
|
@ -25,6 +31,29 @@ class DeleteAccountService < BaseService
|
|||
status_pins
|
||||
).freeze
|
||||
|
||||
# The following associations have no important side-effects
|
||||
# in callbacks and all of their own associations are secured
|
||||
# by foreign keys, making them safe to delete without loading
|
||||
# into memory
|
||||
ASSOCIATIONS_WITHOUT_SIDE_EFFECTS = %w(
|
||||
account_pins
|
||||
aliases
|
||||
conversation_mutes
|
||||
conversations
|
||||
custom_filters
|
||||
devices
|
||||
domain_blocks
|
||||
featured_tags
|
||||
follow_requests
|
||||
identity_proofs
|
||||
migrations
|
||||
mute_relationships
|
||||
muted_by_relationships
|
||||
notifications
|
||||
scheduled_statuses
|
||||
status_pins
|
||||
)
|
||||
|
||||
ASSOCIATIONS_ON_DESTROY = %w(
|
||||
reports
|
||||
targeted_moderation_notes
|
||||
|
@ -55,19 +84,25 @@ class DeleteAccountService < BaseService
|
|||
|
||||
@options[:skip_activitypub] = true if @options[:skip_side_effects]
|
||||
|
||||
reject_follows!
|
||||
undo_follows!
|
||||
purge_user!
|
||||
purge_profile!
|
||||
distribute_activities!
|
||||
purge_content!
|
||||
fulfill_deletion_request!
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def reject_follows!
|
||||
return if @account.local? || !@account.activitypub? || @options[:skip_activitypub]
|
||||
def distribute_activities!
|
||||
return if skip_activitypub?
|
||||
|
||||
if @account.local?
|
||||
delete_actor!
|
||||
elsif @account.activitypub?
|
||||
reject_follows!
|
||||
undo_follows!
|
||||
end
|
||||
end
|
||||
|
||||
def reject_follows!
|
||||
# When deleting a remote account, the account obviously doesn't
|
||||
# actually become deleted on its origin server, i.e. unlike a
|
||||
# locally deleted account it continues to have access to its home
|
||||
|
@ -81,8 +116,6 @@ class DeleteAccountService < BaseService
|
|||
end
|
||||
|
||||
def undo_follows!
|
||||
return if @account.local? || !@account.activitypub? || @options[:skip_activitypub]
|
||||
|
||||
# When deleting a remote account, the account obviously doesn't
|
||||
# actually become deleted on its origin server, but following relationships
|
||||
# are severed on our end. Therefore, make the remote server aware that the
|
||||
|
@ -97,7 +130,7 @@ class DeleteAccountService < BaseService
|
|||
def purge_user!
|
||||
return if !@account.local? || @account.user.nil?
|
||||
|
||||
if @options[:reserve_email]
|
||||
if keep_user_record?
|
||||
@account.user.disable!
|
||||
@account.user.invites.where(uses: 0).destroy_all
|
||||
else
|
||||
|
@ -106,34 +139,52 @@ class DeleteAccountService < BaseService
|
|||
end
|
||||
|
||||
def purge_content!
|
||||
distribute_delete_actor! if @account.local? && !@options[:skip_side_effects]
|
||||
purge_user!
|
||||
purge_profile!
|
||||
purge_statuses!
|
||||
purge_media_attachments!
|
||||
purge_polls!
|
||||
purge_generated_notifications!
|
||||
purge_other_associations!
|
||||
|
||||
@account.statuses.reorder(nil).find_in_batches do |statuses|
|
||||
statuses.reject! { |status| reported_status_ids.include?(status.id) } if @options[:reserve_username]
|
||||
BatchedRemoveStatusService.new.call(statuses, skip_side_effects: @options[:skip_side_effects])
|
||||
@account.destroy unless keep_account_record?
|
||||
end
|
||||
|
||||
def purge_statuses!
|
||||
@account.statuses.reorder(nil).find_in_batches do |statuses|
|
||||
statuses.reject! { |status| reported_status_ids.include?(status.id) } if keep_account_record?
|
||||
|
||||
BatchedRemoveStatusService.new.call(statuses, skip_side_effects: skip_side_effects?)
|
||||
end
|
||||
end
|
||||
|
||||
def purge_media_attachments!
|
||||
@account.media_attachments.reorder(nil).find_each do |media_attachment|
|
||||
next if @options[:reserve_username] && reported_status_ids.include?(media_attachment.status_id)
|
||||
next if keep_account_record? && reported_status_ids.include?(media_attachment.status_id)
|
||||
|
||||
media_attachment.destroy
|
||||
end
|
||||
end
|
||||
|
||||
def purge_polls!
|
||||
@account.polls.reorder(nil).find_each do |poll|
|
||||
next if @options[:reserve_username] && reported_status_ids.include?(poll.status_id)
|
||||
next if keep_account_record? && reported_status_ids.include?(poll.status_id)
|
||||
|
||||
# We can safely delete the poll rather than destroy it, as any non-reported
|
||||
# status should have been deleted already, as long as we take care of
|
||||
# notifications.
|
||||
Notification.where(poll: poll).delete_all
|
||||
poll.delete
|
||||
end
|
||||
|
||||
associations_for_destruction.each do |association_name|
|
||||
destroy_all(@account.public_send(association_name))
|
||||
end
|
||||
|
||||
@account.destroy unless @options[:reserve_username]
|
||||
def purge_generated_notifications!
|
||||
# By deleting polls and statuses without callbacks, we've left behind
|
||||
# polymorphically associated notifications generated by this account
|
||||
|
||||
Notification.where(from_account: @account).in_batches.delete_all
|
||||
end
|
||||
|
||||
def purge_other_associations!
|
||||
associations_for_destruction.each do |association_name|
|
||||
purge_association(association_name)
|
||||
end
|
||||
end
|
||||
|
||||
def purge_profile!
|
||||
|
@ -141,7 +192,7 @@ class DeleteAccountService < BaseService
|
|||
# there is no point wasting time updating
|
||||
# its values first
|
||||
|
||||
return unless @options[:reserve_username]
|
||||
return unless keep_account_record?
|
||||
|
||||
@account.silenced_at = nil
|
||||
@account.suspended_at = @options[:suspended_at] || Time.now.utc
|
||||
|
@ -156,6 +207,7 @@ class DeleteAccountService < BaseService
|
|||
@account.followers_count = 0
|
||||
@account.following_count = 0
|
||||
@account.moved_to_account = nil
|
||||
@account.also_known_as = []
|
||||
@account.trust_level = :untrusted
|
||||
@account.avatar.destroy
|
||||
@account.header.destroy
|
||||
|
@ -166,11 +218,17 @@ class DeleteAccountService < BaseService
|
|||
@account.deletion_request&.destroy
|
||||
end
|
||||
|
||||
def destroy_all(association)
|
||||
def purge_association(association_name)
|
||||
association = @account.public_send(association_name)
|
||||
|
||||
if ASSOCIATIONS_WITHOUT_SIDE_EFFECTS.include?(association_name)
|
||||
association.in_batches.delete_all
|
||||
else
|
||||
association.in_batches.destroy_all
|
||||
end
|
||||
end
|
||||
|
||||
def distribute_delete_actor!
|
||||
def delete_actor!
|
||||
ActivityPub::DeliveryWorker.push_bulk(delivery_inboxes) do |inbox_url|
|
||||
[delete_actor_json, @account.id, inbox_url]
|
||||
end
|
||||
|
@ -197,10 +255,26 @@ class DeleteAccountService < BaseService
|
|||
end
|
||||
|
||||
def associations_for_destruction
|
||||
if @options[:reserve_username]
|
||||
if keep_account_record?
|
||||
ASSOCIATIONS_ON_SUSPEND
|
||||
else
|
||||
ASSOCIATIONS_ON_SUSPEND + ASSOCIATIONS_ON_DESTROY
|
||||
end
|
||||
end
|
||||
|
||||
def keep_user_record?
|
||||
@options[:reserve_email]
|
||||
end
|
||||
|
||||
def keep_account_record?
|
||||
@options[:reserve_username]
|
||||
end
|
||||
|
||||
def skip_side_effects?
|
||||
@options[:skip_side_effects]
|
||||
end
|
||||
|
||||
def skip_activitypub?
|
||||
@options[:skip_activitypub]
|
||||
end
|
||||
end
|
||||
|
|
|
@ -12,6 +12,10 @@ Chewy.settings = {
|
|||
sidekiq: { queue: 'pull' },
|
||||
}
|
||||
|
||||
# We use our own async strategy even outside the request-response
|
||||
# cycle, which takes care of checking if ElasticSearch is enabled
|
||||
# or not. However, mind that for the Rails console, the :urgent
|
||||
# strategy is set automatically with no way to override it.
|
||||
Chewy.root_strategy = :custom_sidekiq
|
||||
Chewy.request_strategy = :custom_sidekiq
|
||||
Chewy.use_after_commit_callbacks = false
|
||||
|
@ -37,6 +41,7 @@ Elasticsearch::Transport::Client.prepend Module.new {
|
|||
super arguments
|
||||
end
|
||||
}
|
||||
|
||||
Elasticsearch::API::Indices::IndicesClient.prepend Module.new {
|
||||
def create(arguments = {})
|
||||
arguments[:include_type_name] = true
|
||||
|
|
|
@ -2,29 +2,10 @@
|
|||
|
||||
module Chewy
|
||||
class Strategy
|
||||
class CustomSidekiq < Base
|
||||
class Worker
|
||||
include ::Sidekiq::Worker
|
||||
|
||||
sidekiq_options queue: 'pull'
|
||||
|
||||
def perform(type, ids, options = {})
|
||||
options[:refresh] = !Chewy.disable_refresh_async if Chewy.disable_refresh_async
|
||||
type.constantize.import!(ids, options)
|
||||
class CustomSidekiq < Sidekiq
|
||||
def update(_type, _objects, _options = {})
|
||||
super if Chewy.enabled?
|
||||
end
|
||||
end
|
||||
|
||||
def update(type, objects, _options = {})
|
||||
return unless Chewy.enabled?
|
||||
|
||||
ids = type.root.id ? Array.wrap(objects) : type.adapter.identify(objects)
|
||||
|
||||
return if ids.empty?
|
||||
|
||||
Worker.perform_async(type.name, ids)
|
||||
end
|
||||
|
||||
def leave; end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -26,6 +26,11 @@ RSpec.describe BatchedRemoveStatusService, type: :service do
|
|||
subject.call([status1, status2])
|
||||
end
|
||||
|
||||
it 'removes statuses' do
|
||||
expect { Status.find(status1.id) }.to raise_error ActiveRecord::RecordNotFound
|
||||
expect { Status.find(status2.id) }.to raise_error ActiveRecord::RecordNotFound
|
||||
end
|
||||
|
||||
it 'removes statuses from author\'s home feed' do
|
||||
expect(HomeFeed.new(alice).get(10)).to_not include([status1.id, status2.id])
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue