From 679b7158e3cd3881e8cbaf2d2c0c97725b3b5fd9 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Wed, 18 May 2022 23:29:14 +0200 Subject: [PATCH] Change search indexing to use batches to minimize resource usage (#18451) --- Gemfile | 2 +- app/chewy/accounts_index.rb | 2 +- app/chewy/statuses_index.rb | 2 +- app/chewy/tags_index.rb | 2 +- app/workers/scheduler/indexing_scheduler.rb | 26 ++++++++++++++++++++ config/application.rb | 2 +- config/initializers/chewy.rb | 5 ++-- config/sidekiq.yml | 4 +++ lib/chewy/strategy/custom_sidekiq.rb | 11 --------- lib/chewy/strategy/mastodon.rb | 27 +++++++++++++++++++++ 10 files changed, 64 insertions(+), 19 deletions(-) create mode 100644 app/workers/scheduler/indexing_scheduler.rb delete mode 100644 lib/chewy/strategy/custom_sidekiq.rb create mode 100644 lib/chewy/strategy/mastodon.rb diff --git a/Gemfile b/Gemfile index 445b10496a7..2e77fb42a69 100644 --- a/Gemfile +++ b/Gemfile @@ -81,7 +81,7 @@ gem 'scenic', '~> 1.6' gem 'sidekiq', '~> 6.4' gem 'sidekiq-scheduler', '~> 4.0' gem 'sidekiq-unique-jobs', '~> 7.1' -gem 'sidekiq-bulk', '~>0.2.0' +gem 'sidekiq-bulk', '~> 0.2.0' gem 'simple-navigation', '~> 4.3' gem 'simple_form', '~> 5.1' gem 'sprockets-rails', '~> 3.4', require: 'sprockets/railtie' diff --git a/app/chewy/accounts_index.rb b/app/chewy/accounts_index.rb index 6f9ea76e9a9..763958a3f95 100644 --- a/app/chewy/accounts_index.rb +++ b/app/chewy/accounts_index.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true class AccountsIndex < Chewy::Index - settings index: { refresh_interval: '5m' }, analysis: { + settings index: { refresh_interval: '30s' }, analysis: { analyzer: { content: { tokenizer: 'whitespace', diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb index 1304aeedbdd..c2000987997 100644 --- a/app/chewy/statuses_index.rb +++ b/app/chewy/statuses_index.rb @@ -3,7 +3,7 @@ class StatusesIndex < Chewy::Index include FormattingHelper - settings index: { refresh_interval: '15m' }, analysis: { + settings index: { refresh_interval: '30s' }, analysis: { filter: { english_stop: { type: 'stop', diff --git a/app/chewy/tags_index.rb b/app/chewy/tags_index.rb index f9db2b03af1..a5b139bcaac 100644 --- a/app/chewy/tags_index.rb +++ b/app/chewy/tags_index.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true class TagsIndex < Chewy::Index - settings index: { refresh_interval: '15m' }, analysis: { + settings index: { refresh_interval: '30s' }, analysis: { analyzer: { content: { tokenizer: 'keyword', diff --git a/app/workers/scheduler/indexing_scheduler.rb b/app/workers/scheduler/indexing_scheduler.rb new file mode 100644 index 00000000000..3a6f47a29a4 --- /dev/null +++ b/app/workers/scheduler/indexing_scheduler.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +class Scheduler::IndexingScheduler + include Sidekiq::Worker + include Redisable + + sidekiq_options retry: 0 + + def perform + indexes.each do |type| + with_redis do |redis| + ids = redis.smembers("chewy:queue:#{type.name}") + + type.import!(ids) + + redis.pipelined do |pipeline| + ids.each { |id| pipeline.srem("chewy:queue:#{type.name}", id) } + end + end + end + end + + def indexes + [AccountsIndex, TagsIndex, StatusesIndex] + end +end diff --git a/config/application.rb b/config/application.rb index 64987cfe7b1..24fa2a97819 100644 --- a/config/application.rb +++ b/config/application.rb @@ -38,7 +38,7 @@ require_relative '../lib/mastodon/version' require_relative '../lib/mastodon/rack_middleware' require_relative '../lib/devise/two_factor_ldap_authenticatable' require_relative '../lib/devise/two_factor_pam_authenticatable' -require_relative '../lib/chewy/strategy/custom_sidekiq' +require_relative '../lib/chewy/strategy/mastodon' require_relative '../lib/webpacker/manifest_extensions' require_relative '../lib/webpacker/helper_extensions' require_relative '../lib/rails/engine_extensions' diff --git a/config/initializers/chewy.rb b/config/initializers/chewy.rb index f303fc54d3a..752fc3c6dfe 100644 --- a/config/initializers/chewy.rb +++ b/config/initializers/chewy.rb @@ -13,15 +13,14 @@ Chewy.settings = { journal: false, user: user, password: password, - sidekiq: { queue: 'pull' }, } # We use our own async strategy even outside the request-response # cycle, which takes care of checking if Elasticsearch is enabled # or not. However, mind that for the Rails console, the :urgent # strategy is set automatically with no way to override it. -Chewy.root_strategy = :custom_sidekiq -Chewy.request_strategy = :custom_sidekiq +Chewy.root_strategy = :mastodon +Chewy.request_strategy = :mastodon Chewy.use_after_commit_callbacks = false module Chewy diff --git a/config/sidekiq.yml b/config/sidekiq.yml index 26be2632659..2a387146846 100644 --- a/config/sidekiq.yml +++ b/config/sidekiq.yml @@ -21,6 +21,10 @@ every: '6h' class: Scheduler::Trends::ReviewNotificationsScheduler queue: scheduler + indexing_scheduler: + every: '5m' + class: Scheduler::IndexingScheduler + queue: scheduler media_cleanup_scheduler: cron: '<%= Random.rand(0..59) %> <%= Random.rand(3..5) %> * * *' class: Scheduler::MediaCleanupScheduler diff --git a/lib/chewy/strategy/custom_sidekiq.rb b/lib/chewy/strategy/custom_sidekiq.rb deleted file mode 100644 index 794ae4ed44d..00000000000 --- a/lib/chewy/strategy/custom_sidekiq.rb +++ /dev/null @@ -1,11 +0,0 @@ -# frozen_string_literal: true - -module Chewy - class Strategy - class CustomSidekiq < Sidekiq - def update(_type, _objects, _options = {}) - super if Chewy.enabled? - end - end - end -end diff --git a/lib/chewy/strategy/mastodon.rb b/lib/chewy/strategy/mastodon.rb new file mode 100644 index 00000000000..ee8b921865e --- /dev/null +++ b/lib/chewy/strategy/mastodon.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +module Chewy + class Strategy + class Mastodon < Base + def initialize + super + + @stash = Hash.new { |hash, key| hash[key] = [] } + end + + def update(type, objects, _options = {}) + @stash[type].concat(type.root.id ? Array.wrap(objects) : type.adapter.identify(objects)) if Chewy.enabled? + end + + def leave + RedisConfiguration.with do |redis| + redis.pipelined do |pipeline| + @stash.each do |type, ids| + pipeline.sadd("chewy:queue:#{type.name}", ids) + end + end + end + end + end + end +end