Merge branch 'main' into glitch-soc/merge-upstream
Conflicts: - `.env.production.sample`: Copied upstream changes. - `app/controllers/settings/identity_proofs_controller.rb`: Minor conflict due to glitch-soc's extra “enable_keybase” setting. Upstream removed keybase support altogether, so did the same. - `app/controllers/well_known/keybase_proof_config_controller.rb`: Minor conflict due to glitch-soc's extra “enable_keybase” setting. Upstream removed keybase support altogether, so did the same. - `lib/mastodon/statuses_cli.rb`: Minor conflict due to an optimization that wasn't shared between the two versions. Copied upstream's version.
This commit is contained in:
@ -17,10 +17,11 @@ module Mastodon
|
||||
].freeze
|
||||
|
||||
option :concurrency, type: :numeric, default: 2, aliases: [:c], desc: 'Workload will be split between this number of threads'
|
||||
option :batch_size, type: :numeric, default: 1_000, aliases: [:b], desc: 'Number of records in each batch'
|
||||
option :only, type: :array, enum: %w(accounts tags statuses), desc: 'Only process these indices'
|
||||
desc 'deploy', 'Create or upgrade ElasticSearch indices and populate them'
|
||||
desc 'deploy', 'Create or upgrade Elasticsearch indices and populate them'
|
||||
long_desc <<~LONG_DESC
|
||||
If ElasticSearch is empty, this command will create the necessary indices
|
||||
If Elasticsearch is empty, this command will create the necessary indices
|
||||
and then import data from the database into those indices.
|
||||
|
||||
This command will also upgrade indices if the underlying schema has been
|
||||
@ -35,6 +36,11 @@ module Mastodon
|
||||
exit(1)
|
||||
end
|
||||
|
||||
if options[:batch_size] < 1
|
||||
say('Cannot run with this batch_size setting, must be at least 1', :red)
|
||||
exit(1)
|
||||
end
|
||||
|
||||
indices = begin
|
||||
if options[:only]
|
||||
options[:only].map { |str| "#{str.camelize}Index".constantize }
|
||||
@ -73,7 +79,7 @@ module Mastodon
|
||||
# is uneconomical. So we only ever add.
|
||||
indices.each do |index|
|
||||
progress.title = "Importing #{index} "
|
||||
batch_size = 1_000
|
||||
batch_size = options[:batch_size]
|
||||
slice_size = (batch_size / options[:concurrency]).ceil
|
||||
|
||||
index.adapter.default_scope.reorder(nil).find_in_batches(batch_size: batch_size) do |batch|
|
||||
|
@ -6,6 +6,7 @@ require_relative 'cli_helper'
|
||||
|
||||
module Mastodon
|
||||
class StatusesCLI < Thor
|
||||
include CLIHelper
|
||||
include ActionView::Helpers::NumberHelper
|
||||
|
||||
def self.exit_on_failure?
|
||||
@ -15,6 +16,8 @@ module Mastodon
|
||||
option :days, type: :numeric, default: 90
|
||||
option :clean_followed, type: :boolean
|
||||
option :skip_media_remove, type: :boolean
|
||||
option :vacuum, type: :boolean, default: false, desc: 'Reduce the file size and update the statistics. This option locks the table for a long time, so run it offline'
|
||||
option :batch_size, type: :numeric, default: 1_000, aliases: [:b], desc: 'Number of records in each batch'
|
||||
desc 'remove', 'Remove unreferenced statuses'
|
||||
long_desc <<~LONG_DESC
|
||||
Remove statuses that are not referenced by local user activity, such as
|
||||
@ -25,52 +28,89 @@ module Mastodon
|
||||
indices before commencing, and removes them afterward.
|
||||
LONG_DESC
|
||||
def remove
|
||||
if options[:batch_size] < 1
|
||||
say('Cannot run with this batch_size setting, must be at least 1', :red)
|
||||
exit(1)
|
||||
end
|
||||
|
||||
say('Creating temporary database indices...')
|
||||
|
||||
ActiveRecord::Base.connection.add_index(:accounts, :id, name: :index_accounts_local, where: 'domain is null', algorithm: :concurrently) unless ActiveRecord::Base.connection.index_name_exists?(:accounts, :index_accounts_local)
|
||||
ActiveRecord::Base.connection.add_index(:status_pins, :status_id, name: :index_status_pins_status_id, algorithm: :concurrently) unless ActiveRecord::Base.connection.index_name_exists?(:status_pins, :index_status_pins_status_id)
|
||||
ActiveRecord::Base.connection.add_index(:media_attachments, :remote_url, name: :index_media_attachments_remote_url, where: 'remote_url is not null', algorithm: :concurrently) unless ActiveRecord::Base.connection.index_name_exists?(:media_attachments, :index_media_attachments_remote_url)
|
||||
ActiveRecord::Base.connection.add_index(:accounts, :id, name: :index_accounts_local, where: 'domain is null', algorithm: :concurrently, if_not_exists: true)
|
||||
ActiveRecord::Base.connection.add_index(:status_pins, :status_id, name: :index_status_pins_status_id, algorithm: :concurrently, if_not_exists: true)
|
||||
ActiveRecord::Base.connection.add_index(:media_attachments, :remote_url, name: :index_media_attachments_remote_url, where: 'remote_url is not null', algorithm: :concurrently, if_not_exists: true)
|
||||
|
||||
max_id = Mastodon::Snowflake.id_at(options[:days].days.ago)
|
||||
start_at = Time.now.to_f
|
||||
|
||||
say('Extract the deletion target... This might take a while...')
|
||||
|
||||
ActiveRecord::Base.connection.create_table('statuses_to_be_deleted', temporary: true)
|
||||
|
||||
# Skip accounts followed by local accounts
|
||||
clean_followed_sql = 'AND NOT EXISTS (SELECT 1 FROM follows WHERE statuses.account_id = follows.target_account_id)' unless options[:clean_followed]
|
||||
|
||||
ActiveRecord::Base.connection.exec_insert(<<-SQL.squish, 'SQL', [[nil, max_id]])
|
||||
INSERT INTO statuses_to_be_deleted (id)
|
||||
SELECT statuses.id FROM statuses WHERE deleted_at IS NULL AND NOT local AND uri IS NOT NULL AND (id < $1)
|
||||
AND NOT EXISTS (SELECT 1 FROM statuses AS statuses1 WHERE statuses.id = statuses1.in_reply_to_id)
|
||||
AND NOT EXISTS (SELECT 1 FROM statuses AS statuses1 WHERE statuses1.id = statuses.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local))
|
||||
AND NOT EXISTS (SELECT 1 FROM statuses AS statuses1 WHERE statuses.id = statuses1.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local OR statuses1.id >= $1))
|
||||
AND NOT EXISTS (SELECT 1 FROM status_pins WHERE statuses.id = status_id)
|
||||
AND NOT EXISTS (SELECT 1 FROM mentions WHERE statuses.id = mentions.status_id AND mentions.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))
|
||||
AND NOT EXISTS (SELECT 1 FROM favourites WHERE statuses.id = favourites.status_id AND favourites.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))
|
||||
AND NOT EXISTS (SELECT 1 FROM bookmarks WHERE statuses.id = bookmarks.status_id AND bookmarks.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))
|
||||
#{clean_followed_sql}
|
||||
SQL
|
||||
|
||||
say('Removing temporary database indices to restore write performance...')
|
||||
|
||||
ActiveRecord::Base.connection.remove_index(:accounts, name: :index_accounts_local, if_exists: true)
|
||||
ActiveRecord::Base.connection.remove_index(:status_pins, name: :index_status_pins_status_id, if_exists: true)
|
||||
|
||||
say('Beginning removal... This might take a while...')
|
||||
|
||||
scope = Status.remote.where('id < ?', max_id)
|
||||
# Skip reblogs of local statuses
|
||||
scope = scope.where('reblog_of_id NOT IN (SELECT statuses1.id FROM statuses AS statuses1 WHERE statuses1.id = statuses.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local))')
|
||||
# Skip statuses that are pinned on profiles
|
||||
scope = scope.where('id NOT IN (SELECT status_pins.status_id FROM status_pins WHERE statuses.id = status_id)')
|
||||
# Skip statuses that mention local accounts
|
||||
scope = scope.where('id NOT IN (SELECT mentions.status_id FROM mentions WHERE statuses.id = mentions.status_id AND mentions.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))')
|
||||
# Skip statuses which have replies
|
||||
scope = scope.where('id NOT IN (SELECT statuses1.in_reply_to_id FROM statuses AS statuses1 WHERE statuses.id = statuses1.in_reply_to_id)')
|
||||
# Skip statuses reblogged by local accounts or with recent boosts
|
||||
scope = scope.where('id NOT IN (SELECT statuses1.reblog_of_id FROM statuses AS statuses1 WHERE statuses.id = statuses1.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local OR statuses1.id >= ?))', max_id)
|
||||
# Skip statuses favourited by local users
|
||||
scope = scope.where('id NOT IN (SELECT favourites.status_id FROM favourites WHERE statuses.id = favourites.status_id AND favourites.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))')
|
||||
# Skip statuses bookmarked by local users
|
||||
scope = scope.where('id NOT IN (SELECT bookmarks.status_id FROM bookmarks WHERE statuses.id = bookmarks.status_id)')
|
||||
|
||||
unless options[:clean_followed]
|
||||
# Skip accounts followed by local accounts
|
||||
scope = scope.where('account_id NOT IN (SELECT follows.target_account_id FROM follows WHERE statuses.account_id = follows.target_account_id)')
|
||||
klass = Class.new(ApplicationRecord) do |c|
|
||||
c.table_name = 'statuses_to_be_deleted'
|
||||
end
|
||||
|
||||
scope.in_batches.delete_all
|
||||
Object.const_set('StatusToBeDeleted', klass)
|
||||
|
||||
scope = StatusToBeDeleted
|
||||
processed = 0
|
||||
removed = 0
|
||||
progress = create_progress_bar(scope.count.fdiv(options[:batch_size]).ceil)
|
||||
|
||||
scope.reorder(nil).in_batches(of: options[:batch_size]) do |relation|
|
||||
ids = relation.pluck(:id)
|
||||
processed += ids.count
|
||||
removed += Status.unscoped.where(id: ids).delete_all
|
||||
progress.increment
|
||||
end
|
||||
|
||||
progress.stop
|
||||
|
||||
if options[:vacuum]
|
||||
say('Run VACUUM and ANALYZE to statuses...')
|
||||
|
||||
ActiveRecord::Base.connection.execute('VACUUM FULL ANALYZE statuses')
|
||||
else
|
||||
say('Run ANALYZE to statuses...')
|
||||
|
||||
ActiveRecord::Base.connection.execute('ANALYZE statuses')
|
||||
end
|
||||
|
||||
unless options[:skip_media_remove]
|
||||
say('Beginning removal of now-orphaned media attachments to free up disk space...')
|
||||
Scheduler::MediaCleanupScheduler.new.perform
|
||||
end
|
||||
|
||||
say("Done after #{Time.now.to_f - start_at}s", :green)
|
||||
say("Done after #{Time.now.to_f - start_at}s, removed #{removed} out of #{processed} statuses.", :green)
|
||||
ensure
|
||||
say('Removing temporary database indices to restore write performance...')
|
||||
|
||||
ActiveRecord::Base.connection.remove_index(:accounts, name: :index_accounts_local) if ActiveRecord::Base.connection.index_name_exists?(:accounts, :index_accounts_local)
|
||||
ActiveRecord::Base.connection.remove_index(:status_pins, name: :index_status_pins_status_id) if ActiveRecord::Base.connection.index_name_exists?(:status_pins, :index_status_pins_status_id)
|
||||
ActiveRecord::Base.connection.remove_index(:media_attachments, name: :index_media_attachments_remote_url) if ActiveRecord::Base.connection.index_name_exists?(:media_attachments, :index_media_attachments_remote_url)
|
||||
ActiveRecord::Base.connection.remove_index(:accounts, name: :index_accounts_local, if_exists: true)
|
||||
ActiveRecord::Base.connection.remove_index(:status_pins, name: :index_status_pins_status_id, if_exists: true)
|
||||
ActiveRecord::Base.connection.remove_index(:media_attachments, name: :index_media_attachments_remote_url, if_exists: true)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
Reference in New Issue
Block a user