Add feature to automatically delete old toots (#16529)
* Add account statuses cleanup policy model * Record last inspected toot to delete to speed up successive calls to statuses_to_delete * Add service to cleanup a given account's statuses within a budget * Add worker to go through account policies and delete old toots * Fix last inspected status id logic All existing statuses older or equal to last inspected status id must be kept by the current policy. This is an invariant that must be kept so that resuming deletion from the last inspected status remains sound. * Add tests * Refactor scheduler and add tests * Add user interface * Add support for discriminating based on boosts/favs * Add UI support for min_reblogs and min_favs, rework UI * Address first round of review comments * Replace Snowflake#id_at_start with with_random parameter * Add tests * Add tests for StatusesCleanupController * Rework settings page * Adjust load-avoiding mechanisms * Please CodeClimate
This commit is contained in:
35
app/controllers/statuses_cleanup_controller.rb
Normal file
35
app/controllers/statuses_cleanup_controller.rb
Normal file
@ -0,0 +1,35 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
class StatusesCleanupController < ApplicationController
|
||||
layout 'admin'
|
||||
|
||||
before_action :authenticate_user!
|
||||
before_action :set_policy
|
||||
before_action :set_body_classes
|
||||
|
||||
def show; end
|
||||
|
||||
def update
|
||||
if @policy.update(resource_params)
|
||||
redirect_to statuses_cleanup_path, notice: I18n.t('generic.changes_saved_msg')
|
||||
else
|
||||
render action: :show
|
||||
end
|
||||
rescue ActionController::ParameterMissing
|
||||
# Do nothing
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def set_policy
|
||||
@policy = current_account.statuses_cleanup_policy || current_account.build_statuses_cleanup_policy(enabled: false)
|
||||
end
|
||||
|
||||
def resource_params
|
||||
params.require(:account_statuses_cleanup_policy).permit(:enabled, :min_status_age, :keep_direct, :keep_pinned, :keep_polls, :keep_media, :keep_self_fav, :keep_self_bookmark, :min_favs, :min_reblogs)
|
||||
end
|
||||
|
||||
def set_body_classes
|
||||
@body_classes = 'admin'
|
||||
end
|
||||
end
|
171
app/models/account_statuses_cleanup_policy.rb
Normal file
171
app/models/account_statuses_cleanup_policy.rb
Normal file
@ -0,0 +1,171 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# == Schema Information
|
||||
#
|
||||
# Table name: account_statuses_cleanup_policies
|
||||
#
|
||||
# id :bigint not null, primary key
|
||||
# account_id :bigint not null
|
||||
# enabled :boolean default(TRUE), not null
|
||||
# min_status_age :integer default(1209600), not null
|
||||
# keep_direct :boolean default(TRUE), not null
|
||||
# keep_pinned :boolean default(TRUE), not null
|
||||
# keep_polls :boolean default(FALSE), not null
|
||||
# keep_media :boolean default(FALSE), not null
|
||||
# keep_self_fav :boolean default(TRUE), not null
|
||||
# keep_self_bookmark :boolean default(TRUE), not null
|
||||
# min_favs :integer
|
||||
# min_reblogs :integer
|
||||
# created_at :datetime not null
|
||||
# updated_at :datetime not null
|
||||
#
|
||||
class AccountStatusesCleanupPolicy < ApplicationRecord
|
||||
include Redisable
|
||||
|
||||
ALLOWED_MIN_STATUS_AGE = [
|
||||
2.weeks.seconds,
|
||||
1.month.seconds,
|
||||
2.months.seconds,
|
||||
3.months.seconds,
|
||||
6.months.seconds,
|
||||
1.year.seconds,
|
||||
2.years.seconds,
|
||||
].freeze
|
||||
|
||||
EXCEPTION_BOOLS = %w(keep_direct keep_pinned keep_polls keep_media keep_self_fav keep_self_bookmark).freeze
|
||||
EXCEPTION_THRESHOLDS = %w(min_favs min_reblogs).freeze
|
||||
|
||||
# Depending on the cleanup policy, the query to discover the next
|
||||
# statuses to delete my get expensive if the account has a lot of old
|
||||
# statuses otherwise excluded from deletion by the other exceptions.
|
||||
#
|
||||
# Therefore, `EARLY_SEARCH_CUTOFF` is meant to be the maximum number of
|
||||
# old statuses to be considered for deletion prior to checking exceptions.
|
||||
#
|
||||
# This is used in `compute_cutoff_id` to provide a `max_id` to
|
||||
# `statuses_to_delete`.
|
||||
EARLY_SEARCH_CUTOFF = 5_000
|
||||
|
||||
belongs_to :account
|
||||
|
||||
validates :min_status_age, inclusion: { in: ALLOWED_MIN_STATUS_AGE }
|
||||
validates :min_favs, numericality: { greater_than_or_equal_to: 1, allow_nil: true }
|
||||
validates :min_reblogs, numericality: { greater_than_or_equal_to: 1, allow_nil: true }
|
||||
validate :validate_local_account
|
||||
|
||||
before_save :update_last_inspected
|
||||
|
||||
def statuses_to_delete(limit = 50, max_id = nil, min_id = nil)
|
||||
scope = account.statuses
|
||||
scope.merge!(old_enough_scope(max_id))
|
||||
scope = scope.where(Status.arel_table[:id].gteq(min_id)) if min_id.present?
|
||||
scope.merge!(without_popular_scope) unless min_favs.nil? && min_reblogs.nil?
|
||||
scope.merge!(without_direct_scope) if keep_direct?
|
||||
scope.merge!(without_pinned_scope) if keep_pinned?
|
||||
scope.merge!(without_poll_scope) if keep_polls?
|
||||
scope.merge!(without_media_scope) if keep_media?
|
||||
scope.merge!(without_self_fav_scope) if keep_self_fav?
|
||||
scope.merge!(without_self_bookmark_scope) if keep_self_bookmark?
|
||||
|
||||
scope.reorder(id: :asc).limit(limit)
|
||||
end
|
||||
|
||||
# This computes a toot id such that:
|
||||
# - the toot would be old enough to be candidate for deletion
|
||||
# - there are at most EARLY_SEARCH_CUTOFF toots between the last inspected toot and this one
|
||||
#
|
||||
# The idea is to limit expensive SQL queries when an account has lots of toots excluded from
|
||||
# deletion, while not starting anew on each run.
|
||||
def compute_cutoff_id
|
||||
min_id = last_inspected || 0
|
||||
max_id = Mastodon::Snowflake.id_at(min_status_age.seconds.ago, with_random: false)
|
||||
subquery = account.statuses.where(Status.arel_table[:id].gteq(min_id)).where(Status.arel_table[:id].lteq(max_id))
|
||||
subquery = subquery.select(:id).reorder(id: :asc).limit(EARLY_SEARCH_CUTOFF)
|
||||
|
||||
# We're textually interpolating a subquery here as ActiveRecord seem to not provide
|
||||
# a way to apply the limit to the subquery
|
||||
Status.connection.execute("SELECT MAX(id) FROM (#{subquery.to_sql}) t").values.first.first
|
||||
end
|
||||
|
||||
# The most important thing about `last_inspected` is that any toot older than it is guaranteed
|
||||
# not to be kept by the policy regardless of its age.
|
||||
def record_last_inspected(last_id)
|
||||
redis.set("account_cleanup:#{account.id}", last_id, ex: 1.week.seconds)
|
||||
end
|
||||
|
||||
def last_inspected
|
||||
redis.get("account_cleanup:#{account.id}")&.to_i
|
||||
end
|
||||
|
||||
def invalidate_last_inspected(status, action)
|
||||
last_value = last_inspected
|
||||
return if last_value.nil? || status.id > last_value || status.account_id != account_id
|
||||
|
||||
case action
|
||||
when :unbookmark
|
||||
return unless keep_self_bookmark?
|
||||
when :unfav
|
||||
return unless keep_self_fav?
|
||||
when :unpin
|
||||
return unless keep_pinned?
|
||||
end
|
||||
|
||||
record_last_inspected(status.id)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def update_last_inspected
|
||||
if EXCEPTION_BOOLS.map { |name| attribute_change_to_be_saved(name) }.compact.include?([true, false])
|
||||
# Policy has been widened in such a way that any previously-inspected status
|
||||
# may need to be deleted, so we'll have to start again.
|
||||
redis.del("account_cleanup:#{account.id}")
|
||||
end
|
||||
if EXCEPTION_THRESHOLDS.map { |name| attribute_change_to_be_saved(name) }.compact.any? { |old, new| old.present? && (new.nil? || new > old) }
|
||||
redis.del("account_cleanup:#{account.id}")
|
||||
end
|
||||
end
|
||||
|
||||
def validate_local_account
|
||||
errors.add(:account, :invalid) unless account&.local?
|
||||
end
|
||||
|
||||
def without_direct_scope
|
||||
Status.where.not(visibility: :direct)
|
||||
end
|
||||
|
||||
def old_enough_scope(max_id = nil)
|
||||
# Filtering on `id` rather than `min_status_age` ago will treat
|
||||
# non-snowflake statuses as older than they really are, but Mastodon
|
||||
# has switched to snowflake IDs significantly over 2 years ago anyway.
|
||||
max_id = [max_id, Mastodon::Snowflake.id_at(min_status_age.seconds.ago, with_random: false)].compact.min
|
||||
Status.where(Status.arel_table[:id].lteq(max_id))
|
||||
end
|
||||
|
||||
def without_self_fav_scope
|
||||
Status.where('NOT EXISTS (SELECT * FROM favourites fav WHERE fav.account_id = statuses.account_id AND fav.status_id = statuses.id)')
|
||||
end
|
||||
|
||||
def without_self_bookmark_scope
|
||||
Status.where('NOT EXISTS (SELECT * FROM bookmarks bookmark WHERE bookmark.account_id = statuses.account_id AND bookmark.status_id = statuses.id)')
|
||||
end
|
||||
|
||||
def without_pinned_scope
|
||||
Status.where('NOT EXISTS (SELECT * FROM status_pins pin WHERE pin.account_id = statuses.account_id AND pin.status_id = statuses.id)')
|
||||
end
|
||||
|
||||
def without_media_scope
|
||||
Status.where('NOT EXISTS (SELECT * FROM media_attachments media WHERE media.status_id = statuses.id)')
|
||||
end
|
||||
|
||||
def without_poll_scope
|
||||
Status.where(poll_id: nil)
|
||||
end
|
||||
|
||||
def without_popular_scope
|
||||
scope = Status.left_joins(:status_stat)
|
||||
scope = scope.where('COALESCE(status_stats.reblogs_count, 0) <= ?', min_reblogs) unless min_reblogs.nil?
|
||||
scope = scope.where('COALESCE(status_stats.favourites_count, 0) <= ?', min_favs) unless min_favs.nil?
|
||||
scope
|
||||
end
|
||||
end
|
@ -23,4 +23,12 @@ class Bookmark < ApplicationRecord
|
||||
before_validation do
|
||||
self.status = status.reblog if status&.reblog?
|
||||
end
|
||||
|
||||
after_destroy :invalidate_cleanup_info
|
||||
|
||||
def invalidate_cleanup_info
|
||||
return unless status&.account_id == account_id && account.local?
|
||||
|
||||
account.statuses_cleanup_policy&.invalidate_last_inspected(status, :unbookmark)
|
||||
end
|
||||
end
|
||||
|
@ -66,5 +66,8 @@ module AccountAssociations
|
||||
|
||||
# Follow recommendations
|
||||
has_one :follow_recommendation_suppression, inverse_of: :account, dependent: :destroy
|
||||
|
||||
# Account statuses cleanup policy
|
||||
has_one :statuses_cleanup_policy, class_name: 'AccountStatusesCleanupPolicy', inverse_of: :account, dependent: :destroy
|
||||
end
|
||||
end
|
||||
|
@ -28,6 +28,7 @@ class Favourite < ApplicationRecord
|
||||
|
||||
after_create :increment_cache_counters
|
||||
after_destroy :decrement_cache_counters
|
||||
after_destroy :invalidate_cleanup_info
|
||||
|
||||
private
|
||||
|
||||
@ -39,4 +40,10 @@ class Favourite < ApplicationRecord
|
||||
return if association(:status).loaded? && status.marked_for_destruction?
|
||||
status&.decrement_count!(:favourites_count)
|
||||
end
|
||||
|
||||
def invalidate_cleanup_info
|
||||
return unless status&.account_id == account_id && account.local?
|
||||
|
||||
account.statuses_cleanup_policy&.invalidate_last_inspected(status, :unfav)
|
||||
end
|
||||
end
|
||||
|
@ -15,4 +15,12 @@ class StatusPin < ApplicationRecord
|
||||
belongs_to :status
|
||||
|
||||
validates_with StatusPinValidator
|
||||
|
||||
after_destroy :invalidate_cleanup_info
|
||||
|
||||
def invalidate_cleanup_info
|
||||
return unless status&.account_id == account_id && account.local?
|
||||
|
||||
account.statuses_cleanup_policy&.invalidate_last_inspected(status, :unpin)
|
||||
end
|
||||
end
|
||||
|
27
app/services/account_statuses_cleanup_service.rb
Normal file
27
app/services/account_statuses_cleanup_service.rb
Normal file
@ -0,0 +1,27 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
class AccountStatusesCleanupService < BaseService
|
||||
# @param [AccountStatusesCleanupPolicy] account_policy
|
||||
# @param [Integer] budget
|
||||
# @return [Integer]
|
||||
def call(account_policy, budget = 50)
|
||||
return 0 unless account_policy.enabled?
|
||||
|
||||
cutoff_id = account_policy.compute_cutoff_id
|
||||
return 0 if cutoff_id.blank?
|
||||
|
||||
num_deleted = 0
|
||||
last_deleted = nil
|
||||
|
||||
account_policy.statuses_to_delete(budget, cutoff_id, account_policy.last_inspected).reorder(nil).find_each(order: :asc) do |status|
|
||||
status.discard
|
||||
RemovalWorker.perform_async(status.id, redraft: false)
|
||||
num_deleted += 1
|
||||
last_deleted = status.id
|
||||
end
|
||||
|
||||
account_policy.record_last_inspected(last_deleted.presence || cutoff_id)
|
||||
|
||||
num_deleted
|
||||
end
|
||||
end
|
45
app/views/statuses_cleanup/show.html.haml
Normal file
45
app/views/statuses_cleanup/show.html.haml
Normal file
@ -0,0 +1,45 @@
|
||||
- content_for :page_title do
|
||||
= t('settings.statuses_cleanup')
|
||||
|
||||
- content_for :heading_actions do
|
||||
= button_tag t('generic.save_changes'), class: 'button', form: 'edit_policy'
|
||||
|
||||
= simple_form_for @policy, url: statuses_cleanup_path, method: :put, html: { id: 'edit_policy' } do |f|
|
||||
|
||||
.fields-row
|
||||
.fields-row__column.fields-row__column-6.fields-group
|
||||
= f.input :enabled, as: :boolean, wrapper: :with_label, label: t('statuses_cleanup.enabled'), hint: t('statuses_cleanup.enabled_hint')
|
||||
.fields-row__column.fields-row__column-6.fields-group
|
||||
= f.input :min_status_age, wrapper: :with_label, label: t('statuses_cleanup.min_age_label'), collection: AccountStatusesCleanupPolicy::ALLOWED_MIN_STATUS_AGE.map(&:to_i), label_method: lambda { |i| t("statuses_cleanup.min_age.#{i}") }, include_blank: false, hint: false
|
||||
|
||||
.flash-message= t('statuses_cleanup.explanation')
|
||||
|
||||
%h4= t('statuses_cleanup.exceptions')
|
||||
|
||||
.fields-row
|
||||
.fields-row__column.fields-row__column-6.fields-group
|
||||
= f.input :keep_pinned, wrapper: :with_label, label: t('statuses_cleanup.keep_pinned'), hint: t('statuses_cleanup.keep_pinned_hint')
|
||||
.fields-row__column.fields-row__column-6.fields-group
|
||||
= f.input :keep_direct, wrapper: :with_label, label: t('statuses_cleanup.keep_direct'), hint: t('statuses_cleanup.keep_direct_hint')
|
||||
|
||||
.fields-row
|
||||
.fields-row__column.fields-row__column-6.fields-group
|
||||
= f.input :keep_self_fav, wrapper: :with_label, label: t('statuses_cleanup.keep_self_fav'), hint: t('statuses_cleanup.keep_self_fav_hint')
|
||||
.fields-row__column.fields-row__column-6.fields-group
|
||||
= f.input :keep_self_bookmark, wrapper: :with_label, label: t('statuses_cleanup.keep_self_bookmark'), hint: t('statuses_cleanup.keep_self_bookmark_hint')
|
||||
|
||||
.fields-row
|
||||
.fields-row__column.fields-row__column-6.fields-group
|
||||
= f.input :keep_polls, wrapper: :with_label, label: t('statuses_cleanup.keep_polls'), hint: t('statuses_cleanup.keep_polls_hint')
|
||||
.fields-row__column.fields-row__column-6.fields-group
|
||||
= f.input :keep_media, wrapper: :with_label, label: t('statuses_cleanup.keep_media'), hint: t('statuses_cleanup.keep_media_hint')
|
||||
|
||||
%h4= t('statuses_cleanup.interaction_exceptions')
|
||||
|
||||
.fields-row
|
||||
.fields-row__column.fields-row__column-6.fields-group
|
||||
= f.input :min_favs, wrapper: :with_label, label: t('statuses_cleanup.min_favs'), hint: t('statuses_cleanup.min_favs_hint'), input_html: { min: 1, placeholder: t('statuses_cleanup.ignore_favs') }
|
||||
.fields-row__column.fields-row__column-6.fields-group
|
||||
= f.input :min_reblogs, wrapper: :with_label, label: t('statuses_cleanup.min_reblogs'), hint: t('statuses_cleanup.min_reblogs_hint'), input_html: { min: 1, placeholder: t('statuses_cleanup.ignore_reblogs') }
|
||||
|
||||
.flash-message= t('statuses_cleanup.interaction_exceptions_explanation')
|
96
app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb
Normal file
96
app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb
Normal file
@ -0,0 +1,96 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
class Scheduler::AccountsStatusesCleanupScheduler
|
||||
include Sidekiq::Worker
|
||||
|
||||
# This limit is mostly to be nice to the fediverse at large and not
|
||||
# generate too much traffic.
|
||||
# This also helps limiting the running time of the scheduler itself.
|
||||
MAX_BUDGET = 50
|
||||
|
||||
# This is an attempt to spread the load across instances, as various
|
||||
# accounts are likely to have various followers.
|
||||
PER_ACCOUNT_BUDGET = 5
|
||||
|
||||
# This is an attempt to limit the workload generated by status removal
|
||||
# jobs to something the particular instance can handle.
|
||||
PER_THREAD_BUDGET = 5
|
||||
|
||||
# Those avoid loading an instance that is already under load
|
||||
MAX_DEFAULT_SIZE = 2
|
||||
MAX_DEFAULT_LATENCY = 5
|
||||
MAX_PUSH_SIZE = 5
|
||||
MAX_PUSH_LATENCY = 10
|
||||
# 'pull' queue has lower priority jobs, and it's unlikely that pushing
|
||||
# deletes would cause much issues with this queue if it didn't cause issues
|
||||
# with default and push. Yet, do not enqueue deletes if the instance is
|
||||
# lagging behind too much.
|
||||
MAX_PULL_SIZE = 500
|
||||
MAX_PULL_LATENCY = 300
|
||||
|
||||
# This is less of an issue in general, but deleting old statuses is likely
|
||||
# to cause delivery errors, and thus increase the number of jobs to be retried.
|
||||
# This doesn't directly translate to load, but connection errors and a high
|
||||
# number of dead instances may lead to this spiraling out of control if
|
||||
# unchecked.
|
||||
MAX_RETRY_SIZE = 50_000
|
||||
|
||||
sidekiq_options retry: 0, lock: :until_executed
|
||||
|
||||
def perform
|
||||
return if under_load?
|
||||
|
||||
budget = compute_budget
|
||||
first_policy_id = last_processed_id
|
||||
|
||||
loop do
|
||||
num_processed_accounts = 0
|
||||
|
||||
scope = AccountStatusesCleanupPolicy.where(enabled: true)
|
||||
scope.where(Account.arel_table[:id].gt(first_policy_id)) if first_policy_id.present?
|
||||
scope.find_each(order: :asc) do |policy|
|
||||
num_deleted = AccountStatusesCleanupService.new.call(policy, [budget, PER_ACCOUNT_BUDGET].min)
|
||||
num_processed_accounts += 1 unless num_deleted.zero?
|
||||
budget -= num_deleted
|
||||
if budget.zero?
|
||||
save_last_processed_id(policy.id)
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
# The idea here is to loop through all policies at least once until the budget is exhausted
|
||||
# and start back after the last processed account otherwise
|
||||
break if budget.zero? || (num_processed_accounts.zero? && first_policy_id.nil?)
|
||||
first_policy_id = nil
|
||||
end
|
||||
end
|
||||
|
||||
def compute_budget
|
||||
threads = Sidekiq::ProcessSet.new.filter { |x| x['queues'].include?('push') }.map { |x| x['concurrency'] }.sum
|
||||
[PER_THREAD_BUDGET * threads, MAX_BUDGET].min
|
||||
end
|
||||
|
||||
def under_load?
|
||||
return true if Sidekiq::Stats.new.retry_size > MAX_RETRY_SIZE
|
||||
queue_under_load?('default', MAX_DEFAULT_SIZE, MAX_DEFAULT_LATENCY) || queue_under_load?('push', MAX_PUSH_SIZE, MAX_PUSH_LATENCY) || queue_under_load?('pull', MAX_PULL_SIZE, MAX_PULL_LATENCY)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def queue_under_load?(name, max_size, max_latency)
|
||||
queue = Sidekiq::Queue.new(name)
|
||||
queue.size > max_size || queue.latency > max_latency
|
||||
end
|
||||
|
||||
def last_processed_id
|
||||
Redis.current.get('account_statuses_cleanup_scheduler:last_account_id')
|
||||
end
|
||||
|
||||
def save_last_processed_id(id)
|
||||
if id.nil?
|
||||
Redis.current.del('account_statuses_cleanup_scheduler:last_account_id')
|
||||
else
|
||||
Redis.current.set('account_statuses_cleanup_scheduler:last_account_id', id, ex: 1.hour.seconds)
|
||||
end
|
||||
end
|
||||
end
|
Reference in New Issue
Block a user