Fix resolving accounts sometimes creating duplicate records for a given AP id (#15364)
* Fix ResolveAccountService accepting mismatching acct: URI * Set attributes that should be updated regardless of suspension * Fix key fetching * Automatically merge remote accounts with duplicate `uri` * Add tests * Add "tootctl accounts fix-duplicates" Finds duplicate accounts sharing a same ActivityPub `id`, re-fetch them and merge them under the canonical `acct:` URI. Co-authored-by: Claire <claire.github-309c@sitedethib.com>
This commit is contained in:
		| @@ -28,7 +28,7 @@ class ActivityPub::FetchRemoteAccountService < BaseService | ||||
|  | ||||
|     return unless only_key || verified_webfinger? | ||||
|  | ||||
|     ActivityPub::ProcessAccountService.new.call(@username, @domain, @json, only_key: only_key) | ||||
|     ActivityPub::ProcessAccountService.new.call(@username, @domain, @json, only_key: only_key, verified_webfinger: !only_key) | ||||
|   rescue Oj::ParseError | ||||
|     nil | ||||
|   end | ||||
|   | ||||
| @@ -28,6 +28,8 @@ class ActivityPub::ProcessAccountService < BaseService | ||||
|         update_account | ||||
|         process_tags | ||||
|         process_attachments | ||||
|  | ||||
|         process_duplicate_accounts! if @options[:verified_webfinger] | ||||
|       else | ||||
|         raise Mastodon::RaceConditionError | ||||
|       end | ||||
| @@ -69,34 +71,42 @@ class ActivityPub::ProcessAccountService < BaseService | ||||
|     @account.protocol            = :activitypub | ||||
|  | ||||
|     set_suspension! | ||||
|     set_immediate_protocol_attributes! | ||||
|     set_fetchable_key! unless @account.suspended? && @account.suspension_origin_local? | ||||
|     set_immediate_attributes! unless @account.suspended? | ||||
|     set_fetchable_attributes! unless @options[:only_keys] || @account.suspended? | ||||
|     set_fetchable_attributes! unless @options[:only_key] || @account.suspended? | ||||
|  | ||||
|     @account.save_with_optional_media! | ||||
|   end | ||||
|  | ||||
|   def set_immediate_attributes! | ||||
|   def set_immediate_protocol_attributes! | ||||
|     @account.inbox_url               = @json['inbox'] || '' | ||||
|     @account.outbox_url              = @json['outbox'] || '' | ||||
|     @account.shared_inbox_url        = (@json['endpoints'].is_a?(Hash) ? @json['endpoints']['sharedInbox'] : @json['sharedInbox']) || '' | ||||
|     @account.followers_url           = @json['followers'] || '' | ||||
|     @account.featured_collection_url = @json['featured'] || '' | ||||
|     @account.devices_url             = @json['devices'] || '' | ||||
|     @account.url                     = url || @uri | ||||
|     @account.uri                     = @uri | ||||
|     @account.actor_type              = actor_type | ||||
|   end | ||||
|  | ||||
|   def set_immediate_attributes! | ||||
|     @account.featured_collection_url = @json['featured'] || '' | ||||
|     @account.devices_url             = @json['devices'] || '' | ||||
|     @account.display_name            = @json['name'] || '' | ||||
|     @account.note                    = @json['summary'] || '' | ||||
|     @account.locked                  = @json['manuallyApprovesFollowers'] || false | ||||
|     @account.fields                  = property_values || {} | ||||
|     @account.also_known_as           = as_array(@json['alsoKnownAs'] || []).map { |item| value_or_id(item) } | ||||
|     @account.actor_type              = actor_type | ||||
|     @account.discoverable            = @json['discoverable'] || false | ||||
|   end | ||||
|  | ||||
|   def set_fetchable_key! | ||||
|     @account.public_key        = public_key || '' | ||||
|   end | ||||
|  | ||||
|   def set_fetchable_attributes! | ||||
|     @account.avatar_remote_url = image_url('icon')  || '' unless skip_download? | ||||
|     @account.header_remote_url = image_url('image') || '' unless skip_download? | ||||
|     @account.public_key        = public_key || '' | ||||
|     @account.statuses_count    = outbox_total_items    if outbox_total_items.present? | ||||
|     @account.following_count   = following_total_items if following_total_items.present? | ||||
|     @account.followers_count   = followers_total_items if followers_total_items.present? | ||||
| @@ -140,6 +150,12 @@ class ActivityPub::ProcessAccountService < BaseService | ||||
|     VerifyAccountLinksWorker.perform_async(@account.id) | ||||
|   end | ||||
|  | ||||
|   def process_duplicate_accounts! | ||||
|     return unless Account.where(uri: @account.uri).where.not(id: @account.id).exists? | ||||
|  | ||||
|     AccountMergingWorker.perform_async(@account.id) | ||||
|   end | ||||
|  | ||||
|   def actor_type | ||||
|     if @json['type'].is_a?(Array) | ||||
|       @json['type'].find { |type| ActivityPub::FetchRemoteAccountService::SUPPORTED_TYPES.include?(type) } | ||||
|   | ||||
| @@ -49,7 +49,7 @@ class ResolveAccountService < BaseService | ||||
|     # Now it is certain, it is definitely a remote account, and it | ||||
|     # either needs to be created, or updated from fresh data | ||||
|  | ||||
|     process_account! | ||||
|     fetch_account! | ||||
|   rescue Webfinger::Error, Oj::ParseError => e | ||||
|     Rails.logger.debug "Webfinger query for #{@uri} failed: #{e}" | ||||
|     nil | ||||
| @@ -104,16 +104,12 @@ class ResolveAccountService < BaseService | ||||
|     acct.gsub(/\Aacct:/, '').split('@') | ||||
|   end | ||||
|  | ||||
|   def process_account! | ||||
|   def fetch_account! | ||||
|     return unless activitypub_ready? | ||||
|  | ||||
|     RedisLock.acquire(lock_options) do |lock| | ||||
|       if lock.acquired? | ||||
|         @account = Account.find_remote(@username, @domain) | ||||
|  | ||||
|         next if actor_json.nil? | ||||
|  | ||||
|         @account = ActivityPub::ProcessAccountService.new.call(@username, @domain, actor_json) | ||||
|         @account = ActivityPub::FetchRemoteAccountService.new.call(actor_url) | ||||
|       else | ||||
|         raise Mastodon::RaceConditionError | ||||
|       end | ||||
| @@ -136,13 +132,6 @@ class ResolveAccountService < BaseService | ||||
|     @actor_url ||= @webfinger.link('self', 'href') | ||||
|   end | ||||
|  | ||||
|   def actor_json | ||||
|     return @actor_json if defined?(@actor_json) | ||||
|  | ||||
|     json        = fetch_resource(actor_url, false) | ||||
|     @actor_json = supported_context?(json) && equals_or_includes_any?(json['type'], ActivityPub::FetchRemoteAccountService::SUPPORTED_TYPES) ? json : nil | ||||
|   end | ||||
|  | ||||
|   def gone_from_origin? | ||||
|     @gone | ||||
|   end | ||||
|   | ||||
							
								
								
									
										18
									
								
								app/workers/account_merging_worker.rb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								app/workers/account_merging_worker.rb
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| # frozen_string_literal: true | ||||
|  | ||||
| class AccountMergingWorker | ||||
|   include Sidekiq::Worker | ||||
|  | ||||
|   sidekiq_options queue: 'pull' | ||||
|  | ||||
|   def perform(account_id) | ||||
|     account = Account.find(account_id) | ||||
|  | ||||
|     return true if account.nil? || account.local? | ||||
|  | ||||
|     Account.where(uri: account.uri).where.not(id: account.id).find_each do |duplicate| | ||||
|       account.merge_with!(duplicate) | ||||
|       duplicate.destroy | ||||
|     end | ||||
|   end | ||||
| end | ||||
| @@ -236,6 +236,25 @@ module Mastodon | ||||
|       say('OK', :green) | ||||
|     end | ||||
|  | ||||
|     desc 'fix-duplicates', 'Find duplicate remote accounts and merge them' | ||||
|     option :dry_run, type: :boolean | ||||
|     long_desc <<-LONG_DESC | ||||
|       Merge known remote accounts sharing an ActivityPub actor identifier. | ||||
|  | ||||
|       Such duplicates can occur when a remote server admin misconfigures their | ||||
|       domain configuration. | ||||
|     LONG_DESC | ||||
|     def fix_duplicates | ||||
|       Account.remote.select(:uri, 'count(*)').group(:uri).having('count(*) > 1').pluck_each(:uri) do |uri| | ||||
|         say("Duplicates found for #{uri}") | ||||
|         begin | ||||
|           ActivityPub::FetchRemotAccountService.new.call(uri) unless options[:dry_run] | ||||
|         rescue => e | ||||
|           say("Error processing #{uri}: #{e}", :red) | ||||
|         end | ||||
|       end | ||||
|     end | ||||
|  | ||||
|     desc 'backup USERNAME', 'Request a backup for a user' | ||||
|     long_desc <<-LONG_DESC | ||||
|       Request a new backup for an account with a given USERNAME. | ||||
|   | ||||
| @@ -60,7 +60,22 @@ RSpec.describe ResolveAccountService, type: :service do | ||||
|  | ||||
|   context 'with a legitimate webfinger redirection' do | ||||
|     before do | ||||
|       webfinger = { subject: 'acct:foo@ap.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo' }] } | ||||
|       webfinger = { subject: 'acct:foo@ap.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo', type: 'application/activity+json' }] } | ||||
|       stub_request(:get, 'https://redirected.example.com/.well-known/webfinger?resource=acct:Foo@redirected.example.com').to_return(body: Oj.dump(webfinger), headers: { 'Content-Type': 'application/jrd+json' }) | ||||
|     end | ||||
|  | ||||
|     it 'returns new remote account' do | ||||
|       account = subject.call('Foo@redirected.example.com') | ||||
|  | ||||
|       expect(account.activitypub?).to eq true | ||||
|       expect(account.acct).to eq 'foo@ap.example.com' | ||||
|       expect(account.inbox_url).to eq 'https://ap.example.com/users/foo/inbox' | ||||
|     end | ||||
|   end | ||||
|  | ||||
|   context 'with a misconfigured redirection' do | ||||
|     before do | ||||
|       webfinger = { subject: 'acct:Foo@redirected.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo', type: 'application/activity+json' }] } | ||||
|       stub_request(:get, 'https://redirected.example.com/.well-known/webfinger?resource=acct:Foo@redirected.example.com').to_return(body: Oj.dump(webfinger), headers: { 'Content-Type': 'application/jrd+json' }) | ||||
|     end | ||||
|  | ||||
| @@ -75,9 +90,9 @@ RSpec.describe ResolveAccountService, type: :service do | ||||
|  | ||||
|   context 'with too many webfinger redirections' do | ||||
|     before do | ||||
|       webfinger = { subject: 'acct:foo@evil.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo' }] } | ||||
|       webfinger = { subject: 'acct:foo@evil.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo', type: 'application/activity+json' }] } | ||||
|       stub_request(:get, 'https://redirected.example.com/.well-known/webfinger?resource=acct:Foo@redirected.example.com').to_return(body: Oj.dump(webfinger), headers: { 'Content-Type': 'application/jrd+json' }) | ||||
|       webfinger2 = { subject: 'acct:foo@ap.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo' }] } | ||||
|       webfinger2 = { subject: 'acct:foo@ap.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo', type: 'application/activity+json' }] } | ||||
|       stub_request(:get, 'https://evil.example.com/.well-known/webfinger?resource=acct:foo@evil.example.com').to_return(body: Oj.dump(webfinger2), headers: { 'Content-Type': 'application/jrd+json' }) | ||||
|     end | ||||
|  | ||||
| @@ -111,6 +126,41 @@ RSpec.describe ResolveAccountService, type: :service do | ||||
|     end | ||||
|   end | ||||
|  | ||||
|   context 'with an already-known actor changing acct: URI' do | ||||
|     let!(:duplicate) { Fabricate(:account, username: 'foo', domain: 'old.example.com', uri: 'https://ap.example.com/users/foo') } | ||||
|     let!(:status)    { Fabricate(:status, account: duplicate, text: 'foo') } | ||||
|  | ||||
|     it 'returns new remote account' do | ||||
|       account = subject.call('foo@ap.example.com') | ||||
|  | ||||
|       expect(account.activitypub?).to eq true | ||||
|       expect(account.domain).to eq 'ap.example.com' | ||||
|       expect(account.inbox_url).to eq 'https://ap.example.com/users/foo/inbox' | ||||
|       expect(account.uri).to eq 'https://ap.example.com/users/foo' | ||||
|     end | ||||
|  | ||||
|     it 'merges accounts' do | ||||
|       account = subject.call('foo@ap.example.com') | ||||
|  | ||||
|       expect(status.reload.account_id).to eq account.id | ||||
|       expect(Account.where(uri: account.uri).count).to eq 1 | ||||
|     end | ||||
|   end | ||||
|  | ||||
|   context 'with an already-known acct: URI changing ActivityPub id' do | ||||
|     let!(:old_account) { Fabricate(:account, username: 'foo', domain: 'ap.example.com', uri: 'https://old.example.com/users/foo', last_webfingered_at: nil) } | ||||
|     let!(:status)    { Fabricate(:status, account: old_account, text: 'foo') } | ||||
|  | ||||
|     it 'returns new remote account' do | ||||
|       account = subject.call('foo@ap.example.com') | ||||
|  | ||||
|       expect(account.activitypub?).to eq true | ||||
|       expect(account.domain).to eq 'ap.example.com' | ||||
|       expect(account.inbox_url).to eq 'https://ap.example.com/users/foo/inbox' | ||||
|       expect(account.uri).to eq 'https://ap.example.com/users/foo' | ||||
|     end | ||||
|   end | ||||
|  | ||||
|   it 'processes one remote account at a time using locks' do | ||||
|     wait_for_start = true | ||||
|     fail_occurred  = false | ||||
|   | ||||
		Reference in New Issue
	
	Block a user