Merge branch 'main' into glitch-soc/merge-upstream

Conflicts:
- `README.md`:
  Upstream updated copyright year, we don't mention it so kept our version.
- `app/controllers/admin/dashboard_controller.rb`:
  Not really a conflict, upstream change (removing the spam checker) too close
  to glitch-soc changes. Ported upstream changes.
- `app/models/form/admin_settings.rb`:
  Same.
- `app/services/remove_status_service.rb`:
  Same.
- `app/views/admin/settings/edit.html.haml`:
  Same.
- `config/settings.yml`:
  Same.
- `config/environments/production.rb`:
  Not a real conflict, upstream added a default HTTP header, but we have
  extra headers in glitch-soc.
  Added the header.
This commit is contained in:
Claire
2021-04-20 12:17:14 +02:00
100 changed files with 1904 additions and 1077 deletions

View File

@@ -0,0 +1,25 @@
# frozen_string_literal: true
class AccountReachFinder
def initialize(account)
@account = account
end
def inboxes
(followers_inboxes + reporters_inboxes + relay_inboxes).uniq
end
private
def followers_inboxes
@account.followers.inboxes
end
def reporters_inboxes
Account.where(id: @account.targeted_reports.select(:account_id)).inboxes
end
def relay_inboxes
Relay.enabled.pluck(:inbox_url)
end
end

View File

@@ -88,7 +88,6 @@ class ActivityPub::Activity::Create < ActivityPub::Activity
resolve_thread(@status)
fetch_replies(@status)
check_for_spam
distribute(@status)
forward_for_reply
end
@@ -498,10 +497,6 @@ class ActivityPub::Activity::Create < ActivityPub::Activity
Tombstone.exists?(uri: object_uri)
end
def check_for_spam
SpamCheck.perform(@status)
end
def forward_for_reply
return unless @status.distributable? && @json['signature'].present? && reply_to_local?

View File

@@ -10,6 +10,8 @@ class ActivityPub::Activity::Flag < ActivityPub::Activity
target_accounts.each do |target_account|
target_statuses = target_statuses_by_account[target_account.id]
next if target_account.suspended?
ReportService.new.call(
@account,
target_account,

View File

@@ -7,7 +7,6 @@ class Admin::SystemCheck::SidekiqProcessCheck < Admin::SystemCheck::BaseCheck
mailers
pull
scheduler
ingress
).freeze
def pass?

View File

@@ -4,6 +4,8 @@ module ApplicationExtension
extend ActiveSupport::Concern
included do
validates :website, url: true, if: :website?
validates :name, length: { maximum: 60 }
validates :website, url: true, length: { maximum: 2_000 }, if: :website?
validates :redirect_uri, length: { maximum: 2_000 }
end
end

View File

@@ -118,7 +118,7 @@ class Formatter
end
def format_field(account, str, **options)
html = account.local? ? encode_and_link_urls(str, me: true) : reformat(str)
html = account.local? ? encode_and_link_urls(str, me: true, with_domain: true) : reformat(str)
html = encode_custom_emojis(html, account.emojis, options[:autoplay]) if options[:custom_emojify]
html.html_safe # rubocop:disable Rails/OutputSafety
end
@@ -187,7 +187,7 @@ class Formatter
elsif entity[:hashtag]
link_to_hashtag(entity)
elsif entity[:screen_name]
link_to_mention(entity, accounts)
link_to_mention(entity, accounts, options)
end
end
end
@@ -352,22 +352,37 @@ class Formatter
encode(entity[:url])
end
def link_to_mention(entity, linkable_accounts)
def link_to_mention(entity, linkable_accounts, options = {})
acct = entity[:screen_name]
return link_to_account(acct) unless linkable_accounts
return link_to_account(acct, options) unless linkable_accounts
account = linkable_accounts.find { |item| TagManager.instance.same_acct?(item.acct, acct) }
account ? mention_html(account) : "@#{encode(acct)}"
same_username_hits = 0
account = nil
username, domain = acct.split('@')
domain = nil if TagManager.instance.local_domain?(domain)
linkable_accounts.each do |item|
same_username = item.username.casecmp(username).zero?
same_domain = item.domain.nil? ? domain.nil? : item.domain.casecmp(domain)&.zero?
if same_username && !same_domain
same_username_hits += 1
elsif same_username && same_domain
account = item
end
end
account ? mention_html(account, with_domain: same_username_hits.positive? || options[:with_domain]) : "@#{encode(acct)}"
end
def link_to_account(acct)
def link_to_account(acct, options = {})
username, domain = acct.split('@')
domain = nil if TagManager.instance.local_domain?(domain)
account = EntityCache.instance.mention(username, domain)
account ? mention_html(account) : "@#{encode(acct)}"
account ? mention_html(account, with_domain: options[:with_domain]) : "@#{encode(acct)}"
end
def link_to_hashtag(entity)
@@ -388,7 +403,7 @@ class Formatter
"<a href=\"#{encode(tag_url(tag))}\" class=\"mention hashtag\" rel=\"tag\">#<span>#{encode(tag)}</span></a>"
end
def mention_html(account)
"<span class=\"h-card\"><a href=\"#{encode(ActivityPub::TagManager.instance.url_for(account))}\" class=\"u-url mention\">@<span>#{encode(account.username)}</span></a></span>"
def mention_html(account, with_domain: false)
"<span class=\"h-card\"><a href=\"#{encode(ActivityPub::TagManager.instance.url_for(account))}\" class=\"u-url mention\">@<span>#{encode(with_domain ? account.pretty_acct : account.username)}</span></a></span>"
end
end

View File

@@ -28,10 +28,14 @@ class PotentialFriendshipTracker
redis.zrem("interactions:#{account_id}", target_account_id)
end
def get(account_id, limit: 20, offset: 0)
account_ids = redis.zrevrange("interactions:#{account_id}", offset, limit)
return [] if account_ids.empty?
Account.searchable.where(id: account_ids)
def get(account, limit)
account_ids = redis.zrevrange("interactions:#{account.id}", 0, limit)
return [] if account_ids.empty? || limit < 1
accounts = Account.searchable.where(id: account_ids).index_by(&:id)
account_ids.map { |id| accounts[id.to_i] }.compact
end
end
end

View File

@@ -1,198 +0,0 @@
# frozen_string_literal: true
class SpamCheck
include Redisable
include ActionView::Helpers::TextHelper
# Threshold over which two Nilsimsa values are considered
# to refer to the same text
NILSIMSA_COMPARE_THRESHOLD = 95
# Nilsimsa doesn't work well on small inputs, so below
# this size, we check only for exact matches with MD5
NILSIMSA_MIN_SIZE = 10
# How long to keep the trail of digests between updates,
# there is no reason to store it forever
EXPIRE_SET_AFTER = 1.week.seconds
# How many digests to keep in an account's trail. If it's
# too small, spam could rotate around different message templates
MAX_TRAIL_SIZE = 10
# How many detected duplicates to allow through before
# considering the message as spam
THRESHOLD = 5
def initialize(status)
@account = status.account
@status = status
end
def skip?
disabled? || already_flagged? || trusted? || no_unsolicited_mentions? || solicited_reply?
end
def spam?
if insufficient_data?
false
elsif nilsimsa?
digests_over_threshold?('nilsimsa') { |_, other_digest| nilsimsa_compare_value(digest, other_digest) >= NILSIMSA_COMPARE_THRESHOLD }
else
digests_over_threshold?('md5') { |_, other_digest| other_digest == digest }
end
end
def flag!
auto_report_status!
end
def remember!
# The scores in sorted sets don't actually have enough bits to hold an exact
# value of our snowflake IDs, so we use it only for its ordering property. To
# get the correct status ID back, we have to save it in the string value
redis.zadd(redis_key, @status.id, digest_with_algorithm)
redis.zremrangebyrank(redis_key, 0, -(MAX_TRAIL_SIZE + 1))
redis.expire(redis_key, EXPIRE_SET_AFTER)
end
def reset!
redis.del(redis_key)
end
def hashable_text
return @hashable_text if defined?(@hashable_text)
@hashable_text = @status.text
@hashable_text = remove_mentions(@hashable_text)
@hashable_text = strip_tags(@hashable_text) unless @status.local?
@hashable_text = normalize_unicode(@status.spoiler_text + ' ' + @hashable_text)
@hashable_text = remove_whitespace(@hashable_text)
end
def insufficient_data?
hashable_text.blank?
end
def digest
@digest ||= begin
if nilsimsa?
Nilsimsa.new(hashable_text).hexdigest
else
Digest::MD5.hexdigest(hashable_text)
end
end
end
def digest_with_algorithm
if nilsimsa?
['nilsimsa', digest, @status.id].join(':')
else
['md5', digest, @status.id].join(':')
end
end
class << self
def perform(status)
spam_check = new(status)
return if spam_check.skip?
if spam_check.spam?
spam_check.flag!
else
spam_check.remember!
end
end
end
private
def disabled?
!Setting.spam_check_enabled
end
def remove_mentions(text)
return text.gsub(Account::MENTION_RE, '') if @status.local?
Nokogiri::HTML.fragment(text).tap do |html|
mentions = @status.mentions.map { |mention| ActivityPub::TagManager.instance.url_for(mention.account) }
html.traverse do |element|
element.unlink if element.name == 'a' && mentions.include?(element['href'])
end
end.to_s
end
def normalize_unicode(text)
text.unicode_normalize(:nfkc).downcase
end
def remove_whitespace(text)
text.gsub(/\s+/, ' ').strip
end
def auto_report_status!
status_ids = Status.where(visibility: %i(public unlisted)).where(id: matching_status_ids).pluck(:id) + [@status.id] if @status.distributable?
ReportService.new.call(Account.representative, @account, status_ids: status_ids, comment: I18n.t('spam_check.spam_detected'))
end
def already_flagged?
@account.silenced? || @account.targeted_reports.unresolved.where(account_id: -99).exists?
end
def trusted?
@account.trust_level > Account::TRUST_LEVELS[:untrusted] || (@account.local? && @account.user_staff?)
end
def no_unsolicited_mentions?
@status.mentions.all? { |mention| mention.silent? || (!@account.local? && !mention.account.local?) || mention.account.following?(@account) }
end
def solicited_reply?
!@status.thread.nil? && @status.thread.mentions.where(account: @account).exists?
end
def nilsimsa_compare_value(first, second)
first = [first].pack('H*')
second = [second].pack('H*')
bits = 0
0.upto(31) do |i|
bits += Nilsimsa::POPC[255 & (first[i].ord ^ second[i].ord)].ord
end
128 - bits # -128 <= Nilsimsa Compare Value <= 128
end
def nilsimsa?
hashable_text.size > NILSIMSA_MIN_SIZE
end
def other_digests
redis.zrange(redis_key, 0, -1)
end
def digests_over_threshold?(filter_algorithm)
other_digests.select do |record|
algorithm, other_digest, status_id = record.split(':')
next unless algorithm == filter_algorithm
yield algorithm, other_digest, status_id
end.size >= THRESHOLD
end
def matching_status_ids
if nilsimsa?
other_digests.filter_map { |record| record.split(':')[2] if record.start_with?('nilsimsa') && nilsimsa_compare_value(digest, record.split(':')[1]) >= NILSIMSA_COMPARE_THRESHOLD }
else
other_digests.filter_map { |record| record.split(':')[2] if record.start_with?('md5') && record.split(':')[1] == digest }
end
end
def redis_key
@redis_key ||= "spam_check:#{@account.id}"
end
end

View File

@@ -6,11 +6,22 @@ class StatusReachFinder
end
def inboxes
Account.where(id: reached_account_ids).inboxes
(reached_account_inboxes + followers_inboxes + relay_inboxes).uniq
end
private
def reached_account_inboxes
# When the status is a reblog, there are no interactions with it
# directly, we assume all interactions are with the original one
if @status.reblog?
[]
else
Account.where(id: reached_account_ids).inboxes
end
end
def reached_account_ids
[
replied_to_account_id,
@@ -49,4 +60,16 @@ class StatusReachFinder
def replies_account_ids
@status.replies.pluck(:account_id)
end
def followers_inboxes
@status.account.followers.inboxes
end
def relay_inboxes
if @status.public_visibility?
Relay.enabled.pluck(:inbox_url)
else
[]
end
end
end

View File

@@ -22,14 +22,6 @@ class TagManager
uri.normalized_host
end
def same_acct?(canonical, needle)
return true if canonical.casecmp(needle).zero?
username, domain = needle.split('@')
local_domain?(domain) && canonical.casecmp(username).zero?
end
def local_url?(url)
uri = Addressable::URI.parse(url).normalize
return false unless uri.host