Merge branch 'main' into glitch-soc/merge-upstream

This commit is contained in:
Claire
2022-02-08 18:23:53 +01:00
26 changed files with 628 additions and 594 deletions

View File

@ -63,15 +63,29 @@ class ActivityPub::RepliesController < ActivityPub::BaseController
end
def next_page
only_other_accounts = !(@replies&.last&.account_id == @account.id && @replies.size == DESCENDANTS_LIMIT)
if only_other_accounts?
# Only consider remote accounts
return nil if @replies.size < DESCENDANTS_LIMIT
account_status_replies_url(
@account,
@status,
page: true,
min_id: only_other_accounts && !only_other_accounts? ? nil : @replies&.last&.id,
only_other_accounts: only_other_accounts
)
account_status_replies_url(
@account,
@status,
page: true,
min_id: @replies&.last&.id,
only_other_accounts: true
)
else
# For now, we're serving only self-replies, but next page might be other accounts
next_only_other_accounts = @replies&.last&.account_id != @account.id || @replies.size < DESCENDANTS_LIMIT
account_status_replies_url(
@account,
@status,
page: true,
min_id: next_only_other_accounts ? nil : @replies&.last&.id,
only_other_accounts: next_only_other_accounts
)
end
end
def page_params

View File

@ -7,27 +7,24 @@ module Localized
around_action :set_locale
end
def set_locale
locale = current_user.locale if respond_to?(:user_signed_in?) && user_signed_in?
locale ||= session[:locale] ||= default_locale
locale = default_locale unless I18n.available_locales.include?(locale.to_sym)
I18n.with_locale(locale) do
yield
end
def set_locale(&block)
I18n.with_locale(requested_locale || I18n.default_locale, &block)
end
private
def default_locale
if ENV['DEFAULT_LOCALE'].present?
I18n.default_locale
else
request_locale || I18n.default_locale
end
def requested_locale
requested_locale_name = available_locale_or_nil(params[:locale])
requested_locale_name ||= available_locale_or_nil(current_user.locale) if respond_to?(:user_signed_in?) && user_signed_in?
requested_locale_name ||= http_accept_language if ENV['DEFAULT_LOCALE'].blank?
requested_locale_name
end
def request_locale
http_accept_language.language_region_compatible_from(I18n.available_locales)
def http_accept_language
HttpAcceptLanguage::Parser.new(request.headers.fetch('Accept-Language')).language_region_compatible_from(I18n.available_locales) if request.headers.key?('Accept-Language')
end
def available_locale_or_nil(locale_name)
locale_name.to_sym if locale_name.present? && I18n.available_locales.include?(locale_name.to_sym)
end
end

View File

@ -1,94 +1,237 @@
# frozen_string_literal: true
module LanguagesHelper
HUMAN_LOCALES = {
af: 'Afrikaans',
ar: 'العربية',
ast: 'Asturianu',
bg: 'Български',
bn: 'বাংলা',
br: 'Breton',
ca: 'Català',
co: 'Corsu',
cs: 'Čeština',
cy: 'Cymraeg',
da: 'Dansk',
de: 'Deutsch',
el: 'Ελληνικά',
en: 'English',
eo: 'Esperanto',
ISO_639_1 = {
aa: ['Afar', 'Afaraf'].freeze,
ab: ['Abkhaz', 'аҧсуа бызшәа'].freeze,
ae: ['Avestan', 'avesta'].freeze,
af: ['Afrikaans', 'Afrikaans'].freeze,
ak: ['Akan', 'Akan'].freeze,
am: ['Amharic', 'አማርኛ'].freeze,
an: ['Aragonese', 'aragonés'].freeze,
ar: ['Arabic', 'اللغة العربية'].freeze,
as: ['Assamese', 'অসমীয়া'].freeze,
av: ['Avaric', 'авар мацӀ'].freeze,
ay: ['Aymara', 'aymar aru'].freeze,
az: ['Azerbaijani', 'azərbaycan dili'].freeze,
ba: ['Bashkir', 'башҡорт теле'].freeze,
be: ['Belarusian', 'беларуская мова'].freeze,
bg: ['Bulgarian', 'български език'].freeze,
bh: ['Bihari', 'भोजपुरी'].freeze,
bi: ['Bislama', 'Bislama'].freeze,
bm: ['Bambara', 'bamanankan'].freeze,
bn: ['Bengali', 'বাংলা'].freeze,
bo: ['Tibetan', 'བོད་ཡིག'].freeze,
br: ['Breton', 'brezhoneg'].freeze,
bs: ['Bosnian', 'bosanski jezik'].freeze,
ca: ['Catalan', 'Català'].freeze,
ce: ['Chechen', 'нохчийн мотт'].freeze,
ch: ['Chamorro', 'Chamoru'].freeze,
co: ['Corsican', 'corsu'].freeze,
cr: ['Cree', 'ᓀᐦᐃᔭᐍᐏᐣ'].freeze,
cs: ['Czech', 'čeština'].freeze,
cu: ['Old Church Slavonic', 'ѩзыкъ словѣньскъ'].freeze,
cv: ['Chuvash', 'чӑваш чӗлхи'].freeze,
cy: ['Welsh', 'Cymraeg'].freeze,
da: ['Danish', 'dansk'].freeze,
de: ['German', 'Deutsch'].freeze,
dv: ['Divehi', 'Dhivehi'].freeze,
dz: ['Dzongkha', 'རྫོང་ཁ'].freeze,
ee: ['Ewe', 'Eʋegbe'].freeze,
el: ['Greek', 'Ελληνικά'].freeze,
en: ['English', 'English'].freeze,
eo: ['Esperanto', 'Esperanto'].freeze,
es: ['Spanish', 'Español'].freeze,
et: ['Estonian', 'eesti'].freeze,
eu: ['Basque', 'euskara'].freeze,
fa: ['Persian', 'فارسی'].freeze,
ff: ['Fula', 'Fulfulde'].freeze,
fi: ['Finnish', 'suomi'].freeze,
fj: ['Fijian', 'Vakaviti'].freeze,
fo: ['Faroese', 'føroyskt'].freeze,
fr: ['French', 'Français'].freeze,
fy: ['Western Frisian', 'Frysk'].freeze,
ga: ['Irish', 'Gaeilge'].freeze,
gd: ['Scottish Gaelic', 'Gàidhlig'].freeze,
gl: ['Galician', 'galego'].freeze,
gu: ['Gujarati', 'ગુજરાતી'].freeze,
gv: ['Manx', 'Gaelg'].freeze,
ha: ['Hausa', 'هَوُسَ'].freeze,
he: ['Hebrew', 'עברית'].freeze,
hi: ['Hindi', 'हिन्दी'].freeze,
ho: ['Hiri Motu', 'Hiri Motu'].freeze,
hr: ['Croatian', 'Hrvatski'].freeze,
ht: ['Haitian', 'Kreyòl ayisyen'].freeze,
hu: ['Hungarian', 'magyar'].freeze,
hy: ['Armenian', 'Հայերեն'].freeze,
hz: ['Herero', 'Otjiherero'].freeze,
ia: ['Interlingua', 'Interlingua'].freeze,
id: ['Indonesian', 'Bahasa Indonesia'].freeze,
ie: ['Interlingue', 'Interlingue'].freeze,
ig: ['Igbo', 'Asụsụ Igbo'].freeze,
ii: ['Nuosu', 'ꆈꌠ꒿ Nuosuhxop'].freeze,
ik: ['Inupiaq', 'Iñupiaq'].freeze,
io: ['Ido', 'Ido'].freeze,
is: ['Icelandic', 'Íslenska'].freeze,
it: ['Italian', 'Italiano'].freeze,
iu: ['Inuktitut', 'ᐃᓄᒃᑎᑐᑦ'].freeze,
ja: ['Japanese', '日本語'].freeze,
jv: ['Javanese', 'basa Jawa'].freeze,
ka: ['Georgian', 'ქართული'].freeze,
kg: ['Kongo', 'Kikongo'].freeze,
ki: ['Kikuyu', 'Gĩkũyũ'].freeze,
kj: ['Kwanyama', 'Kuanyama'].freeze,
kk: ['Kazakh', 'қазақ тілі'].freeze,
kl: ['Kalaallisut', 'kalaallisut'].freeze,
km: ['Khmer', 'ខេមរភាសា'].freeze,
kn: ['Kannada', 'ಕನ್ನಡ'].freeze,
ko: ['Korean', '한국어'].freeze,
kr: ['Kanuri', 'Kanuri'].freeze,
ks: ['Kashmiri', 'कश्मीरी'].freeze,
ku: ['Kurdish', 'Kurdî'].freeze,
kv: ['Komi', 'коми кыв'].freeze,
kw: ['Cornish', 'Kernewek'].freeze,
ky: ['Kyrgyz', 'Кыргызча'].freeze,
la: ['Latin', 'latine'].freeze,
lb: ['Luxembourgish', 'Lëtzebuergesch'].freeze,
lg: ['Ganda', 'Luganda'].freeze,
li: ['Limburgish', 'Limburgs'].freeze,
ln: ['Lingala', 'Lingála'].freeze,
lo: ['Lao', 'ພາສາ'].freeze,
lt: ['Lithuanian', 'lietuvių kalba'].freeze,
lu: ['Luba-Katanga', 'Tshiluba'].freeze,
lv: ['Latvian', 'latviešu valoda'].freeze,
mg: ['Malagasy', 'fiteny malagasy'].freeze,
mh: ['Marshallese', 'Kajin M̧ajeļ'].freeze,
mi: ['Māori', 'te reo Māori'].freeze,
mk: ['Macedonian', 'македонски јазик'].freeze,
ml: ['Malayalam', 'മലയാളം'].freeze,
mn: ['Mongolian', 'Монгол хэл'].freeze,
mr: ['Marathi', 'मराठी'].freeze,
ms: ['Malay', 'Bahasa Malaysia'].freeze,
mt: ['Maltese', 'Malti'].freeze,
my: ['Burmese', 'ဗမာစာ'].freeze,
na: ['Nauru', 'Ekakairũ Naoero'].freeze,
nb: ['Norwegian Bokmål', 'Norsk bokmål'].freeze,
nd: ['Northern Ndebele', 'isiNdebele'].freeze,
ne: ['Nepali', 'नेपाली'].freeze,
ng: ['Ndonga', 'Owambo'].freeze,
nl: ['Dutch', 'Nederlands'].freeze,
nn: ['Norwegian Nynorsk', 'Norsk nynorsk'].freeze,
no: ['Norwegian', 'Norsk'].freeze,
nr: ['Southern Ndebele', 'isiNdebele'].freeze,
nv: ['Navajo', 'Diné bizaad'].freeze,
ny: ['Chichewa', 'chiCheŵa'].freeze,
oc: ['Occitan', 'occitan'].freeze,
oj: ['Ojibwe', 'ᐊᓂᔑᓈᐯᒧᐎᓐ'].freeze,
om: ['Oromo', 'Afaan Oromoo'].freeze,
or: ['Oriya', 'ଓଡ଼ିଆ'].freeze,
os: ['Ossetian', 'ирон æвзаг'].freeze,
pa: ['Panjabi', 'ਪੰਜਾਬੀ'].freeze,
pi: ['Pāli', 'पाऴि'].freeze,
pl: ['Polish', 'Polski'].freeze,
ps: ['Pashto', 'پښتو'].freeze,
pt: ['Portuguese', 'Português'].freeze,
qu: ['Quechua', 'Runa Simi'].freeze,
rm: ['Romansh', 'rumantsch grischun'].freeze,
rn: ['Kirundi', 'Ikirundi'].freeze,
ro: ['Romanian', 'Română'].freeze,
ru: ['Russian', 'Русский'].freeze,
rw: ['Kinyarwanda', 'Ikinyarwanda'].freeze,
sa: ['Sanskrit', 'संस्कृतम्'].freeze,
sc: ['Sardinian', 'sardu'].freeze,
sd: ['Sindhi', 'सिन्धी'].freeze,
se: ['Northern Sami', 'Davvisámegiella'].freeze,
sg: ['Sango', 'yângâ tî sängö'].freeze,
si: ['Sinhala', 'සිංහල'].freeze,
sk: ['Slovak', 'slovenčina'].freeze,
sl: ['Slovenian', 'slovenščina'].freeze,
sn: ['Shona', 'chiShona'].freeze,
so: ['Somali', 'Soomaaliga'].freeze,
sq: ['Albanian', 'Shqip'].freeze,
sr: ['Serbian', 'српски језик'].freeze,
ss: ['Swati', 'SiSwati'].freeze,
st: ['Southern Sotho', 'Sesotho'].freeze,
su: ['Sundanese', 'Basa Sunda'].freeze,
sv: ['Swedish', 'Svenska'].freeze,
sw: ['Swahili', 'Kiswahili'].freeze,
ta: ['Tamil', 'தமிழ்'].freeze,
te: ['Telugu', 'తెలుగు'].freeze,
tg: ['Tajik', 'тоҷикӣ'].freeze,
th: ['Thai', 'ไทย'].freeze,
ti: ['Tigrinya', 'ትግርኛ'].freeze,
tk: ['Turkmen', 'Türkmen'].freeze,
tl: ['Tagalog', 'Wikang Tagalog'].freeze,
tn: ['Tswana', 'Setswana'].freeze,
to: ['Tonga', 'faka Tonga'].freeze,
tr: ['Turkish', 'Türkçe'].freeze,
ts: ['Tsonga', 'Xitsonga'].freeze,
tt: ['Tatar', 'татар теле'].freeze,
tw: ['Twi', 'Twi'].freeze,
ty: ['Tahitian', 'Reo Tahiti'].freeze,
ug: ['Uyghur', 'ئۇيغۇرچە‎'].freeze,
uk: ['Ukrainian', 'Українська'].freeze,
ur: ['Urdu', 'اردو'].freeze,
uz: ['Uzbek', 'Ўзбек'].freeze,
ve: ['Venda', 'Tshivenḓa'].freeze,
vi: ['Vietnamese', 'Tiếng Việt'].freeze,
vo: ['Volapük', 'Volapük'].freeze,
wa: ['Walloon', 'walon'].freeze,
wo: ['Wolof', 'Wollof'].freeze,
xh: ['Xhosa', 'isiXhosa'].freeze,
yi: ['Yiddish', 'ייִדיש'].freeze,
yo: ['Yoruba', 'Yorùbá'].freeze,
za: ['Zhuang', 'Saɯ cueŋƅ'].freeze,
zh: ['Chinese', '中文'].freeze,
zu: ['Zulu', 'isiZulu'].freeze,
}.freeze
ISO_639_3 = {
ast: ['Asturian', 'Asturianu'].freeze,
kab: ['Kabyle', 'Taqbaylit'].freeze,
kmr: ['Northern Kurdish', 'Kurmancî'].freeze,
zgh: ['Standard Moroccan Tamazight', 'ⵜⴰⵎⴰⵣⵉⵖⵜ'].freeze,
}.freeze
SUPPORTED_LOCALES = {}.merge(ISO_639_1).merge(ISO_639_3).freeze
# For ISO-639-1 and ISO-639-3 language codes, we have their official
# names, but for some translations, we need the names of the
# regional variants specifically
REGIONAL_LOCALE_NAMES = {
'es-AR': 'Español (Argentina)',
'es-MX': 'Español (México)',
es: 'Español',
et: 'Eesti',
eu: 'Euskara',
fa: 'فارسی',
fi: 'Suomi',
fr: 'Français',
ga: 'Gaeilge',
gd: 'Gàidhlig',
gl: 'Galego',
he: 'עברית',
hi: 'हिन्दी',
hr: 'Hrvatski',
hu: 'Magyar',
hy: 'Հայերեն',
id: 'Bahasa Indonesia',
io: 'Ido',
is: 'Íslenska',
it: 'Italiano',
ja: '日本語',
ka: 'ქართული',
kab: 'Taqbaylit',
kk: 'Қазақша',
kmr: 'Kurmancî',
kn: 'ಕನ್ನಡ',
ko: '한국어',
ku: 'سۆرانی',
lt: 'Lietuvių',
lv: 'Latviešu',
mk: 'Македонски',
ml: 'മലയാളം',
mr: 'मराठी',
ms: 'Bahasa Melayu',
nl: 'Nederlands',
nn: 'Nynorsk',
no: 'Norsk',
oc: 'Occitan',
pl: 'Polski',
'pt-BR': 'Português (Brasil)',
'pt-PT': 'Português (Portugal)',
pt: 'Português',
ro: 'Română',
ru: 'Русский',
sa: 'संस्कृतम्',
sc: 'Sardu',
si: 'සිංහල',
sk: 'Slovenčina',
sl: 'Slovenščina',
sq: 'Shqip',
'sr-Latn': 'Srpski (latinica)',
sr: 'Српски',
sv: 'Svenska',
ta: 'தமிழ்',
te: 'తెలుగు',
th: 'ไทย',
tr: 'Türkçe',
uk: 'Українська',
ur: 'اُردُو',
vi: 'Tiếng Việt',
zgh: 'ⵜⴰⵎⴰⵣⵉⵖⵜ',
'zh-CN': '简体中文',
'zh-HK': '繁體中文(香港)',
'zh-TW': '繁體中文(臺灣)',
zh: '中文',
}.freeze
def human_locale(locale)
if locale == 'und'
I18n.t('generic.none')
elsif (supported_locale = SUPPORTED_LOCALES[locale.to_sym])
supported_locale[1]
elsif (regional_locale = REGIONAL_LOCALE_NAMES[locale.to_sym])
regional_locale
else
HUMAN_LOCALES[locale.to_sym] || locale
locale
end
end
def valid_locale_or_nil(str)
return if str.blank?
code, = str.to_s.split(/[_-]/) # Strip out the region from e.g. en_US or ja-JP
return unless valid_locale?(code)
code
end
def valid_locale?(locale)
SUPPORTED_LOCALES.key?(locale.to_sym)
end
end

View File

@ -2,7 +2,7 @@
module SettingsHelper
def filterable_languages
LanguageDetector.instance.language_names.select(&LanguagesHelper::HUMAN_LOCALES.method(:key?))
LanguagesHelper::SUPPORTED_LOCALES.keys
end
def hash_to_object(hash)

View File

@ -112,7 +112,7 @@ class ActivityPub::Activity::Create < ActivityPub::Activity
url: @status_parser.url || @status_parser.uri,
account: @account,
text: converted_object_type? ? converted_text : (@status_parser.text || ''),
language: @status_parser.language || detected_language,
language: @status_parser.language,
spoiler_text: converted_object_type? ? '' : (@status_parser.spoiler_text || ''),
created_at: @status_parser.created_at,
edited_at: @status_parser.edited_at,
@ -370,10 +370,6 @@ class ActivityPub::Activity::Create < ActivityPub::Activity
Formatter.instance.linkify([@status_parser.title.presence, @status_parser.spoiler_text.presence, @status_parser.url || @status_parser.uri].compact.join("\n\n"))
end
def detected_language
LanguageDetector.instance.detect(@status_parser.text, @account) if supported_object_type?
end
def unsupported_media_type?(mime_type)
mime_type.present? && !MediaAttachment.supported_mime_types.include?(mime_type)
end

View File

@ -1,101 +0,0 @@
# frozen_string_literal: true
class LanguageDetector
include Singleton
WORDS_THRESHOLD = 4
RELIABLE_CHARACTERS_RE = /[\p{Hebrew}\p{Arabic}\p{Syriac}\p{Thaana}\p{Nko}\p{Han}\p{Katakana}\p{Hiragana}\p{Hangul}\p{Thai}]+/m
def initialize
@identifier = CLD3::NNetLanguageIdentifier.new(1, 2048)
end
def detect(text, account)
input_text = prepare_text(text)
return if input_text.blank?
detect_language_code(input_text) || default_locale(account)
end
def language_names
@language_names = CLD3::TaskContextParams::LANGUAGE_NAMES.map { |name| iso6391(name.to_s).to_sym }.uniq
end
private
def prepare_text(text)
simplify_text(text).strip
end
def unreliable_input?(text)
!reliable_input?(text)
end
def reliable_input?(text)
sufficient_text_length?(text) || language_specific_character_set?(text)
end
def sufficient_text_length?(text)
text.split(/\s+/).size >= WORDS_THRESHOLD
end
def language_specific_character_set?(text)
words = text.scan(RELIABLE_CHARACTERS_RE)
if words.present?
words.reduce(0) { |acc, elem| acc + elem.size }.to_f / text.size > 0.3
else
false
end
end
def detect_language_code(text)
return if unreliable_input?(text)
result = @identifier.find_language(text)
iso6391(result.language.to_s).to_sym if result&.reliable?
end
def iso6391(bcp47)
iso639 = bcp47.split('-').first
# CLD3 returns grandfathered language code for Hebrew
return 'he' if iso639 == 'iw'
ISO_639.find(iso639).alpha2
end
def simplify_text(text)
new_text = remove_html(text)
new_text.gsub!(FetchLinkCardService::URL_PATTERN, '\1')
new_text.gsub!(Account::MENTION_RE, '')
new_text.gsub!(Tag::HASHTAG_RE) { |string| string.gsub(/[#_]/, '#' => '', '_' => ' ').gsub(/[a-z][A-Z]|[a-zA-Z][\d]/) { |s| s.insert(1, ' ') }.downcase }
new_text.gsub!(/:#{CustomEmoji::SHORTCODE_RE_FRAGMENT}:/, '')
new_text.gsub!(/\s+/, ' ')
new_text
end
def new_scrubber
scrubber = Rails::Html::PermitScrubber.new
scrubber.tags = %w(br p)
scrubber
end
def scrubber
@scrubber ||= new_scrubber
end
def remove_html(text)
text = Loofah.fragment(text).scrub!(scrubber).to_s
text.gsub!('<br>', "\n")
text.gsub!('</p><p>', "\n\n")
text.gsub!(/(^<p>|<\/p>$)/, '')
text
end
def default_locale(account)
account.user_locale&.to_sym || I18n.default_locale if account.local?
end
end

View File

@ -2,6 +2,20 @@
class LinkDetailsExtractor
include ActionView::Helpers::TagHelper
include LanguagesHelper
# Some publications wrap their JSON-LD data in their <script> tags
# in commented-out CDATA blocks, they need to be removed before
# attempting to parse JSON
CDATA_JUNK_PATTERN = %r{^[\s]*(
(/\*[\s]*<!\[CDATA\[[\s]*\*/) # Block comment style opening
|
(//[\s]*<!\[CDATA\[) # Single-line comment style opening
|
(/\*[\s]*\]\]>[\s]*\*/) # Block comment style closing
|
(//[\s]*\]\]>) # Single-line comment style closing
)[\s]*$}x
class StructuredData
SUPPORTED_TYPES = %w(
@ -61,6 +75,10 @@ class LinkDetailsExtractor
publisher.dig('logo', 'url')
end
def valid?
json.present?
end
private
def author
@ -134,11 +152,11 @@ class LinkDetailsExtractor
end
def title
structured_data&.headline || opengraph_tag('og:title') || document.xpath('//title').map(&:content).first
html_entities.decode(structured_data&.headline || opengraph_tag('og:title') || document.xpath('//title').map(&:content).first)
end
def description
structured_data&.description || opengraph_tag('og:description') || meta_tag('description')
html_entities.decode(structured_data&.description || opengraph_tag('og:description') || meta_tag('description'))
end
def image
@ -146,11 +164,11 @@ class LinkDetailsExtractor
end
def canonical_url
valid_url_or_nil(opengraph_tag('og:url') || link_tag('canonical'), same_origin_only: true) || @original_url.to_s
valid_url_or_nil(link_tag('canonical') || opengraph_tag('og:url'), same_origin_only: true) || @original_url.to_s
end
def provider_name
structured_data&.publisher_name || opengraph_tag('og:site_name')
html_entities.decode(structured_data&.publisher_name || opengraph_tag('og:site_name'))
end
def provider_url
@ -158,7 +176,7 @@ class LinkDetailsExtractor
end
def author_name
structured_data&.author_name || opengraph_tag('og:author') || opengraph_tag('og:author:username')
html_entities.decode(structured_data&.author_name || opengraph_tag('og:author') || opengraph_tag('og:author:username'))
end
def author_url
@ -201,14 +219,6 @@ class LinkDetailsExtractor
nil
end
def valid_locale_or_nil(str)
return nil if str.blank?
code, = str.split(/_-/) # Strip out the region from e.g. en_US or ja-JA
locale = ISO_639.find(code)
locale&.alpha2
end
def link_tag(name)
document.xpath("//link[@rel=\"#{name}\"]").map { |link| link['href'] }.first
end
@ -223,10 +233,24 @@ class LinkDetailsExtractor
def structured_data
@structured_data ||= begin
json_ld = document.xpath('//script[@type="application/ld+json"]').map(&:content).first
json_ld.present? ? StructuredData.new(json_ld) : nil
rescue Oj::ParseError
nil
# Some publications have more than one JSON-LD definition on the page,
# and some of those definitions aren't valid JSON either, so we have
# to loop through here until we find something that is the right type
# and doesn't break
document.xpath('//script[@type="application/ld+json"]').filter_map do |element|
json_ld = element.content&.gsub(CDATA_JUNK_PATTERN, '')
next if json_ld.blank?
structured_data = StructuredData.new(html_entities.decode(json_ld))
next unless structured_data.valid?
structured_data
rescue Oj::ParseError, EncodingError
Rails.logger.debug("Invalid JSON-LD in #{@original_url}")
next
end.first
end
end
@ -246,4 +270,8 @@ class LinkDetailsExtractor
detector.strip_tags = true
end
end
def html_entities
@html_entities ||= HTMLEntities.new
end
end

View File

@ -6,7 +6,7 @@ class AccountSuggestions::GlobalSource < AccountSuggestions::Source
end
def get(account, skip_account_ids: [], limit: 40)
account_ids = account_ids_for_locale(account.user_locale) - [account.id] - skip_account_ids
account_ids = account_ids_for_locale(I18n.locale.to_str.split(/[_-]/).first) - [account.id] - skip_account_ids
as_ordered_suggestions(
scope(account).where(id: account_ids),

View File

@ -245,6 +245,10 @@ class User < ApplicationRecord
save!
end
def preferred_posting_language
settings.default_language || locale
end
def setting_default_privacy
settings.default_privacy || (account.locked? ? 'private' : 'public')
end

View File

@ -120,7 +120,7 @@ class ActivityPub::ProcessStatusUpdateService < BaseService
@status.text = @status_parser.text || ''
@status.spoiler_text = @status_parser.spoiler_text || ''
@status.sensitive = @account.sensitized? || @status_parser.sensitive || false
@status.language = @status_parser.language || detected_language
@status.language = @status_parser.language
@status.edited_at = @status_parser.edited_at || Time.now.utc if significant_changes?
@status.save!
@ -210,10 +210,6 @@ class ActivityPub::ProcessStatusUpdateService < BaseService
{ redis: Redis.current, key: "create:#{@uri}", autorelease: 15.minutes.seconds }
end
def detected_language
LanguageDetector.instance.detect(@status_parser.text, @account)
end
def create_previous_edit!
# We only need to create a previous edit when no previous edits exist, e.g.
# when the status has never been edited. For other cases, we always create

View File

@ -2,6 +2,7 @@
class PostStatusService < BaseService
include Redisable
include LanguagesHelper
MIN_SCHEDULE_OFFSET = 5.minutes.freeze
@ -118,10 +119,6 @@ class PostStatusService < BaseService
raise Mastodon::ValidationError, I18n.t('media_attachments.validations.not_ready') if @media.any?(&:not_processed?)
end
def language_from_option(str)
ISO_639.find(str)&.alpha2
end
def process_mentions_service
ProcessMentionsService.new
end
@ -174,7 +171,7 @@ class PostStatusService < BaseService
sensitive: @sensitive,
spoiler_text: @options[:spoiler_text] || '',
visibility: @visibility,
language: language_from_option(@options[:language]) || @account.user&.setting_default_language&.presence || LanguageDetector.instance.detect(@text, @account),
language: valid_locale_or_nil(@options[:language].presence || @account.user&.preferred_posting_language || I18n.default_locale),
application: @options[:application],
content_type: @options[:content_type] || @account.user&.setting_default_content_type,
rate_limit: @options[:with_rate_limit],

View File

@ -1,5 +1,7 @@
# frozen_string_literal: true
require 'csv'
class ImportValidator < ActiveModel::Validator
KNOWN_HEADERS = [
'Account address',

View File

@ -10,7 +10,7 @@
.filter-subset.filter-subset--with-select
%strong= t('admin.follow_recommendations.language')
.input.select.optional
= select_tag :language, options_for_select(I18n.available_locales.map { |key| [human_locale(key), key]}, @language)
= select_tag :language, options_for_select(I18n.available_locales.map { |key| key.to_s.split(/[_-]/).first.to_sym }.uniq.map { |key| [human_locale(key), key]}, @language)
.filter-subset
%strong= t('admin.follow_recommendations.status')

View File

@ -27,7 +27,7 @@
= f.input :setting_default_privacy, collection: Status.selectable_visibilities, wrapper: :with_label, include_blank: false, label_method: lambda { |visibility| safe_join([I18n.t("statuses.visibilities.#{visibility}"), I18n.t("statuses.visibilities.#{visibility}_long")], ' - ') }, required: false, hint: false
.fields-group.fields-row__column.fields-row__column-6
= f.input :setting_default_language, collection: [nil] + filterable_languages.sort, wrapper: :with_label, label_method: lambda { |locale| locale.nil? ? I18n.t('statuses.language_detection') : human_locale(locale) }, required: false, include_blank: false, hint: false
= f.input :setting_default_language, collection: [nil] + filterable_languages, wrapper: :with_label, label_method: lambda { |locale| locale.nil? ? I18n.t('statuses.default_language') : human_locale(locale) }, required: false, include_blank: false, hint: false
.fields-group
= f.input :setting_default_sensitive, as: :boolean, wrapper: :with_label
@ -41,7 +41,7 @@
%h4= t 'preferences.public_timelines'
.fields-group
= f.input :chosen_languages, collection: filterable_languages.sort, wrapper: :with_block_label, include_blank: false, label_method: lambda { |locale| human_locale(locale) }, required: false, as: :check_boxes, collection_wrapper_tag: 'ul', item_wrapper_tag: 'li'
= f.input :chosen_languages, collection: filterable_languages, wrapper: :with_block_label, include_blank: false, label_method: lambda { |locale| human_locale(locale) }, required: false, as: :check_boxes, collection_wrapper_tag: 'ul', item_wrapper_tag: 'li'
.actions
= f.button :button, t('generic.save_changes'), type: :submit

View File

@ -6,7 +6,10 @@ class ActivityPub::ProcessingWorker
sidekiq_options backtrace: true, retry: 8
def perform(account_id, body, delivered_to_account_id = nil)
ActivityPub::ProcessCollectionService.new.call(body, Account.find(account_id), override_timestamps: true, delivered_to_account_id: delivered_to_account_id, delivery: true)
account = Account.find_by(id: account_id)
return if account.nil?
ActivityPub::ProcessCollectionService.new.call(body, account, override_timestamps: true, delivered_to_account_id: delivered_to_account_id, delivery: true)
rescue ActiveRecord::RecordInvalid => e
Rails.logger.debug "Error processing incoming ActivityPub object: #{e}"
end

View File

@ -66,7 +66,7 @@ class Scheduler::AccountsStatusesCleanupScheduler
end
def compute_budget
threads = Sidekiq::ProcessSet.new.filter { |x| x['queues'].include?('push') }.map { |x| x['concurrency'] }.sum
threads = Sidekiq::ProcessSet.new.select { |x| x['queues'].include?('push') }.map { |x| x['concurrency'] }.sum
[PER_THREAD_BUDGET * threads, MAX_BUDGET].min
end

View File

@ -16,28 +16,33 @@ class Scheduler::FollowRecommendationsScheduler
AccountSummary.refresh
FollowRecommendation.refresh
fallback_recommendations = FollowRecommendation.order(rank: :desc).limit(SET_SIZE).index_by(&:account_id)
fallback_recommendations = FollowRecommendation.order(rank: :desc).limit(SET_SIZE)
I18n.available_locales.each do |locale|
I18n.available_locales.map { |locale| locale.to_s.split(/[_-]/).first }.uniq.each do |locale|
recommendations = begin
if AccountSummary.safe.filtered.localized(locale).exists? # We can skip the work if no accounts with that language exist
FollowRecommendation.localized(locale).order(rank: :desc).limit(SET_SIZE).index_by(&:account_id)
FollowRecommendation.localized(locale).order(rank: :desc).limit(SET_SIZE).map { |recommendation| [recommendation.account_id, recommendation.rank] }
else
{}
[]
end
end
# Use language-agnostic results if there are not enough language-specific ones
missing = SET_SIZE - recommendations.keys.size
missing = SET_SIZE - recommendations.size
if missing.positive? && fallback_recommendations.size.positive?
max_fallback_rank = fallback_recommendations.first.rank || 0
# Language-specific results should be above language-agnostic ones,
# otherwise language-agnostic ones will always overshadow them
recommendations.map! { |(account_id, rank)| [account_id, rank + max_fallback_rank] }
if missing.positive?
added = 0
# Avoid duplicate results
fallback_recommendations.each_value do |recommendation|
next if recommendations.key?(recommendation.account_id)
fallback_recommendations.each do |recommendation|
next if recommendations.any? { |(account_id, _)| account_id == recommendation.account_id }
recommendations[recommendation.account_id] = recommendation
recommendations << [recommendation.account_id, recommendation.rank]
added += 1
break if added >= missing
@ -47,8 +52,8 @@ class Scheduler::FollowRecommendationsScheduler
redis.pipelined do
redis.del(key(locale))
recommendations.each_value do |recommendation|
redis.zadd(key(locale), recommendation.rank, recommendation.account_id)
recommendations.each do |(account_id, rank)|
redis.zadd(key(locale), rank, account_id)
end
end
end