Merge commit '425d77f8124a50fc033e8fb3bdf7b89a6a25f4fa' into glitch-soc/merge-upstream
Conflicts: - `.rubocop_todo.yml`: Upstream regenerated this file, glitch-soc had a specific ignore. - `README.md`: Upstream updated its README, but glitch-soc has a completely different one. Kept glitch-soc's README
This commit is contained in:
@@ -45,8 +45,11 @@ class Importer::BaseImporter
|
||||
# Remove documents from the index that no longer exist in the database
|
||||
def clean_up!
|
||||
index.scroll_batches do |documents|
|
||||
primary_key = index.adapter.target.primary_key
|
||||
raise ActiveRecord::UnknownPrimaryKey, index.adapter.target if primary_key.nil?
|
||||
|
||||
ids = documents.pluck('_id')
|
||||
existence_map = index.adapter.target.where(id: ids).pluck(:id).each_with_object({}) { |id, map| map[id.to_s] = true }
|
||||
existence_map = index.adapter.target.where(primary_key => ids).pluck(primary_key).each_with_object({}) { |id, map| map[id.to_s] = true }
|
||||
tmp = ids.reject { |id| existence_map[id] }
|
||||
|
||||
next if tmp.empty?
|
||||
|
||||
@@ -68,13 +68,26 @@ class Request
|
||||
# about 15s in total
|
||||
TIMEOUT = { connect_timeout: 5, read_timeout: 10, write_timeout: 10, read_deadline: 30 }.freeze
|
||||
|
||||
# Workaround for overly-eager decoding of percent-encoded characters in Addressable::URI#normalized_path
|
||||
# https://github.com/sporkmonger/addressable/issues/366
|
||||
URI_NORMALIZER = lambda do |uri|
|
||||
uri = HTTP::URI.parse(uri)
|
||||
|
||||
HTTP::URI.new(
|
||||
scheme: uri.normalized_scheme,
|
||||
authority: uri.normalized_authority,
|
||||
path: Addressable::URI.normalize_path(encode_non_ascii(uri.path)).presence || '/',
|
||||
query: encode_non_ascii(uri.query)
|
||||
)
|
||||
end
|
||||
|
||||
include RoutingHelper
|
||||
|
||||
def initialize(verb, url, **options)
|
||||
raise ArgumentError if url.blank?
|
||||
|
||||
@verb = verb
|
||||
@url = Addressable::URI.parse(url).normalize
|
||||
@url = URI_NORMALIZER.call(url)
|
||||
@http_client = options.delete(:http_client)
|
||||
@allow_local = options.delete(:allow_local)
|
||||
@options = options.merge(socket_class: use_proxy? || @allow_local ? ProxySocket : Socket)
|
||||
@@ -138,8 +151,14 @@ class Request
|
||||
%w(http https).include?(parsed_url.scheme) && parsed_url.host.present?
|
||||
end
|
||||
|
||||
NON_ASCII_PATTERN = /[^\x00-\x7F]+/
|
||||
|
||||
def encode_non_ascii(str)
|
||||
str&.gsub(NON_ASCII_PATTERN) { |substr| CGI.escape(substr.encode(Encoding::UTF_8)) }
|
||||
end
|
||||
|
||||
def http_client
|
||||
HTTP.use(:auto_inflate).follow(max_hops: 3)
|
||||
HTTP.use(:auto_inflate).use(normalize_uri: { normalizer: URI_NORMALIZER }).follow(max_hops: 3)
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -14,13 +14,14 @@ class RSS::Builder
|
||||
end
|
||||
|
||||
def to_xml
|
||||
('<?xml version="1.0" encoding="UTF-8"?>'.dup << Ox.dump(wrap_in_document, effort: :tolerant)).force_encoding('UTF-8')
|
||||
Ox.dump(wrap_in_document, effort: :tolerant).force_encoding('UTF-8')
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def wrap_in_document
|
||||
Ox::Document.new(version: '1.0').tap do |document|
|
||||
document << xml_instruct
|
||||
document << Ox::Element.new('rss').tap do |rss|
|
||||
rss['version'] = '2.0'
|
||||
rss['xmlns:webfeeds'] = 'http://webfeeds.org/rss/1.0'
|
||||
@@ -30,4 +31,11 @@ class RSS::Builder
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def xml_instruct
|
||||
Ox::Instruct.new(:xml).tap do |instruct|
|
||||
instruct[:version] = '1.0'
|
||||
instruct[:encoding] = 'UTF-8'
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -75,7 +75,7 @@ class TextFormatter
|
||||
entity[:indices].first
|
||||
end
|
||||
|
||||
result = ''.dup
|
||||
result = +''
|
||||
|
||||
last_index = entities.reduce(0) do |index, entity|
|
||||
indices = entity[:indices]
|
||||
|
||||
Reference in New Issue
Block a user