use Twitter::Extractor for creating links (#2502)
This commit is contained in:
		
							
								
								
									
										33
									
								
								app/lib/extractor.rb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								app/lib/extractor.rb
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,33 @@ | |||||||
|  | # frozen_string_literal: true | ||||||
|  |  | ||||||
|  | module Extractor | ||||||
|  |   extend Twitter::Extractor | ||||||
|  |  | ||||||
|  |   module_function | ||||||
|  |  | ||||||
|  |   def extract_mentions_or_lists_with_indices(text) # :yields: username, list_slug, start, end | ||||||
|  |     return [] unless text =~ Twitter::Regex[:at_signs] | ||||||
|  |  | ||||||
|  |     possible_entries = [] | ||||||
|  |  | ||||||
|  |     text.to_s.scan(Account::MENTION_RE) do |screen_name, _| | ||||||
|  |       match_data = $LAST_MATCH_INFO | ||||||
|  |       after = $' | ||||||
|  |       unless after =~ Twitter::Regex[:end_mention_match] | ||||||
|  |         start_position = match_data.char_begin(1) - 1 | ||||||
|  |         end_position = match_data.char_end(1) | ||||||
|  |         possible_entries << { | ||||||
|  |           screen_name: screen_name, | ||||||
|  |           indices: [start_position, end_position], | ||||||
|  |         } | ||||||
|  |       end | ||||||
|  |     end | ||||||
|  |  | ||||||
|  |     if block_given? | ||||||
|  |       possible_entries.each do |mention| | ||||||
|  |         yield mention[:screen_name], mention[:indices].first, mention[:indices].last | ||||||
|  |       end | ||||||
|  |     end | ||||||
|  |     possible_entries | ||||||
|  |   end | ||||||
|  | end | ||||||
| @@ -13,11 +13,10 @@ class Formatter | |||||||
|     return reformat(status.content) unless status.local? |     return reformat(status.content) unless status.local? | ||||||
|  |  | ||||||
|     html = status.text |     html = status.text | ||||||
|     html = encode_and_link_urls(html) |     html = encode_and_link_urls(html, status.mentions) | ||||||
|  |  | ||||||
|     html = simple_format(html, {}, sanitize: false) |     html = simple_format(html, {}, sanitize: false) | ||||||
|     html = html.delete("\n") |     html = html.delete("\n") | ||||||
|     html = link_mentions(html, status.mentions) |  | ||||||
|     html = link_hashtags(html) |  | ||||||
|  |  | ||||||
|     html.html_safe # rubocop:disable Rails/OutputSafety |     html.html_safe # rubocop:disable Rails/OutputSafety | ||||||
|   end |   end | ||||||
| @@ -37,8 +36,6 @@ class Formatter | |||||||
|     html = encode_and_link_urls(account.note) |     html = encode_and_link_urls(account.note) | ||||||
|     html = simple_format(html, {}, sanitize: false) |     html = simple_format(html, {}, sanitize: false) | ||||||
|     html = html.delete("\n") |     html = html.delete("\n") | ||||||
|     html = link_accounts(html) |  | ||||||
|     html = link_hashtags(html) |  | ||||||
|  |  | ||||||
|     html.html_safe # rubocop:disable Rails/OutputSafety |     html.html_safe # rubocop:disable Rails/OutputSafety | ||||||
|   end |   end | ||||||
| @@ -53,51 +50,66 @@ class Formatter | |||||||
|     HTMLEntities.new.encode(html) |     HTMLEntities.new.encode(html) | ||||||
|   end |   end | ||||||
|  |  | ||||||
|   def encode_and_link_urls(html) |   def encode_and_link_urls(html, mentions = nil) | ||||||
|     entities = Twitter::Extractor.extract_urls_with_indices(html, extract_url_without_protocol: false) |     entities = Extractor.extract_entities_with_indices(html, extract_url_without_protocol: false) | ||||||
|     entities = entities.sort_by { |entity| entity[:indices].first } |  | ||||||
|  |  | ||||||
|     chars = html.to_s.to_char_a |     rewrite(html.dup, entities) do |entity| | ||||||
|  |       if entity[:url] | ||||||
|  |         link_to_url(entity) | ||||||
|  |       elsif entity[:hashtag] | ||||||
|  |         link_to_hashtag(entity) | ||||||
|  |       elsif entity[:screen_name] | ||||||
|  |         link_to_mention(entity, mentions) | ||||||
|  |       end | ||||||
|  |     end | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def rewrite(text, entities) | ||||||
|  |     chars = text.to_s.to_char_a | ||||||
|  |  | ||||||
|  |     # sort by start index | ||||||
|  |     entities = entities.sort_by do |entity| | ||||||
|  |       indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices] | ||||||
|  |       indices.first | ||||||
|  |     end | ||||||
|  |  | ||||||
|  |     result = [] | ||||||
|  |     last_index = entities.reduce(0) do |index, entity| | ||||||
|  |       indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices] | ||||||
|  |       result << encode(chars[index...indices.first].join) | ||||||
|  |       result << yield(entity) | ||||||
|  |       indices.last | ||||||
|  |     end | ||||||
|  |     result << encode(chars[last_index..-1].join) | ||||||
|  |  | ||||||
|  |     result.flatten.join | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def link_to_url(entity) | ||||||
|  |     normalized_url = Addressable::URI.parse(entity[:url]).normalize | ||||||
|     html_attrs = { |     html_attrs = { | ||||||
|       target: '_blank', |       target: '_blank', | ||||||
|       rel: 'nofollow noopener', |       rel: 'nofollow noopener', | ||||||
|     } |     } | ||||||
|     result = '' |     Twitter::Autolink.send(:link_to_text, entity, link_html(entity[:url]), normalized_url, html_attrs) | ||||||
|  |  | ||||||
|     last_index = entities.reduce(0) do |index, entity| |  | ||||||
|       normalized_url = Addressable::URI.parse(entity[:url]).normalize |  | ||||||
|       indices = entity[:indices] |  | ||||||
|       result += encode(chars[index...indices.first].join) |  | ||||||
|       result += Twitter::Autolink.send(:link_to_text, entity, link_html(entity[:url]), normalized_url, html_attrs) |  | ||||||
|       indices.last |  | ||||||
|     end |  | ||||||
|     result += encode(chars[last_index..-1].join) |  | ||||||
|   end |   end | ||||||
|  |  | ||||||
|   def link_mentions(html, mentions) |   def link_to_mention(entity, mentions) | ||||||
|     html.gsub(Account::MENTION_RE) do |match| |     acct = entity[:screen_name] | ||||||
|       acct    = Account::MENTION_RE.match(match)[1] |     return link_to_account(acct) unless mentions | ||||||
|     mention = mentions.find { |item| TagManager.instance.same_acct?(item.account.acct, acct) } |     mention = mentions.find { |item| TagManager.instance.same_acct?(item.account.acct, acct) } | ||||||
|  |     mention ? mention_html(mention.account) : "@#{acct}" | ||||||
|       mention.nil? ? match : mention_html(match, mention.account) |  | ||||||
|     end |  | ||||||
|   end |   end | ||||||
|  |  | ||||||
|   def link_accounts(html) |   def link_to_account(acct) | ||||||
|     html.gsub(Account::MENTION_RE) do |match| |  | ||||||
|       acct = Account::MENTION_RE.match(match)[1] |  | ||||||
|     username, domain = acct.split('@') |     username, domain = acct.split('@') | ||||||
|     domain = nil if TagManager.instance.local_domain?(domain) |     domain = nil if TagManager.instance.local_domain?(domain) | ||||||
|     account = Account.find_remote(username, domain) |     account = Account.find_remote(username, domain) | ||||||
|  |     account ? mention_html(account) : "@#{acct}" | ||||||
|       account.nil? ? match : mention_html(match, account) |  | ||||||
|     end |  | ||||||
|   end |   end | ||||||
|  |  | ||||||
|   def link_hashtags(html) |   def link_to_hashtag(entity) | ||||||
|     html.gsub(Tag::HASHTAG_RE) do |match| |     hashtag_html(entity[:hashtag]) | ||||||
|       hashtag_html(match) |  | ||||||
|     end |  | ||||||
|   end |   end | ||||||
|  |  | ||||||
|   def link_html(url) |   def link_html(url) | ||||||
| @@ -110,12 +122,11 @@ class Formatter | |||||||
|     "<span class=\"invisible\">#{prefix}</span><span class=\"#{cutoff ? 'ellipsis' : ''}\">#{text}</span><span class=\"invisible\">#{suffix}</span>" |     "<span class=\"invisible\">#{prefix}</span><span class=\"#{cutoff ? 'ellipsis' : ''}\">#{text}</span><span class=\"invisible\">#{suffix}</span>" | ||||||
|   end |   end | ||||||
|  |  | ||||||
|   def hashtag_html(match) |   def hashtag_html(tag) | ||||||
|     prefix, _, affix = match.rpartition('#') |     "<a href=\"#{tag_url(tag.downcase)}\" class=\"mention hashtag\">#<span>#{tag}</span></a>" | ||||||
|     "#{prefix}<a href=\"#{tag_url(affix.downcase)}\" class=\"mention hashtag\">#<span>#{affix}</span></a>" |  | ||||||
|   end |   end | ||||||
|  |  | ||||||
|   def mention_html(match, account) |   def mention_html(account) | ||||||
|     "#{match.split('@').first}<span class=\"h-card\"><a href=\"#{TagManager.instance.url_for(account)}\" class=\"u-url mention\">@<span>#{account.username}</span></a></span>" |     "<span class=\"h-card\"><a href=\"#{TagManager.instance.url_for(account)}\" class=\"u-url mention\">@<span>#{account.username}</span></a></span>" | ||||||
|   end |   end | ||||||
| end | end | ||||||
|   | |||||||
| @@ -6,6 +6,10 @@ RSpec.describe Formatter do | |||||||
|   let(:local_status)  { Fabricate(:status, text: local_text, account: account) } |   let(:local_status)  { Fabricate(:status, text: local_text, account: account) } | ||||||
|   let(:remote_status) { Fabricate(:status, text: '<script>alert("Hello")</script> Beep boop', uri: 'beepboop', account: account) } |   let(:remote_status) { Fabricate(:status, text: '<script>alert("Hello")</script> Beep boop', uri: 'beepboop', account: account) } | ||||||
|  |  | ||||||
|  |   let(:local_text_with_mention) { "@#{account.username} @#{account.username}@example.com #{local_text}?x=@#{account.username} #hashtag" } | ||||||
|  |   let(:local_status_with_mention) { Fabricate(:status, text: local_text_with_mention, | ||||||
|  |                                               account: account, mentions: [Fabricate(:mention, account: account)]) } | ||||||
|  |  | ||||||
|   describe '#format' do |   describe '#format' do | ||||||
|     subject { Formatter.instance.format(local_status) } |     subject { Formatter.instance.format(local_status) } | ||||||
|  |  | ||||||
| @@ -21,6 +25,18 @@ RSpec.describe Formatter do | |||||||
|       expect(subject).to match('<a href="http://google.com/" rel="nofollow noopener" target="_blank"><span class="invisible">http://</span><span class="">google.com/</span><span class="invisible"></span></a>') |       expect(subject).to match('<a href="http://google.com/" rel="nofollow noopener" target="_blank"><span class="invisible">http://</span><span class="">google.com/</span><span class="invisible"></span></a>') | ||||||
|     end |     end | ||||||
|  |  | ||||||
|  |     it 'contains a mention' do | ||||||
|  |       result = Formatter.instance.format(local_status_with_mention) | ||||||
|  |       expect(result).to match "<a href=\"#{TagManager.instance.url_for(account)}\" class=\"u-url mention\">@<span>#{account.username}</span></a></span>" | ||||||
|  |       expect(result).to match %r{href=\"http://google.com/\?x=@#{account.username}} | ||||||
|  |       expect(result).not_to match "href=\"https://example.com/@#{account.username}" | ||||||
|  |     end | ||||||
|  |  | ||||||
|  |     it 'contains a hashtag' do | ||||||
|  |       result = Formatter.instance.format(local_status_with_mention) | ||||||
|  |       expect(result).to match("/tags/hashtag\" class=\"mention hashtag\">#<span>hashtag</span></a>") | ||||||
|  |     end | ||||||
|  |  | ||||||
|     context 'matches a stand-alone medium URL' do |     context 'matches a stand-alone medium URL' do | ||||||
|       let(:local_text) { 'https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4' } |       let(:local_text) { 'https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4' } | ||||||
|       it 'has valid url' do |       it 'has valid url' do | ||||||
|   | |||||||
| @@ -379,6 +379,10 @@ RSpec.describe Account, type: :model do | |||||||
|     it 'does not match URLs' do |     it 'does not match URLs' do | ||||||
|       expect(subject.match('Check this out https://medium.com/@alice/some-article#.abcdef123')).to be_nil |       expect(subject.match('Check this out https://medium.com/@alice/some-article#.abcdef123')).to be_nil | ||||||
|     end |     end | ||||||
|  |  | ||||||
|  |     xit 'does not match URL querystring' do | ||||||
|  |       expect(subject.match('https://example.com/?x=@alice')).to be_nil | ||||||
|  |     end | ||||||
|   end |   end | ||||||
|  |  | ||||||
|   describe 'validations' do |   describe 'validations' do | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user