Add support for linking XMPP URIs in toots (#12709)

* Fix wrong grouping in Twitter valid_url regex

* Add support for xmpp URIs

Fixes #9776

The difficult part is autolinking, because Twitter-text's extractor does
some pretty ad-hoc stuff to find things that “look like” URLs, and XMPP
URIs do not really match the assumptions of that lib, so it doesn't sound
wise to try to shoehorn it into the existing regex.

This is why I used a specific regex (very close, although slightly more
permissive than the RFC), and a specific scan function (a simplified version
of the generalized one from Twitter).

* Remove leading “xmpp:” from auto-linked text
pull/12817/head
ThibG 2020-01-11 02:15:25 +01:00 committed by Eugen Rochko
parent e9ea09d173
commit ea436b355b
4 changed files with 70 additions and 4 deletions

View File

@ -245,8 +245,9 @@ class Formatter
end
standard = Extractor.extract_entities_with_indices(text, options)
xmpp = Extractor.extract_xmpp_uris_with_indices(text, options)
Extractor.remove_overlapping_entities(special + standard)
Extractor.remove_overlapping_entities(special + standard + xmpp)
end
def link_to_url(entity, options = {})
@ -284,7 +285,7 @@ class Formatter
def link_html(url)
url = Addressable::URI.parse(url).to_s
prefix = url.match(/\Ahttps?:\/\/(www\.)?/).to_s
prefix = url.match(/\A(https?:\/\/(www\.)?|xmpp:)/).to_s
text = url[prefix.length, 30]
suffix = url[prefix.length + 30..-1]
cutoff = url[prefix.length..-1].length > 30

View File

@ -2,7 +2,7 @@
class Sanitize
module Config
HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', :relative].freeze
HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', 'xmpp', :relative].freeze
CLASS_WHITELIST_TRANSFORMER = lambda do |env|
node = env[:node]

View File

@ -29,7 +29,7 @@ module Twitter
( # $1 total match
(#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
( # $3 URL
((https?|dat|dweb|ipfs|ipns|ssb|gopher):\/\/)? # $4 Protocol (optional)
((?:https?|dat|dweb|ipfs|ipns|ssb|gopher):\/\/)? # $4 Protocol (optional)
(#{REGEXEN[:valid_domain]}) # $5 Domain(s)
(?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional)
(/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor
@ -37,5 +37,54 @@ module Twitter
)
)
}iox
REGEXEN[:validate_nodeid] = /(?:
#{REGEXEN[:validate_url_unreserved]}|
#{REGEXEN[:validate_url_pct_encoded]}|
[!$()*+,;=]
)/iox
REGEXEN[:validate_resid] = /(?:
#{REGEXEN[:validate_url_unreserved]}|
#{REGEXEN[:validate_url_pct_encoded]}|
#{REGEXEN[:validate_url_sub_delims]}
)/iox
REGEXEN[:valid_xmpp_uri] = %r{
( # $1 total match
(#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
( # $3 URL
((?:xmpp):) # $4 Protocol
(//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)? # $5 Authority (optional)
(#{REGEXEN[:validate_nodeid]}+@)? # $6 Username in path (optional)
(#{REGEXEN[:valid_domain]}) # $7 Domain in path
(/#{REGEXEN[:validate_resid]}+)? # $8 Resource in path (optional)
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $9 Query String
)
)
}iox
end
module Extractor
# Extracts a list of all XMPP URIs included in the Tweet <tt>text</tt> along
# with the indices. If the <tt>text</tt> is <tt>nil</tt> or contains no
# XMPP URIs an empty array will be returned.
#
# If a block is given then it will be called for each XMPP URI.
def extract_xmpp_uris_with_indices(text, options = {}) # :yields: uri, start, end
return [] unless text && text.index(":")
urls = []
text.to_s.scan(Twitter::Regex[:valid_xmpp_uri]) do
valid_uri_match_data = $~
start_position = valid_uri_match_data.char_begin(3)
end_position = valid_uri_match_data.char_end(3)
urls << {
:url => valid_uri_match_data[3],
:indices => [start_position, end_position]
}
end
urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last} if block_given?
urls
end
end
end

View File

@ -242,6 +242,22 @@ RSpec.describe Formatter do
is_expected.to include '/tags/hashtag%E3%82%BF%E3%82%B0" class="mention hashtag" rel="tag">#<span>hashtagタグ</span></a>'
end
end
context 'given a stand-alone xmpp: URI' do
let(:text) { 'xmpp:user@instance.com' }
it 'matches the full URI' do
is_expected.to include 'href="xmpp:user@instance.com"'
end
end
context 'given a an xmpp: URI with a query-string' do
let(:text) { 'please join xmpp:muc@instance.com?join right now' }
it 'matches the full URI' do
is_expected.to include 'href="xmpp:muc@instance.com?join"'
end
end
end
describe '#format_spoiler' do