We're not to-the-letter about percent-encoding, but
we should allow all the characters.  This is mainly
so we can effectively use the link to some Wikipedia
pages with parentheses in them:

        https://en.wikipedia.org/wiki/Atom_(standard)
        https://en.wikipedia.org/wiki/Git_(software)
---
 lib/PublicInbox/Linkify.pm |  5 ++++-
 t/linkify.t                | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm
index d4df689..ea7fd71 100644
--- a/lib/PublicInbox/Linkify.pm
+++ b/lib/PublicInbox/Linkify.pm
@@ -17,7 +17,10 @@ use Digest::SHA qw/sha1_hex/;
 my $SALT = rand;
 my $LINK_RE = qr{\b((?:ftps?|https?|nntps?|gopher)://
                 [\@:\w\.-]+/
-                ?[!,:~\$\@\w\+\&\?\.\%\;/#=-]*)}x;
+                (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*)
+                (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)?
+                (?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)?
+                )}xi;
 
 sub new { bless {}, shift }
 
diff --git a/t/linkify.t b/t/linkify.t
index 586691a..49cbbd6 100644
--- a/t/linkify.t
+++ b/t/linkify.t
@@ -23,4 +23,38 @@ use PublicInbox::Linkify;
        is($s, qq(<a\nhref="$u">$u</a>;), 'trailing semicolon not in URL');
 }
 
+{
+       my $l = PublicInbox::Linkify->new;
+       my $u = 'http://example.com/url-with-(parens)';
+       my $s = "hello $u world";
+       $s = $l->linkify_1($s);
+       $s = $l->linkify_2($s);
+       is($s, qq(hello <a\nhref="$u">$u</a> world), 'URL preserved');
+
+       $u .= "?query=a";
+       $s = "hello $u world";
+       $s = $l->linkify_1($s);
+       $s = $l->linkify_2($s);
+       is($s, qq(hello <a\nhref="$u">$u</a> world), 'query preserved');
+
+       $u .= "#fragment";
+       $s = "hello $u world";
+       $s = $l->linkify_1($s);
+       $s = $l->linkify_2($s);
+       is($s, qq(hello <a\nhref="$u">$u</a> world),
+         'query + fragment preserved');
+
+       $u = "http://example.com/";;
+       $s = "hello $u world";
+       $s = $l->linkify_1($s);
+       $s = $l->linkify_2($s);
+       is($s, qq(hello <a\nhref="$u">$u</a> world), "root URL preserved");
+
+       $u = "http://example.com/#fragment";;
+       $s = "hello $u world";
+       $s = $l->linkify_1($s);
+       $s = $l->linkify_2($s);
+       is($s, qq(hello <a\nhref="$u">$u</a> world), "root + fragment");
+}
+
 done_testing();
-- 
EW

--
unsubscribe: [email protected]
archive: https://public-inbox.org/meta/

Reply via email to