This is an automated email from the ASF dual-hosted git repository. markus pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push: new bd8c847 NUTCH-2386 BasicURLNormalizer does not encode curly braces bd8c847 is described below commit bd8c8476b36a465159703c88b75eb08008650136 Author: Markus Jelsma <mar...@apache.org> AuthorDate: Wed Oct 25 15:00:33 2017 +0200 NUTCH-2386 BasicURLNormalizer does not encode curly braces --- .../apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java | 2 +- .../nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java b/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java index ffd22ce..b6033ae 100644 --- a/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java +++ b/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java @@ -250,7 +250,7 @@ public class BasicURLNormalizer extends Configured implements URLNormalizer { // Traverse over all bytes in this URL for (byte b: path.getBytes(utf8)) { // Is this a control character? - if (b < 33 || b == 91 || b == 93) { + if (b < 0x21 || b == 0x5B || b == 0x5D || b == 0x7B || b == 0x7D) { // Start escape sequence sb.append('%'); diff --git a/src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java b/src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java index 2625ea3..5cefbf3 100644 --- a/src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java +++ b/src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java @@ -171,6 +171,12 @@ public class TestBasicURLNormalizer { normalizeTest("http:////", "http:/"); normalizeTest("http:///////", "http:/"); } + + @Test + public void testCurlyBraces() throws Exception { + // check that leading and trailing spaces are removed + normalizeTest("http://foo.com/{{stuff}} ", "http://foo.com/%7B%7Bstuff%7D%7D"); + } private void normalizeTest(String weird, String normal) throws Exception { Assert.assertEquals("normalizing: " + weird, normal, @@ -181,4 +187,4 @@ public class TestBasicURLNormalizer { new TestBasicURLNormalizer().testNormalizer(); } -} \ No newline at end of file +} -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" <commits@nutch.apache.org>'].