This is an automated email from the ASF dual-hosted git repository.

markus pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new bd8c847  NUTCH-2386 BasicURLNormalizer does not encode curly braces
bd8c847 is described below

commit bd8c8476b36a465159703c88b75eb08008650136
Author: Markus Jelsma <mar...@apache.org>
AuthorDate: Wed Oct 25 15:00:33 2017 +0200

    NUTCH-2386 BasicURLNormalizer does not encode curly braces
---
 .../apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java  | 2 +-
 .../nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java     | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git 
a/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
 
b/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
index ffd22ce..b6033ae 100644
--- 
a/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
+++ 
b/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
@@ -250,7 +250,7 @@ public class BasicURLNormalizer extends Configured 
implements URLNormalizer {
     // Traverse over all bytes in this URL
     for (byte b: path.getBytes(utf8)) {
       // Is this a control character?
-      if (b < 33 || b == 91 || b == 93) {
+      if (b < 0x21 || b == 0x5B || b == 0x5D || b == 0x7B || b == 0x7D) {
         // Start escape sequence 
         sb.append('%');
         
diff --git 
a/src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java
 
b/src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java
index 2625ea3..5cefbf3 100644
--- 
a/src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java
+++ 
b/src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java
@@ -171,6 +171,12 @@ public class TestBasicURLNormalizer {
     normalizeTest("http:////";, "http:/");
     normalizeTest("http:///////";, "http:/");
   }
+  
+  @Test
+  public void testCurlyBraces() throws Exception {
+    // check that leading and trailing spaces are removed
+    normalizeTest("http://foo.com/{{stuff}} ", 
"http://foo.com/%7B%7Bstuff%7D%7D";);
+  }
 
   private void normalizeTest(String weird, String normal) throws Exception {
     Assert.assertEquals("normalizing: " + weird, normal,
@@ -181,4 +187,4 @@ public class TestBasicURLNormalizer {
     new TestBasicURLNormalizer().testNormalizer();
   }
 
-}
\ No newline at end of file
+}

-- 
To stop receiving notification emails like this one, please contact
['"commits@nutch.apache.org" <commits@nutch.apache.org>'].

Reply via email to