This is an automated email from the ASF dual-hosted git repository.
markus pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push:
new bd8c847 NUTCH-2386 BasicURLNormalizer does not encode curly braces
bd8c847 is described below
commit bd8c8476b36a465159703c88b75eb08008650136
Author: Markus Jelsma <[email protected]>
AuthorDate: Wed Oct 25 15:00:33 2017 +0200
NUTCH-2386 BasicURLNormalizer does not encode curly braces
---
.../apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java | 2 +-
.../nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java | 8 +++++++-
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git
a/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
b/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
index ffd22ce..b6033ae 100644
---
a/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
+++
b/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
@@ -250,7 +250,7 @@ public class BasicURLNormalizer extends Configured
implements URLNormalizer {
// Traverse over all bytes in this URL
for (byte b: path.getBytes(utf8)) {
// Is this a control character?
- if (b < 33 || b == 91 || b == 93) {
+ if (b < 0x21 || b == 0x5B || b == 0x5D || b == 0x7B || b == 0x7D) {
// Start escape sequence
sb.append('%');
diff --git
a/src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java
b/src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java
index 2625ea3..5cefbf3 100644
---
a/src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java
+++
b/src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java
@@ -171,6 +171,12 @@ public class TestBasicURLNormalizer {
normalizeTest("http:////", "http:/");
normalizeTest("http:///////", "http:/");
}
+
+ @Test
+ public void testCurlyBraces() throws Exception {
+ // check that leading and trailing spaces are removed
+ normalizeTest("http://foo.com/{{stuff}} ",
"http://foo.com/%7B%7Bstuff%7D%7D");
+ }
private void normalizeTest(String weird, String normal) throws Exception {
Assert.assertEquals("normalizing: " + weird, normal,
@@ -181,4 +187,4 @@ public class TestBasicURLNormalizer {
new TestBasicURLNormalizer().testNormalizer();
}
-}
\ No newline at end of file
+}
--
To stop receiving notification emails like this one, please contact
['"[email protected]" <[email protected]>'].