Author: snagel Date: Mon Nov 12 21:20:55 2012 New Revision: 1408465 URL: http://svn.apache.org/viewvc?rev=1408465&view=rev Log: NUTCH-1484 TableUtil unreverseURL fails on file:// URLs
Modified: nutch/branches/2.x/CHANGES.txt nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java nutch/branches/2.x/src/test/org/apache/nutch/util/TestTableUtil.java Modified: nutch/branches/2.x/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1408465&r1=1408464&r2=1408465&view=diff ============================================================================== --- nutch/branches/2.x/CHANGES.txt (original) +++ nutch/branches/2.x/CHANGES.txt Mon Nov 12 21:20:55 2012 @@ -2,6 +2,8 @@ Nutch Change Log Release 2.2 - Current Development +* NUTCH-1484 TableUtil unreverseURL fails on file:// URLs (Rogério Pereira Araújo via snagel) + * NUTCH-1451 Upgrade automaton jar to 1.11-8 (lewismc) * NUTCH-1496 ParserJob logs skipped urls with level info (Nathan Gass via lewismc) Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java?rev=1408465&r1=1408464&r2=1408465&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java Mon Nov 12 21:20:55 2012 @@ -93,8 +93,8 @@ public class TableUtil { pathBegin = reversedUrl.length(); String sub = reversedUrl.substring(0, pathBegin); - String[] splits = StringUtils.split(sub, ':'); // {<reversed host>, <port>, <protocol>} - + String[] splits = StringUtils.splitPreserveAllTokens(sub, ':'); // {<reversed host>, <port>, <protocol>} + buf.append(splits[1]); // add protocol buf.append("://"); reverseAppendSplits(splits[0], buf); // splits[0] is reversed Modified: nutch/branches/2.x/src/test/org/apache/nutch/util/TestTableUtil.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/util/TestTableUtil.java?rev=1408465&r1=1408464&r2=1408465&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/util/TestTableUtil.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/util/TestTableUtil.java Mon Nov 12 21:20:55 2012 @@ -28,6 +28,7 @@ public class TestTableUtil extends TestC String urlString5 = "http://foo.com?a=/a/b&c=0"; String urlString5rev = "http://foo.com/?a=/a/b&c=0"; String urlString6 = "http://foo.com"; + String urlString7 = "file:///var/www/index.html"; String reversedUrlString1 = "com.foo:http/"; String reversedUrlString2 = "com.foo:http:8900/"; @@ -35,6 +36,7 @@ public class TestTableUtil extends TestC String reversedUrlString4 = "com.baz.bar:http:8983/to/index.html?a=b&c=d"; String reversedUrlString5 = "com.foo:http/?a=/a/b&c=0"; String reversedUrlString6 = "com.foo:http"; + String reversedUrlString7 = ":file/var/www/index.html"; public void testReverseUrl() throws Exception { assertReverse(urlString1, reversedUrlString1); @@ -44,6 +46,7 @@ public class TestTableUtil extends TestC assertReverse(urlString5, reversedUrlString5); assertReverse(urlString5, reversedUrlString5); assertReverse(urlString6, reversedUrlString6); + assertReverse(urlString7, reversedUrlString7); } public void testUnreverseUrl() throws Exception { @@ -53,6 +56,7 @@ public class TestTableUtil extends TestC assertUnreverse(reversedUrlString4, urlString4); assertUnreverse(reversedUrlString5, urlString5rev); assertUnreverse(reversedUrlString6, urlString6); + assertUnreverse(reversedUrlString7, urlString7); } private static void assertReverse(String url, String expectedReversedUrl) throws Exception {