Author: ferdy
Date: Fri May 11 09:49:43 2012
New Revision: 1337091
URL: http://svn.apache.org/viewvc?rev=1337091&view=rev
Log:
NUTCH-1362 Fix error handling of urls with empty fields
Modified:
nutch/branches/nutchgora/CHANGES.txt
nutch/branches/nutchgora/src/java/org/apache/nutch/util/TableUtil.java
Modified: nutch/branches/nutchgora/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1337091&r1=1337090&r2=1337091&view=diff
==============================================================================
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Fri May 11 09:49:43 2012
@@ -1,6 +1,8 @@
Nutch Change Log
Release nutchgora - Current Development
+* NUTCH-1362 Fix error handling of urls with empty fields (lewis, ferdy)
+
* NUTCH-1026 Strip UTF-8 non-character codepoints (markus, ferdy)
* NUTCH-1358 Do not accept bogus arguments (ferdy)
Modified: nutch/branches/nutchgora/src/java/org/apache/nutch/util/TableUtil.java
URL:
http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/util/TableUtil.java?rev=1337091&r1=1337090&r2=1337091&view=diff
==============================================================================
--- nutch/branches/nutchgora/src/java/org/apache/nutch/util/TableUtil.java
(original)
+++ nutch/branches/nutchgora/src/java/org/apache/nutch/util/TableUtil.java Fri
May 11 09:49:43 2012
@@ -21,6 +21,7 @@ import java.net.URL;
import java.nio.ByteBuffer;
import org.apache.avro.util.Utf8;
+import org.apache.commons.lang.StringUtils;
public class TableUtil {
@@ -63,7 +64,7 @@ public class TableUtil {
StringBuilder buf = new StringBuilder();
/* reverse host */
- reverseAppendSplits(host.split("\\."), buf);
+ reverseAppendSplits(host, buf);
/* add protocol */
buf.append(':');
@@ -92,11 +93,11 @@ public class TableUtil {
pathBegin = reversedUrl.length();
String sub = reversedUrl.substring(0, pathBegin);
- String[] splits = sub.split(":"); // {<reversed host>, <port>, <protocol>}
+ String[] splits = StringUtils.split(sub, ':'); // {<reversed host>,
<port>, <protocol>}
buf.append(splits[1]); // add protocol
buf.append("://");
- reverseAppendSplits(splits[0].split("\\."), buf); // splits[0] is reversed
+ reverseAppendSplits(splits[0], buf); // splits[0] is reversed
// host
if (splits.length == 3) { // has a port
buf.append(':');
@@ -118,17 +119,22 @@ public class TableUtil {
return reversedUrl.substring(0, reversedUrl.indexOf(':'));
}
- private static void reverseAppendSplits(String[] splits, StringBuilder buf) {
- for (int i = splits.length - 1; i > 0; i--) {
- buf.append(splits[i]);
- buf.append('.');
+ private static void reverseAppendSplits(String string, StringBuilder buf) {
+ String[] splits = StringUtils.split(string,'.');
+ if (splits.length > 0) {
+ for (int i = splits.length - 1; i > 0; i--) {
+ buf.append(splits[i]);
+ buf.append('.');
+ }
+ buf.append(splits[0]);
+ } else {
+ buf.append(string);
}
- buf.append(splits[0]);
}
public static String reverseHost(String hostName) {
StringBuilder buf = new StringBuilder();
- reverseAppendSplits(hostName.split("\\."), buf);
+ reverseAppendSplits(hostName, buf);
return buf.toString();
}