Author: jnioche
Date: Fri Aug 23 08:40:19 2013
New Revision: 1516746
URL: http://svn.apache.org/r1516746
Log:
NUTCH-1629 Injector skips empty lines
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1516746&r1=1516745&r2=1516746&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Fri Aug 23 08:40:19 2013
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Development Trunk
+* NUTCH-1629 Injector skips empty lines in seed files (kaveh minooie via
jnioche)
+
* NUTCH-911 protocol-file to return proper protocol status (Peter Lundberg via
snagel)
* NUTCH-806 Merge CrawlDBScanner with CrawlDBReader (jnioche)
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java?rev=1516746&r1=1516745&r2=1516746&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java Fri Aug 23
08:40:19 2013
@@ -84,9 +84,9 @@ public class Injector extends Configured
public void map(WritableComparable<?> key, Text value,
OutputCollector<Text, CrawlDatum> output, Reporter
reporter)
throws IOException {
- String url = value.toString(); // value is line of text
+ String url = value.toString().trim(); // value is line of
text
- if (url != null && url.trim().startsWith("#")) {
+ if (url != null && ( url.length() == 0 || url.startsWith("#") ) ) {
/* Ignore line that start with # */
return;
}