Author: jnioche
Date: Fri Aug 23 08:40:19 2013
New Revision: 1516746

URL: http://svn.apache.org/r1516746
Log:
NUTCH-1629 Injector skips empty lines

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1516746&r1=1516745&r2=1516746&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Fri Aug 23 08:40:19 2013
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Nutch Development Trunk
 
+* NUTCH-1629 Injector skips empty lines in seed files (kaveh minooie via 
jnioche)
+
 * NUTCH-911 protocol-file to return proper protocol status (Peter Lundberg via 
snagel)
 
 * NUTCH-806 Merge CrawlDBScanner with CrawlDBReader (jnioche)

Modified: nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java?rev=1516746&r1=1516745&r2=1516746&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java Fri Aug 23 
08:40:19 2013
@@ -84,9 +84,9 @@ public class Injector extends Configured
     public void map(WritableComparable<?> key, Text value,
                     OutputCollector<Text, CrawlDatum> output, Reporter 
reporter)
       throws IOException {
-      String url = value.toString();              // value is line of text
+      String url = value.toString().trim();              // value is line of 
text
 
-      if (url != null && url.trim().startsWith("#")) {
+      if (url != null && ( url.length() == 0 || url.startsWith("#") ) ) {
           /* Ignore line that start with # */
           return;
       }


Reply via email to