Author: jnioche
Date: Fri Aug 23 08:52:38 2013
New Revision: 1516752

URL: http://svn.apache.org/r1516752
Log:
NUTCH-1629 Injector skips empty lines

Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/src/java/org/apache/nutch/crawl/InjectorJob.java

Modified: nutch/branches/2.x/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1516752&r1=1516751&r2=1516752&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Fri Aug 23 08:52:38 2013
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Current Development
 
+* NUTCH-1629 Injector skips empty lines in seed files (kaveh minooie via 
jnioche)
+
 * NUTCH-1624 Typo in WebTableReader line 486 (kaveh minooie via lewismc)
 
 * NUTCH-1294 IndexClean job with solr implementation. (Dan Rosher, lewismc, 
Claudiu Chis via feng)

Modified: nutch/branches/2.x/src/java/org/apache/nutch/crawl/InjectorJob.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/crawl/InjectorJob.java?rev=1516752&r1=1516751&r2=1516752&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/crawl/InjectorJob.java 
(original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/crawl/InjectorJob.java Fri Aug 
23 08:52:38 2013
@@ -110,9 +110,9 @@ public class InjectorJob extends NutchTo
 
     protected void map(LongWritable key, Text value, Context context)
         throws IOException, InterruptedException {
-      String url = value.toString(); // value is line of text
+      String url = value.toString().trim(); // value is line of text
       
-      if (url != null && url.trim().startsWith("#")) {
+      if (url != null && ( url.length() == 0 || url.startsWith("#") ) ) {
         /* Ignore line that start with # */
         return;
       }


Reply via email to