Author: jnioche
Date: Thu May 29 10:20:42 2014
New Revision: 1598241
URL: http://svn.apache.org/r1598241
Log:
NUTCH-1758 IndexChecker to send document to IndexWriters (jnioche)
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1598241&r1=1598240&r2=1598241&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu May 29 10:20:42 2014
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development
+* NUTCH-1758 IndexChecker to send document to IndexWriters (jnioche)
+
* NUTCH-1786 CrawlDb should follow db.url.normalizers and db.url.filters (Diaa
via markus)
* NUTCH-1757 ParserChecker to take custom metadata as input (jnioche)
Modified:
nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java?rev=1598241&r1=1598240&r2=1598241&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
(original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
Thu May 29 10:20:42 2014
@@ -22,6 +22,7 @@ import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.crawl.CrawlDatum;
@@ -84,6 +85,8 @@ public class IndexingFiltersChecker exte
ProtocolOutput output = protocol.getProtocolOutput(new Text(url), datum);
+ IndexWriters writers = new IndexWriters(getConf());
+
if (!output.getStatus().isSuccess()) {
System.out.println("Fetch failed with protocol status: " +
output.getStatus());
return 0;
@@ -150,6 +153,13 @@ public class IndexingFiltersChecker exte
}
}
}
+
+ if (conf.getBoolean("doIndex", false) && doc!=null){
+ writers.open(new JobConf(getConf()), "IndexingFilterChecker");
+ writers.write(doc);
+ writers.close();
+ }
+
return 0;
}