Author: lewismc
Date: Fri Aug 23 19:47:16 2013
New Revision: 1517003
URL: http://svn.apache.org/r1517003
Log:
NUTCH-1631 Display Document Count Added to Solr Server
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/src/java/org/apache/nutch/indexer/IndexerJob.java
nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrIndexerJob.java
nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrWriter.java
Modified: nutch/branches/2.x/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1517003&r1=1517002&r2=1517003&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Fri Aug 23 19:47:16 2013
@@ -2,6 +2,8 @@ Nutch Change Log
Current Development
+* NUTCH-1631 Display Document Count Added To Solr Server (Furkan KAMACI via
lewismc)
+
* NUTCH-1629 Injector skips empty lines in seed files (kaveh minooie via
jnioche)
* NUTCH-1624 Typo in WebTableReader line 486 (kaveh minooie via lewismc)
Modified: nutch/branches/2.x/src/java/org/apache/nutch/indexer/IndexerJob.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/indexer/IndexerJob.java?rev=1517003&r1=1517002&r2=1517003&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/indexer/IndexerJob.java
(original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/indexer/IndexerJob.java Fri
Aug 23 19:47:16 2013
@@ -25,6 +25,9 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.mapred.Counters.Counter;
+import org.apache.hadoop.mapreduce.CounterGroup;
+import org.apache.hadoop.mapreduce.Counters;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.nutch.crawl.GeneratorJob;
@@ -109,6 +112,7 @@ public abstract class IndexerJob extends
store.put(key, page);
}
context.write(key, doc);
+ context.getCounter("IndexerJob", "DocumentCount").increment(1);
}
}
Modified:
nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrIndexerJob.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrIndexerJob.java?rev=1517003&r1=1517002&r2=1517003&view=diff
==============================================================================
---
nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrIndexerJob.java
(original)
+++
nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrIndexerJob.java
Fri Aug 23 19:47:16 2013
@@ -60,7 +60,8 @@ public class SolrIndexerJob extends Inde
if (getConf().getBoolean(SolrConstants.COMMIT_INDEX, true)) {
solr.commit();
}
- LOG.info("SolrIndexerJob: done.");
+ LOG.info("SolrIndexerJob: done. Total "+ SolrWriter.documentCount +
+ (SolrWriter.documentCount > 1 ? " documents are " : " document is ") +
"added.");
}
public int run(String[] args) throws Exception {
Modified:
nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrWriter.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrWriter.java?rev=1517003&r1=1517002&r2=1517003&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrWriter.java
(original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrWriter.java
Fri Aug 23 19:47:16 2013
@@ -41,7 +41,8 @@ public class SolrWriter implements Nutch
private final List<SolrInputDocument> inputDocs =
new ArrayList<SolrInputDocument>();
- private int commitSize;
+ private int commitSize;
+ protected static long documentCount = 0;
@Override
public void open(TaskAttemptContext job)
@@ -72,6 +73,7 @@ public class SolrWriter implements Nutch
}
inputDoc.setDocumentBoost(doc.getScore());
inputDocs.add(inputDoc);
+ documentCount++;
if (inputDocs.size() >= commitSize) {
try {
LOG.info("Adding " + Integer.toString(inputDocs.size()) + "
documents");