Author: jnioche
Date: Thu Jul 28 13:55:08 2011
New Revision: 1151852

URL: http://svn.apache.org/viewvc?rev=1151852&view=rev
Log:
NUTCH-1071 Crawldb update to total counts per status

Modified:
    nutch/branches/branch-1.4/CHANGES.txt
    
nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDbReducer.java

Modified: nutch/branches/branch-1.4/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/CHANGES.txt?rev=1151852&r1=1151851&r2=1151852&view=diff
==============================================================================
--- nutch/branches/branch-1.4/CHANGES.txt (original)
+++ nutch/branches/branch-1.4/CHANGES.txt Thu Jul 28 13:55:08 2011
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 1.4 - Current development
 
+* NUTCH-1071 Crawldb update displays total number of URLs per status (jnioche)
+
 * NUTCH-1045 MimeUtil to rely on default config provided by Tika (jnioche)
 
 * NUTCH-1057 Fetcher thread time out configurable (markus)

Modified: 
nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=1151852&r1=1151851&r2=1151852&view=diff
==============================================================================
--- 
nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDbReducer.java 
(original)
+++ 
nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDbReducer.java 
Thu Jul 28 13:55:08 2011
@@ -151,6 +151,7 @@ public class CrawlDbReducer implements R
     if (!fetchSet) {
       if (oldSet) {// at this point at least "old" should be present
         output.collect(key, old);
+        reporter.getCounter("CrawlDB status", 
CrawlDatum.getStatusName(result.getStatus())).increment(1);
       } else {
         LOG.warn("Missing fetch and old value, signature=" + signature);
       }
@@ -291,6 +292,7 @@ public class CrawlDbReducer implements R
     // remove generation time, if any
     result.getMetaData().remove(Nutch.WRITABLE_GENERATE_TIME_KEY);
     output.collect(key, result);
+    reporter.getCounter("CrawlDB status", 
CrawlDatum.getStatusName(result.getStatus())).increment(1);
   }
   
 }


Reply via email to