Author: jnioche
Date: Thu Jul 28 13:55:08 2011
New Revision: 1151852
URL: http://svn.apache.org/viewvc?rev=1151852&view=rev
Log:
NUTCH-1071 Crawldb update to total counts per status
Modified:
nutch/branches/branch-1.4/CHANGES.txt
nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
Modified: nutch/branches/branch-1.4/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/CHANGES.txt?rev=1151852&r1=1151851&r2=1151852&view=diff
==============================================================================
--- nutch/branches/branch-1.4/CHANGES.txt (original)
+++ nutch/branches/branch-1.4/CHANGES.txt Thu Jul 28 13:55:08 2011
@@ -2,6 +2,8 @@ Nutch Change Log
Release 1.4 - Current development
+* NUTCH-1071 Crawldb update displays total number of URLs per status (jnioche)
+
* NUTCH-1045 MimeUtil to rely on default config provided by Tika (jnioche)
* NUTCH-1057 Fetcher thread time out configurable (markus)
Modified:
nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
URL:
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=1151852&r1=1151851&r2=1151852&view=diff
==============================================================================
---
nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
(original)
+++
nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
Thu Jul 28 13:55:08 2011
@@ -151,6 +151,7 @@ public class CrawlDbReducer implements R
if (!fetchSet) {
if (oldSet) {// at this point at least "old" should be present
output.collect(key, old);
+ reporter.getCounter("CrawlDB status",
CrawlDatum.getStatusName(result.getStatus())).increment(1);
} else {
LOG.warn("Missing fetch and old value, signature=" + signature);
}
@@ -291,6 +292,7 @@ public class CrawlDbReducer implements R
// remove generation time, if any
result.getMetaData().remove(Nutch.WRITABLE_GENERATE_TIME_KEY);
output.collect(key, result);
+ reporter.getCounter("CrawlDB status",
CrawlDatum.getStatusName(result.getStatus())).increment(1);
}
}