Author: jnioche
Date: Tue Jan  5 10:14:49 2010
New Revision: 895972

URL: http://svn.apache.org/viewvc?rev=895972&view=rev
Log:
NUTCH-658 : Add Counter for # of doc fetched in Reporter

Modified:
    lucene/nutch/trunk/CHANGES.txt
    lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java
    lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolStatus.java

Modified: lucene/nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=895972&r1=895971&r2=895972&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Tue Jan  5 10:14:49 2010
@@ -2,6 +2,8 @@
 
 Unreleased Changes
 
+* NUTCH-658 Use counters to report fetching and parsing status (jnioche)
+
 * NUTCH-777 Upgrading to jetty6 broke unit tests (mattmann)
 
 * NUTCH-767 Update Tika to v0.5 for the MimeType detection (Julien Nioche via 
ab)

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java?rev=895972&r1=895971&r2=895972&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java Tue Jan  
5 10:14:49 2010
@@ -607,6 +607,7 @@
                   LOG.debug("Denied by robots.txt: " + fit.url);
                 }
                 output(fit.url, fit.datum, null, 
ProtocolStatus.STATUS_ROBOTS_DENIED, CrawlDatum.STATUS_FETCH_GONE);
+                reporter.incrCounter("FetcherStatus", "robots_denied", 1);
                 continue;
               }
               if (rules.getCrawlDelay() > 0) {
@@ -615,6 +616,7 @@
                   fetchQueues.finishFetchItem(fit, true);
                   LOG.debug("Crawl-Delay for " + fit.url + " too long (" + 
rules.getCrawlDelay() + "), skipping");
                   output(fit.url, fit.datum, null, 
ProtocolStatus.STATUS_ROBOTS_DENIED, CrawlDatum.STATUS_FETCH_GONE);
+                  reporter.incrCounter("FetcherStatus", 
"robots_denied_maxcrawldelay", 1);
                   continue;
                 } else {
                   FetchItemQueue fiq = 
fetchQueues.getFetchItemQueue(fit.queueID);
@@ -630,6 +632,8 @@
 
               String urlString = fit.url.toString();
 
+              reporter.incrCounter("FetcherStatus", status.getName(), 1);
+              
               switch(status.getCode()) {
                 
               case ProtocolStatus.WOULDBLOCK:
@@ -664,6 +668,7 @@
                     } else {
                       // stop redirecting
                       redirecting = false;
+                      reporter.incrCounter("FetcherStatus", 
"FetchItem.notCreated.redirect", 1);
                     }
                   }
                 }
@@ -701,6 +706,7 @@
                   } else {
                     // stop redirecting
                     redirecting = false;
+                    reporter.incrCounter("FetcherStatus", 
"FetchItem.notCreated.redirect", 1);
                   }
                 } else {
                   // stop redirecting
@@ -926,6 +932,7 @@
       if (parseResult != null && !parseResult.isEmpty()) {
         Parse p = parseResult.get(content.getUrl());
         if (p != null) {
+          reporter.incrCounter("ParserStatus", 
ParseStatus.majorCodes[p.getData().getStatus().getMajorCode()], 1);
           return p.getData().getStatus();
         }
       }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java?rev=895972&r1=895971&r2=895972&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java 
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java Tue 
Jan  5 10:14:49 2010
@@ -93,6 +93,8 @@
       Parse parse = entry.getValue();
       ParseStatus parseStatus = parse.getData().getStatus();
       
+      reporter.incrCounter("ParserStatus", 
ParseStatus.majorCodes[parseStatus.getMajorCode()], 1);
+      
       if (!parseStatus.isSuccess()) {
         LOG.warn("Error parsing: " + key + ": " + parseStatus);
         parse = parseStatus.getEmptyParse(getConf());

Modified: 
lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolStatus.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolStatus.java?rev=895972&r1=895971&r2=895972&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolStatus.java 
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolStatus.java 
Tue Jan  5 10:14:49 2010
@@ -191,6 +191,10 @@
   public int getCode() {
     return code;
   }
+
+  public String getName() {
+    return codeToName.get(this.code);
+  }
   
   public void setCode(int code) {
     this.code = code;


Reply via email to