Author: lewismc
Date: Tue Mar 18 22:07:25 2014
New Revision: 1579072

URL: http://svn.apache.org/r1579072
Log:
NUTCH-1738 Expose number of URLs generated per batch in GeneratorJob

Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorJob.java
    nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorReducer.java

Modified: nutch/branches/2.x/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1579072&r1=1579071&r2=1579072&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Tue Mar 18 22:07:25 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Current Development
 
+* NUTCH-1738 Expose number of URLs generated per batch in GeneratorJob (Talat 
UYARER via lewismc)
+
 * NUTCH-1671 indexchecker to add digest field (snagel, lufeng)
 
 * NUTCH-1645 Junit Test Case for Adaptive Fetch Schedule class (Yasin 
Kılınç, lufeng, Sertac TURKEL via snagel)

Modified: nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorJob.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorJob.java?rev=1579072&r1=1579071&r2=1579072&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorJob.java 
(original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorJob.java Tue 
Mar 18 22:07:25 2014
@@ -228,7 +228,7 @@ public class GeneratorJob extends NutchT
     String batchId =  getConf().get(BATCH_ID);
     long finish = System.currentTimeMillis();
     LOG.info("GeneratorJob: finished at " + sdf.format(finish) + ", time 
elapsed: " + TimingUtil.elapsedTime(start, finish));
-    LOG.info("GeneratorJob: generated batch id: " + batchId);
+    LOG.info("GeneratorJob: generated batch id: " + batchId + " containing " + 
GeneratorReducer.count + " URLs");
     return batchId;
   }
 

Modified: 
nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorReducer.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorReducer.java?rev=1579072&r1=1579071&r2=1579072&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorReducer.java 
(original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/crawl/GeneratorReducer.java 
Tue Mar 18 22:07:25 2014
@@ -42,7 +42,7 @@ extends GoraReducer<SelectorEntry, WebPa
 
   private long limit;
   private long maxCount;
-  private long count = 0;
+  protected static long count = 0;
   private boolean byDomain = false;
   private Map<String, Integer> hostCountMap = new HashMap<String, Integer>();
   private Utf8 batchId;


Reply via email to