Author: ab
Date: Tue Oct 31 13:32:51 2006
New Revision: 469660

URL: http://svn.apache.org/viewvc?view=rev&rev=469660
Log:
When jobtracker is 'local' generate only one partition. This should fix
NUTCH-361 and NUTCH-136.

Modified:
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java?view=diff&rev=469660&r1=469659&r2=469660
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java Tue Oct 
31 13:32:51 2006
@@ -307,18 +307,20 @@
     // map to inverted subset due for fetch, sort by link count
     JobConf job = new NutchJob(getConf());
     job.setJobName("generate: select " + segment);
-    
+
     if (numLists == -1) {                         // for politeness make
       numLists = job.getNumMapTasks();            // a partition per fetch task
     }
-
+    if ("local".equals(job.get("mapred.job.tracker")) && numLists != 1) {
+      // override
+      LOG.info("Generator: jobtracker is 'local', generating exactly one 
partition.");
+      numLists = 1;
+    }
     job.setLong("crawl.gen.curTime", curTime);
     job.setLong("crawl.topN", topN);
 
     job.setInputPath(new Path(dbDir, CrawlDatum.DB_DIR_NAME));
     job.setInputFormat(SequenceFileInputFormat.class);
-    job.setInputKeyClass(Text.class);
-    job.setInputValueClass(CrawlDatum.class);
 
     job.setMapperClass(Selector.class);
     job.setPartitionerClass(Selector.class);
@@ -342,8 +344,6 @@
 
     job.setInputPath(tempDir);
     job.setInputFormat(SequenceFileInputFormat.class);
-    job.setInputKeyClass(FloatWritable.class);
-    job.setInputValueClass(SelectorEntry.class);
 
     job.setMapperClass(SelectorInverseMapper.class);
     job.setPartitionerClass(PartitionUrlByHost.class);


Reply via email to