Author: ab Date: Tue Oct 31 13:32:51 2006 New Revision: 469660 URL: http://svn.apache.org/viewvc?view=rev&rev=469660 Log: When jobtracker is 'local' generate only one partition. This should fix NUTCH-361 and NUTCH-136.
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java?view=diff&rev=469660&r1=469659&r2=469660 ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java Tue Oct 31 13:32:51 2006 @@ -307,18 +307,20 @@ // map to inverted subset due for fetch, sort by link count JobConf job = new NutchJob(getConf()); job.setJobName("generate: select " + segment); - + if (numLists == -1) { // for politeness make numLists = job.getNumMapTasks(); // a partition per fetch task } - + if ("local".equals(job.get("mapred.job.tracker")) && numLists != 1) { + // override + LOG.info("Generator: jobtracker is 'local', generating exactly one partition."); + numLists = 1; + } job.setLong("crawl.gen.curTime", curTime); job.setLong("crawl.topN", topN); job.setInputPath(new Path(dbDir, CrawlDatum.DB_DIR_NAME)); job.setInputFormat(SequenceFileInputFormat.class); - job.setInputKeyClass(Text.class); - job.setInputValueClass(CrawlDatum.class); job.setMapperClass(Selector.class); job.setPartitionerClass(Selector.class); @@ -342,8 +344,6 @@ job.setInputPath(tempDir); job.setInputFormat(SequenceFileInputFormat.class); - job.setInputKeyClass(FloatWritable.class); - job.setInputValueClass(SelectorEntry.class); job.setMapperClass(SelectorInverseMapper.class); job.setPartitionerClass(PartitionUrlByHost.class);