Author: cutting Date: Wed Jul 12 01:16:37 2006 New Revision: 421185 URL: http://svn.apache.org/viewvc?rev=421185&view=rev Log: Patch a bug introduced by Hadoop 0.4.0, which requires specified input directories to exist.
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java?rev=421185&r1=421184&r2=421185&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java Wed Jul 12 01:16:37 2006 @@ -65,7 +65,8 @@ if (LOG.isInfoEnabled()) { LOG.info("CrawlDb update: done"); } } - public static JobConf createJob(Configuration config, Path crawlDb) { + public static JobConf createJob(Configuration config, Path crawlDb) + throws IOException { Path newCrawlDb = new Path(crawlDb, Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); @@ -73,7 +74,11 @@ JobConf job = new NutchJob(config); job.setJobName("crawldb " + crawlDb); - job.addInputPath(new Path(crawlDb, CrawlDatum.DB_DIR_NAME)); + + Path current = new Path(crawlDb, CrawlDatum.DB_DIR_NAME); + if (FileSystem.get(job).exists(current)) { + job.addInputPath(current); + } job.setInputFormat(SequenceFileInputFormat.class); job.setInputKeyClass(UTF8.class); job.setInputValueClass(CrawlDatum.class);