Author: cutting
Date: Wed Jul 12 01:16:37 2006
New Revision: 421185

URL: http://svn.apache.org/viewvc?rev=421185&view=rev
Log:
Patch a bug introduced by Hadoop 0.4.0, which requires specified input
directories to exist.

Modified:
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java?rev=421185&r1=421184&r2=421185&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java Wed Jul 12 
01:16:37 2006
@@ -65,7 +65,8 @@
     if (LOG.isInfoEnabled()) { LOG.info("CrawlDb update: done"); }
   }
 
-  public static JobConf createJob(Configuration config, Path crawlDb) {
+  public static JobConf createJob(Configuration config, Path crawlDb)
+    throws IOException {
     Path newCrawlDb =
       new Path(crawlDb,
                Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
@@ -73,7 +74,11 @@
     JobConf job = new NutchJob(config);
     job.setJobName("crawldb " + crawlDb);
 
-    job.addInputPath(new Path(crawlDb, CrawlDatum.DB_DIR_NAME));
+
+    Path current = new Path(crawlDb, CrawlDatum.DB_DIR_NAME);
+    if (FileSystem.get(job).exists(current)) {
+      job.addInputPath(current);
+    }
     job.setInputFormat(SequenceFileInputFormat.class);
     job.setInputKeyClass(UTF8.class);
     job.setInputValueClass(CrawlDatum.class);


Reply via email to