Author: tejasp
Date: Thu Jan 2 19:40:18 2014
New Revision: 1554883
URL: http://svn.apache.org/r1554883
Log:
NUTCH-1670 set same crawldb directory in mergedb parameter
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1554883&r1=1554882&r2=1554883&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Jan 2 19:40:18 2014
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Development Trunk
+* NUTCH-1670 set same crawldb directory in mergedb parameter (lufeng via
tejasp)
+
* NUTCH-1080 Type safe members, arguments for better readability (tejasp)
* NUTCH-1360 Suport the storing of IP address connected to when web crawling
(lewismc, ferdy and Yasin Kılınç)
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java?rev=1554883&r1=1554882&r2=1554883&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java Thu Jan 2
19:40:18 2014
@@ -125,6 +125,8 @@ public class CrawlDbMerger extends Confi
}
JobClient.runJob(job);
FileSystem fs = FileSystem.get(getConf());
+ if(fs.exists(output))
+ fs.delete(output,true);
fs.mkdirs(output);
fs.rename(FileOutputFormat.getOutputPath(job), new Path(output,
CrawlDb.CURRENT_NAME));
long end = System.currentTimeMillis();