Author: dogacan Date: Sun Jun 7 17:12:18 2009 New Revision: 782412 URL: http://svn.apache.org/viewvc?rev=782412&view=rev Log: NUTCH-735 - crawl-tool.xml must be read before nutch-site.xml when invoked using crawl command. Patch by Susam Pal.
Modified: lucene/nutch/trunk/CHANGES.txt lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java Modified: lucene/nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=782412&r1=782411&r2=782412&view=diff ============================================================================== --- lucene/nutch/trunk/CHANGES.txt (original) +++ lucene/nutch/trunk/CHANGES.txt Sun Jun 7 17:12:18 2009 @@ -1,5 +1,10 @@ Nutch Change Log +Unreleased Changes + + 1. NUTCH-735 - crawl-tool.xml must be read before nutch-site.xml when + invoked using crawl command (Susam Pal via dogacan) + Release 1.0 - 2009-03-23 1. NUTCH-474 - Fetcher2 crawlDelay and blocking fix (Dogacan Guney via ab) Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java?rev=782412&r1=782411&r2=782412&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java Sun Jun 7 17:12:18 2009 @@ -54,8 +54,7 @@ return; } - Configuration conf = NutchConfiguration.create(); - conf.addResource("crawl-tool.xml"); + Configuration conf = NutchConfiguration.createCrawlConfiguration(); JobConf job = new NutchJob(conf); Path rootUrlDir = null; Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java?rev=782412&r1=782411&r2=782412&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java Sun Jun 7 17:12:18 2009 @@ -47,7 +47,17 @@ /** Create a {...@link Configuration} for Nutch. */ public static Configuration create() { Configuration conf = new Configuration(); - addNutchResources(conf); + addNutchResources(conf, false); + return conf; + } + + /** + * Create a {...@link Configuration for Nutch invoked with the command + * line crawl command, i.e. bin/nutch crawl ... + */ + public static Configuration createCrawlConfiguration() { + Configuration conf = new Configuration(); + addNutchResources(conf, true); return conf; } @@ -79,12 +89,23 @@ return conf; } - /** Add the standard Nutch resources to {...@link Configuration}. */ - public static Configuration addNutchResources(Configuration conf) { + /** + * Add the standard Nutch resources to {...@link Configuration}. + * + * @param conf Configuration object to which + * configuration is to be added. + * @param crawlConfiguration Whether configuration for command line + * crawl using 'bin/nutch crawl' command + * should be added. + */ + private static Configuration addNutchResources(Configuration conf, + boolean crawlConfiguration) { conf.addResource("nutch-default.xml"); + if (crawlConfiguration) { + conf.addResource("crawl-tool.xml"); + } conf.addResource("nutch-site.xml"); return conf; } - }