Author: dogacan Date: Tue Aug 25 05:45:53 2009 New Revision: 807485 URL: http://svn.apache.org/viewvc?rev=807485&view=rev Log: Fetcher2 slow. Patch contributed by Julien Nioche.
Modified: lucene/nutch/trunk/CHANGES.txt lucene/nutch/trunk/conf/nutch-default.xml lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java Modified: lucene/nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=807485&r1=807484&r2=807485&view=diff ============================================================================== --- lucene/nutch/trunk/CHANGES.txt (original) +++ lucene/nutch/trunk/CHANGES.txt Tue Aug 25 05:45:53 2009 @@ -5,6 +5,8 @@ 1. NUTCH-735 - crawl-tool.xml must be read before nutch-site.xml when invoked using crawl command (Susam Pal via dogacan) + 2. NUTCH-721 - Fetcher2 Slow (Julien Nioche via dogacan) + Release 1.0 - 2009-03-23 1. NUTCH-474 - Fetcher2 crawlDelay and blocking fix (Dogacan Guney via ab) Modified: lucene/nutch/trunk/conf/nutch-default.xml URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/conf/nutch-default.xml?rev=807485&r1=807484&r2=807485&view=diff ============================================================================== --- lucene/nutch/trunk/conf/nutch-default.xml (original) +++ lucene/nutch/trunk/conf/nutch-default.xml Tue Aug 25 05:45:53 2009 @@ -575,7 +575,7 @@ <property> <name>fetcher.threads.per.host.by.ip</name> - <value>true</value> + <value>false</value> <description>If true, then fetcher will count threads by IP address, to which the URL's host name resolves. If false, only host name will be used. NOTE: this should be set to the same value as Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java?rev=807485&r1=807484&r2=807485&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java Tue Aug 25 05:45:53 2009 @@ -460,8 +460,7 @@ this.protocolFactory = new ProtocolFactory(conf); this.normalizers = new URLNormalizers(conf, URLNormalizers.SCOPE_FETCHER); this.maxCrawlDelay = conf.getInt("fetcher.max.crawl.delay", 30) * 1000; - // backward-compatible default setting - this.byIP = conf.getBoolean("fetcher.threads.per.host.by.ip", true); + this.byIP = conf.getBoolean("fetcher.threads.per.host.by.ip", false); this.maxRedirect = conf.getInt("http.redirect.max", 3); this.ignoreExternalLinks = conf.getBoolean("db.ignore.external.links", false);