Author: jnioche
Date: Mon May 12 12:58:41 2014
New Revision: 1593954
URL: http://svn.apache.org/r1593954
Log:
NUTCH-1613 Timeouts in protocol-httpclient when crawling same host with >2
threads
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
Modified: nutch/branches/2.x/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1593954&r1=1593953&r2=1593954&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Mon May 12 12:58:41 2014
@@ -2,6 +2,8 @@ Nutch Change Log
Current Development
+* NUTCH-1613 Timeouts in protocol-httpclient when crawling same host with >2
threads (brian44 via jnioche)
+
* NUTCH-1182 fetcher to log hung threads (snagel)
* NUTCH-1618 Turn speculative execution off for Fetching (talat)
Modified:
nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java?rev=1593954&r1=1593953&r2=1593954&view=diff
==============================================================================
---
nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
(original)
+++
nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
Mon May 12 12:58:41 2014
@@ -190,6 +190,10 @@ public class Http extends HttpBase {
params.setSendBufferSize(BUFFER_SIZE);
params.setReceiveBufferSize(BUFFER_SIZE);
params.setMaxTotalConnections(maxThreadsTotal);
+
+ //Also set max connections per host to maxThreadsTotal since
all threads
+ //might be used to fetch from the same host - otherwise timeout
errors can occur
+ params.setDefaultMaxConnectionsPerHost(maxThreadsTotal);
// executeMethod(HttpMethod) seems to ignore the connection
timeout on
// the connection manager.