Author: ab Date: Mon Mar 2 09:11:03 2009 New Revision: 749247 URL: http://svn.apache.org/viewvc?rev=749247&view=rev Log: NUTCH-419 Unavailable robots.txt kills fetch.
Modified: lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java Modified: lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java?rev=749247&r1=749246&r2=749247&view=diff ============================================================================== --- lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java (original) +++ lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java Mon Mar 2 09:11:03 2009 @@ -176,6 +176,10 @@ params.setDefaultMaxConnectionsPerHost(maxThreadsTotal); } + // executeMethod(HttpMethod) seems to ignore the connection timeout on the connection manager. + // set it explicitly on the HttpClient. + client.getParams().setConnectionManagerTimeout(timeout); + HostConfiguration hostConf = client.getHostConfiguration(); ArrayList headers = new ArrayList(); // Set the User Agent in the header