Author: markus
Date: Tue Jul 19 15:40:34 2011
New Revision: 1148406
URL: http://svn.apache.org/viewvc?rev=1148406&view=rev
Log:
NUTCH-1057 Fetcher thread time out configurable
Modified:
nutch/branches/branch-1.4/CHANGES.txt
nutch/branches/branch-1.4/conf/nutch-default.xml
nutch/branches/branch-1.4/src/java/org/apache/nutch/fetcher/Fetcher.java
Modified: nutch/branches/branch-1.4/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/CHANGES.txt?rev=1148406&r1=1148405&r2=1148406&view=diff
==============================================================================
--- nutch/branches/branch-1.4/CHANGES.txt (original)
+++ nutch/branches/branch-1.4/CHANGES.txt Tue Jul 19 15:40:34 2011
@@ -2,6 +2,8 @@ Nutch Change Log
Release 1.4 - Current development
+* NUTCH-1057 Fetcher thread time out configurable (markus)
+
* NUTCH-1037 Option to deduplicate anchors prior to indexing (markus)
* NUTCH-1050 Add segmentDir option to WebGraph (markus)
Modified: nutch/branches/branch-1.4/conf/nutch-default.xml
URL:
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/conf/nutch-default.xml?rev=1148406&r1=1148405&r2=1148406&view=diff
==============================================================================
--- nutch/branches/branch-1.4/conf/nutch-default.xml (original)
+++ nutch/branches/branch-1.4/conf/nutch-default.xml Tue Jul 19 15:40:34 2011
@@ -686,6 +686,15 @@
</description>
</property>
+<property>
+ <name>fetcher.threads.timeout.divisor</name>
+ <value>2</value>
+ <description>The thread time-out divisor to use. By default threads have a
time-out
+ value of mapred.task.timeout / 2. Increase this setting if the fetcher waits
too
+ long before killing hanged threads.
+ </description>
+</property>
+
<!-- moreindexingfilter plugin properties -->
<property>
Modified:
nutch/branches/branch-1.4/src/java/org/apache/nutch/fetcher/Fetcher.java
URL:
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/java/org/apache/nutch/fetcher/Fetcher.java?rev=1148406&r1=1148405&r2=1148406&view=diff
==============================================================================
--- nutch/branches/branch-1.4/src/java/org/apache/nutch/fetcher/Fetcher.java
(original)
+++ nutch/branches/branch-1.4/src/java/org/apache/nutch/fetcher/Fetcher.java
Tue Jul 19 15:40:34 2011
@@ -1010,6 +1010,9 @@ public class Fetcher extends Configured
int threadCount = getConf().getInt("fetcher.threads.fetch", 10);
if (LOG.isInfoEnabled()) { LOG.info("Fetcher: threads: " + threadCount); }
+ int timeoutDivisor = getConf().getInt("fetcher.threads.timeout.divisor",
2);
+ if (LOG.isInfoEnabled()) { LOG.info("Fetcher: time-out divisor: " +
timeoutDivisor); }
+
feeder = new QueueFeeder(input, fetchQueues, threadCount * 50);
//feeder.setPriority((Thread.MAX_PRIORITY + Thread.NORM_PRIORITY) / 2);
@@ -1027,7 +1030,7 @@ public class Fetcher extends Configured
}
// select a timeout that avoids a task timeout
- long timeout = getConf().getInt("mapred.task.timeout", 10*60*1000)/2;
+ long timeout = getConf().getInt("mapred.task.timeout",
10*60*1000)/timeoutDivisor;
do { // wait for threads to exit
try {