Author: markus
Date: Tue Jun 12 10:18:09 2012
New Revision: 1349230

URL: http://svn.apache.org/viewvc?rev=1349230&view=rev
Log:
NUTCH-1356 ParseUtil use ExecutorService instead of manually thread handling

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/ivy/ivy.xml
    nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1349230&r1=1349229&r2=1349230&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Jun 12 10:18:09 2012
@@ -2,6 +2,8 @@ Nutch Change Log
 
 (trunk) Current Development:
 
+* NUTCH-1356 ParseUtil use ExecutorService instead of manually thread handling 
(ferdy via markus)
+
 * NUTCH-1352 Improve regex urlfilters/normalizers synchronization (ferdy via 
markus)
 
 * NUTCH-1024 Dynamically set fetchInterval by MIME-type (markus)

Modified: nutch/trunk/ivy/ivy.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/ivy/ivy.xml?rev=1349230&r1=1349229&r2=1349230&view=diff
==============================================================================
--- nutch/trunk/ivy/ivy.xml (original)
+++ nutch/trunk/ivy/ivy.xml Tue Jun 12 10:18:09 2012
@@ -62,6 +62,8 @@
                <dependency org="xerces" name="xercesImpl" rev="2.9.1" />
                <dependency org="xerces" name="xmlParserAPIs" rev="2.6.2" />
                <dependency org="oro" name="oro" rev="2.0.8" />
+               
+               <dependency org="com.google.guava" name="guava" rev="11.0.2" />
 
                <!--Configuration: test -->
 

Modified: nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java?rev=1349230&r1=1349229&r2=1349230&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java Tue Jun 12 
10:18:09 2012
@@ -18,15 +18,18 @@ package org.apache.nutch.parse;
 
 // Commons Logging imports
 
-import java.util.concurrent.FutureTask;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
-import java.util.concurrent.TimeoutException;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.protocol.Content;
 
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+
 
 /**
  * A Utility class containing methods to simply perform parsing utilities such
@@ -43,7 +46,8 @@ public class ParseUtil {
   public static final Logger LOG = LoggerFactory.getLogger(ParseUtil.class);
   private ParserFactory parserFactory;
   /** Parser timeout set to 30 sec by default. Set -1 to deactivate **/
-  private int MAX_PARSE_TIME = 30;
+  private int maxParseTime = 30;
+  private ExecutorService executorService;
   
   /**
    * 
@@ -51,7 +55,9 @@ public class ParseUtil {
    */
   public ParseUtil(Configuration conf) {
     this.parserFactory = new ParserFactory(conf);
-    MAX_PARSE_TIME=conf.getInt("parser.timeout", 30);
+    maxParseTime=conf.getInt("parser.timeout", 30);
+    executorService = Executors.newCachedThreadPool(new ThreadFactoryBuilder()
+      .setNameFormat("parse-%d").setDaemon(true).build());
   }
   
   /**
@@ -83,7 +89,7 @@ public class ParseUtil {
       if (LOG.isDebugEnabled()) {
         LOG.debug("Parsing [" + content.getUrl() + "] with [" + parsers[i] + 
"]");
       }
-      if (MAX_PARSE_TIME!=-1)
+      if (maxParseTime!=-1)
        parseResult = runParser(parsers[i], content);
       else 
        parseResult = parsers[i].getParse(content);
@@ -133,7 +139,7 @@ public class ParseUtil {
     }
     
     ParseResult parseResult = null;
-    if (MAX_PARSE_TIME!=-1)
+    if (maxParseTime!=-1)
        parseResult = runParser(p, content);
     else 
        parseResult = p.getParse(content);
@@ -150,20 +156,14 @@ public class ParseUtil {
 
   private ParseResult runParser(Parser p, Content content) {
     ParseCallable pc = new ParseCallable(p, content);
-    FutureTask<ParseResult> task = new FutureTask<ParseResult>(pc);
+    Future<ParseResult> task = executorService.submit(pc);
     ParseResult res = null;
-    Thread t = new Thread(task);
-    t.start();
     try {
-      res = task.get(MAX_PARSE_TIME, TimeUnit.SECONDS);
-    } catch (TimeoutException e) {
-      LOG.warn("TIMEOUT parsing " + content.getUrl() + " with " + p);
+      res = task.get(maxParseTime, TimeUnit.SECONDS);
     } catch (Exception e) {
+      LOG.warn("Error parsing " + content.getUrl() + " with " + p, e);
       task.cancel(true);
-      res = null;
-      t.interrupt();
     } finally {
-      t = null;
       pc = null;
     }
     return res;


Reply via email to