Author: jukka
Date: Sun Apr 26 22:35:05 2009
New Revision: 768819

URL: http://svn.apache.org/viewvc?rev=768819&view=rev
Log:
TIKA-215: Use a thread pool in ParsingReader

ParsingReader can now use any java.util.concurrent.Executor (that runs tasks 
asynchronously in background) for the parsing task. Existing clients are not 
affected.


Modified:
    lucene/tika/trunk/CHANGES.txt
    lucene/tika/trunk/src/main/java/org/apache/tika/parser/ParsingReader.java

Modified: lucene/tika/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/CHANGES.txt?rev=768819&r1=768818&r2=768819&view=diff
==============================================================================
--- lucene/tika/trunk/CHANGES.txt (original)
+++ lucene/tika/trunk/CHANGES.txt Sun Apr 26 22:35:05 2009
@@ -13,6 +13,10 @@
     attacks, where a specially crafted input document can expand to
     practically infinite amount of output text. (TIKA-216)
 
+  * The ParsingReader class can now use a thread pool or a more complex
+    execution model (java.util.concurrent.Executor) for the background
+    parsing task. (TIKA-215)
+
 Release 0.3 - 03/09/2009
 ------------------------
 

Modified: 
lucene/tika/trunk/src/main/java/org/apache/tika/parser/ParsingReader.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/parser/ParsingReader.java?rev=768819&r1=768818&r2=768819&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/java/org/apache/tika/parser/ParsingReader.java 
(original)
+++ lucene/tika/trunk/src/main/java/org/apache/tika/parser/ParsingReader.java 
Sun Apr 26 22:35:05 2009
@@ -26,6 +26,7 @@
 import java.io.PipedWriter;
 import java.io.Reader;
 import java.io.Writer;
+import java.util.concurrent.Executor;
 
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.sax.BodyContentHandler;
@@ -33,7 +34,7 @@
 
 /**
  * Reader for the text content from a given binary stream. This class
- * starts a background thread and uses a {...@link Parser}
+ * uses a background parsing task with a {...@link Parser}
  * ({...@link AutoDetectParser} by default) to parse the text content from
  * a given input stream. The {...@link BodyContentHandler} class and a pipe
  * is used to convert the push-based SAX event stream to the pull-based
@@ -71,7 +72,7 @@
     /**
      * An exception (if any) thrown by the parsing thread.
      */
-    private Throwable throwable;
+    private transient Throwable throwable;
 
     /**
      * Utility method that returns a {...@link Metadata} instance
@@ -124,15 +125,49 @@
     /**
      * Creates a reader for the text content of the given binary stream
      * with the given document metadata. The given parser is used for
-     * parsing.
+     * parsing. A new background thread is started for the parsing task.
      *
      * @param parser parser instance
      * @param stream binary stream
      * @param metadata document metadata
      * @throws IOException if the document can not be parsed
      */
-    public ParsingReader(Parser parser, InputStream stream, Metadata metadata)
+    public ParsingReader(
+            Parser parser, InputStream stream, final Metadata metadata)
             throws IOException {
+        this(parser, stream, metadata, new Executor() {
+            public void execute(Runnable command) {
+                String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
+                if (name != null) {
+                    name = "Apache Tika: " + name;
+                } else {
+                    name = "Apache Tika";
+                }
+                Thread thread = new Thread(command, name);
+                thread.setDaemon(true);
+                thread.start();
+            }
+        });
+    }
+
+    /**
+     * Creates a reader for the text content of the given binary stream
+     * with the given document metadata. The given parser is used for the
+     * parsing task that is run with the given executor. The given executor
+     * <em>must</em> run the parsing task asynchronously in a separate thread,
+     * since the current thread must return to the caller that can then
+     * consume the parsed text through the {...@link Reader} interface.
+     *
+     * @param parser parser instance
+     * @param stream binary stream
+     * @param metadata document metadata
+     * @param executor executor for the parsing task
+     * @throws IOException if the document can not be parsed
+     * @since Apache Tika 0.4
+     */
+    public ParsingReader(
+            Parser parser, InputStream stream, Metadata metadata,
+            Executor executor) throws IOException {
         this.parser = parser;
         PipedReader pipedReader = new PipedReader();
         this.reader = new BufferedReader(pipedReader);
@@ -144,13 +179,7 @@
         this.stream = stream;
         this.metadata = metadata;
 
-        String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
-        if (name != null) {
-            name = "Apache Tika: " + name;
-        } else {
-            name = "Apache Tika";
-        }
-        new Thread(new ParsingThread(), name).start();
+        executor.execute(new ParsingTask());
 
         // TIKA-203: Buffer first character to force metadata extraction
         reader.mark(1);
@@ -159,9 +188,9 @@
     }
 
     /**
-     * The background parsing thread.
+     * The background parsing task.
      */
-    private class ParsingThread implements Runnable {
+    private class ParsingTask implements Runnable {
 
         /**
          * Parses the given binary stream and writes the text content


Reply via email to