This is an automated email from the ASF dual-hosted git repository.

nick pushed a commit to branch multiple-parsers
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 82f6f5f6068d72b2afcb6c47840b9124554afdbf
Author: Nick Burch <n...@gagravarr.org>
AuthorDate: Tue Mar 13 17:12:34 2018 +0000

    Bring over stream reset logic from ParserDecorator and update comments
---
 .../parser/multiple/AbstractMultipleParser.java    | 28 ++++++++++------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git 
a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
 
b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
index 4d3ff0c..6262dc1 100644
--- 
a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
+++ 
b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
@@ -167,13 +167,9 @@ public abstract class AbstractMultipleParser extends 
AbstractParser {
             ContentHandler handler, Exception exception);
     
     /**
-     * Delegates the call to one or more Parsers, 
-     * Delegates the call to the matching component parser.
-     * <p>
-     * Potential {@link RuntimeException}s, {@link IOException}s and
-     * {@link SAXException}s unrelated to the given input stream and content
-     * handler are automatically wrapped into {@link TikaException}s to better
-     * honor the {@link Parser} contract.
+     * Processes the given Stream through one or more parsers, 
+     *  resetting things between parsers as requested by policy.
+     * The actual processing is delegated to one or more {@link Parser}s
      */
     public void parse(
             InputStream stream, ContentHandler handler,
@@ -187,11 +183,8 @@ public abstract class AbstractMultipleParser extends 
AbstractParser {
         TemporaryResources tmp = new TemporaryResources();
         try {
             // Force the stream to be a Tika one
-            // Force the stream to be file-backed, so we can
-            //  re-wind it safely if required
-            // TODO Support an InputStreamFactory as an alternative to
-            //  Files, see TIKA-2585
-            // TODO Rewind support copy from ParserDecorator.withFallbacks
+            // Force the stream to be file-backed, so we can re-read safely
+            //  later if required for parser 2+
             // TODO Should we use RereadableInputStream instead?
             TikaInputStream taggedStream = TikaInputStream.get(stream, tmp);
             Path path = taggedStream.getPath();
@@ -202,8 +195,10 @@ public abstract class AbstractMultipleParser extends 
AbstractParser {
             // TODO Provide a way to supply a ContentHandlerFactory?
 
             for (Parser p : parsers) {
-                // TODO What's the best way to reset each time?
-                TikaInputStream parserStream = TikaInputStream.get(path);
+                // Indicate we may need to re-read the stream later
+                // TODO Support an InputStreamFactory as an alternative to
+                //  Files, see TIKA-2585
+                taggedStream.mark(-1);
                 
                 // Record that we used this parser
                 recordParserDetails(p, metadata);
@@ -213,7 +208,7 @@ public abstract class AbstractMultipleParser extends 
AbstractParser {
                 // Process if possible
                 Exception failure = null;
                 try {
-                    p.parse(parserStream, handler, metadata, context);
+                    p.parse(taggedStream, handler, metadata, context);
                 } catch (Exception e) {
                     recordParserFailure(p, e, metadata);
                     failure = e;
@@ -236,6 +231,9 @@ public abstract class AbstractMultipleParser extends 
AbstractParser {
                 
                 // TODO Handle metadata clashes based on the Policy
                 lastMetadata = cloneMetadata(metadata);
+                
+                // Prepare for the next parser, if present
+                taggedStream.reset();
             }
         } finally {
             tmp.dispose();

-- 
To stop receiving notification emails like this one, please contact
n...@apache.org.

Reply via email to