This is an automated email from the ASF dual-hosted git repository.

nick pushed a commit to branch multiple-parsers
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 97b97b345b49b7dd510af560598e6d1ab7baf28c
Author: Nick Burch <n...@gagravarr.org>
AuthorDate: Tue Mar 13 15:24:41 2018 +0000

    Move logic for recording embedded parser failures in the metadata to utils, 
and use for multiple parsers
---
 .../apache/tika/parser/RecursiveParserWrapper.java | 10 +++-------
 .../parser/multiple/AbstractMultipleParser.java    |  1 +
 .../tika/parser/multiple/FallbackParser.java       |  3 ---
 .../tika/parser/multiple/SupplementingParser.java  |  3 ---
 .../java/org/apache/tika/utils/ParserUtils.java    | 22 +++++++++++++++++++++-
 5 files changed, 25 insertions(+), 14 deletions(-)

diff --git 
a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java 
b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
index 1e8e5b1..c426a42 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
@@ -31,7 +31,6 @@ import org.apache.tika.metadata.Property;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.sax.ContentHandlerFactory;
-import org.apache.tika.utils.ExceptionUtils;
 import org.apache.tika.utils.ParserUtils;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
@@ -85,8 +84,7 @@ public class RecursiveParserWrapper implements Parser {
     public final static Property EMBEDDED_RESOURCE_LIMIT_REACHED = 
                 
Property.internalBoolean(TikaCoreProperties.TIKA_META_EXCEPTION_PREFIX + 
"embedded_resource_limit_reached");
 
-    public final static Property EMBEDDED_EXCEPTION =
-            
Property.internalText(TikaCoreProperties.TIKA_META_EXCEPTION_PREFIX + 
"embedded_exception");
+    public final static Property EMBEDDED_EXCEPTIONx = 
ParserUtils.EMBEDDED_EXCEPTION;
     //move this to TikaCoreProperties?
     public final static Property EMBEDDED_RESOURCE_PATH = 
                 
Property.internalText(TikaCoreProperties.TIKA_META_PREFIX+"embedded_resource_path");
@@ -304,16 +302,14 @@ public class RecursiveParserWrapper implements Parser {
                     metadata.add(WRITE_LIMIT_REACHED, "true");
                 } else {
                     if (catchEmbeddedExceptions) {
-                        String trace = ExceptionUtils.getStackTrace(e);
-                        metadata.set(EMBEDDED_EXCEPTION, trace);
+                        ParserUtils.recordParserFailure(this, e, metadata);
                     } else {
                         throw e;
                     }
                 }
             } catch (TikaException e) {
                 if (catchEmbeddedExceptions) {
-                    String trace = ExceptionUtils.getStackTrace(e);
-                    metadata.set(EMBEDDED_EXCEPTION, trace);
+                    ParserUtils.recordParserFailure(this, e, metadata);
                 } else {
                     throw e;
                 }
diff --git 
a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
 
b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
index 4695e0a..d857b35 100644
--- 
a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
+++ 
b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
@@ -216,6 +216,7 @@ public abstract class AbstractMultipleParser extends 
AbstractParser {
                 try {
                     p.parse(parserStream, handler, metadata, context);
                 } catch (Exception e) {
+                    recordParserFailure(p, e, metadata);
                     failure = e;
                 }
                 
diff --git 
a/tika-core/src/main/java/org/apache/tika/parser/multiple/FallbackParser.java 
b/tika-core/src/main/java/org/apache/tika/parser/multiple/FallbackParser.java
index 9b6a0bf..97a8aaf 100644
--- 
a/tika-core/src/main/java/org/apache/tika/parser/multiple/FallbackParser.java
+++ 
b/tika-core/src/main/java/org/apache/tika/parser/multiple/FallbackParser.java
@@ -61,9 +61,6 @@ public class FallbackParser extends AbstractMultipleParser {
         // If there was no exception, abort further parsers
         if (exception == null) return false;
         
-        // Record the details of this exception in the metadata
-        // TODO Share logic with the Recursive Parser Wrapper
-        
         // Have the next parser tried
         return true;
     }
diff --git 
a/tika-core/src/main/java/org/apache/tika/parser/multiple/SupplementingParser.java
 
b/tika-core/src/main/java/org/apache/tika/parser/multiple/SupplementingParser.java
index fd5d037..c1dec34 100644
--- 
a/tika-core/src/main/java/org/apache/tika/parser/multiple/SupplementingParser.java
+++ 
b/tika-core/src/main/java/org/apache/tika/parser/multiple/SupplementingParser.java
@@ -72,9 +72,6 @@ public class SupplementingParser extends 
AbstractMultipleParser {
         // If there was no exception, just carry on to the next
         if (exception == null) return true;
         
-        // Record the details of this exception in the metadata
-        // TODO Share logic with the Recursive Parser Wrapper
-        
         // Have the next parser tried
         return true;
     }
diff --git a/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java 
b/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java
index 58105a6..c3c63ba 100644
--- a/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java
@@ -17,6 +17,8 @@
 package org.apache.tika.utils;
 
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.ParserDecorator;
 
@@ -24,6 +26,11 @@ import org.apache.tika.parser.ParserDecorator;
  * Helper util methods for Parsers themselves.
  */
 public class ParserUtils {
+    public final static Property EMBEDDED_PARSER =
+            
Property.internalText(TikaCoreProperties.TIKA_META_EXCEPTION_PREFIX + 
"embedded_parser");
+    public final static Property EMBEDDED_EXCEPTION =
+            
Property.internalText(TikaCoreProperties.TIKA_META_EXCEPTION_PREFIX + 
"embedded_exception");
+    
     /**
      * Does a deep clone of a Metadata object.
      */
@@ -56,11 +63,24 @@ public class ParserUtils {
     }
 
     /**
-     * Records details of the {@link Parser} used to the Metadata,
+     * Records details of the {@link Parser} used to the {@link Metadata},
      *  typically wanted where multiple parsers could be picked between
      *  or used.
      */
     public static void recordParserDetails(Parser parser, Metadata metadata) {
         metadata.add("X-Parsed-By", getParserClassname(parser));
     }
+
+    /**
+     * Records details of a {@link Parser}'s failure to the
+     *  {@link Metadata}, so you can check what went wrong even if the
+     *  {@link Exception} wasn't immediately thrown (eg when several different
+     *  Parsers are used)
+     */
+    public static void recordParserFailure(Parser parser, Exception failure, 
+                                           Metadata metadata) {
+        String trace = ExceptionUtils.getStackTrace(failure);
+        metadata.add(EMBEDDED_EXCEPTION, trace);
+        metadata.add(EMBEDDED_PARSER, getParserClassname(parser));
+    }
 }

-- 
To stop receiving notification emails like this one, please contact
n...@apache.org.

Reply via email to