This is an automated email from the ASF dual-hosted git repository. nick pushed a commit to branch multiple-parsers in repository https://gitbox.apache.org/repos/asf/tika.git
commit 97b97b345b49b7dd510af560598e6d1ab7baf28c Author: Nick Burch <n...@gagravarr.org> AuthorDate: Tue Mar 13 15:24:41 2018 +0000 Move logic for recording embedded parser failures in the metadata to utils, and use for multiple parsers --- .../apache/tika/parser/RecursiveParserWrapper.java | 10 +++------- .../parser/multiple/AbstractMultipleParser.java | 1 + .../tika/parser/multiple/FallbackParser.java | 3 --- .../tika/parser/multiple/SupplementingParser.java | 3 --- .../java/org/apache/tika/utils/ParserUtils.java | 22 +++++++++++++++++++++- 5 files changed, 25 insertions(+), 14 deletions(-) diff --git a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java index 1e8e5b1..c426a42 100644 --- a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java +++ b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java @@ -31,7 +31,6 @@ import org.apache.tika.metadata.Property; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.mime.MediaType; import org.apache.tika.sax.ContentHandlerFactory; -import org.apache.tika.utils.ExceptionUtils; import org.apache.tika.utils.ParserUtils; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; @@ -85,8 +84,7 @@ public class RecursiveParserWrapper implements Parser { public final static Property EMBEDDED_RESOURCE_LIMIT_REACHED = Property.internalBoolean(TikaCoreProperties.TIKA_META_EXCEPTION_PREFIX + "embedded_resource_limit_reached"); - public final static Property EMBEDDED_EXCEPTION = - Property.internalText(TikaCoreProperties.TIKA_META_EXCEPTION_PREFIX + "embedded_exception"); + public final static Property EMBEDDED_EXCEPTIONx = ParserUtils.EMBEDDED_EXCEPTION; //move this to TikaCoreProperties? public final static Property EMBEDDED_RESOURCE_PATH = Property.internalText(TikaCoreProperties.TIKA_META_PREFIX+"embedded_resource_path"); @@ -304,16 +302,14 @@ public class RecursiveParserWrapper implements Parser { metadata.add(WRITE_LIMIT_REACHED, "true"); } else { if (catchEmbeddedExceptions) { - String trace = ExceptionUtils.getStackTrace(e); - metadata.set(EMBEDDED_EXCEPTION, trace); + ParserUtils.recordParserFailure(this, e, metadata); } else { throw e; } } } catch (TikaException e) { if (catchEmbeddedExceptions) { - String trace = ExceptionUtils.getStackTrace(e); - metadata.set(EMBEDDED_EXCEPTION, trace); + ParserUtils.recordParserFailure(this, e, metadata); } else { throw e; } diff --git a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java index 4695e0a..d857b35 100644 --- a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java +++ b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java @@ -216,6 +216,7 @@ public abstract class AbstractMultipleParser extends AbstractParser { try { p.parse(parserStream, handler, metadata, context); } catch (Exception e) { + recordParserFailure(p, e, metadata); failure = e; } diff --git a/tika-core/src/main/java/org/apache/tika/parser/multiple/FallbackParser.java b/tika-core/src/main/java/org/apache/tika/parser/multiple/FallbackParser.java index 9b6a0bf..97a8aaf 100644 --- a/tika-core/src/main/java/org/apache/tika/parser/multiple/FallbackParser.java +++ b/tika-core/src/main/java/org/apache/tika/parser/multiple/FallbackParser.java @@ -61,9 +61,6 @@ public class FallbackParser extends AbstractMultipleParser { // If there was no exception, abort further parsers if (exception == null) return false; - // Record the details of this exception in the metadata - // TODO Share logic with the Recursive Parser Wrapper - // Have the next parser tried return true; } diff --git a/tika-core/src/main/java/org/apache/tika/parser/multiple/SupplementingParser.java b/tika-core/src/main/java/org/apache/tika/parser/multiple/SupplementingParser.java index fd5d037..c1dec34 100644 --- a/tika-core/src/main/java/org/apache/tika/parser/multiple/SupplementingParser.java +++ b/tika-core/src/main/java/org/apache/tika/parser/multiple/SupplementingParser.java @@ -72,9 +72,6 @@ public class SupplementingParser extends AbstractMultipleParser { // If there was no exception, just carry on to the next if (exception == null) return true; - // Record the details of this exception in the metadata - // TODO Share logic with the Recursive Parser Wrapper - // Have the next parser tried return true; } diff --git a/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java b/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java index 58105a6..c3c63ba 100644 --- a/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java +++ b/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java @@ -17,6 +17,8 @@ package org.apache.tika.utils; import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.Property; +import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.parser.Parser; import org.apache.tika.parser.ParserDecorator; @@ -24,6 +26,11 @@ import org.apache.tika.parser.ParserDecorator; * Helper util methods for Parsers themselves. */ public class ParserUtils { + public final static Property EMBEDDED_PARSER = + Property.internalText(TikaCoreProperties.TIKA_META_EXCEPTION_PREFIX + "embedded_parser"); + public final static Property EMBEDDED_EXCEPTION = + Property.internalText(TikaCoreProperties.TIKA_META_EXCEPTION_PREFIX + "embedded_exception"); + /** * Does a deep clone of a Metadata object. */ @@ -56,11 +63,24 @@ public class ParserUtils { } /** - * Records details of the {@link Parser} used to the Metadata, + * Records details of the {@link Parser} used to the {@link Metadata}, * typically wanted where multiple parsers could be picked between * or used. */ public static void recordParserDetails(Parser parser, Metadata metadata) { metadata.add("X-Parsed-By", getParserClassname(parser)); } + + /** + * Records details of a {@link Parser}'s failure to the + * {@link Metadata}, so you can check what went wrong even if the + * {@link Exception} wasn't immediately thrown (eg when several different + * Parsers are used) + */ + public static void recordParserFailure(Parser parser, Exception failure, + Metadata metadata) { + String trace = ExceptionUtils.getStackTrace(failure); + metadata.add(EMBEDDED_EXCEPTION, trace); + metadata.add(EMBEDDED_PARSER, getParserClassname(parser)); + } } -- To stop receiving notification emails like this one, please contact n...@apache.org.