This is an automated email from the ASF dual-hosted git repository. nick pushed a commit to branch multiple-parsers in repository https://gitbox.apache.org/repos/asf/tika.git
commit f4a926ca94c50a6158891c7746e725cd720a2faa Author: Nick Burch <[email protected]> AuthorDate: Tue Mar 13 15:13:19 2018 +0000 Use utils for recording details of the parser used --- .../src/main/java/org/apache/tika/parser/CompositeParser.java | 2 +- .../org/apache/tika/parser/multiple/AbstractMultipleParser.java | 4 ++-- tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java | 9 +++++++++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java b/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java index 0098468..c5c95a6 100644 --- a/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java +++ b/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java @@ -272,7 +272,7 @@ public class CompositeParser extends AbstractParser { TikaInputStream taggedStream = TikaInputStream.get(stream, tmp); TaggedContentHandler taggedHandler = handler != null ? new TaggedContentHandler(handler) : null; - metadata.add("X-Parsed-By", ParserUtils.getParserClassname(parser)); + ParserUtils.recordParserDetails(parser, metadata); try { parser.parse(taggedStream, taggedHandler, metadata, context); } catch (RuntimeException e) { diff --git a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java index d66c541..4695e0a 100644 --- a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java +++ b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java @@ -205,8 +205,8 @@ public abstract class AbstractMultipleParser extends AbstractParser { // TODO What's the best way to reset each time? TikaInputStream parserStream = TikaInputStream.get(path); - // Record this parser - metadata.add("X-Parsed-By", getParserClassname(p)); + // Record that we used this parser + recordParserDetails(p, metadata); // TODO Handle metadata clashes based on the Policy diff --git a/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java b/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java index bdbb04c..58105a6 100644 --- a/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java +++ b/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java @@ -54,4 +54,13 @@ public class ParserUtils { return parser.getClass().getName(); } } + + /** + * Records details of the {@link Parser} used to the Metadata, + * typically wanted where multiple parsers could be picked between + * or used. + */ + public static void recordParserDetails(Parser parser, Metadata metadata) { + metadata.add("X-Parsed-By", getParserClassname(parser)); + } } -- To stop receiving notification emails like this one, please contact [email protected].
