This is an automated email from the ASF dual-hosted git repository.

nick pushed a commit to branch multiple-parsers
in repository https://gitbox.apache.org/repos/asf/tika.git

commit f4a926ca94c50a6158891c7746e725cd720a2faa
Author: Nick Burch <n...@gagravarr.org>
AuthorDate: Tue Mar 13 15:13:19 2018 +0000

    Use utils for recording details of the parser used
---
 .../src/main/java/org/apache/tika/parser/CompositeParser.java    | 2 +-
 .../org/apache/tika/parser/multiple/AbstractMultipleParser.java  | 4 ++--
 tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java   | 9 +++++++++
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git 
a/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java 
b/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
index 0098468..c5c95a6 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
@@ -272,7 +272,7 @@ public class CompositeParser extends AbstractParser {
             TikaInputStream taggedStream = TikaInputStream.get(stream, tmp);
             TaggedContentHandler taggedHandler = 
                 handler != null ? new TaggedContentHandler(handler) : null;
-            metadata.add("X-Parsed-By", 
ParserUtils.getParserClassname(parser));
+            ParserUtils.recordParserDetails(parser, metadata);
             try {
                 parser.parse(taggedStream, taggedHandler, metadata, context);
             } catch (RuntimeException e) {
diff --git 
a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
 
b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
index d66c541..4695e0a 100644
--- 
a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
+++ 
b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java
@@ -205,8 +205,8 @@ public abstract class AbstractMultipleParser extends 
AbstractParser {
                 // TODO What's the best way to reset each time?
                 TikaInputStream parserStream = TikaInputStream.get(path);
                 
-                // Record this parser
-                metadata.add("X-Parsed-By", getParserClassname(p));
+                // Record that we used this parser
+                recordParserDetails(p, metadata);
                 
                 // TODO Handle metadata clashes based on the Policy
                 
diff --git a/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java 
b/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java
index bdbb04c..58105a6 100644
--- a/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java
@@ -54,4 +54,13 @@ public class ParserUtils {
             return parser.getClass().getName();
         }
     }
+
+    /**
+     * Records details of the {@link Parser} used to the Metadata,
+     *  typically wanted where multiple parsers could be picked between
+     *  or used.
+     */
+    public static void recordParserDetails(Parser parser, Metadata metadata) {
+        metadata.add("X-Parsed-By", getParserClassname(parser));
+    }
 }

-- 
To stop receiving notification emails like this one, please contact
n...@apache.org.

Reply via email to