This is an automated email from the ASF dual-hosted git repository. nick pushed a commit to branch multiple-parsers in repository https://gitbox.apache.org/repos/asf/tika.git
commit d229ab6f666cde8b007f568b13001a2c780ff477 Author: Nick Burch <[email protected]> AuthorDate: Tue Mar 13 15:10:16 2018 +0000 Pull common "Real Parser" identification logic out to utils --- .../java/org/apache/tika/parser/CompositeParser.java | 7 ++----- .../tika/parser/multiple/AbstractMultipleParser.java | 17 ++++------------- .../main/java/org/apache/tika/utils/ParserUtils.java | 14 ++++++++++++++ 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java b/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java index ea3968e..0098468 100644 --- a/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java +++ b/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java @@ -23,6 +23,7 @@ import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; import org.apache.tika.mime.MediaTypeRegistry; import org.apache.tika.sax.TaggedContentHandler; +import org.apache.tika.utils.ParserUtils; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; @@ -271,11 +272,7 @@ public class CompositeParser extends AbstractParser { TikaInputStream taggedStream = TikaInputStream.get(stream, tmp); TaggedContentHandler taggedHandler = handler != null ? new TaggedContentHandler(handler) : null; - if (parser instanceof ParserDecorator){ - metadata.add("X-Parsed-By", ((ParserDecorator) parser).getWrappedParser().getClass().getName()); - } else { - metadata.add("X-Parsed-By", parser.getClass().getName()); - } + metadata.add("X-Parsed-By", ParserUtils.getParserClassname(parser)); try { parser.parse(taggedStream, taggedHandler, metadata, context); } catch (RuntimeException e) { diff --git a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java index 02d7e51..d66c541 100644 --- a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java +++ b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java @@ -34,7 +34,7 @@ import org.apache.tika.parser.AbstractParser; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; import org.apache.tika.parser.ParserDecorator; -import org.apache.tika.utils.ParserUtils; +import static org.apache.tika.utils.ParserUtils.*; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; @@ -180,7 +180,7 @@ public abstract class AbstractMultipleParser extends AbstractParser { Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { // Track the metadata between parsers, so we can apply our policy - Metadata originalMetadata = ParserUtils.cloneMetadata(metadata); + Metadata originalMetadata = cloneMetadata(metadata); Metadata lastMetadata = originalMetadata; // Start tracking resources, so we can clean up when done @@ -206,7 +206,7 @@ public abstract class AbstractMultipleParser extends AbstractParser { TikaInputStream parserStream = TikaInputStream.get(path); // Record this parser - metadata.add("X-Parsed-By", getParserName(p)); + metadata.add("X-Parsed-By", getParserClassname(p)); // TODO Handle metadata clashes based on the Policy @@ -234,20 +234,11 @@ public abstract class AbstractMultipleParser extends AbstractParser { } // TODO Handle metadata clashes based on the Policy - lastMetadata = ParserUtils.cloneMetadata(metadata); + lastMetadata = cloneMetadata(metadata); } } finally { tmp.dispose(); } } - - private String getParserName(Parser parser) { - // TODO Share this logic with CompositeParser - if (parser instanceof ParserDecorator){ - return ((ParserDecorator) parser).getWrappedParser().getClass().getName(); - } else { - return parser.getClass().getName(); - } - } } diff --git a/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java b/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java index 289cbc2..bdbb04c 100644 --- a/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java +++ b/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java @@ -17,6 +17,8 @@ package org.apache.tika.utils; import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.Parser; +import org.apache.tika.parser.ParserDecorator; /** * Helper util methods for Parsers themselves. @@ -40,4 +42,16 @@ public class ParserUtils { } return clone; } + + /** + * Identifies the real class name of the {@link Parser}, unwrapping + * any {@link ParserDecorator} decorations on top of it. + */ + public static String getParserClassname(Parser parser) { + if (parser instanceof ParserDecorator){ + return ((ParserDecorator) parser).getWrappedParser().getClass().getName(); + } else { + return parser.getClass().getName(); + } + } } -- To stop receiving notification emails like this one, please contact [email protected].
