This is an automated email from the ASF dual-hosted git repository. nick pushed a commit to branch multiple-parsers in repository https://gitbox.apache.org/repos/asf/tika.git
commit 427417c5d17f1e03724f3e6ded64779bf7366677 Author: Nick Burch <[email protected]> AuthorDate: Tue Mar 13 15:04:43 2018 +0000 Prepare to track metadata between parsers --- .../org/apache/tika/parser/multiple/AbstractMultipleParser.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java index c47e762..46cd064 100644 --- a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java +++ b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java @@ -34,6 +34,7 @@ import org.apache.tika.parser.AbstractParser; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; import org.apache.tika.parser.ParserDecorator; +import org.apache.tika.utils.ParserUtils; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; @@ -178,7 +179,11 @@ public abstract class AbstractMultipleParser extends AbstractParser { InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { + // Track the metadata between parsers, so we can apply our policy + Metadata originalMetadata = ParserUtils.cloneMetadata(metadata); + Metadata lastMetadata = originalMetadata; + // Start tracking resources, so we can clean up when done TemporaryResources tmp = new TemporaryResources(); try { // Force the stream to be a Tika one @@ -187,6 +192,7 @@ public abstract class AbstractMultipleParser extends AbstractParser { // TODO Support an InputStreamFactory as an alternative to // Files, see TIKA-2585 // TODO Rewind support copy from ParserDecorator.withFallbacks + // TODO Should we use RereadableInputStream instead? TikaInputStream taggedStream = TikaInputStream.get(stream, tmp); Path path = taggedStream.getPath(); @@ -222,6 +228,7 @@ public abstract class AbstractMultipleParser extends AbstractParser { } // TODO Handle metadata clashes based on the Policy + lastMetadata = ParserUtils.cloneMetadata(metadata); } } finally { tmp.dispose(); -- To stop receiving notification emails like this one, please contact [email protected].
