This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch branch_3x in repository https://gitbox.apache.org/repos/asf/tika.git
commit 27b6605d11eee110db9a9a7b6f75449757e7e8ef Author: tallison <[email protected]> AuthorDate: Wed Oct 29 15:27:06 2025 -0400 TIKA-4533 -- fix handling of TikaInputStreams with open containers -- fix merge conflicts (#2378) (cherry picked from commit 5dbfb15d04aef1c7e5a970e02add8ab60a08a406) --- .../tika/extractor/ParsingEmbeddedDocumentExtractor.java | 2 +- .../src/main/java/org/apache/tika/io/TikaInputStream.java | 15 +++++++++++++++ .../main/java/org/apache/tika/parser/DigestingParser.java | 9 +-------- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java b/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java index 4f2331359..cc6458473 100644 --- a/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java +++ b/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java @@ -105,7 +105,7 @@ public class ParsingEmbeddedDocumentExtractor implements EmbeddedDocumentExtract if (stream instanceof TikaInputStream) { final Object container = ((TikaInputStream) stream).getOpenContainer(); if (container != null) { - newStream.setOpenContainer(container); + newStream.setOpenContainer(container, ((TikaInputStream) stream).getLength()); } } DELEGATING_PARSER.parse(newStream, new EmbeddedContentHandler(new BodyContentHandler(handler)), diff --git a/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java b/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java index 8c2a85952..6ba1d2d97 100644 --- a/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java +++ b/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java @@ -669,6 +669,21 @@ public class TikaInputStream extends TaggedInputStream { } } + /** + * This is a hack for TIKA-4533 and is fixed correctly in Tika 4.x. + * Be careful and do not use this unless you know what you're doing! + * + * @param container + * @param length + */ + public void setOpenContainer(Object container, long length) { + openContainer = container; + if (container instanceof Closeable) { + tmp.addResource((Closeable) container); + } + this.length = length; + } + /** * * @param closeable diff --git a/tika-core/src/main/java/org/apache/tika/parser/DigestingParser.java b/tika-core/src/main/java/org/apache/tika/parser/DigestingParser.java index d0bcaa1f9..1a2d784ac 100644 --- a/tika-core/src/main/java/org/apache/tika/parser/DigestingParser.java +++ b/tika-core/src/main/java/org/apache/tika/parser/DigestingParser.java @@ -20,9 +20,6 @@ package org.apache.tika.parser; import java.io.IOException; import java.io.InputStream; -import java.io.OutputStream; -import java.nio.file.Files; -import java.nio.file.Path; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; @@ -65,11 +62,7 @@ public class DigestingParser extends ParserDecorator { try { if (embeddedStreamTranslator.shouldTranslate(tis, metadata)) { - Path tmpBytes = tmp.createTempFile(); - try (OutputStream os = Files.newOutputStream(tmpBytes)) { - embeddedStreamTranslator.translate(tis, metadata, os); - } - try (TikaInputStream translated = TikaInputStream.get(tmpBytes)) { + try (TikaInputStream translated = TikaInputStream.get(embeddedStreamTranslator.translate(tis, metadata))) { digester.digest(translated, metadata, context); } } else {
