This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_3x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 27b6605d11eee110db9a9a7b6f75449757e7e8ef
Author: tallison <[email protected]>
AuthorDate: Wed Oct 29 15:27:06 2025 -0400

    TIKA-4533 -- fix handling of TikaInputStreams with open containers -- fix 
merge conflicts (#2378)
    
    (cherry picked from commit 5dbfb15d04aef1c7e5a970e02add8ab60a08a406)
---
 .../tika/extractor/ParsingEmbeddedDocumentExtractor.java  |  2 +-
 .../src/main/java/org/apache/tika/io/TikaInputStream.java | 15 +++++++++++++++
 .../main/java/org/apache/tika/parser/DigestingParser.java |  9 +--------
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git 
a/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java
 
b/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java
index 4f2331359..cc6458473 100644
--- 
a/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java
+++ 
b/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java
@@ -105,7 +105,7 @@ public class ParsingEmbeddedDocumentExtractor implements 
EmbeddedDocumentExtract
             if (stream instanceof TikaInputStream) {
                 final Object container = ((TikaInputStream) 
stream).getOpenContainer();
                 if (container != null) {
-                    newStream.setOpenContainer(container);
+                    newStream.setOpenContainer(container, ((TikaInputStream) 
stream).getLength());
                 }
             }
             DELEGATING_PARSER.parse(newStream, new EmbeddedContentHandler(new 
BodyContentHandler(handler)),
diff --git a/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java 
b/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
index 8c2a85952..6ba1d2d97 100644
--- a/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
+++ b/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
@@ -669,6 +669,21 @@ public class TikaInputStream extends TaggedInputStream {
         }
     }
 
+    /**
+     * This is a hack for TIKA-4533 and is fixed correctly in Tika 4.x.
+     * Be careful and do not use this unless you know what you're doing!
+     *
+     * @param container
+     * @param length
+     */
+    public void setOpenContainer(Object container, long length) {
+        openContainer = container;
+        if (container instanceof Closeable) {
+            tmp.addResource((Closeable) container);
+        }
+        this.length = length;
+    }
+
     /**
      *
      * @param closeable
diff --git 
a/tika-core/src/main/java/org/apache/tika/parser/DigestingParser.java 
b/tika-core/src/main/java/org/apache/tika/parser/DigestingParser.java
index d0bcaa1f9..1a2d784ac 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/DigestingParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/DigestingParser.java
@@ -20,9 +20,6 @@ package org.apache.tika.parser;
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.OutputStream;
-import java.nio.file.Files;
-import java.nio.file.Path;
 
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
@@ -65,11 +62,7 @@ public class DigestingParser extends ParserDecorator {
         try {
 
             if (embeddedStreamTranslator.shouldTranslate(tis, metadata)) {
-                Path tmpBytes = tmp.createTempFile();
-                try (OutputStream os = Files.newOutputStream(tmpBytes)) {
-                    embeddedStreamTranslator.translate(tis, metadata, os);
-                }
-                try (TikaInputStream translated = 
TikaInputStream.get(tmpBytes)) {
+                try (TikaInputStream translated = 
TikaInputStream.get(embeddedStreamTranslator.translate(tis, metadata))) {
                     digester.digest(translated, metadata, context);
                 }
             } else {

Reply via email to