This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_3x
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/branch_3x by this push:
     new 920741865 TIKA-4607 -- fix bug in DigestingParser
920741865 is described below

commit 920741865629ca4149862d045d382fe00a7322be
Author: tallison <[email protected]>
AuthorDate: Tue Dec 30 14:52:40 2025 -0500

    TIKA-4607 -- fix bug in DigestingParser
---
 .../java/org/apache/tika/parser/AutoDetectParser.java  |  2 +-
 .../java/org/apache/tika/parser/DigestingParser.java   | 15 +++++++++++++--
 .../tika/parser/microsoft/ooxml/OOXMLParserTest.java   | 18 ++++++++++++++++++
 3 files changed, 32 insertions(+), 3 deletions(-)

diff --git 
a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java 
b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
index 86eae692a..59882f896 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
@@ -231,7 +231,7 @@ public class AutoDetectParser extends CompositeParser {
 
     private void maybeSpool(TikaInputStream tis, AutoDetectParserConfig 
autoDetectParserConfig,
                             Metadata metadata) throws IOException {
-        if (tis.hasFile()) {
+        if (tis.hasFile() || tis.getOpenContainer() != null) {
             return;
         }
         if (autoDetectParserConfig.getSpoolToDisk() == null) {
diff --git 
a/tika-core/src/main/java/org/apache/tika/parser/DigestingParser.java 
b/tika-core/src/main/java/org/apache/tika/parser/DigestingParser.java
index c346b551d..548f071e1 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/DigestingParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/DigestingParser.java
@@ -16,9 +16,11 @@
  */
 package org.apache.tika.parser;
 
-
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
 
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
@@ -61,7 +63,16 @@ public class DigestingParser extends ParserDecorator {
         try {
 
             if (embeddedStreamTranslator.shouldTranslate(tis, metadata)) {
-                try (TikaInputStream translated = 
TikaInputStream.get(embeddedStreamTranslator.translate(tis, metadata))) {
+                Path translatedBytes = tmp.createTempFile();
+                if (tis.getOpenContainer() == null) {
+                    //if there's no open container, then translate the bytes
+                    try (InputStream is = TikaInputStream.get(tis.getPath())) {
+                        Files.copy(embeddedStreamTranslator.translate(is, 
metadata), translatedBytes, StandardCopyOption.REPLACE_EXISTING);
+                    }
+                } else {
+                    Files.copy(embeddedStreamTranslator.translate(tis, 
metadata), translatedBytes, StandardCopyOption.REPLACE_EXISTING);
+                }
+                try (TikaInputStream translated = 
TikaInputStream.get(translatedBytes)) {
                     digester.digest(translated, metadata, context);
                 }
             } else {
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
index 36038a8ca..ffba11b0c 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
@@ -17,6 +17,7 @@
 package org.apache.tika.parser.microsoft.ooxml;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertNull;
 
 import java.util.List;
@@ -28,7 +29,10 @@ import org.apache.tika.TikaTest;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.Office;
 import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.DigestingParser;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.digestutils.CommonsDigester;
 import org.apache.tika.parser.microsoft.EMFParser;
 import org.apache.tika.parser.microsoft.OfficeParserConfig;
 
@@ -142,4 +146,18 @@ public class OOXMLParserTest extends TikaTest {
         assertContains("Example of a table",
                 metadataList.get(1).get(TikaCoreProperties.TIKA_CONTENT));
     }
+
+    @Test
+    public void testDigestTranslator() throws Exception {
+        Parser parser = TikaTest.AUTO_DETECT_PARSER;
+        Parser digestingParser = new DigestingParser(parser, new 
CommonsDigester(100000, "sha256"), false);
+        List<Metadata> metadataList = 
getRecursiveMetadata("testMSChart-govdocs-428996.pptx", digestingParser);
+        assertEquals(4, metadataList.size());
+        for (Metadata m : metadataList) {
+            assertNotNull(m.get("X-TIKA:digest:SHA256"));
+            //there was a zero-byte file exception thrown on the ole.bin file
+            //before TIKA-4607
+            assertNull(m.get(TikaCoreProperties.EMBEDDED_EXCEPTION));
+        }
+    }
 }

Reply via email to