This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch branch_3x
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_3x by this push:
new 920741865 TIKA-4607 -- fix bug in DigestingParser
920741865 is described below
commit 920741865629ca4149862d045d382fe00a7322be
Author: tallison <[email protected]>
AuthorDate: Tue Dec 30 14:52:40 2025 -0500
TIKA-4607 -- fix bug in DigestingParser
---
.../java/org/apache/tika/parser/AutoDetectParser.java | 2 +-
.../java/org/apache/tika/parser/DigestingParser.java | 15 +++++++++++++--
.../tika/parser/microsoft/ooxml/OOXMLParserTest.java | 18 ++++++++++++++++++
3 files changed, 32 insertions(+), 3 deletions(-)
diff --git
a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
index 86eae692a..59882f896 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
@@ -231,7 +231,7 @@ public class AutoDetectParser extends CompositeParser {
private void maybeSpool(TikaInputStream tis, AutoDetectParserConfig
autoDetectParserConfig,
Metadata metadata) throws IOException {
- if (tis.hasFile()) {
+ if (tis.hasFile() || tis.getOpenContainer() != null) {
return;
}
if (autoDetectParserConfig.getSpoolToDisk() == null) {
diff --git
a/tika-core/src/main/java/org/apache/tika/parser/DigestingParser.java
b/tika-core/src/main/java/org/apache/tika/parser/DigestingParser.java
index c346b551d..548f071e1 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/DigestingParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/DigestingParser.java
@@ -16,9 +16,11 @@
*/
package org.apache.tika.parser;
-
import java.io.IOException;
import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -61,7 +63,16 @@ public class DigestingParser extends ParserDecorator {
try {
if (embeddedStreamTranslator.shouldTranslate(tis, metadata)) {
- try (TikaInputStream translated =
TikaInputStream.get(embeddedStreamTranslator.translate(tis, metadata))) {
+ Path translatedBytes = tmp.createTempFile();
+ if (tis.getOpenContainer() == null) {
+ //if there's no open container, then translate the bytes
+ try (InputStream is = TikaInputStream.get(tis.getPath())) {
+ Files.copy(embeddedStreamTranslator.translate(is,
metadata), translatedBytes, StandardCopyOption.REPLACE_EXISTING);
+ }
+ } else {
+ Files.copy(embeddedStreamTranslator.translate(tis,
metadata), translatedBytes, StandardCopyOption.REPLACE_EXISTING);
+ }
+ try (TikaInputStream translated =
TikaInputStream.get(translatedBytes)) {
digester.digest(translated, metadata, context);
}
} else {
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
index 36038a8ca..ffba11b0c 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
@@ -17,6 +17,7 @@
package org.apache.tika.parser.microsoft.ooxml;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import java.util.List;
@@ -28,7 +29,10 @@ import org.apache.tika.TikaTest;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.DigestingParser;
import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.digestutils.CommonsDigester;
import org.apache.tika.parser.microsoft.EMFParser;
import org.apache.tika.parser.microsoft.OfficeParserConfig;
@@ -142,4 +146,18 @@ public class OOXMLParserTest extends TikaTest {
assertContains("Example of a table",
metadataList.get(1).get(TikaCoreProperties.TIKA_CONTENT));
}
+
+ @Test
+ public void testDigestTranslator() throws Exception {
+ Parser parser = TikaTest.AUTO_DETECT_PARSER;
+ Parser digestingParser = new DigestingParser(parser, new
CommonsDigester(100000, "sha256"), false);
+ List<Metadata> metadataList =
getRecursiveMetadata("testMSChart-govdocs-428996.pptx", digestingParser);
+ assertEquals(4, metadataList.size());
+ for (Metadata m : metadataList) {
+ assertNotNull(m.get("X-TIKA:digest:SHA256"));
+ //there was a zero-byte file exception thrown on the ole.bin file
+ //before TIKA-4607
+ assertNull(m.get(TikaCoreProperties.EMBEDDED_EXCEPTION));
+ }
+ }
}