This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4533c
in repository https://gitbox.apache.org/repos/asf/tika.git

commit dd3e30dc56418a7ab68afe50a9c6af29f8322e79
Author: tallison <[email protected]>
AuthorDate: Thu Oct 30 17:13:53 2025 -0400

    TIKA-4533 - third time's the charm -- further refinement
---
 .../apache/tika/extractor/RUnpackExtractor.java    | 32 +++++++++++-----------
 .../src/test/java/org/apache/tika/TikaTest.java    |  9 ++++++
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git 
a/tika-core/src/main/java/org/apache/tika/extractor/RUnpackExtractor.java 
b/tika-core/src/main/java/org/apache/tika/extractor/RUnpackExtractor.java
index 70c21ffb4..234c3155f 100644
--- a/tika-core/src/main/java/org/apache/tika/extractor/RUnpackExtractor.java
+++ b/tika-core/src/main/java/org/apache/tika/extractor/RUnpackExtractor.java
@@ -24,7 +24,6 @@ import java.io.InputStream;
 import java.io.OutputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.nio.file.StandardCopyOption;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -113,30 +112,31 @@ public class RUnpackExtractor extends 
ParsingEmbeddedDocumentExtractor {
 
     private void parseWithBytes(TikaInputStream tis, ContentHandler handler, 
Metadata metadata) throws TikaException, IOException, SAXException {
 
-        Path tmp = Files.createTempFile("tika-tmp-", ".bin");
+        //trigger spool to disk
+        Path rawBytes = tis.getPath();
+
+        //There may be a "translated" path for OLE2 etc
+        Path translated = null;
         try {
             //translate the stream or not
             if (embeddedStreamTranslator.shouldTranslate(tis, metadata)) {
-                try (OutputStream os = Files.newOutputStream(tmp)) {
+                translated = Files.createTempFile("tika-tmp-", ".bin");
+                try (OutputStream os = Files.newOutputStream(translated)) {
                     embeddedStreamTranslator.translate(tis, metadata, os);
                 }
-            } else {
-                Files.copy(tis, tmp, StandardCopyOption.REPLACE_EXISTING);
-            }
-
-            //now do the parse
-            if (tis.getOpenContainer() != null) {
-                parse(tis, handler, metadata);
-            } else {
-                try (TikaInputStream tisTmp = TikaInputStream.get(tmp)) {
-                    parse(tisTmp, handler, metadata);
-                }
             }
+            parse(tis, handler, metadata);
         } finally {
             try {
-                storeEmbeddedBytes(tmp, metadata);
+                if (translated != null) {
+                    storeEmbeddedBytes(translated, metadata);
+                } else {
+                    storeEmbeddedBytes(rawBytes, metadata);
+                }
             } finally {
-                Files.delete(tmp);
+                if (translated != null) {
+                    Files.delete(translated);
+                }
             }
         }
     }
diff --git a/tika-core/src/test/java/org/apache/tika/TikaTest.java 
b/tika-core/src/test/java/org/apache/tika/TikaTest.java
index a0a6377b8..4345c2a03 100644
--- a/tika-core/src/test/java/org/apache/tika/TikaTest.java
+++ b/tika-core/src/test/java/org/apache/tika/TikaTest.java
@@ -399,6 +399,15 @@ public abstract class TikaTest {
         }
     }
 
+    protected List<Metadata> getRecursiveMetadata(Path path, Parser parser, 
ParseContext parseContext,
+                                                  boolean suppressException) 
throws Exception {
+        Metadata metadata = new Metadata();
+        try (TikaInputStream tis = TikaInputStream.get(path, metadata)) {
+            return getRecursiveMetadata(tis, parser, metadata, parseContext,
+                    suppressException);
+        }
+    }
+
     protected List<Metadata> getRecursiveMetadata(Path path, Parser parser,
                                                   boolean suppressException) 
throws Exception {
         Metadata metadata = new Metadata();

Reply via email to