This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_3x
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/branch_3x by this push:
     new 6c7adfe778 TIKA-4617 -- stream translator should change names only in 
rare circumstances. (#2518)
6c7adfe778 is described below

commit 6c7adfe778fae702707a615d3fa843a205acbb41
Author: Tim Allison <[email protected]>
AuthorDate: Thu Jan 8 20:34:25 2026 -0500

    TIKA-4617 -- stream translator should change names only in rare 
circumstances. (#2518)
---
 .../tika/extractor/microsoft/MSEmbeddedStreamTranslator.java     | 9 +++++++--
 .../org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java  | 5 +++++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/extractor/microsoft/MSEmbeddedStreamTranslator.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/extractor/microsoft/MSEmbeddedStreamTranslator.java
index 24f7ec2d30..7e0794d080 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/extractor/microsoft/MSEmbeddedStreamTranslator.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/extractor/microsoft/MSEmbeddedStreamTranslator.java
@@ -22,6 +22,7 @@ import java.io.InputStream;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.io.input.UnsynchronizedByteArrayInputStream;
 import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.poi.poifs.filesystem.DirectoryEntry;
 import org.apache.poi.poifs.filesystem.DocumentEntry;
 import org.apache.poi.poifs.filesystem.DocumentInputStream;
@@ -72,14 +73,18 @@ public class MSEmbeddedStreamTranslator implements 
EmbeddedStreamTranslator {
                 try {
                     Ole10Native ole = 
Ole10Native.createFromEmbeddedOleObject(poifs);
                     if (ole.getDataSize() > 0) {
-                        name = ole.getLabel();
+                        if (StringUtils.isAllBlank(name)) {
+                            name = ole.getLabel();
+                        }
                         data = ole.getDataBuffer();
                     }
                 } catch (Ole10NativeException ex) {
                     LOG.warn("Skipping invalid part", ex);
                 }
             } else {
-                name += '.' + type.getExtension();
+                if (! StringUtils.isAllBlank(type.getExtension()) && ! 
StringUtils.isAllBlank(name) && !name.contains(".")) {
+                    name += '.' + type.getExtension();
+                }
             }
             metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, name);
             return 
UnsynchronizedByteArrayInputStream.builder().setByteArray(data).get();
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
index ffba11b0c2..7c1ed12943 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
@@ -152,6 +152,7 @@ public class OOXMLParserTest extends TikaTest {
         Parser parser = TikaTest.AUTO_DETECT_PARSER;
         Parser digestingParser = new DigestingParser(parser, new 
CommonsDigester(100000, "sha256"), false);
         List<Metadata> metadataList = 
getRecursiveMetadata("testMSChart-govdocs-428996.pptx", digestingParser);
+
         assertEquals(4, metadataList.size());
         for (Metadata m : metadataList) {
             assertNotNull(m.get("X-TIKA:digest:SHA256"));
@@ -159,5 +160,9 @@ public class OOXMLParserTest extends TikaTest {
             //before TIKA-4607
             assertNull(m.get(TikaCoreProperties.EMBEDDED_EXCEPTION));
         }
+
+        assertEquals("/oleObject1.bin", 
metadataList.get(2).get(TikaCoreProperties.FINAL_EMBEDDED_RESOURCE_PATH));
+        assertEquals("application/vnd.ms-graph", 
metadataList.get(2).get(Metadata.CONTENT_TYPE));
+        
assertEquals("4cfadec808582492aeb5f1ae0f391dadbd3402affeef3e5488b4f6a07537aea5",
 metadataList.get(2).get("X-TIKA:digest:SHA256"));
     }
 }

Reply via email to