This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch branch_3x
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_3x by this push:
new 6c7adfe778 TIKA-4617 -- stream translator should change names only in
rare circumstances. (#2518)
6c7adfe778 is described below
commit 6c7adfe778fae702707a615d3fa843a205acbb41
Author: Tim Allison <[email protected]>
AuthorDate: Thu Jan 8 20:34:25 2026 -0500
TIKA-4617 -- stream translator should change names only in rare
circumstances. (#2518)
---
.../tika/extractor/microsoft/MSEmbeddedStreamTranslator.java | 9 +++++++--
.../org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java | 5 +++++
2 files changed, 12 insertions(+), 2 deletions(-)
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/extractor/microsoft/MSEmbeddedStreamTranslator.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/extractor/microsoft/MSEmbeddedStreamTranslator.java
index 24f7ec2d30..7e0794d080 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/extractor/microsoft/MSEmbeddedStreamTranslator.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/extractor/microsoft/MSEmbeddedStreamTranslator.java
@@ -22,6 +22,7 @@ import java.io.InputStream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.input.UnsynchronizedByteArrayInputStream;
import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream;
+import org.apache.commons.lang3.StringUtils;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
@@ -72,14 +73,18 @@ public class MSEmbeddedStreamTranslator implements
EmbeddedStreamTranslator {
try {
Ole10Native ole =
Ole10Native.createFromEmbeddedOleObject(poifs);
if (ole.getDataSize() > 0) {
- name = ole.getLabel();
+ if (StringUtils.isAllBlank(name)) {
+ name = ole.getLabel();
+ }
data = ole.getDataBuffer();
}
} catch (Ole10NativeException ex) {
LOG.warn("Skipping invalid part", ex);
}
} else {
- name += '.' + type.getExtension();
+ if (! StringUtils.isAllBlank(type.getExtension()) && !
StringUtils.isAllBlank(name) && !name.contains(".")) {
+ name += '.' + type.getExtension();
+ }
}
metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, name);
return
UnsynchronizedByteArrayInputStream.builder().setByteArray(data).get();
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
index ffba11b0c2..7c1ed12943 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
@@ -152,6 +152,7 @@ public class OOXMLParserTest extends TikaTest {
Parser parser = TikaTest.AUTO_DETECT_PARSER;
Parser digestingParser = new DigestingParser(parser, new
CommonsDigester(100000, "sha256"), false);
List<Metadata> metadataList =
getRecursiveMetadata("testMSChart-govdocs-428996.pptx", digestingParser);
+
assertEquals(4, metadataList.size());
for (Metadata m : metadataList) {
assertNotNull(m.get("X-TIKA:digest:SHA256"));
@@ -159,5 +160,9 @@ public class OOXMLParserTest extends TikaTest {
//before TIKA-4607
assertNull(m.get(TikaCoreProperties.EMBEDDED_EXCEPTION));
}
+
+ assertEquals("/oleObject1.bin",
metadataList.get(2).get(TikaCoreProperties.FINAL_EMBEDDED_RESOURCE_PATH));
+ assertEquals("application/vnd.ms-graph",
metadataList.get(2).get(Metadata.CONTENT_TYPE));
+
assertEquals("4cfadec808582492aeb5f1ae0f391dadbd3402affeef3e5488b4f6a07537aea5",
metadataList.get(2).get("X-TIKA:digest:SHA256"));
}
}