This is an automated email from the ASF dual-hosted git repository.

tballison pushed a commit to branch TIKA-4732
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 899a34247475e0f238ef40ed22f521b2b7a734e3
Author: Lawrence Moorehead <[email protected]>
AuthorDate: Fri May 15 12:24:05 2026 -0400

    Use supplied filename as RESOURCE_NAME_KEY during unpack
---
 .../apache/tika/pipes/core/server/PipesWorker.java | 10 ++++
 .../tika/server/standard/UnpackerResourceTest.java | 59 ++++++++++++++++++++++
 2 files changed, 69 insertions(+)

diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesWorker.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesWorker.java
index fb7553fee0..a96d070b57 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesWorker.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesWorker.java
@@ -37,6 +37,7 @@ import 
org.apache.tika.extractor.EmbeddedDocumentExtractorFactory;
 import org.apache.tika.extractor.UnpackHandler;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.metadata.writefilter.MetadataWriteLimiterFactory;
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.ParseContext;
@@ -503,6 +504,15 @@ class PipesWorker implements Callable<PipesResult> {
         }
         // Use newMetadata() to apply any configured write limits
         Metadata metadata = localContext.newMetadata();
+        // Carry the caller-supplied resource name across the fresh-metadata 
boundary so
+        // detection, suffix selection, and the Frictionless manifest's name 
field see
+        // the logical filename rather than whatever the fetcher's path 
happens to be
+        // (e.g., a server-side spool prefix). TikaInputStream.get(path, 
metadata)
+        // already honors a pre-set RESOURCE_NAME_KEY.
+        String suppliedName = 
fetchEmitTuple.getMetadata().get(TikaCoreProperties.RESOURCE_NAME_KEY);
+        if (!StringUtils.isBlank(suppliedName)) {
+            metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, suppliedName);
+        }
         FetchHandler.TisOrResult tisOrResult = 
fetchHandler.fetch(fetchEmitTuple, metadata, localContext);
         if (tisOrResult.pipesResult() != null) {
             return new ParseDataOrPipesResult(null, tisOrResult.pipesResult());
diff --git 
a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceTest.java
 
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceTest.java
index ae200959b0..b8c62b17ae 100644
--- 
a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceTest.java
+++ 
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/UnpackerResourceTest.java
@@ -556,6 +556,65 @@ public class UnpackerResourceTest extends CXFTestBase {
                         ", only-in-archive: " + difference(archiveDataFiles, 
manifestPaths));
     }
 
+    /**
+     * The Frictionless manifest's "name" field is supposed to carry the
+     * original filename of each resource. For the container 
(unpacked/0.<ext>),
+     * that name should be the filename the user supplied on the multipart
+     * upload -- not the server's internal spool filename.
+     */
+    @Test
+    public void testFrictionlessContainerManifestNameMatchesUploadFilename() 
throws Exception {
+        String configJson = """
+                {
+                  "parse-context": {
+                    "unpack-config": {
+                      "outputFormat": "FRICTIONLESS",
+                      "outputMode": "ZIPPED"
+                    }
+                  }
+                }
+                """;
+        String uploadFilename = "Doc1_ole.doc";
+        ContentDisposition fileCd = new ContentDisposition(
+                "form-data; name=\"file\"; filename=\"" + uploadFilename + 
"\"");
+        Attachment fileAtt = new Attachment("file",
+                ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV), fileCd);
+        Attachment configAtt = new Attachment("config", "application/json",
+                new 
ByteArrayInputStream(configJson.getBytes(StandardCharsets.UTF_8)));
+
+        Response response = WebClient
+                .create(endPoint + ALL_PATH)
+                .type("multipart/form-data")
+                .accept("application/zip")
+                .post(new MultipartBody(Arrays.asList(fileAtt, configAtt)));
+
+        assertEquals(200, response.getStatus());
+        Map<String, byte[]> data = readZipArchiveBytes((InputStream) 
response.getEntity());
+
+        byte[] dpBytes = data.get("datapackage.json");
+        assertNotNull(dpBytes, "datapackage.json should be present");
+        JsonNode dataPackage = MAPPER.readTree(dpBytes);
+
+        JsonNode containerResource = null;
+        for (JsonNode resource : dataPackage.get("resources")) {
+            String path = resource.get("path").asText();
+            if (path.equals("unpacked/0") || path.startsWith("unpacked/0.")) {
+                containerResource = resource;
+                break;
+            }
+        }
+        assertNotNull(containerResource,
+                "Manifest should list the container at unpacked/0. Resources: 
" +
+                        dataPackage.get("resources"));
+
+        JsonNode nameNode = containerResource.get("name");
+        assertNotNull(nameNode,
+                "Container resource should carry a 'name' field. Resource: " + 
containerResource);
+        assertEquals(uploadFilename, nameNode.asText(),
+                "Container's manifest name should be the user-supplied upload 
filename, " +
+                        "not the server's internal spool filename. Resource: " 
+ containerResource);
+    }
+
     private static Set<String> difference(Set<String> a, Set<String> b) {
         Set<String> diff = new HashSet<>(a);
         diff.removeAll(b);

Reply via email to