This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4645-usability-scripts
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 53274892bbd0a23c2832890d19eb02bca59468cb
Author: tallison <[email protected]>
AuthorDate: Mon Feb 2 06:47:29 2026 -0500

    TIKA-4645 - usability scripts
---
 docs/advanced/integration-testing/tika-server.adoc | 473 +++++++++++++++++++++
 .../org/apache/tika/async/cli/PluginsWriter.java   |  52 ++-
 .../apache/tika/server/core/TikaServerProcess.java | 265 ++++++++----
 .../server/core/resource/PipesParsingHelper.java   | 288 +++++++------
 .../org/apache/tika/server/core/CXFTestBase.java   |  39 +-
 5 files changed, 907 insertions(+), 210 deletions(-)

diff --git a/docs/advanced/integration-testing/tika-server.adoc 
b/docs/advanced/integration-testing/tika-server.adoc
new file mode 100644
index 0000000000..85bca5f1fa
--- /dev/null
+++ b/docs/advanced/integration-testing/tika-server.adoc
@@ -0,0 +1,473 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Tika-Server Integration Testing
+
+Integration tests for `tika-server` to be run from a distribution ZIP.
+
+== Setup
+
+[source,bash]
+----
+# Create test directory
+mkdir -p /tmp/tika-server-test
+cd /tmp/tika-server-test
+
+# Copy and extract distribution
+cp /path/to/tika-server-standard-4.0.0-SNAPSHOT-bin.zip .
+unzip tika-server-standard-4.0.0-SNAPSHOT-bin.zip
+
+# Copy test files
+cp /path/to/test-documents/testPDF.pdf .
+cp /path/to/test-documents/testHTML.html .
+cp /path/to/test-documents/test_recursive_embedded.docx .
+----
+
+== Part 1: Default Mode Tests
+
+Start server in default mode (config endpoints disabled):
+
+[source,bash]
+----
+java -jar tika-server.jar --port 9998 &
+sleep 8
+curl -s http://localhost:9998/version
+----
+
+=== Test 1: GET /version
+
+[source,bash]
+----
+curl -s http://localhost:9998/version
+----
+
+*Expected:* `Apache Tika X.X.X`
+
+=== Test 2: PUT /detect/stream
+
+[source,bash]
+----
+curl -s -X PUT -T testPDF.pdf http://localhost:9998/detect/stream
+----
+
+*Expected:* `application/pdf`
+
+=== Test 3: PUT /tika/text
+
+[source,bash]
+----
+curl -s -X PUT -T testPDF.pdf http://localhost:9998/tika/text
+----
+
+*Expected:* Plain text content extracted from PDF.
+
+=== Test 4: PUT /tika/html
+
+[source,bash]
+----
+curl -s -X PUT -T testPDF.pdf http://localhost:9998/tika/html
+----
+
+*Expected:* HTML with metadata in `<meta>` tags and content in `<body>`.
+
+=== Test 5: PUT /tika/xml
+
+[source,bash]
+----
+curl -s -X PUT -T testPDF.pdf http://localhost:9998/tika/xml
+----
+
+*Expected:* XHTML content (starts with `<html xmlns=...>`).
+
+=== Test 6: PUT /tika/json
+
+[source,bash]
+----
+curl -s -X PUT -T testPDF.pdf http://localhost:9998/tika/json
+----
+
+*Expected:* JSON object with metadata and X-TIKA:content field.
+
+=== Test 7: PUT /meta
+
+[source,bash]
+----
+curl -s -X PUT -H "Accept: application/json" -T testPDF.pdf 
http://localhost:9998/meta
+----
+
+*Expected:* JSON object with metadata only (no content).
+
+=== Test 8: PUT /meta/{field}
+
+[source,bash]
+----
+curl -s -X PUT -T testPDF.pdf http://localhost:9998/meta/Content-Type
+----
+
+*Expected:* `Content-Type,application/pdf`
+
+=== Test 9: PUT /rmeta
+
+[source,bash]
+----
+curl -s -X PUT -T test_recursive_embedded.docx http://localhost:9998/rmeta
+----
+
+*Expected:* JSON array with metadata for main document and all embedded 
documents.
+
+=== Test 10: PUT /rmeta/text
+
+[source,bash]
+----
+curl -s -X PUT -T test_recursive_embedded.docx http://localhost:9998/rmeta/text
+----
+
+*Expected:* JSON array with ToTextContentHandler content.
+
+=== Test 11: PUT /language/stream
+
+[source,bash]
+----
+curl -s -X PUT -T testPDF.pdf http://localhost:9998/language/stream
+----
+
+*Expected:* Two-letter language code (e.g., `en`, `th`).
+
+=== Test 12: PUT /unpack/all
+
+[source,bash]
+----
+curl -s -X PUT -T test_recursive_embedded.docx 
http://localhost:9998/unpack/all -o /tmp/unpack.zip
+unzip -l /tmp/unpack.zip
+----
+
+*Expected:* ZIP file containing extracted embedded files plus `__TEXT__` and 
`__METADATA__` files.
+
+=== Test 13: GET /parsers
+
+[source,bash]
+----
+curl -s -H "Accept: text/plain" http://localhost:9998/parsers
+----
+
+*Expected:* Hierarchical list of available parsers.
+
+=== Test 14: GET /detectors
+
+[source,bash]
+----
+curl -s -H "Accept: text/plain" http://localhost:9998/detectors
+----
+
+*Expected:* List of available detectors.
+
+=== Test 15: GET /mime-types
+
+[source,bash]
+----
+curl -s -H "Accept: application/json" http://localhost:9998/mime-types
+----
+
+*Expected:* JSON object with all known MIME types.
+
+=== Test 16: POST /meta/form
+
+[source,bash]
+----
+curl -s -X POST -F "[email protected]" -H "Accept: application/json" 
http://localhost:9998/meta/form
+----
+
+*Expected:* JSON metadata from multipart form upload.
+
+=== Test 17: POST /rmeta/form
+
+[source,bash]
+----
+curl -s -X POST -F "upload=@test_recursive_embedded.docx" 
http://localhost:9998/rmeta/form
+----
+
+*Expected:* JSON array with recursive metadata from multipart upload.
+
+=== Test 18: Config Endpoints Blocked (Default Mode)
+
+[source,bash]
+----
+curl -s -w "\nHTTP Status: %{http_code}\n" -X POST -F "[email protected]" 
http://localhost:9998/meta/config
+curl -s -w "\nHTTP Status: %{http_code}\n" -X POST -F "[email protected]" 
http://localhost:9998/rmeta/config
+curl -s -w "\nHTTP Status: %{http_code}\n" -X POST -F "[email protected]" 
http://localhost:9998/tika/config
+curl -s -w "\nHTTP Status: %{http_code}\n" -X POST -F "[email protected]" 
http://localhost:9998/unpack/config
+----
+
+*Expected:* All return HTTP 403 with message: "Config endpoints are disabled. 
Set enableUnsecureFeatures=true in server config."
+
+== Part 2: Tests with enableUnsecureFeatures
+
+Stop the default server and create a config file:
+
+[source,bash]
+----
+pkill -f "tika-server.jar"
+
+cat > tika-config-unsecure.json << 'EOF'
+{
+  "server": {
+    "port": 9998,
+    "host": "localhost",
+    "enableUnsecureFeatures": true
+  },
+  "parsers": [
+    {"default-parser": {}}
+  ],
+  "plugin-roots": "/tmp/tika-server-test/plugins"
+}
+EOF
+
+java -jar tika-server.jar -c tika-config-unsecure.json &
+sleep 10
+curl -s http://localhost:9998/version
+----
+
+=== Test 19: POST /meta/config
+
+[source,bash]
+----
+curl -s -X POST -F "[email protected]" -H "Accept: application/json" 
http://localhost:9998/meta/config
+----
+
+*Expected:* JSON metadata.
+
+=== Test 20: POST /meta/config with custom parser config
+
+[source,bash]
+----
+curl -s -X POST -F "[email protected]" \
+  -F 'config={"parsers":[{"pdf-parser":{"ocrStrategy":"NO_OCR"}}]}' \
+  -H "Accept: application/json" \
+  http://localhost:9998/meta/config
+----
+
+*Expected:* JSON metadata with custom PDF parser config applied.
+
+=== Test 21: POST /unpack/config
+
+[source,bash]
+----
+curl -s -X POST -F "file=@test_recursive_embedded.docx" 
http://localhost:9998/unpack/config -o /tmp/unpack-config.zip
+unzip -l /tmp/unpack-config.zip
+----
+
+*Expected:* ZIP with extracted embedded files.
+
+=== Test 22: POST /unpack/all/config
+
+[source,bash]
+----
+curl -s -X POST -F "file=@test_recursive_embedded.docx" 
http://localhost:9998/unpack/all/config -o /tmp/unpack-all.zip
+unzip -l /tmp/unpack-all.zip
+----
+
+*Expected:* ZIP with all recursively extracted files.
+
+== Server Options
+
+=== Test 23: Custom Port
+
+[source,bash]
+----
+java -jar tika-server.jar --port 9999 &
+sleep 8
+curl -s http://localhost:9999/version
+----
+
+*Expected:* Server responds on port 9999.
+
+=== Test 24: Custom Host
+
+[source,bash]
+----
+java -jar tika-server.jar --host 0.0.0.0 --port 9998 &
+----
+
+*Expected:* Server binds to all interfaces.
+
+=== Test 25: With Config File
+
+[source,bash]
+----
+java -jar tika-server.jar -c tika-config.json &
+----
+
+*Expected:* Server uses custom configuration.
+
+== Headers
+
+=== Test 26: X-Tika-OCRskipOcr Header
+
+[source,bash]
+----
+curl -s -X PUT -H "X-Tika-OCRskipOcr: true" -T testPDF.pdf 
http://localhost:9998/tika/text
+----
+
+*Expected:* Text extraction without OCR.
+
+=== Test 27: Content-Disposition Filename
+
+[source,bash]
+----
+curl -s -X PUT -H "Content-Disposition: attachment; filename=myfile.pdf" -T 
testPDF.pdf http://localhost:9998/meta/resourceName
+----
+
+*Expected:* Returns the filename from Content-Disposition header.
+
+== Error Handling
+
+=== Test 28: Non-existent Endpoint
+
+[source,bash]
+----
+curl -s -w "\nHTTP Status: %{http_code}\n" http://localhost:9998/nonexistent
+----
+
+*Expected:* 404 Not Found.
+
+=== Test 29: Invalid Method
+
+[source,bash]
+----
+curl -s -w "\nHTTP Status: %{http_code}\n" -X DELETE 
http://localhost:9998/tika/text
+----
+
+*Expected:* 405 Method Not Allowed.
+
+== Cleanup
+
+[source,bash]
+----
+pkill -f "tika-server.jar"
+rm -rf /tmp/tika-server-test
+----
+
+== Usability Test Results
+
+The following endpoints were tested and verified working:
+
+=== Default Mode (enableUnsecureFeatures=false)
+
+[cols="1,1,1", options="header"]
+|===
+|Endpoint |Method |Status
+
+|`/version` |GET |PASS
+|`/detect/stream` |PUT |PASS
+|`/tika` |PUT |PASS
+|`/tika/text` |PUT |PASS
+|`/tika/html` |PUT |PASS
+|`/tika/xml` |PUT |PASS
+|`/tika/json` |PUT |PASS
+|`/meta` |PUT |PASS
+|`/meta/{field}` |PUT |PASS
+|`/rmeta` |PUT |PASS
+|`/rmeta/text` |PUT |PASS
+|`/language/stream` |PUT |PASS
+|`/unpack/all` |PUT |PASS
+|`/parsers` |GET |PASS
+|`/detectors` |GET |PASS
+|`/mime-types` |GET |PASS
+|`/meta/form` |POST |PASS
+|`/rmeta/form` |POST |PASS
+|`/meta/config` |POST |BLOCKED (403) - Expected
+|`/rmeta/config` |POST |BLOCKED (403) - Expected
+|`/tika/config` |POST |BLOCKED (403) - Expected
+|`/unpack/config` |POST |BLOCKED (403) - Expected
+|===
+
+=== With enableUnsecureFeatures=true
+
+[cols="1,1,1", options="header"]
+|===
+|Endpoint |Method |Status
+
+|`/meta/config` |POST |PASS
+|`/rmeta/config` |POST |PASS
+|`/tika/config` |POST |PASS
+|`/unpack/config` |POST |PASS
+|`/unpack/all/config` |POST |PASS
+|===
+
+== Known Issues
+
+=== Issue 1: Language Detection Accuracy
+
+Short texts may not be detected reliably. The `/language/stream` endpoint 
works best with substantial text content.
+
+== Quick Reference
+
+=== Basic Parsing
+[source,bash]
+----
+# Text output
+curl -X PUT -T file.pdf http://localhost:9998/tika/text
+
+# HTML output
+curl -X PUT -T file.pdf http://localhost:9998/tika/html
+
+# JSON output (metadata + content)
+curl -X PUT -T file.pdf http://localhost:9998/tika/json
+----
+
+=== Metadata Only
+[source,bash]
+----
+curl -X PUT -H "Accept: application/json" -T file.pdf 
http://localhost:9998/meta
+----
+
+=== Recursive Metadata
+[source,bash]
+----
+curl -X PUT -T file.docx http://localhost:9998/rmeta
+curl -X PUT -T file.docx http://localhost:9998/rmeta/text
+----
+
+=== Detection
+[source,bash]
+----
+curl -X PUT -T file.pdf http://localhost:9998/detect/stream
+----
+
+=== Extract Embedded Files
+[source,bash]
+----
+curl -X PUT -T file.docx http://localhost:9998/unpack/all -o output.zip
+----
+
+== Implementation Notes
+
+=== Automatic Component Configuration
+
+The server automatically configures the required fetcher and emitter for 
pipes-based parsing:
+
+* **tika-server-fetcher**: A file-system-fetcher with `basePath` pointing to a 
dedicated temp directory for input files. This enables the `/tika`, `/rmeta`, 
and `/meta` endpoints to work with uploaded files.
+
+* **unpack-emitter**: A file-system-emitter with `basePath` pointing to a 
dedicated temp directory for unpacked files. This is only created when the 
`/unpack` endpoint is enabled (default). This enables the `/unpack/all` 
endpoint to return embedded files as a ZIP.
+
+Both temp directories are cleaned up on server shutdown.
+
+If a user config file does not include `plugin-roots`, the server 
automatically adds a default value pointing to a `plugins` directory in the 
current working directory.
+
+=== Security Boundary
+
+Child processes (pipes workers) are configured with `basePath` rather than 
`allowAbsolutePaths`, ensuring they can only access files within their 
designated temp directories. This provides a security boundary between the 
parent server process and forked child processes.
diff --git 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java
 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java
index c6e7a30af8..1257c48e4c 100644
--- 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java
+++ 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java
@@ -17,7 +17,6 @@
 package org.apache.tika.async.cli;
 
 import java.io.IOException;
-import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
@@ -52,30 +51,59 @@ public class PluginsWriter {
             }
         }
         try {
-            String jsonTemplate = new 
String(getClass().getResourceAsStream("/config-template.json").readAllBytes(), 
StandardCharsets.UTF_8);
-            String json = jsonTemplate.replace("FETCHER_BASE_PATH", 
baseInput.toAbsolutePath().toString());
-            json = json.replace("EMITTER_BASE_PATH", 
baseOutput.toAbsolutePath().toString());
-            String pluginString = 
StringUtils.isBlank(simpleAsyncConfig.getPluginsDir()) ? "plugins" : 
simpleAsyncConfig.getPluginsDir();
+            ObjectMapper objectMapper = TikaObjectMapperFactory.getMapper();
+            ObjectNode root = (ObjectNode) objectMapper.readTree(
+                    getClass().getResourceAsStream("/config-template.json"));
+
+            // Set fetcher basePath
+            ObjectNode fetchers = (ObjectNode) root.get("fetchers");
+            if (fetchers != null && fetchers.has("fsf")) {
+                ObjectNode fsf = (ObjectNode) fetchers.get("fsf");
+                if (fsf != null && fsf.has("file-system-fetcher")) {
+                    ObjectNode fsFetcher = (ObjectNode) 
fsf.get("file-system-fetcher");
+                    fsFetcher.put("basePath", 
baseInput.toAbsolutePath().toString());
+                }
+            }
+
+            // Set emitter basePath
+            ObjectNode emitters = (ObjectNode) root.get("emitters");
+            if (emitters != null && emitters.has("fse")) {
+                ObjectNode fse = (ObjectNode) emitters.get("fse");
+                if (fse != null && fse.has("file-system-emitter")) {
+                    ObjectNode fsEmitter = (ObjectNode) 
fse.get("file-system-emitter");
+                    fsEmitter.put("basePath", 
baseOutput.toAbsolutePath().toString());
+                }
+            }
+
+            // Set pipes-iterator basePath
+            ObjectNode pipesIterator = (ObjectNode) root.get("pipes-iterator");
+            if (pipesIterator != null && 
pipesIterator.has("file-system-pipes-iterator")) {
+                ObjectNode fsIterator = (ObjectNode) 
pipesIterator.get("file-system-pipes-iterator");
+                fsIterator.put("basePath", 
baseInput.toAbsolutePath().toString());
+            }
+
+            // Set plugin-roots
+            String pluginString = 
StringUtils.isBlank(simpleAsyncConfig.getPluginsDir()) ?
+                    "plugins" : simpleAsyncConfig.getPluginsDir();
             Path plugins = Paths.get(pluginString);
             if (Files.isDirectory(plugins)) {
                 pluginString = plugins.toAbsolutePath().toString();
             }
-            json = json.replace("PLUGIN_ROOTS", pluginString).replace("\\", 
"/");
-            PipesConfig pipesConfig = new PipesConfig();
-
-            pipesConfig.setNumClients(simpleAsyncConfig.getNumClients() == 
null ? 2 : simpleAsyncConfig.getNumClients());
+            root.put("plugin-roots", pluginString);
 
+            // Set pipes config
+            PipesConfig pipesConfig = new PipesConfig();
+            pipesConfig.setNumClients(simpleAsyncConfig.getNumClients() == 
null ?
+                    2 : simpleAsyncConfig.getNumClients());
             if (simpleAsyncConfig.getXmx() != null) {
                 pipesConfig.setForkedJvmArgs(new 
ArrayList<>(List.of(simpleAsyncConfig.getXmx())));
             }
             if (simpleAsyncConfig.getTimeoutMs() != null) {
                 pipesConfig.setTimeoutMillis(simpleAsyncConfig.getTimeoutMs());
             }
-            ObjectMapper objectMapper = TikaObjectMapperFactory.getMapper();
-            ObjectNode root = (ObjectNode) 
objectMapper.readTree(json.getBytes(StandardCharsets.UTF_8));
             root.set("pipes", objectMapper.valueToTree(pipesConfig));
 
-            Files.writeString(output, root.toString());
+            
objectMapper.writerWithDefaultPrettyPrinter().writeValue(output.toFile(), root);
         } catch (Exception e) {
             throw new IOException(e);
         }
diff --git 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java
 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java
index d28cbb96c8..fdc8883f3b 100644
--- 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java
+++ 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java
@@ -27,7 +27,6 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
-import java.util.Locale;
 import java.util.Set;
 
 import org.apache.commons.cli.CommandLine;
@@ -444,6 +443,18 @@ public class TikaServerProcess {
         return endpoints.contains("tika") || endpoints.contains("rmeta");
     }
 
+    /**
+     * Determines if the /unpack endpoint is enabled based on configured 
endpoints.
+     */
+    private static boolean isUnpackEndpointEnabled(TikaServerConfig 
tikaServerConfig) {
+        List<String> endpoints = tikaServerConfig.getEndpoints();
+        // If no endpoints specified, all default endpoints are loaded 
(including unpack)
+        if (endpoints == null || endpoints.isEmpty()) {
+            return true;
+        }
+        return endpoints.contains("unpack");
+    }
+
     /**
      * Initializes the PipesParsingHelper for pipes-based parsing with process 
isolation.
      * <p>
@@ -452,22 +463,42 @@ public class TikaServerProcess {
      * <p>
      * If no config file is provided, a minimal default configuration will be 
created.
      * The plugin-roots will default to a "plugins" directory at the same 
level as the server jar.
+     * <p>
+     * A dedicated temp directory is created for input files, and a 
file-system-fetcher
+     * is configured with basePath pointing to that directory. This ensures 
child processes
+     * can only access files in the designated temp directory (security 
boundary).
      *
      * @param tikaServerConfig the server configuration
      * @return the PipesParsingHelper
      * @throws Exception if pipes initialization fails
      */
     private static PipesParsingHelper initPipesParsingHelper(TikaServerConfig 
tikaServerConfig) throws Exception {
-        // Load or create config
+        // Create dedicated temp directory for input files
+        Path inputTempDirectory = 
Files.createTempDirectory("tika-server-input-");
+        LOG.info("Created input temp directory: {}", inputTempDirectory);
+
+        // Only create unpack temp directory if /unpack endpoint is enabled
+        Path unpackTempDirectory = null;
+        if (isUnpackEndpointEnabled(tikaServerConfig)) {
+            unpackTempDirectory = 
Files.createTempDirectory("tika-server-unpack-");
+            LOG.info("Created unpack temp directory: {}", unpackTempDirectory);
+        }
+
+        // Load or create config, adding the fetcher (and emitter if unpack is 
enabled)
         Path configPath;
         if (tikaServerConfig.hasConfigFile()) {
             configPath = tikaServerConfig.getConfigPath();
         } else {
-            configPath = createDefaultConfig();
+            configPath = createDefaultConfig(inputTempDirectory, 
unpackTempDirectory);
         }
 
         TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(configPath);
 
+        // Ensure fetcher (and emitter if unpack is enabled) are configured 
with correct basePaths
+        configPath = ensureServerComponents(configPath, tikaJsonConfig,
+                inputTempDirectory, unpackTempDirectory);
+        tikaJsonConfig = TikaJsonConfig.load(configPath);
+
         // Load or create PipesConfig with defaults
         PipesConfig pipesConfig = tikaJsonConfig.deserialize("pipes", 
PipesConfig.class);
         if (pipesConfig == null) {
@@ -480,13 +511,13 @@ public class TikaServerProcess {
         // Create PipesParser
         PipesParser pipesParser = PipesParser.load(tikaJsonConfig, 
pipesConfig, configPath);
 
-        // Try to determine unpack emitter basePath from config
-        Path unpackEmitterBasePath = getUnpackEmitterBasePath(tikaJsonConfig);
-
         // Create and return the helper
-        PipesParsingHelper helper = new PipesParsingHelper(pipesParser, 
pipesConfig, unpackEmitterBasePath);
+        PipesParsingHelper helper = new PipesParsingHelper(pipesParser, 
pipesConfig,
+                inputTempDirectory, unpackTempDirectory);
 
-        // Register shutdown hook to clean up PipesParser
+        // Register shutdown hook to clean up PipesParser and temp directories
+        final Path inputDirToClean = inputTempDirectory;
+        final Path unpackDirToClean = unpackTempDirectory;
         Runtime.getRuntime().addShutdownHook(new Thread(() -> {
             try {
                 LOG.info("Shutting down PipesParser");
@@ -494,62 +525,32 @@ public class TikaServerProcess {
             } catch (Exception e) {
                 LOG.warn("Error closing PipesParser", e);
             }
+            // Clean up temp directories
+            cleanupTempDirectory(inputDirToClean);
+            if (unpackDirToClean != null) {
+                cleanupTempDirectory(unpackDirToClean);
+            }
         }));
 
         return helper;
     }
 
-    /**
-     * Attempts to determine the basePath for the unpack-emitter from the 
config.
-     * Returns null if the emitter is not configured or basePath cannot be 
determined.
-     */
-    private static Path getUnpackEmitterBasePath(TikaJsonConfig 
tikaJsonConfig) {
+    private static void cleanupTempDirectory(Path tempDir) {
         try {
-            java.util.Map<String, com.fasterxml.jackson.databind.JsonNode> 
emitters =
-                    tikaJsonConfig.getComponents("emitters");
-            if (emitters == null || 
!emitters.containsKey(PipesParsingHelper.UNPACK_EMITTER_ID)) {
-                LOG.debug("No unpack-emitter configured, UNPACK mode will not 
be available");
-                return null;
-            }
-
-            com.fasterxml.jackson.databind.JsonNode emitterConfig =
-                    emitters.get(PipesParsingHelper.UNPACK_EMITTER_ID);
-            com.fasterxml.jackson.databind.JsonNode basePath = 
findBasePath(emitterConfig);
-            if (basePath != null && basePath.isTextual()) {
-                Path path = Path.of(basePath.asText());
-                if (Files.isDirectory(path)) {
-                    LOG.info("UNPACK mode enabled with basePath: {}", path);
-                    return path;
-                } else {
-                    LOG.warn("unpack-emitter basePath does not exist: {}", 
path);
-                }
+            if (Files.exists(tempDir)) {
+                Files.walk(tempDir)
+                        .sorted((a, b) -> -a.compareTo(b)) // Delete files 
before directories
+                        .forEach(p -> {
+                            try {
+                                Files.deleteIfExists(p);
+                            } catch (IOException e) {
+                                LOG.warn("Failed to delete: {}", p);
+                            }
+                        });
             }
-        } catch (Exception e) {
-            LOG.warn("Failed to determine unpack-emitter basePath", e);
+        } catch (IOException e) {
+            LOG.warn("Error cleaning up temp directory: {}", tempDir, e);
         }
-        return null;
-    }
-
-    /**
-     * Recursively searches for "basePath" in a JSON node.
-     */
-    private static com.fasterxml.jackson.databind.JsonNode findBasePath(
-            com.fasterxml.jackson.databind.JsonNode node) {
-        if (node == null) {
-            return null;
-        }
-        if (node.has("basePath")) {
-            return node.get("basePath");
-        }
-        for (com.fasterxml.jackson.databind.JsonNode child : node) {
-            if (child.isObject()) {
-                com.fasterxml.jackson.databind.JsonNode result = 
findBasePath(child);
-                if (result != null) {
-                    return result;
-                }
-            }
-        }
-        return null;
     }
 
     /**
@@ -559,36 +560,150 @@ public class TikaServerProcess {
 
     /**
      * Creates a default configuration file with plugin-roots set to the 
"plugins" directory
-     * relative to the current working directory.
+     * relative to the current working directory, the tika-server-fetcher 
configured
+     * with basePath pointing to the input temp directory, and optionally the 
unpack-emitter
+     * configured with basePath pointing to the unpack temp directory.
+     *
+     * @param inputTempDirectory the temp directory for input files
+     * @param unpackTempDirectory the temp directory for unpack output files 
(may be null)
      */
-    private static Path createDefaultConfig() throws IOException {
+    private static Path createDefaultConfig(Path inputTempDirectory,
+                                            Path unpackTempDirectory) throws 
IOException {
         Path pluginsDir = Path.of(DEFAULT_PLUGINS_DIR).toAbsolutePath();
 
-        String configJson = String.format(Locale.ROOT, """
-            {
-              "fetchers": {
-                "file-system-fetcher": {
-                  "file-system-fetcher": {
-                    "allowAbsolutePaths": true
-                  }
-                }
-              },
-              "pipes": {
-                "numClients": 4,
-                "timeoutMillis": 60000
-              },
-              "plugin-roots": "%s"
-            }
-            """, pluginsDir.toString().replace("\\", "/"));
+        com.fasterxml.jackson.databind.ObjectMapper mapper =
+                new com.fasterxml.jackson.databind.ObjectMapper();
+        com.fasterxml.jackson.databind.node.ObjectNode rootNode = 
mapper.createObjectNode();
+
+        // Create fetchers section
+        com.fasterxml.jackson.databind.node.ObjectNode fetchersNode = 
mapper.createObjectNode();
+        com.fasterxml.jackson.databind.node.ObjectNode fetcherNode = 
mapper.createObjectNode();
+        com.fasterxml.jackson.databind.node.ObjectNode fetcherTypeConfig = 
mapper.createObjectNode();
+        fetcherTypeConfig.put("basePath", 
inputTempDirectory.toAbsolutePath().toString());
+        fetcherNode.set("file-system-fetcher", fetcherTypeConfig);
+        fetchersNode.set(PipesParsingHelper.DEFAULT_FETCHER_ID, fetcherNode);
+        rootNode.set("fetchers", fetchersNode);
+
+        // Create emitters section if unpack is enabled
+        if (unpackTempDirectory != null) {
+            com.fasterxml.jackson.databind.node.ObjectNode emittersNode = 
mapper.createObjectNode();
+            com.fasterxml.jackson.databind.node.ObjectNode emitterNode = 
mapper.createObjectNode();
+            com.fasterxml.jackson.databind.node.ObjectNode emitterTypeConfig = 
mapper.createObjectNode();
+            emitterTypeConfig.put("basePath", 
unpackTempDirectory.toAbsolutePath().toString());
+            emitterTypeConfig.put("onExists", "REPLACE");
+            emitterNode.set("file-system-emitter", emitterTypeConfig);
+            emittersNode.set(PipesParsingHelper.UNPACK_EMITTER_ID, 
emitterNode);
+            rootNode.set("emitters", emittersNode);
+        }
+
+        // Create pipes section
+        com.fasterxml.jackson.databind.node.ObjectNode pipesNode = 
mapper.createObjectNode();
+        pipesNode.put("numClients", 4);
+        pipesNode.put("timeoutMillis", 60000);
+        rootNode.set("pipes", pipesNode);
+
+        // Set plugin-roots
+        rootNode.put("plugin-roots", pluginsDir.toString());
 
         Path tempConfig = Files.createTempFile("tika-server-default-config-", 
".json");
-        Files.writeString(tempConfig, configJson);
+        
mapper.writerWithDefaultPrettyPrinter().writeValue(tempConfig.toFile(), 
rootNode);
         tempConfig.toFile().deleteOnExit();
 
         LOG.info("Created default config with plugin-roots: {}", pluginsDir);
         return tempConfig;
     }
 
+    /**
+     * Ensures the tika-server-fetcher exists in the config with basePath 
pointing to
+     * the input temp directory. If unpackTempDirectory is provided, also 
ensures the
+     * unpack-emitter exists.
+     * <p>
+     * The fetcher is used by legacy endpoints (/tika, /rmeta, etc.) to read 
uploaded files
+     * that have been spooled to the input temp directory.
+     * <p>
+     * The emitter is used by /unpack endpoints to write unpacked files that 
are then
+     * streamed back to the client.
+     * <p>
+     * Both components are configured with basePath (not allowAbsolutePaths) 
so child processes
+     * can only access files within their designated temp directories 
(security boundary).
+     *
+     * @param originalConfigPath the original config file path
+     * @param tikaJsonConfig the parsed Tika JSON config
+     * @param inputTempDirectory the temp directory for input files
+     * @param unpackTempDirectory the temp directory for unpack output files 
(may be null)
+     * @return the config path to use (always a new merged config with fetcher 
and optionally emitter)
+     */
+    private static Path ensureServerComponents(Path originalConfigPath, 
TikaJsonConfig tikaJsonConfig,
+                                               Path inputTempDirectory,
+                                               Path unpackTempDirectory) 
throws IOException {
+        LOG.info("Configuring {} with basePath={}", 
PipesParsingHelper.DEFAULT_FETCHER_ID, inputTempDirectory);
+
+        // Read original config as a mutable tree
+        com.fasterxml.jackson.databind.ObjectMapper mapper =
+                new com.fasterxml.jackson.databind.ObjectMapper();
+        com.fasterxml.jackson.databind.node.ObjectNode rootNode =
+                (com.fasterxml.jackson.databind.node.ObjectNode) 
mapper.readTree(originalConfigPath.toFile());
+
+        // Get or create the fetchers section
+        com.fasterxml.jackson.databind.node.ObjectNode fetchersNode;
+        if (rootNode.has("fetchers") && rootNode.get("fetchers").isObject()) {
+            fetchersNode = (com.fasterxml.jackson.databind.node.ObjectNode) 
rootNode.get("fetchers");
+        } else {
+            fetchersNode = mapper.createObjectNode();
+            rootNode.set("fetchers", fetchersNode);
+        }
+
+        // Create the fetcher config with basePath
+        // Structure: "tika-server-fetcher": { "file-system-fetcher": { 
"basePath": "/tmp/..." } }
+        com.fasterxml.jackson.databind.node.ObjectNode fetcherTypeConfig = 
mapper.createObjectNode();
+        fetcherTypeConfig.put("basePath", 
inputTempDirectory.toAbsolutePath().toString());
+
+        com.fasterxml.jackson.databind.node.ObjectNode fetcherNode = 
mapper.createObjectNode();
+        fetcherNode.set("file-system-fetcher", fetcherTypeConfig);
+
+        fetchersNode.set(PipesParsingHelper.DEFAULT_FETCHER_ID, fetcherNode);
+
+        // Only add unpack-emitter if unpack endpoint is enabled
+        if (unpackTempDirectory != null) {
+            LOG.info("Configuring {} with basePath={}", 
PipesParsingHelper.UNPACK_EMITTER_ID, unpackTempDirectory);
+
+            // Get or create the emitters section
+            com.fasterxml.jackson.databind.node.ObjectNode emittersNode;
+            if (rootNode.has("emitters") && 
rootNode.get("emitters").isObject()) {
+                emittersNode = 
(com.fasterxml.jackson.databind.node.ObjectNode) rootNode.get("emitters");
+            } else {
+                emittersNode = mapper.createObjectNode();
+                rootNode.set("emitters", emittersNode);
+            }
+
+            // Create the emitter config with basePath
+            // Structure: "unpack-emitter": { "file-system-emitter": { 
"basePath": "/tmp/...", "onExists": "REPLACE" } }
+            com.fasterxml.jackson.databind.node.ObjectNode emitterTypeConfig = 
mapper.createObjectNode();
+            emitterTypeConfig.put("basePath", 
unpackTempDirectory.toAbsolutePath().toString());
+            emitterTypeConfig.put("onExists", "REPLACE");
+
+            com.fasterxml.jackson.databind.node.ObjectNode emitterNode = 
mapper.createObjectNode();
+            emitterNode.set("file-system-emitter", emitterTypeConfig);
+
+            emittersNode.set(PipesParsingHelper.UNPACK_EMITTER_ID, 
emitterNode);
+        }
+
+        // Ensure plugin-roots is set (required for child processes)
+        if (!rootNode.has("plugin-roots")) {
+            Path pluginsDir = Path.of(DEFAULT_PLUGINS_DIR).toAbsolutePath();
+            rootNode.put("plugin-roots", pluginsDir.toString());
+            LOG.info("Added default plugin-roots: {}", pluginsDir);
+        }
+
+        // Write merged config to temp file
+        Path mergedConfig = Files.createTempFile("tika-server-merged-config-", 
".json");
+        
mapper.writerWithDefaultPrettyPrinter().writeValue(mergedConfig.toFile(), 
rootNode);
+        mergedConfig.toFile().deleteOnExit();
+
+        LOG.debug("Created merged config: {}", mergedConfig);
+        return mergedConfig;
+    }
+
     private static class ServerDetails {
         JAXRSServerFactoryBean sf;
         String serverId;
diff --git 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesParsingHelper.java
 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesParsingHelper.java
index c88a1ec799..6b1a6fe699 100644
--- 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesParsingHelper.java
+++ 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesParsingHelper.java
@@ -20,7 +20,6 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.nio.file.Paths;
 import java.util.Collections;
 import java.util.List;
 import java.util.UUID;
@@ -50,18 +49,9 @@ import org.apache.tika.server.core.TikaServerParseException;
  * Helper class for pipes-based parsing in tika-server endpoints.
  * Handles temp file management, FetchEmitTuple creation, and result 
processing.
  * <p>
- * To use pipes-based parsing, your tika-config.json must include a 
file-system fetcher
- * with allowAbsolutePaths enabled:
- * <pre>
- * {
- *   "fetchers": {
- *     "file-system-fetcher": {
- *       "class": "org.apache.tika.pipes.fetcher.fs.FileSystemFetcher",
- *       "allowAbsolutePaths": true
- *     }
- *   }
- * }
- * </pre>
+ * The helper manages a dedicated temp directory for input files. A 
file-system-fetcher
+ * is configured with basePath pointing to this directory, ensuring child 
processes
+ * can only access files within the designated temp directory (no absolute 
paths).
  */
 public class PipesParsingHelper {
 
@@ -69,9 +59,9 @@ public class PipesParsingHelper {
 
     /**
      * The fetcher ID used for reading temp files.
-     * This fetcher must be configured in the JSON config with 
allowAbsolutePaths=true.
+     * This fetcher is configured with basePath = inputTempDirectory.
      */
-    public static final String DEFAULT_FETCHER_ID = "file-system-fetcher";
+    public static final String DEFAULT_FETCHER_ID = "tika-server-fetcher";
 
     private final PipesParser pipesParser;
     private final PipesConfig pipesConfig;
@@ -83,33 +73,42 @@ public class PipesParsingHelper {
      *
      * @param pipesParser the PipesParser instance
      * @param pipesConfig the PipesConfig instance
+     * @param inputTempDirectory the temp directory for input files. The 
file-system-fetcher
+     *                           is configured with basePath = this directory.
      * @param unpackEmitterBasePath the basePath where the unpack-emitter 
writes files.
      *                              This is where the server will find the zip 
files created
      *                              by UNPACK mode. May be null if UNPACK mode 
won't be used.
      */
-    public PipesParsingHelper(PipesParser pipesParser, PipesConfig 
pipesConfig, Path unpackEmitterBasePath) {
+    public PipesParsingHelper(PipesParser pipesParser, PipesConfig pipesConfig,
+                              Path inputTempDirectory, Path 
unpackEmitterBasePath) {
         this.pipesParser = pipesParser;
         this.pipesConfig = pipesConfig;
+        this.inputTempDirectory = inputTempDirectory;
         this.unpackEmitterBasePath = unpackEmitterBasePath;
 
-        // Determine input temp directory
-        String configTempDir = pipesConfig.getTempDirectory();
-        if (configTempDir != null && !configTempDir.isBlank()) {
-            this.inputTempDirectory = Paths.get(configTempDir);
-            if (!Files.isDirectory(this.inputTempDirectory)) {
-                throw new IllegalArgumentException(
-                        "Configured tempDirectory does not exist or is not a 
directory: " + configTempDir);
-            }
-        } else {
-            this.inputTempDirectory = null; // Use system default
+        if (inputTempDirectory == null || 
!Files.isDirectory(inputTempDirectory)) {
+            throw new IllegalArgumentException(
+                    "inputTempDirectory must be a valid directory: " + 
inputTempDirectory);
         }
+        LOG.info("PipesParsingHelper initialized with inputTempDirectory: {}", 
inputTempDirectory);
+    }
+
+    /**
+     * Gets the input temp directory path.
+     * @return the input temp directory
+     */
+    public Path getInputTempDirectory() {
+        return inputTempDirectory;
     }
 
     /**
      * Parses content using pipes-based parsing with process isolation.
      * <p>
-     * The TikaInputStream should already be spooled to a temp file via {@link 
TikaInputStream#getPath()}.
-     * The caller is responsible for closing the TikaInputStream, which will 
clean up any temp files.
+     * This method spools the input to the dedicated temp directory and uses a 
relative
+     * filename in the FetchKey. The file-system-fetcher is configured with 
basePath
+     * pointing to this directory, so the child process can only access files 
there.
+     * <p>
+     * The caller is responsible for closing the TikaInputStream.
      *
      * @param tis the TikaInputStream containing the content to parse
      * @param metadata metadata to pass to the parser (may include filename, 
content-type, etc.)
@@ -122,17 +121,22 @@ public class PipesParsingHelper {
     public List<Metadata> parse(TikaInputStream tis, Metadata metadata,
                                  ParseContext parseContext, ParseMode 
parseMode) throws IOException {
         String requestId = UUID.randomUUID().toString();
+        Path tempFile = null;
 
         try {
-            // Get the backing file path from the spooled TikaInputStream
-            Path inputFile = tis.getPath();
-            LOG.debug("parse: using file {} ({} bytes)", inputFile, 
Files.size(inputFile));
+            // Spool input to our dedicated temp directory with proper suffix
+            String suffix = getSuffix(metadata);
+            tempFile = Files.createTempFile(inputTempDirectory, "tika-", 
suffix);
+            Files.copy(tis, tempFile, 
java.nio.file.StandardCopyOption.REPLACE_EXISTING);
+
+            String relativeName = tempFile.getFileName().toString();
+            LOG.debug("parse: spooled to {} ({} bytes)", relativeName, 
Files.size(tempFile));
 
             // Set parse mode in context
             parseContext.set(ParseMode.class, parseMode);
 
-            // Create FetchEmitTuple - use NO_EMIT since we're using 
PASSBACK_ALL
-            FetchKey fetchKey = new FetchKey(DEFAULT_FETCHER_ID, 
inputFile.toAbsolutePath().toString());
+            // Create FetchEmitTuple with relative filename (basePath is 
configured in fetcher)
+            FetchKey fetchKey = new FetchKey(DEFAULT_FETCHER_ID, relativeName);
 
             FetchEmitTuple tuple = new FetchEmitTuple(
                     requestId,
@@ -153,9 +157,33 @@ public class PipesParsingHelper {
             throw new TikaServerParseException("Parsing interrupted");
         } catch (PipesException e) {
             throw new TikaServerParseException(e);
+        } finally {
+            // Clean up temp file
+            if (tempFile != null) {
+                try {
+                    Files.deleteIfExists(tempFile);
+                } catch (IOException e) {
+                    LOG.warn("Failed to delete temp file: {}", tempFile, e);
+                }
+            }
         }
     }
 
+    /**
+     * Extracts file suffix from metadata (resource name or content-type).
+     */
+    private String getSuffix(Metadata metadata) {
+        String resourceName = 
metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
+        if (resourceName != null) {
+            int lastDot = resourceName.lastIndexOf('.');
+            if (lastDot > 0 && lastDot < resourceName.length() - 1) {
+                return resourceName.substring(lastDot);
+            }
+        }
+        // Default suffix
+        return ".tmp";
+    }
+
     /**
      * Processes the PipesResult and returns the metadata list.
      */
@@ -260,10 +288,11 @@ public class PipesParsingHelper {
      * extracted embedded documents.
      * <p>
      * This method:
-     * 1. Configures UnpackConfig with zipEmbeddedFiles=true
-     * 2. The pipes child process extracts embedded files and creates a zip
-     * 3. The zip is emitted to the configured file-system emitter
-     * 4. Returns the path to the zip file for streaming
+     * 1. Spools input to the dedicated temp directory
+     * 2. Configures UnpackConfig with zipEmbeddedFiles=true
+     * 3. The pipes child process extracts embedded files and creates a zip
+     * 4. The zip is emitted to the configured file-system emitter
+     * 5. Returns the path to the zip file for streaming
      * <p>
      * The caller is responsible for deleting the zip file after streaming.
      *
@@ -277,42 +306,47 @@ public class PipesParsingHelper {
     public UnpackResult parseUnpack(TikaInputStream tis, Metadata metadata,
                                     ParseContext parseContext, boolean 
saveAll) throws IOException {
         String requestId = UUID.randomUUID().toString();
+        Path tempFile = null;
 
-        // Get the backing file path from the spooled TikaInputStream
-        Path inputFile = tis.getPath();
-        LOG.debug("parseUnpack: using file {} ({} bytes), requestId={}",
-                inputFile, Files.size(inputFile), requestId);
-
-        // Set parse mode to UNPACK
-        parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
-        // Configure UnpackConfig - use existing or create new
-        UnpackConfig unpackConfig = parseContext.get(UnpackConfig.class);
-        if (unpackConfig == null) {
-            unpackConfig = new UnpackConfig();
-        }
+        try {
+            // Spool input to our dedicated temp directory with proper suffix
+            String suffix = getSuffix(metadata);
+            tempFile = Files.createTempFile(inputTempDirectory, 
"tika-unpack-", suffix);
+            Files.copy(tis, tempFile, 
java.nio.file.StandardCopyOption.REPLACE_EXISTING);
+
+            String relativeName = tempFile.getFileName().toString();
+            LOG.debug("parseUnpack: spooled to {} ({} bytes), requestId={}",
+                    relativeName, Files.size(tempFile), requestId);
+
+            // Set parse mode to UNPACK
+            parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+            // Configure UnpackConfig - use existing or create new
+            UnpackConfig unpackConfig = parseContext.get(UnpackConfig.class);
+            if (unpackConfig == null) {
+                unpackConfig = new UnpackConfig();
+            }
 
-        // Enable zip creation in the child process
-        unpackConfig.setZipEmbeddedFiles(true);
+            // Enable zip creation in the child process
+            unpackConfig.setZipEmbeddedFiles(true);
 
-        // Set suffix strategy to DETECTED so files get their proper 
extensions (e.g., .wav, .jpg)
-        unpackConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
+            // Set suffix strategy to DETECTED so files get their proper 
extensions (e.g., .wav, .jpg)
+            
unpackConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
 
-        // Set emitter to our file-system emitter
-        unpackConfig.setEmitter(UNPACK_EMITTER_ID);
+            // Set emitter to our file-system emitter
+            unpackConfig.setEmitter(UNPACK_EMITTER_ID);
 
-        // Include original document if saveAll is requested
-        if (saveAll) {
-            unpackConfig.setIncludeOriginal(true);
-            unpackConfig.setIncludeMetadataInZip(true);
-        }
+            // Include original document if saveAll is requested
+            if (saveAll) {
+                unpackConfig.setIncludeOriginal(true);
+                unpackConfig.setIncludeMetadataInZip(true);
+            }
 
-        parseContext.set(UnpackConfig.class, unpackConfig);
+            parseContext.set(UnpackConfig.class, unpackConfig);
 
-        // Create FetchEmitTuple - the emitKey will be used to determine the 
zip file location
-        // The zip file will be written to: emitter.basePath + "/" + emitKey + 
"-embedded.zip"
-        FetchKey fetchKey = new FetchKey(DEFAULT_FETCHER_ID, 
inputFile.toAbsolutePath().toString());
-        EmitKey emitKey = new EmitKey(UNPACK_EMITTER_ID, requestId);
+            // Create FetchEmitTuple with relative filename (basePath is 
configured in fetcher)
+            FetchKey fetchKey = new FetchKey(DEFAULT_FETCHER_ID, relativeName);
+            EmitKey emitKey = new EmitKey(UNPACK_EMITTER_ID, requestId);
 
         FetchEmitTuple tuple = new FetchEmitTuple(
                 requestId,
@@ -322,70 +356,80 @@ public class PipesParsingHelper {
                 parseContext
         );
 
-        // Execute parse via pipes
-        PipesResult result;
-        try {
-            result = pipesParser.parse(tuple);
-        } catch (InterruptedException e) {
-            Thread.currentThread().interrupt();
-            throw new TikaServerParseException("Parsing interrupted");
-        } catch (PipesException e) {
-            throw new TikaServerParseException(e);
-        }
+            // Execute parse via pipes
+            PipesResult result;
+            try {
+                result = pipesParser.parse(tuple);
+            } catch (InterruptedException e) {
+                Thread.currentThread().interrupt();
+                throw new TikaServerParseException("Parsing interrupted");
+            } catch (PipesException e) {
+                throw new TikaServerParseException(e);
+            }
 
-        // Check for errors
-        if (result.isProcessCrash() || result.isFatal() || 
result.isInitializationFailure()) {
-            LOG.warn("UNPACK parse failed: {} - {}", result.status(), 
result.message());
-            throw new WebApplicationException(
-                    "Parse failed: " + result.status(),
-                    mapStatusToHttpResponse(result.status()));
-        }
+            // Check for errors
+            if (result.isProcessCrash() || result.isFatal() || 
result.isInitializationFailure()) {
+                LOG.warn("UNPACK parse failed: {} - {}", result.status(), 
result.message());
+                throw new WebApplicationException(
+                        "Parse failed: " + result.status(),
+                        mapStatusToHttpResponse(result.status()));
+            }
 
-        if (result.isTaskException()) {
-            LOG.warn("UNPACK task exception: {} - {}", result.status(), 
result.message());
-            throw new WebApplicationException(
-                    "Parse failed: " + result.message(),
-                    Response.Status.INTERNAL_SERVER_ERROR);
-        }
+            if (result.isTaskException()) {
+                LOG.warn("UNPACK task exception: {} - {}", result.status(), 
result.message());
+                throw new WebApplicationException(
+                        "Parse failed: " + result.message(),
+                        Response.Status.INTERNAL_SERVER_ERROR);
+            }
 
-        // Get metadata list from result
-        List<Metadata> metadataList = Collections.emptyList();
-        EmitData emitData = result.emitData();
-        if (emitData != null && emitData.getMetadataList() != null) {
-            metadataList = emitData.getMetadataList();
-        }
+            // Get metadata list from result
+            List<Metadata> metadataList = Collections.emptyList();
+            EmitData emitData = result.emitData();
+            if (emitData != null && emitData.getMetadataList() != null) {
+                metadataList = emitData.getMetadataList();
+            }
 
-        // Check for parse exceptions in the container document metadata
-        // These should return appropriate HTTP status codes
-        if (!metadataList.isEmpty()) {
-            Metadata containerMetadata = metadataList.get(0);
-            String containerException = 
containerMetadata.get(TikaCoreProperties.CONTAINER_EXCEPTION);
-            if (containerException != null) {
-                // Map exception type to HTTP status
-                // 422 (Unprocessable Entity) for parse-related exceptions
-                int status = 422; // Default for parse exceptions
-                if (containerException.contains("EncryptedDocumentException") 
||
-                        containerException.contains("TikaException") ||
-                        containerException.contains("NullPointerException") ||
-                        containerException.contains("IllegalStateException")) {
-                    status = 422;
+            // Check for parse exceptions in the container document metadata
+            // These should return appropriate HTTP status codes
+            if (!metadataList.isEmpty()) {
+                Metadata containerMetadata = metadataList.get(0);
+                String containerException = 
containerMetadata.get(TikaCoreProperties.CONTAINER_EXCEPTION);
+                if (containerException != null) {
+                    // Map exception type to HTTP status
+                    // 422 (Unprocessable Entity) for parse-related exceptions
+                    int status = 422; // Default for parse exceptions
+                    if 
(containerException.contains("EncryptedDocumentException") ||
+                            containerException.contains("TikaException") ||
+                            
containerException.contains("NullPointerException") ||
+                            
containerException.contains("IllegalStateException")) {
+                        status = 422;
+                    }
+                    // Build response with exception string as body for stack 
trace support
+                    Response response = Response.status(status)
+                            .entity(containerException)
+                            .type("text/plain")
+                            .build();
+                    throw new WebApplicationException(response);
                 }
-                // Build response with exception string as body for stack 
trace support
-                Response response = Response.status(status)
-                        .entity(containerException)
-                        .type("text/plain")
-                        .build();
-                throw new WebApplicationException(response);
             }
-        }
 
-        // Determine the zip file path
-        // Regular format: emitter.basePath + "/" + emitKey + "-embedded.zip"
-        // Frictionless format: emitter.basePath + "/" + emitKey + 
"-frictionless.zip"
-        boolean isFrictionless = unpackConfig.getOutputFormat() == 
UnpackConfig.OUTPUT_FORMAT.FRICTIONLESS;
-        Path zipFile = getEmittedZipPath(requestId, isFrictionless);
+            // Determine the zip file path
+            // Regular format: emitter.basePath + "/" + emitKey + 
"-embedded.zip"
+            // Frictionless format: emitter.basePath + "/" + emitKey + 
"-frictionless.zip"
+            boolean isFrictionless = unpackConfig.getOutputFormat() == 
UnpackConfig.OUTPUT_FORMAT.FRICTIONLESS;
+            Path zipFile = getEmittedZipPath(requestId, isFrictionless);
 
-        return new UnpackResult(zipFile, metadataList);
+            return new UnpackResult(zipFile, metadataList);
+        } finally {
+            // Clean up temp file
+            if (tempFile != null) {
+                try {
+                    Files.deleteIfExists(tempFile);
+                } catch (IOException e) {
+                    LOG.warn("Failed to delete temp file: {}", tempFile, e);
+                }
+            }
+        }
     }
 
     /**
diff --git 
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
 
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
index d11d21984d..9cbdb7a11d 100644
--- 
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
+++ 
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
@@ -196,7 +196,12 @@ public abstract class CXFTestBase {
 
             this.tika = TikaLoader.load(tmp);
 
+            // Create input temp directory for pipes-based parsing
+            Path inputTempDirectory = 
Files.createTempDirectory("tika-server-test-input-");
+
             // Initialize PipesParsingHelper for pipes-based parsing
+            // Merge the fetcher config with basePath pointing to the temp 
directory
+            this.pipesConfigPath = mergeFetcherConfig(this.pipesConfigPath, 
inputTempDirectory);
             TikaJsonConfig tikaJsonConfig = 
TikaJsonConfig.load(this.pipesConfigPath);
             PipesConfig pipesConfig = tikaJsonConfig.deserialize("pipes", 
PipesConfig.class);
             if (pipesConfig == null) {
@@ -204,7 +209,8 @@ public abstract class CXFTestBase {
             }
             pipesConfig.setEmitStrategy(new 
EmitStrategyConfig(EmitStrategy.PASSBACK_ALL));
             this.pipesParser = PipesParser.load(tikaJsonConfig, pipesConfig, 
this.pipesConfigPath);
-            PipesParsingHelper pipesParsingHelper = new 
PipesParsingHelper(this.pipesParser, pipesConfig, getUnpackEmitterBasePath());
+            PipesParsingHelper pipesParsingHelper = new 
PipesParsingHelper(this.pipesParser, pipesConfig,
+                    inputTempDirectory, getUnpackEmitterBasePath());
 
             TikaResource.init(tika, new ServerStatus(), pipesParsingHelper);
         } finally {
@@ -259,6 +265,37 @@ public abstract class CXFTestBase {
         return tempConfig;
     }
 
+    /**
+     * Merges the tika-server-fetcher configuration into the pipes config.
+     * The fetcher is configured with basePath pointing to the input temp 
directory.
+     */
+    private Path mergeFetcherConfig(Path configPath, Path inputTempDirectory) 
throws IOException {
+        ObjectMapper mapper = new ObjectMapper();
+        com.fasterxml.jackson.databind.node.ObjectNode root =
+                (com.fasterxml.jackson.databind.node.ObjectNode) 
mapper.readTree(configPath.toFile());
+
+        // Get or create fetchers section
+        com.fasterxml.jackson.databind.node.ObjectNode fetchers =
+                (com.fasterxml.jackson.databind.node.ObjectNode) 
root.get("fetchers");
+        if (fetchers == null) {
+            fetchers = mapper.createObjectNode();
+            root.set("fetchers", fetchers);
+        }
+
+        // Create the tika-server-fetcher with basePath
+        com.fasterxml.jackson.databind.node.ObjectNode fetcherTypeConfig = 
mapper.createObjectNode();
+        fetcherTypeConfig.put("basePath", 
inputTempDirectory.toAbsolutePath().toString());
+
+        com.fasterxml.jackson.databind.node.ObjectNode fetcherNode = 
mapper.createObjectNode();
+        fetcherNode.set("file-system-fetcher", fetcherTypeConfig);
+
+        fetchers.set(PipesParsingHelper.DEFAULT_FETCHER_ID, fetcherNode);
+
+        Path tempConfig = Files.createTempFile("tika-server-pipes-fetcher-", 
".json");
+        
mapper.writerWithDefaultPrettyPrinter().writeValue(tempConfig.toFile(), root);
+        return tempConfig;
+    }
+
     /**
      * Creates a default test config with pipes configuration.
      * If the tika config contains metadata-filters, they are merged into the 
pipes config.

Reply via email to