This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 1c06d308a9 TIKA-4645-usability-scripts and bug fixes  (#2577)
1c06d308a9 is described below

commit 1c06d308a9e2ddba4744d4d860fb23e9c4210516
Author: Tim Allison <[email protected]>
AuthorDate: Mon Feb 2 14:30:43 2026 -0500

    TIKA-4645-usability-scripts and bug fixes  (#2577)
---
 docs/advanced/integration-testing/tika-app.adoc    | 398 +++++++++++++++++
 docs/advanced/integration-testing/tika-server.adoc | 473 +++++++++++++++++++++
 docs/modules/ROOT/pages/migration-to-4x/index.adoc |  17 +
 .../pages/migration-to-4x/migrating-to-4x.adoc     |  17 +-
 .../main/java/org/apache/tika/cli/AsyncHelper.java |  16 +
 .../src/main/java/org/apache/tika/cli/TikaCLI.java |  55 ++-
 .../java/org/apache/tika/cli/AsyncHelperTest.java  |  43 ++
 .../test/java/org/apache/tika/cli/TikaCLITest.java |  51 +++
 .../src/test/resources/s3/tika-config-s3.json      |   5 +-
 .../ocr/configs/tika-config-restricted-gdal.json   |   5 -
 .../apache/tika/parser/ocr/tesseract-config.json   |   5 -
 .../org/apache/tika/async/cli/PluginsWriter.java   |  52 ++-
 .../org/apache/tika/async/cli/TikaAsyncCLI.java    |  58 ++-
 .../src/main/resources/config-template.json        |   5 +-
 .../apache/tika/async/cli/AsyncCliParserTest.java  |  90 ++++
 .../apache/tika/server/core/TikaServerProcess.java | 265 ++++++++----
 .../server/core/resource/PipesParsingHelper.java   | 288 +++++++------
 .../org/apache/tika/server/core/CXFTestBase.java   |  39 +-
 18 files changed, 1617 insertions(+), 265 deletions(-)

diff --git a/docs/advanced/integration-testing/tika-app.adoc 
b/docs/advanced/integration-testing/tika-app.adoc
new file mode 100644
index 0000000000..ea0b846173
--- /dev/null
+++ b/docs/advanced/integration-testing/tika-app.adoc
@@ -0,0 +1,398 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Tika-App Integration Testing
+
+Integration tests for `tika-app` to be run from a distribution ZIP.
+
+== Setup
+
+[source,bash]
+----
+# Create test directory
+mkdir -p /tmp/tika-app-test
+cd /tmp/tika-app-test
+
+# Copy and extract distribution
+cp /path/to/tika-app-4.0.0-SNAPSHOT.zip .
+unzip tika-app-4.0.0-SNAPSHOT.zip
+cd tika-app-4.0.0-SNAPSHOT
+
+# Get test files
+cp 
/path/to/tika-main/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testPDF.pdf
 .
+cp 
/path/to/tika-main/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/test_recursive_embedded.docx
 .
+cp 
/path/to/tika-main/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testHTML.html
 .
+----
+
+== Test Cases
+
+=== Test 1: Basic Text Extraction
+
+[source,bash]
+----
+java -jar tika-app.jar --text testPDF.pdf
+----
+
+*Expected:* Outputs extracted text from PDF.
+
+=== Test 2: Metadata Extraction
+
+[source,bash]
+----
+java -jar tika-app.jar --metadata testPDF.pdf
+----
+
+*Expected:* Outputs key=value metadata pairs.
+
+=== Test 3: JSON Output with Pretty Print
+
+[source,bash]
+----
+java -jar tika-app.jar --json --pretty-print testPDF.pdf
+----
+
+*Expected:* Clean, readable JSON output with metadata.
+
+=== Test 4: File Type Detection
+
+[source,bash]
+----
+java -jar tika-app.jar --detect testPDF.pdf
+----
+
+*Expected:* Returns `application/pdf`
+
+=== Test 5: Non-existent File Handling
+
+[source,bash]
+----
+java -jar tika-app.jar --text nonexistent_file.pdf
+----
+
+*Expected:* Clear error message (currently shows confusing 
"MalformedURLException: no protocol").
+
+=== Test 6: Recursive JSON Output
+
+[source,bash]
+----
+java -jar tika-app.jar --jsonRecursive test_recursive_embedded.docx
+----
+
+*Expected:* JSON array with metadata and content for main doc and all embedded 
documents.
+
+=== Test 7: Stdin Input
+
+[source,bash]
+----
+echo "Hello World" | java -jar tika-app.jar --text
+----
+
+*Expected:* Outputs "Hello World"
+
+=== Test 8: Extract Attachments (-z)
+
+[source,bash]
+----
+mkdir -p /tmp/tika-app-test/extract-out
+java -jar tika-app.jar -z --extract-dir=/tmp/tika-app-test/extract-out 
test_recursive_embedded.docx
+ls /tmp/tika-app-test/extract-out
+----
+
+*Expected:* Creates .json metadata file and extracts embedded files to 
extract-out directory.
+
+=== Test 9: Recursive Extract (-Z)
+
+[source,bash]
+----
+mkdir -p /tmp/tika-app-test/extract-recursive
+java -jar tika-app.jar -Z --extract-dir=/tmp/tika-app-test/extract-recursive 
test_recursive_embedded.docx
+ls -R /tmp/tika-app-test/extract-recursive
+----
+
+*Expected:* Extracts all nested embedded documents recursively.
+
+=== Test 10: Batch Mode (Simple)
+
+[source,bash]
+----
+mkdir -p /tmp/tika-app-test/batch-input
+mkdir -p /tmp/tika-app-test/batch-output
+cp testPDF.pdf testHTML.html /tmp/tika-app-test/batch-input/
+java -jar tika-app.jar /tmp/tika-app-test/batch-input 
/tmp/tika-app-test/batch-output
+ls /tmp/tika-app-test/batch-output
+----
+
+*Expected:* Creates .json files for each input file in output directory.
+
+=== Test 10b: Batch Mode with Output Options
+
+[source,bash]
+----
+mkdir -p /tmp/tika-app-test/batch-output2
+java -jar tika-app.jar -J -t /tmp/tika-app-test/batch-input 
/tmp/tika-app-test/batch-output2
+ls /tmp/tika-app-test/batch-output2
+----
+
+*Expected:* Creates .json files with text content (X-TIKA:content_handler 
should be ToTextContentHandler).
+
+=== Test 11: Version Check
+
+[source,bash]
+----
+java -jar tika-app.jar --version
+----
+
+*Expected:* Returns `Apache Tika X.X.X`
+
+=== Test 12: List Parsers
+
+[source,bash]
+----
+java -jar tika-app.jar --list-parsers
+----
+
+*Expected:* Hierarchical list of available parsers.
+
+=== Test 13: Language Detection
+
+[source,bash]
+----
+java -jar tika-app.jar --language testPDF.pdf
+----
+
+*Expected:* Returns detected language code.
+
+=== Test 14: Digest Computation
+
+[source,bash]
+----
+java -jar tika-app.jar --digest=md5 --json testPDF.pdf
+----
+
+*Expected:* JSON output includes `X-TIKA:digest:MD5` field.
+
+=== Test 15: URL Input
+
+[source,bash]
+----
+java -jar tika-app.jar --detect https://www.apache.org/
+----
+
+*Expected:* Returns `text/html`
+
+=== Test 16: XMP Output
+
+[source,bash]
+----
+java -jar tika-app.jar --xmp testPDF.pdf
+----
+
+*Expected:* Valid XMP metadata in RDF/XML format.
+
+=== Test 17: Boilerpipe Main Content
+
+[source,bash]
+----
+java -jar tika-app.jar --text-main testHTML.html
+----
+
+*Expected:* Returns only main content, not boilerplate.
+
+=== Test 18: Depth Limiting
+
+[source,bash]
+----
+java -jar tika-app.jar --maxEmbeddedDepth=1 --text test_recursive_embedded.docx
+----
+
+*Expected:* Limited depth of embedded document extraction.
+
+=== Test 19: GUI Mode
+
+[source,bash]
+----
+java -jar tika-app.jar
+----
+
+*Expected:* Opens GUI (skip in headless environments).
+
+== Advanced Tests: Custom Config
+
+These tests require creating a custom tika-config.json file.
+
+=== Test 20: Create Custom Config File
+
+Create `/tmp/tika-app-test/my-config.json`:
+[source,json]
+----
+{
+  "content-handler-factory": {
+    "basic-content-handler-factory": {
+      "type": "TEXT",
+      "writeLimit": 100000,
+      "throwOnWriteLimitReached": false
+    }
+  },
+  "parsers": [
+    {
+      "default-parser": {}
+    },
+    {
+      "pdf-parser": {
+        "extractActions": true,
+        "extractInlineImages": true,
+        "ocrStrategy": "NO_OCR"
+      }
+    },
+    {
+      "ooxml-parser": {
+        "includeDeletedContent": true,
+        "includeMoveFromContent": true,
+        "extractMacros": true
+      }
+    }
+  ],
+  "fetchers": {
+    "fsf": {
+      "file-system-fetcher": {
+        "basePath": "/tmp/tika-app-test/batch-input",
+        "extractFileSystemMetadata": true
+      }
+    }
+  },
+  "emitters": {
+    "fse": {
+      "file-system-emitter": {
+        "basePath": "/tmp/tika-app-test/config-output",
+        "fileExtension": "json",
+        "onExists": "REPLACE"
+      }
+    }
+  },
+  "pipes-iterator": {
+    "file-system-pipes-iterator": {
+      "basePath": "/tmp/tika-app-test/batch-input",
+      "countTotal": true,
+      "fetcherId": "fsf",
+      "emitterId": "fse"
+    }
+  },
+  "pipes": {
+    "parseMode": "RMETA",
+    "numClients": 2,
+    "timeoutMillis": 60000
+  },
+  "plugin-roots": "/tmp/tika-app-test/plugins"
+}
+----
+
+=== Test 21: Run with Custom Config
+
+[source,bash]
+----
+mkdir -p /tmp/tika-app-test/config-output
+java -jar tika-app.jar /tmp/tika-app-test/my-config.json
+ls /tmp/tika-app-test/config-output
+----
+
+*Expected:* Processes all files in batch-input using custom parser settings.
+
+=== Test 22: Async Mode with Config Flag
+
+[source,bash]
+----
+java -jar tika-app.jar -a --config=/tmp/tika-app-test/my-config.json
+----
+
+*Expected:* Same as Test 21 but using explicit async flag.
+
+=== Test 23: Unpack with Frictionless Format
+
+[source,bash]
+----
+mkdir -p /tmp/tika-app-test/frictionless-out
+java -jar tika-app.jar -Z --extract-dir=/tmp/tika-app-test/frictionless-out 
--unpack-format=FRICTIONLESS --unpack-include-metadata 
test_recursive_embedded.docx
+ls /tmp/tika-app-test/frictionless-out
+----
+
+*Expected:* Extracts embedded files in Frictionless data package format with 
metadata.json.
+
+=== Test 24: Unpack to Directory (not zipped)
+
+[source,bash]
+----
+mkdir -p /tmp/tika-app-test/unpack-dir-out
+java -jar tika-app.jar -Z --extract-dir=/tmp/tika-app-test/unpack-dir-out 
--unpack-mode=DIRECTORY test_recursive_embedded.docx
+ls -R /tmp/tika-app-test/unpack-dir-out
+----
+
+*Expected:* Extracts embedded files to directory structure instead of zipped.
+
+=== Test 25: Batch with Multiple Workers
+
+[source,bash]
+----
+mkdir -p /tmp/tika-app-test/multi-worker-out
+java -jar tika-app.jar -n 4 /tmp/tika-app-test/batch-input 
/tmp/tika-app-test/multi-worker-out
+----
+
+*Expected:* Processes files using 4 parallel forked clients.
+
+=== Test 26: Batch with Custom Timeout
+
+[source,bash]
+----
+mkdir -p /tmp/tika-app-test/timeout-out
+java -jar tika-app.jar -T 30000 /tmp/tika-app-test/batch-input 
/tmp/tika-app-test/timeout-out
+----
+
+*Expected:* Processes files with 30 second timeout per file.
+
+=== Test 27: Batch with Custom Heap
+
+[source,bash]
+----
+mkdir -p /tmp/tika-app-test/heap-out
+java -jar tika-app.jar -X 2g /tmp/tika-app-test/batch-input 
/tmp/tika-app-test/heap-out
+----
+
+*Expected:* Forked processes use 2GB heap.
+
+== Known Issues
+
+=== Issue 1: Confusing "no protocol" Error
+
+When a file doesn't exist, the error message is misleading:
+[source]
+----
+MalformedURLException: no protocol: nonexistent_file.pdf
+----
+
+Should say "File not found".
+
+=== Issue 2: INFO Message on Every Command
+
+Every command prints an INFO message to stderr about convenience features. Use 
`2>/dev/null` to suppress.
+
+=== Issue 3: Config Dump Options Not Implemented
+
+These options are not yet implemented in 4.x:
+
+* `--dump-minimal-config`
+* `--dump-current-config`
+* `--dump-static-config`
+* `--dump-static-full-config`
diff --git a/docs/advanced/integration-testing/tika-server.adoc 
b/docs/advanced/integration-testing/tika-server.adoc
new file mode 100644
index 0000000000..85bca5f1fa
--- /dev/null
+++ b/docs/advanced/integration-testing/tika-server.adoc
@@ -0,0 +1,473 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Tika-Server Integration Testing
+
+Integration tests for `tika-server` to be run from a distribution ZIP.
+
+== Setup
+
+[source,bash]
+----
+# Create test directory
+mkdir -p /tmp/tika-server-test
+cd /tmp/tika-server-test
+
+# Copy and extract distribution
+cp /path/to/tika-server-standard-4.0.0-SNAPSHOT-bin.zip .
+unzip tika-server-standard-4.0.0-SNAPSHOT-bin.zip
+
+# Copy test files
+cp /path/to/test-documents/testPDF.pdf .
+cp /path/to/test-documents/testHTML.html .
+cp /path/to/test-documents/test_recursive_embedded.docx .
+----
+
+== Part 1: Default Mode Tests
+
+Start server in default mode (config endpoints disabled):
+
+[source,bash]
+----
+java -jar tika-server.jar --port 9998 &
+sleep 8
+curl -s http://localhost:9998/version
+----
+
+=== Test 1: GET /version
+
+[source,bash]
+----
+curl -s http://localhost:9998/version
+----
+
+*Expected:* `Apache Tika X.X.X`
+
+=== Test 2: PUT /detect/stream
+
+[source,bash]
+----
+curl -s -X PUT -T testPDF.pdf http://localhost:9998/detect/stream
+----
+
+*Expected:* `application/pdf`
+
+=== Test 3: PUT /tika/text
+
+[source,bash]
+----
+curl -s -X PUT -T testPDF.pdf http://localhost:9998/tika/text
+----
+
+*Expected:* Plain text content extracted from PDF.
+
+=== Test 4: PUT /tika/html
+
+[source,bash]
+----
+curl -s -X PUT -T testPDF.pdf http://localhost:9998/tika/html
+----
+
+*Expected:* HTML with metadata in `<meta>` tags and content in `<body>`.
+
+=== Test 5: PUT /tika/xml
+
+[source,bash]
+----
+curl -s -X PUT -T testPDF.pdf http://localhost:9998/tika/xml
+----
+
+*Expected:* XHTML content (starts with `<html xmlns=...>`).
+
+=== Test 6: PUT /tika/json
+
+[source,bash]
+----
+curl -s -X PUT -T testPDF.pdf http://localhost:9998/tika/json
+----
+
+*Expected:* JSON object with metadata and X-TIKA:content field.
+
+=== Test 7: PUT /meta
+
+[source,bash]
+----
+curl -s -X PUT -H "Accept: application/json" -T testPDF.pdf 
http://localhost:9998/meta
+----
+
+*Expected:* JSON object with metadata only (no content).
+
+=== Test 8: PUT /meta/{field}
+
+[source,bash]
+----
+curl -s -X PUT -T testPDF.pdf http://localhost:9998/meta/Content-Type
+----
+
+*Expected:* `Content-Type,application/pdf`
+
+=== Test 9: PUT /rmeta
+
+[source,bash]
+----
+curl -s -X PUT -T test_recursive_embedded.docx http://localhost:9998/rmeta
+----
+
+*Expected:* JSON array with metadata for main document and all embedded 
documents.
+
+=== Test 10: PUT /rmeta/text
+
+[source,bash]
+----
+curl -s -X PUT -T test_recursive_embedded.docx http://localhost:9998/rmeta/text
+----
+
+*Expected:* JSON array with ToTextContentHandler content.
+
+=== Test 11: PUT /language/stream
+
+[source,bash]
+----
+curl -s -X PUT -T testPDF.pdf http://localhost:9998/language/stream
+----
+
+*Expected:* Two-letter language code (e.g., `en`, `th`).
+
+=== Test 12: PUT /unpack/all
+
+[source,bash]
+----
+curl -s -X PUT -T test_recursive_embedded.docx 
http://localhost:9998/unpack/all -o /tmp/unpack.zip
+unzip -l /tmp/unpack.zip
+----
+
+*Expected:* ZIP file containing extracted embedded files plus `__TEXT__` and 
`__METADATA__` files.
+
+=== Test 13: GET /parsers
+
+[source,bash]
+----
+curl -s -H "Accept: text/plain" http://localhost:9998/parsers
+----
+
+*Expected:* Hierarchical list of available parsers.
+
+=== Test 14: GET /detectors
+
+[source,bash]
+----
+curl -s -H "Accept: text/plain" http://localhost:9998/detectors
+----
+
+*Expected:* List of available detectors.
+
+=== Test 15: GET /mime-types
+
+[source,bash]
+----
+curl -s -H "Accept: application/json" http://localhost:9998/mime-types
+----
+
+*Expected:* JSON object with all known MIME types.
+
+=== Test 16: POST /meta/form
+
+[source,bash]
+----
+curl -s -X POST -F "[email protected]" -H "Accept: application/json" 
http://localhost:9998/meta/form
+----
+
+*Expected:* JSON metadata from multipart form upload.
+
+=== Test 17: POST /rmeta/form
+
+[source,bash]
+----
+curl -s -X POST -F "upload=@test_recursive_embedded.docx" 
http://localhost:9998/rmeta/form
+----
+
+*Expected:* JSON array with recursive metadata from multipart upload.
+
+=== Test 18: Config Endpoints Blocked (Default Mode)
+
+[source,bash]
+----
+curl -s -w "\nHTTP Status: %{http_code}\n" -X POST -F "[email protected]" 
http://localhost:9998/meta/config
+curl -s -w "\nHTTP Status: %{http_code}\n" -X POST -F "[email protected]" 
http://localhost:9998/rmeta/config
+curl -s -w "\nHTTP Status: %{http_code}\n" -X POST -F "[email protected]" 
http://localhost:9998/tika/config
+curl -s -w "\nHTTP Status: %{http_code}\n" -X POST -F "[email protected]" 
http://localhost:9998/unpack/config
+----
+
+*Expected:* All return HTTP 403 with message: "Config endpoints are disabled. 
Set enableUnsecureFeatures=true in server config."
+
+== Part 2: Tests with enableUnsecureFeatures
+
+Stop the default server and create a config file:
+
+[source,bash]
+----
+pkill -f "tika-server.jar"
+
+cat > tika-config-unsecure.json << 'EOF'
+{
+  "server": {
+    "port": 9998,
+    "host": "localhost",
+    "enableUnsecureFeatures": true
+  },
+  "parsers": [
+    {"default-parser": {}}
+  ],
+  "plugin-roots": "/tmp/tika-server-test/plugins"
+}
+EOF
+
+java -jar tika-server.jar -c tika-config-unsecure.json &
+sleep 10
+curl -s http://localhost:9998/version
+----
+
+=== Test 19: POST /meta/config
+
+[source,bash]
+----
+curl -s -X POST -F "[email protected]" -H "Accept: application/json" 
http://localhost:9998/meta/config
+----
+
+*Expected:* JSON metadata.
+
+=== Test 20: POST /meta/config with custom parser config
+
+[source,bash]
+----
+curl -s -X POST -F "[email protected]" \
+  -F 'config={"parsers":[{"pdf-parser":{"ocrStrategy":"NO_OCR"}}]}' \
+  -H "Accept: application/json" \
+  http://localhost:9998/meta/config
+----
+
+*Expected:* JSON metadata with custom PDF parser config applied.
+
+=== Test 21: POST /unpack/config
+
+[source,bash]
+----
+curl -s -X POST -F "file=@test_recursive_embedded.docx" 
http://localhost:9998/unpack/config -o /tmp/unpack-config.zip
+unzip -l /tmp/unpack-config.zip
+----
+
+*Expected:* ZIP with extracted embedded files.
+
+=== Test 22: POST /unpack/all/config
+
+[source,bash]
+----
+curl -s -X POST -F "file=@test_recursive_embedded.docx" 
http://localhost:9998/unpack/all/config -o /tmp/unpack-all.zip
+unzip -l /tmp/unpack-all.zip
+----
+
+*Expected:* ZIP with all recursively extracted files.
+
+== Server Options
+
+=== Test 23: Custom Port
+
+[source,bash]
+----
+java -jar tika-server.jar --port 9999 &
+sleep 8
+curl -s http://localhost:9999/version
+----
+
+*Expected:* Server responds on port 9999.
+
+=== Test 24: Custom Host
+
+[source,bash]
+----
+java -jar tika-server.jar --host 0.0.0.0 --port 9998 &
+----
+
+*Expected:* Server binds to all interfaces.
+
+=== Test 25: With Config File
+
+[source,bash]
+----
+java -jar tika-server.jar -c tika-config.json &
+----
+
+*Expected:* Server uses custom configuration.
+
+== Headers
+
+=== Test 26: X-Tika-OCRskipOcr Header
+
+[source,bash]
+----
+curl -s -X PUT -H "X-Tika-OCRskipOcr: true" -T testPDF.pdf 
http://localhost:9998/tika/text
+----
+
+*Expected:* Text extraction without OCR.
+
+=== Test 27: Content-Disposition Filename
+
+[source,bash]
+----
+curl -s -X PUT -H "Content-Disposition: attachment; filename=myfile.pdf" -T 
testPDF.pdf http://localhost:9998/meta/resourceName
+----
+
+*Expected:* Returns the filename from Content-Disposition header.
+
+== Error Handling
+
+=== Test 28: Non-existent Endpoint
+
+[source,bash]
+----
+curl -s -w "\nHTTP Status: %{http_code}\n" http://localhost:9998/nonexistent
+----
+
+*Expected:* 404 Not Found.
+
+=== Test 29: Invalid Method
+
+[source,bash]
+----
+curl -s -w "\nHTTP Status: %{http_code}\n" -X DELETE 
http://localhost:9998/tika/text
+----
+
+*Expected:* 405 Method Not Allowed.
+
+== Cleanup
+
+[source,bash]
+----
+pkill -f "tika-server.jar"
+rm -rf /tmp/tika-server-test
+----
+
+== Usability Test Results
+
+The following endpoints were tested and verified working:
+
+=== Default Mode (enableUnsecureFeatures=false)
+
+[cols="1,1,1", options="header"]
+|===
+|Endpoint |Method |Status
+
+|`/version` |GET |PASS
+|`/detect/stream` |PUT |PASS
+|`/tika` |PUT |PASS
+|`/tika/text` |PUT |PASS
+|`/tika/html` |PUT |PASS
+|`/tika/xml` |PUT |PASS
+|`/tika/json` |PUT |PASS
+|`/meta` |PUT |PASS
+|`/meta/{field}` |PUT |PASS
+|`/rmeta` |PUT |PASS
+|`/rmeta/text` |PUT |PASS
+|`/language/stream` |PUT |PASS
+|`/unpack/all` |PUT |PASS
+|`/parsers` |GET |PASS
+|`/detectors` |GET |PASS
+|`/mime-types` |GET |PASS
+|`/meta/form` |POST |PASS
+|`/rmeta/form` |POST |PASS
+|`/meta/config` |POST |BLOCKED (403) - Expected
+|`/rmeta/config` |POST |BLOCKED (403) - Expected
+|`/tika/config` |POST |BLOCKED (403) - Expected
+|`/unpack/config` |POST |BLOCKED (403) - Expected
+|===
+
+=== With enableUnsecureFeatures=true
+
+[cols="1,1,1", options="header"]
+|===
+|Endpoint |Method |Status
+
+|`/meta/config` |POST |PASS
+|`/rmeta/config` |POST |PASS
+|`/tika/config` |POST |PASS
+|`/unpack/config` |POST |PASS
+|`/unpack/all/config` |POST |PASS
+|===
+
+== Known Issues
+
+=== Issue 1: Language Detection Accuracy
+
+Short texts may not be detected reliably. The `/language/stream` endpoint 
works best with substantial text content.
+
+== Quick Reference
+
+=== Basic Parsing
+[source,bash]
+----
+# Text output
+curl -X PUT -T file.pdf http://localhost:9998/tika/text
+
+# HTML output
+curl -X PUT -T file.pdf http://localhost:9998/tika/html
+
+# JSON output (metadata + content)
+curl -X PUT -T file.pdf http://localhost:9998/tika/json
+----
+
+=== Metadata Only
+[source,bash]
+----
+curl -X PUT -H "Accept: application/json" -T file.pdf 
http://localhost:9998/meta
+----
+
+=== Recursive Metadata
+[source,bash]
+----
+curl -X PUT -T file.docx http://localhost:9998/rmeta
+curl -X PUT -T file.docx http://localhost:9998/rmeta/text
+----
+
+=== Detection
+[source,bash]
+----
+curl -X PUT -T file.pdf http://localhost:9998/detect/stream
+----
+
+=== Extract Embedded Files
+[source,bash]
+----
+curl -X PUT -T file.docx http://localhost:9998/unpack/all -o output.zip
+----
+
+== Implementation Notes
+
+=== Automatic Component Configuration
+
+The server automatically configures the required fetcher and emitter for 
pipes-based parsing:
+
+* **tika-server-fetcher**: A file-system-fetcher with `basePath` pointing to a 
dedicated temp directory for input files. This enables the `/tika`, `/rmeta`, 
and `/meta` endpoints to work with uploaded files.
+
+* **unpack-emitter**: A file-system-emitter with `basePath` pointing to a 
dedicated temp directory for unpacked files. This is only created when the 
`/unpack` endpoint is enabled (default). This enables the `/unpack/all` 
endpoint to return embedded files as a ZIP.
+
+Both temp directories are cleaned up on server shutdown.
+
+If a user config file does not include `plugin-roots`, the server 
automatically adds a default value pointing to a `plugins` directory in the 
current working directory.
+
+=== Security Boundary
+
+Child processes (pipes workers) are configured with `basePath` rather than 
`allowAbsolutePaths`, ensuring they can only access files within their 
designated temp directories. This provides a security boundary between the 
parent server process and forked child processes.
diff --git a/docs/modules/ROOT/pages/migration-to-4x/index.adoc 
b/docs/modules/ROOT/pages/migration-to-4x/index.adoc
index 20a7c5cf48..eebf29f3db 100644
--- a/docs/modules/ROOT/pages/migration-to-4x/index.adoc
+++ b/docs/modules/ROOT/pages/migration-to-4x/index.adoc
@@ -31,3 +31,20 @@ See the xref:roadmap.adoc[Roadmap] for version timelines and 
support schedules.
 
 * xref:migration-to-4x/design-notes-4x.adoc[Design Notes] - Architectural 
decisions and design rationale
 * xref:migration-to-4x/serialization-4x.adoc[Serialization] - JSON 
serialization design and implementation details
+
+== TODOs / Missing Features in 4.x
+
+The following features from 3.x are not yet implemented in 4.x:
+
+=== Config Serialization
+
+The following tika-app options for dumping configuration are not yet available:
+
+* `--dump-minimal-config` - Print minimal TikaConfig
+* `--dump-current-config` - Print current TikaConfig
+* `--dump-static-config` - Print static config
+* `--dump-static-full-config` - Print static explicit config
+
+These require completing the JSON serialization support for TikaConfig 
objects. The underlying serialization infrastructure exists (see 
xref:migration-to-4x/serialization-4x.adoc[Serialization]) but the CLI 
integration is pending.
+
+*Workaround:* Manually create JSON config files using the templates in 
`tika-pipes/tika-async-cli/src/main/resources/config-template.json` as a 
starting point.
diff --git a/docs/modules/ROOT/pages/migration-to-4x/migrating-to-4x.adoc 
b/docs/modules/ROOT/pages/migration-to-4x/migrating-to-4x.adoc
index c8cd0a7242..5c963f4809 100644
--- a/docs/modules/ROOT/pages/migration-to-4x/migrating-to-4x.adoc
+++ b/docs/modules/ROOT/pages/migration-to-4x/migrating-to-4x.adoc
@@ -76,16 +76,17 @@ The converter currently supports:
         "sortByPosition": true,
         "maxMainMemoryBytes": 1000000
       }
-    },
-    {
-      "default-parser": {
-        "_exclude": ["pdf-parser"]
-      }
     }
   ]
 }
 ----
 
+NOTE: When you configure a parser with specific settings in JSON, the loader 
automatically
+excludes it from SPI loading. The parser (e.g., `pdf-parser`) is not even 
instantiated in
+`default-parser` if there's a definition for it in the tika-config.json. 
Explicit `_exclude`
+directives are only needed when you want to disable a parser entirely without 
providing
+custom configuration.
+
 === Key Differences
 
 [cols="1,1,2"]
@@ -102,13 +103,9 @@ The converter currently supports:
 
 |Exclusions
 |`<parser-exclude class="..."/>`
-|`"_exclude": ["component-name"]`
+|`"_exclude": ["component-name"]` (only needed to disable a parser entirely)
 |===
 
-NOTE: When you configure a parser with specific settings in JSON, the loader 
automatically
-excludes it from SPI loading. Explicit exclusions are only needed when you 
want to disable
-a parser entirely without providing custom configuration.
-
 === Limitations
 
 The automatic converter has some limitations:
diff --git a/tika-app/src/main/java/org/apache/tika/cli/AsyncHelper.java 
b/tika-app/src/main/java/org/apache/tika/cli/AsyncHelper.java
index 38a0094f79..e3561ecf5f 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/AsyncHelper.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/AsyncHelper.java
@@ -58,6 +58,22 @@ public class AsyncHelper {
                 argList.add(mode);
             } else if (arg.equals(UNPACK_INCLUDE_METADATA)) {
                 argList.add("--unpack-include-metadata");
+            } else if (arg.equals("-t") || arg.equals("--text")) {
+                // Translate TikaCLI text output to TikaAsyncCLI handler type
+                argList.add("-h");
+                argList.add("t");
+            } else if (arg.equals("--html")) {
+                // Translate TikaCLI html output to TikaAsyncCLI handler type
+                // Note: TikaCLI uses -h for html, but TikaAsyncCLI uses -h 
for handler type
+                argList.add("-h");
+                argList.add("h");
+            } else if (arg.equals("-x") || arg.equals("--xml")) {
+                // Translate TikaCLI xml output to TikaAsyncCLI handler type
+                argList.add("-h");
+                argList.add("x");
+            } else if (arg.equals("-J") || arg.equals("--jsonRecursive")) {
+                // TikaAsyncCLI always outputs JSON with recursive metadata 
(RMETA mode)
+                // This is already the default, so we just skip this arg
             } else {
                 argList.add(args[i]);
             }
diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java 
b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 5f388865c4..97ca90a489 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -38,7 +38,6 @@ import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.nio.file.StandardCopyOption;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
@@ -276,25 +275,11 @@ public class TikaCLI {
         if (args.length == 1 &&  args[0].endsWith(".json")) {
             TikaAsyncCLI.main(args);
             return;
-        };
-        //TODO -- are there other shortcuts?
-        Path tmpConfig = null;
-        try {
-            tmpConfig = Files.createTempFile("tika-config-", ".json");
-            
Files.copy(TikaCLI.class.getResourceAsStream("/tika-config-default-single-file.json"),
-                    tmpConfig, StandardCopyOption.REPLACE_EXISTING);
-            List<String> argList = new ArrayList<>();
-            argList.add("-c");
-            argList.add(tmpConfig.toAbsolutePath().toString());
-            for (String arg : args) {
-                argList.add(arg);
-            }
-            TikaAsyncCLI.main(argList.toArray(new String[0]));
-        } finally {
-            if (tmpConfig != null) {
-                Files.delete(tmpConfig);
-            }
         }
+        // For batch mode (two directories), pass directly to TikaAsyncCLI.
+        // It will create its own config with PluginsWriter that includes
+        // plugin-roots, fetcher, emitter, and pipes-iterator configuration.
+        TikaAsyncCLI.main(args);
     }
 
     /**
@@ -350,12 +335,34 @@ public class TikaCLI {
 
     private boolean testForAsync(String[] args) {
 
+        // Single .json file is a config file for async mode
+        if (args.length == 1 && args[0].endsWith(".json")) {
+            return true;
+        }
+
         if (args.length == 2) {
             if (Files.isDirectory(Paths.get(args[0]))) {
                 return true;
             }
         }
 
+        // Check if last two args are directories (batch mode with options)
+        if (args.length >= 2) {
+            String lastArg = args[args.length - 1];
+            String secondLastArg = args[args.length - 2];
+            // Make sure neither looks like an option value
+            if (!lastArg.startsWith("-") && !secondLastArg.startsWith("-")) {
+                try {
+                    if (Files.isDirectory(Paths.get(secondLastArg)) &&
+                        (Files.isDirectory(Paths.get(lastArg)) || 
!Files.exists(Paths.get(lastArg)))) {
+                        return true;
+                    }
+                } catch (Exception e) {
+                    // Invalid path, not batch mode
+                }
+            }
+        }
+
         for (String arg : args) {
             if (arg.equals("-a") || arg.equals("--async")) {
                 return true;
@@ -590,10 +597,12 @@ public class TikaCLI {
         out.println();
         out.println("    --config=<tika-config.xml>");
         out.println("        TikaConfig file. Must be specified before -g, -s, 
-f or the dump-x-config !");
-        out.println("    --dump-minimal-config  Print minimal TikaConfig");
-        out.println("    --dump-current-config  Print current TikaConfig");
-        out.println("    --dump-static-config   Print static config");
-        out.println("    --dump-static-full-config  Print static explicit 
config");
+        // TODO: TIKA-XXXX - Re-enable config dump options once JSON 
serialization is complete
+        // These options are not yet implemented in 4.x due to the migration 
from XML to JSON config
+        // out.println("    --dump-minimal-config  Print minimal TikaConfig");
+        // out.println("    --dump-current-config  Print current TikaConfig");
+        // out.println("    --dump-static-config   Print static config");
+        // out.println("    --dump-static-full-config  Print static explicit 
config");
         out.println("    
--convert-config-xml-to-json=<input.xml>,<output.json>");
         out.println("        Convert legacy XML config to JSON format (parsers 
section only)");
         out.println("");
diff --git a/tika-app/src/test/java/org/apache/tika/cli/AsyncHelperTest.java 
b/tika-app/src/test/java/org/apache/tika/cli/AsyncHelperTest.java
index 9885feac3f..a26f247500 100644
--- a/tika-app/src/test/java/org/apache/tika/cli/AsyncHelperTest.java
+++ b/tika-app/src/test/java/org/apache/tika/cli/AsyncHelperTest.java
@@ -28,4 +28,47 @@ public class AsyncHelperTest {
         String[] expected = new String[]{"-c", "blah.json", "-i", 
"input.docx", "-o", "output/dir"};
         assertArrayEquals(expected, AsyncHelper.translateArgs(args));
     }
+
+    @Test
+    public void testTextHandler() throws Exception {
+        String[] args = new String[]{"-t", "input", "output"};
+        String[] expected = new String[]{"-h", "t", "input", "output"};
+        assertArrayEquals(expected, AsyncHelper.translateArgs(args));
+    }
+
+    @Test
+    public void testTextHandlerLong() throws Exception {
+        String[] args = new String[]{"--text", "input", "output"};
+        String[] expected = new String[]{"-h", "t", "input", "output"};
+        assertArrayEquals(expected, AsyncHelper.translateArgs(args));
+    }
+
+    @Test
+    public void testHtmlHandler() throws Exception {
+        String[] args = new String[]{"--html", "input", "output"};
+        String[] expected = new String[]{"-h", "h", "input", "output"};
+        assertArrayEquals(expected, AsyncHelper.translateArgs(args));
+    }
+
+    @Test
+    public void testXmlHandler() throws Exception {
+        String[] args = new String[]{"-x", "input", "output"};
+        String[] expected = new String[]{"-h", "x", "input", "output"};
+        assertArrayEquals(expected, AsyncHelper.translateArgs(args));
+    }
+
+    @Test
+    public void testJsonRecursiveSkipped() throws Exception {
+        // -J is the default in async mode, so it's just skipped
+        String[] args = new String[]{"-J", "-t", "input", "output"};
+        String[] expected = new String[]{"-h", "t", "input", "output"};
+        assertArrayEquals(expected, AsyncHelper.translateArgs(args));
+    }
+
+    @Test
+    public void testBatchModeWithOptions() throws Exception {
+        String[] args = new String[]{"-J", "-t", "/path/to/input", 
"/path/to/output"};
+        String[] expected = new String[]{"-h", "t", "/path/to/input", 
"/path/to/output"};
+        assertArrayEquals(expected, AsyncHelper.translateArgs(args));
+    }
 }
diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java 
b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
index 0de27d2354..8c3d78cd34 100644
--- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
+++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
@@ -568,6 +568,57 @@ public class TikaCLITest {
                 "Should have at least 2 files (json + embedded), got " + 
fileNames.size() + ": " + fileNames);
     }
 
+    /**
+     * Test that --extract-dir option correctly sets the output directory
+     * for both -z (shallow) and -Z (recursive) extraction modes.
+     */
+    @Test
+    public void testExtractDirOption() throws Exception {
+        Path input = Paths.get(new URI(resourcePrefix + 
"/test_recursive_embedded.docx"));
+        Path pluginsDir = Paths.get("target/plugins");
+
+        // Test with -z (shallow extraction)
+        String[] params = {"-z",
+                "--extract-dir=" + extractDir.toAbsolutePath(),
+                "-p", pluginsDir.toAbsolutePath().toString(),
+                input.toAbsolutePath().toString()};
+
+        TikaCLI.main(params);
+
+        Set<String> fileNames = getFileNames(extractDir);
+
+        // Should have extracted files in the specified directory, not current 
dir
+        assertTrue(fileNames.stream().anyMatch(f -> f.endsWith(".json")),
+                "Should have a .json metadata file in extractDir, got: " + 
fileNames);
+        assertTrue(fileNames.stream().anyMatch(f -> f.contains("-embed/")),
+                "Should have extracted embedded files in extractDir, got: " + 
fileNames);
+    }
+
+    /**
+     * Test that --extract-dir option works with -Z (recursive) extraction.
+     */
+    @Test
+    public void testExtractDirOptionRecursive() throws Exception {
+        Path input = Paths.get(new URI(resourcePrefix + 
"/test_recursive_embedded.docx"));
+        Path pluginsDir = Paths.get("target/plugins");
+
+        // Test with -Z (recursive extraction)
+        String[] params = {"-Z",
+                "--extract-dir=" + extractDir.toAbsolutePath(),
+                "-p", pluginsDir.toAbsolutePath().toString(),
+                input.toAbsolutePath().toString()};
+
+        TikaCLI.main(params);
+
+        Set<String> fileNames = getFileNames(extractDir);
+
+        // Should have extracted files in the specified directory
+        assertTrue(fileNames.stream().anyMatch(f -> f.endsWith(".json")),
+                "Should have a .json metadata file in extractDir, got: " + 
fileNames);
+        assertTrue(fileNames.stream().anyMatch(f -> f.contains("-embed/")),
+                "Should have extracted embedded files in extractDir, got: " + 
fileNames);
+    }
+
     @Test
     public void testDefaultConfigException() throws Exception {
         //default xml parser will throw TikaException
diff --git 
a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.json
 
b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.json
index e16f0a9b6b..bca9d1a664 100644
--- 
a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.json
+++ 
b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.json
@@ -3,10 +3,7 @@
     {
       "default-parser": {
         "_exclude": [
-          "tesseract-ocr-parser",
-          "pdf-parser",
-          "ooxml-parser",
-          "office-parser"
+          "tesseract-ocr-parser"
         ]
       }
     },
diff --git 
a/tika-parsers/tika-parsers-extended/tika-parsers-extended-integration-tests/src/test/resources/org/apache/tika/parser/ocr/configs/tika-config-restricted-gdal.json
 
b/tika-parsers/tika-parsers-extended/tika-parsers-extended-integration-tests/src/test/resources/org/apache/tika/parser/ocr/configs/tika-config-restricted-gdal.json
index 40c05e4288..99cf597805 100644
--- 
a/tika-parsers/tika-parsers-extended/tika-parsers-extended-integration-tests/src/test/resources/org/apache/tika/parser/ocr/configs/tika-config-restricted-gdal.json
+++ 
b/tika-parsers/tika-parsers-extended/tika-parsers-extended-integration-tests/src/test/resources/org/apache/tika/parser/ocr/configs/tika-config-restricted-gdal.json
@@ -1,10 +1,5 @@
 {
   "parsers": [
-    {
-      "default-parser": {
-        "_exclude": ["gdal-parser"]
-      }
-    },
     {
       "gdal-parser": {
         "_mime-exclude": ["image/jpeg", "image/png", "image/jp2", "image/gif"]
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/parser/ocr/tesseract-config.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/parser/ocr/tesseract-config.json
index 00c67e9ebe..3474b85822 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/parser/ocr/tesseract-config.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/parser/ocr/tesseract-config.json
@@ -1,10 +1,5 @@
 {
   "parsers": [
-    {
-      "default-parser": {
-        "_exclude": ["tesseract-ocr-parser"]
-      }
-    },
     {
       "tesseract-ocr-parser": {
         "tesseractPath": "C:\\Program Files\\Tesseract OCR",
diff --git 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java
 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java
index c6e7a30af8..1257c48e4c 100644
--- 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java
+++ 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java
@@ -17,7 +17,6 @@
 package org.apache.tika.async.cli;
 
 import java.io.IOException;
-import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
@@ -52,30 +51,59 @@ public class PluginsWriter {
             }
         }
         try {
-            String jsonTemplate = new 
String(getClass().getResourceAsStream("/config-template.json").readAllBytes(), 
StandardCharsets.UTF_8);
-            String json = jsonTemplate.replace("FETCHER_BASE_PATH", 
baseInput.toAbsolutePath().toString());
-            json = json.replace("EMITTER_BASE_PATH", 
baseOutput.toAbsolutePath().toString());
-            String pluginString = 
StringUtils.isBlank(simpleAsyncConfig.getPluginsDir()) ? "plugins" : 
simpleAsyncConfig.getPluginsDir();
+            ObjectMapper objectMapper = TikaObjectMapperFactory.getMapper();
+            ObjectNode root = (ObjectNode) objectMapper.readTree(
+                    getClass().getResourceAsStream("/config-template.json"));
+
+            // Set fetcher basePath
+            ObjectNode fetchers = (ObjectNode) root.get("fetchers");
+            if (fetchers != null && fetchers.has("fsf")) {
+                ObjectNode fsf = (ObjectNode) fetchers.get("fsf");
+                if (fsf != null && fsf.has("file-system-fetcher")) {
+                    ObjectNode fsFetcher = (ObjectNode) 
fsf.get("file-system-fetcher");
+                    fsFetcher.put("basePath", 
baseInput.toAbsolutePath().toString());
+                }
+            }
+
+            // Set emitter basePath
+            ObjectNode emitters = (ObjectNode) root.get("emitters");
+            if (emitters != null && emitters.has("fse")) {
+                ObjectNode fse = (ObjectNode) emitters.get("fse");
+                if (fse != null && fse.has("file-system-emitter")) {
+                    ObjectNode fsEmitter = (ObjectNode) 
fse.get("file-system-emitter");
+                    fsEmitter.put("basePath", 
baseOutput.toAbsolutePath().toString());
+                }
+            }
+
+            // Set pipes-iterator basePath
+            ObjectNode pipesIterator = (ObjectNode) root.get("pipes-iterator");
+            if (pipesIterator != null && 
pipesIterator.has("file-system-pipes-iterator")) {
+                ObjectNode fsIterator = (ObjectNode) 
pipesIterator.get("file-system-pipes-iterator");
+                fsIterator.put("basePath", 
baseInput.toAbsolutePath().toString());
+            }
+
+            // Set plugin-roots
+            String pluginString = 
StringUtils.isBlank(simpleAsyncConfig.getPluginsDir()) ?
+                    "plugins" : simpleAsyncConfig.getPluginsDir();
             Path plugins = Paths.get(pluginString);
             if (Files.isDirectory(plugins)) {
                 pluginString = plugins.toAbsolutePath().toString();
             }
-            json = json.replace("PLUGIN_ROOTS", pluginString).replace("\\", 
"/");
-            PipesConfig pipesConfig = new PipesConfig();
-
-            pipesConfig.setNumClients(simpleAsyncConfig.getNumClients() == 
null ? 2 : simpleAsyncConfig.getNumClients());
+            root.put("plugin-roots", pluginString);
 
+            // Set pipes config
+            PipesConfig pipesConfig = new PipesConfig();
+            pipesConfig.setNumClients(simpleAsyncConfig.getNumClients() == 
null ?
+                    2 : simpleAsyncConfig.getNumClients());
             if (simpleAsyncConfig.getXmx() != null) {
                 pipesConfig.setForkedJvmArgs(new 
ArrayList<>(List.of(simpleAsyncConfig.getXmx())));
             }
             if (simpleAsyncConfig.getTimeoutMs() != null) {
                 pipesConfig.setTimeoutMillis(simpleAsyncConfig.getTimeoutMs());
             }
-            ObjectMapper objectMapper = TikaObjectMapperFactory.getMapper();
-            ObjectNode root = (ObjectNode) 
objectMapper.readTree(json.getBytes(StandardCharsets.UTF_8));
             root.set("pipes", objectMapper.valueToTree(pipesConfig));
 
-            Files.writeString(output, root.toString());
+            
objectMapper.writerWithDefaultPrettyPrinter().writeValue(output.toFile(), root);
         } catch (Exception e) {
             throw new IOException(e);
         }
diff --git 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
index 72531fcc66..4687845389 100644
--- 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
+++ 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
@@ -25,6 +25,9 @@ import java.util.List;
 import java.util.Optional;
 import java.util.concurrent.TimeoutException;
 
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ObjectNode;
 import org.apache.commons.cli.CommandLine;
 import org.apache.commons.cli.CommandLineParser;
 import org.apache.commons.cli.DefaultParser;
@@ -115,6 +118,13 @@ public class TikaAsyncCLI {
                 tikaConfig = tmpTikaConfig;
                 PluginsWriter pluginsWriter = new 
PluginsWriter(simpleAsyncConfig, tikaConfig);
                 pluginsWriter.write(tikaConfig);
+            } else {
+                // User provided a config - ensure plugin-roots is set
+                tikaConfig = ensurePluginRoots(tikaConfig, 
simpleAsyncConfig.getPluginsDir());
+                if 
(!tikaConfig.equals(Paths.get(simpleAsyncConfig.getTikaConfig()))) {
+                    // A new merged config was created, mark for cleanup
+                    tmpTikaConfig = tikaConfig;
+                }
             }
 
             pipesIterator = buildPipesIterator(tikaConfig, simpleAsyncConfig);
@@ -260,10 +270,13 @@ public class TikaAsyncCLI {
                 throw new TikaConfigException("Input file/dir must exist: " + 
inputPath);
             }
             inputDir = inString;
-            if (Files.isRegularFile(inputPath)) {
-                outputDir = Paths.get(".").toAbsolutePath().toString();
-            } else {
-                outputDir = Paths.get("output").toAbsolutePath().toString();
+            // Only set default outputDir if not already specified via -o
+            if (outputDir == null) {
+                if (Files.isRegularFile(inputPath)) {
+                    outputDir = Paths.get(".").toAbsolutePath().toString();
+                } else {
+                    outputDir = 
Paths.get("output").toAbsolutePath().toString();
+                }
             }
         }
 
@@ -368,6 +381,43 @@ public class TikaAsyncCLI {
         parseContext.set(UnpackConfig.class, config);
     }
 
+    private static final String DEFAULT_PLUGINS_DIR = "plugins";
+
+    /**
+     * Ensures plugin-roots is set in the config. If missing, creates a merged 
config
+     * with a default plugin-roots value.
+     *
+     * @param originalConfigPath the user's config file path
+     * @param pluginsDir optional plugins directory from command line (may be 
null)
+     * @return the config path to use (original if plugin-roots exists, or a 
new merged config)
+     */
+    static Path ensurePluginRoots(Path originalConfigPath, String pluginsDir) 
throws IOException {
+        ObjectMapper mapper = new ObjectMapper();
+        JsonNode rootNode = mapper.readTree(originalConfigPath.toFile());
+
+        if (rootNode.has("plugin-roots")) {
+            // plugin-roots already set, use original config
+            return originalConfigPath;
+        }
+
+        // Need to add plugin-roots
+        ObjectNode mutableRoot = (ObjectNode) rootNode;
+        String pluginString = StringUtils.isBlank(pluginsDir) ? 
DEFAULT_PLUGINS_DIR : pluginsDir;
+        Path plugins = Paths.get(pluginString);
+        if (Files.isDirectory(plugins)) {
+            pluginString = plugins.toAbsolutePath().toString();
+        }
+        mutableRoot.put("plugin-roots", pluginString);
+
+        // Write merged config to temp file
+        Path mergedConfig = Files.createTempFile("tika-async-merged-config-", 
".json");
+        
mapper.writerWithDefaultPrettyPrinter().writeValue(mergedConfig.toFile(), 
mutableRoot);
+        mergedConfig.toFile().deleteOnExit();
+
+        LOG.info("Added default plugin-roots to config: {}", pluginString);
+        return mergedConfig;
+    }
+
     private static void usage(Options options) throws IOException {
         System.out.println("Two primary options:");
         System.out.println("\t1. Specify a tika-config.xml on the commandline 
that includes the definitions for async");
diff --git a/tika-pipes/tika-async-cli/src/main/resources/config-template.json 
b/tika-pipes/tika-async-cli/src/main/resources/config-template.json
index ee1efd49dc..15cd90b19f 100644
--- a/tika-pipes/tika-async-cli/src/main/resources/config-template.json
+++ b/tika-pipes/tika-async-cli/src/main/resources/config-template.json
@@ -53,10 +53,7 @@
       "basePath": "FETCHER_BASE_PATH",
       "countTotal": true,
       "fetcherId": "fsf",
-      "emitterId": "fse",
-      "onParseException": "EMIT",
-      "maxWaitMs": 600000,
-      "queueSize": 10000
+      "emitterId": "fse"
     }
   },
   "pipes": {
diff --git 
a/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncCliParserTest.java
 
b/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncCliParserTest.java
index 88f8371bdc..ef446d2fd7 100644
--- 
a/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncCliParserTest.java
+++ 
b/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncCliParserTest.java
@@ -17,9 +17,17 @@
 package org.apache.tika.async.cli;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
 
 import org.apache.tika.sax.BasicContentHandlerFactory;
 
@@ -83,4 +91,86 @@ public class AsyncCliParserTest {
     }
 
     //TODO -- test for file list with and without inputDir
+
+    @TempDir
+    Path tempDir;
+
+    @Test
+    public void testEnsurePluginRootsAddsDefault() throws Exception {
+        // Create a config without plugin-roots
+        Path configPath = tempDir.resolve("config-no-plugins.json");
+        Files.writeString(configPath, """
+            {
+              "pipes": {
+                "numClients": 2
+              }
+            }
+            """);
+
+        // ensurePluginRoots should create a new config with plugin-roots added
+        Path result = TikaAsyncCLI.ensurePluginRoots(configPath, null);
+
+        // Should return a different path (merged config)
+        assertFalse(result.equals(configPath), "Should create a new merged 
config");
+
+        // The merged config should have plugin-roots
+        ObjectMapper mapper = new ObjectMapper();
+        JsonNode root = mapper.readTree(result.toFile());
+        assertTrue(root.has("plugin-roots"), "Merged config should have 
plugin-roots");
+        assertEquals("plugins", root.get("plugin-roots").asText());
+
+        // Original config values should be preserved
+        assertTrue(root.has("pipes"));
+        assertEquals(2, root.get("pipes").get("numClients").asInt());
+
+        // Clean up
+        Files.deleteIfExists(result);
+    }
+
+    @Test
+    public void testEnsurePluginRootsPreservesExisting() throws Exception {
+        // Create a config with plugin-roots already set
+        Path configPath = tempDir.resolve("config-with-plugins.json");
+        Files.writeString(configPath, """
+            {
+              "plugin-roots": "/custom/plugins",
+              "pipes": {
+                "numClients": 4
+              }
+            }
+            """);
+
+        // ensurePluginRoots should return the original path (no merging 
needed)
+        Path result = TikaAsyncCLI.ensurePluginRoots(configPath, null);
+
+        // Should return the same path
+        assertEquals(configPath, result, "Should return original config when 
plugin-roots exists");
+    }
+
+    @Test
+    public void testEnsurePluginRootsUsesCommandLineOption() throws Exception {
+        // Create a config without plugin-roots
+        Path configPath = tempDir.resolve("config-no-plugins2.json");
+        Files.writeString(configPath, """
+            {
+              "pipes": {
+                "numClients": 2
+              }
+            }
+            """);
+
+        // ensurePluginRoots with a custom plugins dir
+        Path result = TikaAsyncCLI.ensurePluginRoots(configPath, 
"/my/custom/plugins");
+
+        // Should create a merged config with the custom plugins dir
+        assertFalse(result.equals(configPath));
+
+        ObjectMapper mapper = new ObjectMapper();
+        JsonNode root = mapper.readTree(result.toFile());
+        assertTrue(root.has("plugin-roots"));
+        assertEquals("/my/custom/plugins", root.get("plugin-roots").asText());
+
+        // Clean up
+        Files.deleteIfExists(result);
+    }
 }
diff --git 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java
 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java
index d28cbb96c8..fdc8883f3b 100644
--- 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java
+++ 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java
@@ -27,7 +27,6 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
-import java.util.Locale;
 import java.util.Set;
 
 import org.apache.commons.cli.CommandLine;
@@ -444,6 +443,18 @@ public class TikaServerProcess {
         return endpoints.contains("tika") || endpoints.contains("rmeta");
     }
 
+    /**
+     * Determines if the /unpack endpoint is enabled based on configured 
endpoints.
+     */
+    private static boolean isUnpackEndpointEnabled(TikaServerConfig 
tikaServerConfig) {
+        List<String> endpoints = tikaServerConfig.getEndpoints();
+        // If no endpoints specified, all default endpoints are loaded 
(including unpack)
+        if (endpoints == null || endpoints.isEmpty()) {
+            return true;
+        }
+        return endpoints.contains("unpack");
+    }
+
     /**
      * Initializes the PipesParsingHelper for pipes-based parsing with process 
isolation.
      * <p>
@@ -452,22 +463,42 @@ public class TikaServerProcess {
      * <p>
      * If no config file is provided, a minimal default configuration will be 
created.
      * The plugin-roots will default to a "plugins" directory at the same 
level as the server jar.
+     * <p>
+     * A dedicated temp directory is created for input files, and a 
file-system-fetcher
+     * is configured with basePath pointing to that directory. This ensures 
child processes
+     * can only access files in the designated temp directory (security 
boundary).
      *
      * @param tikaServerConfig the server configuration
      * @return the PipesParsingHelper
      * @throws Exception if pipes initialization fails
      */
     private static PipesParsingHelper initPipesParsingHelper(TikaServerConfig 
tikaServerConfig) throws Exception {
-        // Load or create config
+        // Create dedicated temp directory for input files
+        Path inputTempDirectory = 
Files.createTempDirectory("tika-server-input-");
+        LOG.info("Created input temp directory: {}", inputTempDirectory);
+
+        // Only create unpack temp directory if /unpack endpoint is enabled
+        Path unpackTempDirectory = null;
+        if (isUnpackEndpointEnabled(tikaServerConfig)) {
+            unpackTempDirectory = 
Files.createTempDirectory("tika-server-unpack-");
+            LOG.info("Created unpack temp directory: {}", unpackTempDirectory);
+        }
+
+        // Load or create config, adding the fetcher (and emitter if unpack is 
enabled)
         Path configPath;
         if (tikaServerConfig.hasConfigFile()) {
             configPath = tikaServerConfig.getConfigPath();
         } else {
-            configPath = createDefaultConfig();
+            configPath = createDefaultConfig(inputTempDirectory, 
unpackTempDirectory);
         }
 
         TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(configPath);
 
+        // Ensure fetcher (and emitter if unpack is enabled) are configured 
with correct basePaths
+        configPath = ensureServerComponents(configPath, tikaJsonConfig,
+                inputTempDirectory, unpackTempDirectory);
+        tikaJsonConfig = TikaJsonConfig.load(configPath);
+
         // Load or create PipesConfig with defaults
         PipesConfig pipesConfig = tikaJsonConfig.deserialize("pipes", 
PipesConfig.class);
         if (pipesConfig == null) {
@@ -480,13 +511,13 @@ public class TikaServerProcess {
         // Create PipesParser
         PipesParser pipesParser = PipesParser.load(tikaJsonConfig, 
pipesConfig, configPath);
 
-        // Try to determine unpack emitter basePath from config
-        Path unpackEmitterBasePath = getUnpackEmitterBasePath(tikaJsonConfig);
-
         // Create and return the helper
-        PipesParsingHelper helper = new PipesParsingHelper(pipesParser, 
pipesConfig, unpackEmitterBasePath);
+        PipesParsingHelper helper = new PipesParsingHelper(pipesParser, 
pipesConfig,
+                inputTempDirectory, unpackTempDirectory);
 
-        // Register shutdown hook to clean up PipesParser
+        // Register shutdown hook to clean up PipesParser and temp directories
+        final Path inputDirToClean = inputTempDirectory;
+        final Path unpackDirToClean = unpackTempDirectory;
         Runtime.getRuntime().addShutdownHook(new Thread(() -> {
             try {
                 LOG.info("Shutting down PipesParser");
@@ -494,62 +525,32 @@ public class TikaServerProcess {
             } catch (Exception e) {
                 LOG.warn("Error closing PipesParser", e);
             }
+            // Clean up temp directories
+            cleanupTempDirectory(inputDirToClean);
+            if (unpackDirToClean != null) {
+                cleanupTempDirectory(unpackDirToClean);
+            }
         }));
 
         return helper;
     }
 
-    /**
-     * Attempts to determine the basePath for the unpack-emitter from the 
config.
-     * Returns null if the emitter is not configured or basePath cannot be 
determined.
-     */
-    private static Path getUnpackEmitterBasePath(TikaJsonConfig 
tikaJsonConfig) {
+    private static void cleanupTempDirectory(Path tempDir) {
         try {
-            java.util.Map<String, com.fasterxml.jackson.databind.JsonNode> 
emitters =
-                    tikaJsonConfig.getComponents("emitters");
-            if (emitters == null || 
!emitters.containsKey(PipesParsingHelper.UNPACK_EMITTER_ID)) {
-                LOG.debug("No unpack-emitter configured, UNPACK mode will not 
be available");
-                return null;
-            }
-
-            com.fasterxml.jackson.databind.JsonNode emitterConfig =
-                    emitters.get(PipesParsingHelper.UNPACK_EMITTER_ID);
-            com.fasterxml.jackson.databind.JsonNode basePath = 
findBasePath(emitterConfig);
-            if (basePath != null && basePath.isTextual()) {
-                Path path = Path.of(basePath.asText());
-                if (Files.isDirectory(path)) {
-                    LOG.info("UNPACK mode enabled with basePath: {}", path);
-                    return path;
-                } else {
-                    LOG.warn("unpack-emitter basePath does not exist: {}", 
path);
-                }
+            if (Files.exists(tempDir)) {
+                Files.walk(tempDir)
+                        .sorted((a, b) -> -a.compareTo(b)) // Delete files 
before directories
+                        .forEach(p -> {
+                            try {
+                                Files.deleteIfExists(p);
+                            } catch (IOException e) {
+                                LOG.warn("Failed to delete: {}", p);
+                            }
+                        });
             }
-        } catch (Exception e) {
-            LOG.warn("Failed to determine unpack-emitter basePath", e);
+        } catch (IOException e) {
+            LOG.warn("Error cleaning up temp directory: {}", tempDir, e);
         }
-        return null;
-    }
-
-    /**
-     * Recursively searches for "basePath" in a JSON node.
-     */
-    private static com.fasterxml.jackson.databind.JsonNode findBasePath(
-            com.fasterxml.jackson.databind.JsonNode node) {
-        if (node == null) {
-            return null;
-        }
-        if (node.has("basePath")) {
-            return node.get("basePath");
-        }
-        for (com.fasterxml.jackson.databind.JsonNode child : node) {
-            if (child.isObject()) {
-                com.fasterxml.jackson.databind.JsonNode result = 
findBasePath(child);
-                if (result != null) {
-                    return result;
-                }
-            }
-        }
-        return null;
     }
 
     /**
@@ -559,36 +560,150 @@ public class TikaServerProcess {
 
     /**
      * Creates a default configuration file with plugin-roots set to the 
"plugins" directory
-     * relative to the current working directory.
+     * relative to the current working directory, the tika-server-fetcher 
configured
+     * with basePath pointing to the input temp directory, and optionally the 
unpack-emitter
+     * configured with basePath pointing to the unpack temp directory.
+     *
+     * @param inputTempDirectory the temp directory for input files
+     * @param unpackTempDirectory the temp directory for unpack output files 
(may be null)
      */
-    private static Path createDefaultConfig() throws IOException {
+    private static Path createDefaultConfig(Path inputTempDirectory,
+                                            Path unpackTempDirectory) throws 
IOException {
         Path pluginsDir = Path.of(DEFAULT_PLUGINS_DIR).toAbsolutePath();
 
-        String configJson = String.format(Locale.ROOT, """
-            {
-              "fetchers": {
-                "file-system-fetcher": {
-                  "file-system-fetcher": {
-                    "allowAbsolutePaths": true
-                  }
-                }
-              },
-              "pipes": {
-                "numClients": 4,
-                "timeoutMillis": 60000
-              },
-              "plugin-roots": "%s"
-            }
-            """, pluginsDir.toString().replace("\\", "/"));
+        com.fasterxml.jackson.databind.ObjectMapper mapper =
+                new com.fasterxml.jackson.databind.ObjectMapper();
+        com.fasterxml.jackson.databind.node.ObjectNode rootNode = 
mapper.createObjectNode();
+
+        // Create fetchers section
+        com.fasterxml.jackson.databind.node.ObjectNode fetchersNode = 
mapper.createObjectNode();
+        com.fasterxml.jackson.databind.node.ObjectNode fetcherNode = 
mapper.createObjectNode();
+        com.fasterxml.jackson.databind.node.ObjectNode fetcherTypeConfig = 
mapper.createObjectNode();
+        fetcherTypeConfig.put("basePath", 
inputTempDirectory.toAbsolutePath().toString());
+        fetcherNode.set("file-system-fetcher", fetcherTypeConfig);
+        fetchersNode.set(PipesParsingHelper.DEFAULT_FETCHER_ID, fetcherNode);
+        rootNode.set("fetchers", fetchersNode);
+
+        // Create emitters section if unpack is enabled
+        if (unpackTempDirectory != null) {
+            com.fasterxml.jackson.databind.node.ObjectNode emittersNode = 
mapper.createObjectNode();
+            com.fasterxml.jackson.databind.node.ObjectNode emitterNode = 
mapper.createObjectNode();
+            com.fasterxml.jackson.databind.node.ObjectNode emitterTypeConfig = 
mapper.createObjectNode();
+            emitterTypeConfig.put("basePath", 
unpackTempDirectory.toAbsolutePath().toString());
+            emitterTypeConfig.put("onExists", "REPLACE");
+            emitterNode.set("file-system-emitter", emitterTypeConfig);
+            emittersNode.set(PipesParsingHelper.UNPACK_EMITTER_ID, 
emitterNode);
+            rootNode.set("emitters", emittersNode);
+        }
+
+        // Create pipes section
+        com.fasterxml.jackson.databind.node.ObjectNode pipesNode = 
mapper.createObjectNode();
+        pipesNode.put("numClients", 4);
+        pipesNode.put("timeoutMillis", 60000);
+        rootNode.set("pipes", pipesNode);
+
+        // Set plugin-roots
+        rootNode.put("plugin-roots", pluginsDir.toString());
 
         Path tempConfig = Files.createTempFile("tika-server-default-config-", 
".json");
-        Files.writeString(tempConfig, configJson);
+        
mapper.writerWithDefaultPrettyPrinter().writeValue(tempConfig.toFile(), 
rootNode);
         tempConfig.toFile().deleteOnExit();
 
         LOG.info("Created default config with plugin-roots: {}", pluginsDir);
         return tempConfig;
     }
 
+    /**
+     * Ensures the tika-server-fetcher exists in the config with basePath 
pointing to
+     * the input temp directory. If unpackTempDirectory is provided, also 
ensures the
+     * unpack-emitter exists.
+     * <p>
+     * The fetcher is used by legacy endpoints (/tika, /rmeta, etc.) to read 
uploaded files
+     * that have been spooled to the input temp directory.
+     * <p>
+     * The emitter is used by /unpack endpoints to write unpacked files that 
are then
+     * streamed back to the client.
+     * <p>
+     * Both components are configured with basePath (not allowAbsolutePaths) 
so child processes
+     * can only access files within their designated temp directories 
(security boundary).
+     *
+     * @param originalConfigPath the original config file path
+     * @param tikaJsonConfig the parsed Tika JSON config
+     * @param inputTempDirectory the temp directory for input files
+     * @param unpackTempDirectory the temp directory for unpack output files 
(may be null)
+     * @return the config path to use (always a new merged config with fetcher 
and optionally emitter)
+     */
+    private static Path ensureServerComponents(Path originalConfigPath, 
TikaJsonConfig tikaJsonConfig,
+                                               Path inputTempDirectory,
+                                               Path unpackTempDirectory) 
throws IOException {
+        LOG.info("Configuring {} with basePath={}", 
PipesParsingHelper.DEFAULT_FETCHER_ID, inputTempDirectory);
+
+        // Read original config as a mutable tree
+        com.fasterxml.jackson.databind.ObjectMapper mapper =
+                new com.fasterxml.jackson.databind.ObjectMapper();
+        com.fasterxml.jackson.databind.node.ObjectNode rootNode =
+                (com.fasterxml.jackson.databind.node.ObjectNode) 
mapper.readTree(originalConfigPath.toFile());
+
+        // Get or create the fetchers section
+        com.fasterxml.jackson.databind.node.ObjectNode fetchersNode;
+        if (rootNode.has("fetchers") && rootNode.get("fetchers").isObject()) {
+            fetchersNode = (com.fasterxml.jackson.databind.node.ObjectNode) 
rootNode.get("fetchers");
+        } else {
+            fetchersNode = mapper.createObjectNode();
+            rootNode.set("fetchers", fetchersNode);
+        }
+
+        // Create the fetcher config with basePath
+        // Structure: "tika-server-fetcher": { "file-system-fetcher": { 
"basePath": "/tmp/..." } }
+        com.fasterxml.jackson.databind.node.ObjectNode fetcherTypeConfig = 
mapper.createObjectNode();
+        fetcherTypeConfig.put("basePath", 
inputTempDirectory.toAbsolutePath().toString());
+
+        com.fasterxml.jackson.databind.node.ObjectNode fetcherNode = 
mapper.createObjectNode();
+        fetcherNode.set("file-system-fetcher", fetcherTypeConfig);
+
+        fetchersNode.set(PipesParsingHelper.DEFAULT_FETCHER_ID, fetcherNode);
+
+        // Only add unpack-emitter if unpack endpoint is enabled
+        if (unpackTempDirectory != null) {
+            LOG.info("Configuring {} with basePath={}", 
PipesParsingHelper.UNPACK_EMITTER_ID, unpackTempDirectory);
+
+            // Get or create the emitters section
+            com.fasterxml.jackson.databind.node.ObjectNode emittersNode;
+            if (rootNode.has("emitters") && 
rootNode.get("emitters").isObject()) {
+                emittersNode = 
(com.fasterxml.jackson.databind.node.ObjectNode) rootNode.get("emitters");
+            } else {
+                emittersNode = mapper.createObjectNode();
+                rootNode.set("emitters", emittersNode);
+            }
+
+            // Create the emitter config with basePath
+            // Structure: "unpack-emitter": { "file-system-emitter": { 
"basePath": "/tmp/...", "onExists": "REPLACE" } }
+            com.fasterxml.jackson.databind.node.ObjectNode emitterTypeConfig = 
mapper.createObjectNode();
+            emitterTypeConfig.put("basePath", 
unpackTempDirectory.toAbsolutePath().toString());
+            emitterTypeConfig.put("onExists", "REPLACE");
+
+            com.fasterxml.jackson.databind.node.ObjectNode emitterNode = 
mapper.createObjectNode();
+            emitterNode.set("file-system-emitter", emitterTypeConfig);
+
+            emittersNode.set(PipesParsingHelper.UNPACK_EMITTER_ID, 
emitterNode);
+        }
+
+        // Ensure plugin-roots is set (required for child processes)
+        if (!rootNode.has("plugin-roots")) {
+            Path pluginsDir = Path.of(DEFAULT_PLUGINS_DIR).toAbsolutePath();
+            rootNode.put("plugin-roots", pluginsDir.toString());
+            LOG.info("Added default plugin-roots: {}", pluginsDir);
+        }
+
+        // Write merged config to temp file
+        Path mergedConfig = Files.createTempFile("tika-server-merged-config-", 
".json");
+        
mapper.writerWithDefaultPrettyPrinter().writeValue(mergedConfig.toFile(), 
rootNode);
+        mergedConfig.toFile().deleteOnExit();
+
+        LOG.debug("Created merged config: {}", mergedConfig);
+        return mergedConfig;
+    }
+
     private static class ServerDetails {
         JAXRSServerFactoryBean sf;
         String serverId;
diff --git 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesParsingHelper.java
 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesParsingHelper.java
index c88a1ec799..6b1a6fe699 100644
--- 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesParsingHelper.java
+++ 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesParsingHelper.java
@@ -20,7 +20,6 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.nio.file.Paths;
 import java.util.Collections;
 import java.util.List;
 import java.util.UUID;
@@ -50,18 +49,9 @@ import org.apache.tika.server.core.TikaServerParseException;
  * Helper class for pipes-based parsing in tika-server endpoints.
  * Handles temp file management, FetchEmitTuple creation, and result 
processing.
  * <p>
- * To use pipes-based parsing, your tika-config.json must include a 
file-system fetcher
- * with allowAbsolutePaths enabled:
- * <pre>
- * {
- *   "fetchers": {
- *     "file-system-fetcher": {
- *       "class": "org.apache.tika.pipes.fetcher.fs.FileSystemFetcher",
- *       "allowAbsolutePaths": true
- *     }
- *   }
- * }
- * </pre>
+ * The helper manages a dedicated temp directory for input files. A 
file-system-fetcher
+ * is configured with basePath pointing to this directory, ensuring child 
processes
+ * can only access files within the designated temp directory (no absolute 
paths).
  */
 public class PipesParsingHelper {
 
@@ -69,9 +59,9 @@ public class PipesParsingHelper {
 
     /**
      * The fetcher ID used for reading temp files.
-     * This fetcher must be configured in the JSON config with 
allowAbsolutePaths=true.
+     * This fetcher is configured with basePath = inputTempDirectory.
      */
-    public static final String DEFAULT_FETCHER_ID = "file-system-fetcher";
+    public static final String DEFAULT_FETCHER_ID = "tika-server-fetcher";
 
     private final PipesParser pipesParser;
     private final PipesConfig pipesConfig;
@@ -83,33 +73,42 @@ public class PipesParsingHelper {
      *
      * @param pipesParser the PipesParser instance
      * @param pipesConfig the PipesConfig instance
+     * @param inputTempDirectory the temp directory for input files. The 
file-system-fetcher
+     *                           is configured with basePath = this directory.
      * @param unpackEmitterBasePath the basePath where the unpack-emitter 
writes files.
      *                              This is where the server will find the zip 
files created
      *                              by UNPACK mode. May be null if UNPACK mode 
won't be used.
      */
-    public PipesParsingHelper(PipesParser pipesParser, PipesConfig 
pipesConfig, Path unpackEmitterBasePath) {
+    public PipesParsingHelper(PipesParser pipesParser, PipesConfig pipesConfig,
+                              Path inputTempDirectory, Path 
unpackEmitterBasePath) {
         this.pipesParser = pipesParser;
         this.pipesConfig = pipesConfig;
+        this.inputTempDirectory = inputTempDirectory;
         this.unpackEmitterBasePath = unpackEmitterBasePath;
 
-        // Determine input temp directory
-        String configTempDir = pipesConfig.getTempDirectory();
-        if (configTempDir != null && !configTempDir.isBlank()) {
-            this.inputTempDirectory = Paths.get(configTempDir);
-            if (!Files.isDirectory(this.inputTempDirectory)) {
-                throw new IllegalArgumentException(
-                        "Configured tempDirectory does not exist or is not a 
directory: " + configTempDir);
-            }
-        } else {
-            this.inputTempDirectory = null; // Use system default
+        if (inputTempDirectory == null || 
!Files.isDirectory(inputTempDirectory)) {
+            throw new IllegalArgumentException(
+                    "inputTempDirectory must be a valid directory: " + 
inputTempDirectory);
         }
+        LOG.info("PipesParsingHelper initialized with inputTempDirectory: {}", 
inputTempDirectory);
+    }
+
+    /**
+     * Gets the input temp directory path.
+     * @return the input temp directory
+     */
+    public Path getInputTempDirectory() {
+        return inputTempDirectory;
     }
 
     /**
      * Parses content using pipes-based parsing with process isolation.
      * <p>
-     * The TikaInputStream should already be spooled to a temp file via {@link 
TikaInputStream#getPath()}.
-     * The caller is responsible for closing the TikaInputStream, which will 
clean up any temp files.
+     * This method spools the input to the dedicated temp directory and uses a 
relative
+     * filename in the FetchKey. The file-system-fetcher is configured with 
basePath
+     * pointing to this directory, so the child process can only access files 
there.
+     * <p>
+     * The caller is responsible for closing the TikaInputStream.
      *
      * @param tis the TikaInputStream containing the content to parse
      * @param metadata metadata to pass to the parser (may include filename, 
content-type, etc.)
@@ -122,17 +121,22 @@ public class PipesParsingHelper {
     public List<Metadata> parse(TikaInputStream tis, Metadata metadata,
                                  ParseContext parseContext, ParseMode 
parseMode) throws IOException {
         String requestId = UUID.randomUUID().toString();
+        Path tempFile = null;
 
         try {
-            // Get the backing file path from the spooled TikaInputStream
-            Path inputFile = tis.getPath();
-            LOG.debug("parse: using file {} ({} bytes)", inputFile, 
Files.size(inputFile));
+            // Spool input to our dedicated temp directory with proper suffix
+            String suffix = getSuffix(metadata);
+            tempFile = Files.createTempFile(inputTempDirectory, "tika-", 
suffix);
+            Files.copy(tis, tempFile, 
java.nio.file.StandardCopyOption.REPLACE_EXISTING);
+
+            String relativeName = tempFile.getFileName().toString();
+            LOG.debug("parse: spooled to {} ({} bytes)", relativeName, 
Files.size(tempFile));
 
             // Set parse mode in context
             parseContext.set(ParseMode.class, parseMode);
 
-            // Create FetchEmitTuple - use NO_EMIT since we're using 
PASSBACK_ALL
-            FetchKey fetchKey = new FetchKey(DEFAULT_FETCHER_ID, 
inputFile.toAbsolutePath().toString());
+            // Create FetchEmitTuple with relative filename (basePath is 
configured in fetcher)
+            FetchKey fetchKey = new FetchKey(DEFAULT_FETCHER_ID, relativeName);
 
             FetchEmitTuple tuple = new FetchEmitTuple(
                     requestId,
@@ -153,9 +157,33 @@ public class PipesParsingHelper {
             throw new TikaServerParseException("Parsing interrupted");
         } catch (PipesException e) {
             throw new TikaServerParseException(e);
+        } finally {
+            // Clean up temp file
+            if (tempFile != null) {
+                try {
+                    Files.deleteIfExists(tempFile);
+                } catch (IOException e) {
+                    LOG.warn("Failed to delete temp file: {}", tempFile, e);
+                }
+            }
         }
     }
 
+    /**
+     * Extracts file suffix from metadata (resource name or content-type).
+     */
+    private String getSuffix(Metadata metadata) {
+        String resourceName = 
metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
+        if (resourceName != null) {
+            int lastDot = resourceName.lastIndexOf('.');
+            if (lastDot > 0 && lastDot < resourceName.length() - 1) {
+                return resourceName.substring(lastDot);
+            }
+        }
+        // Default suffix
+        return ".tmp";
+    }
+
     /**
      * Processes the PipesResult and returns the metadata list.
      */
@@ -260,10 +288,11 @@ public class PipesParsingHelper {
      * extracted embedded documents.
      * <p>
      * This method:
-     * 1. Configures UnpackConfig with zipEmbeddedFiles=true
-     * 2. The pipes child process extracts embedded files and creates a zip
-     * 3. The zip is emitted to the configured file-system emitter
-     * 4. Returns the path to the zip file for streaming
+     * 1. Spools input to the dedicated temp directory
+     * 2. Configures UnpackConfig with zipEmbeddedFiles=true
+     * 3. The pipes child process extracts embedded files and creates a zip
+     * 4. The zip is emitted to the configured file-system emitter
+     * 5. Returns the path to the zip file for streaming
      * <p>
      * The caller is responsible for deleting the zip file after streaming.
      *
@@ -277,42 +306,47 @@ public class PipesParsingHelper {
     public UnpackResult parseUnpack(TikaInputStream tis, Metadata metadata,
                                     ParseContext parseContext, boolean 
saveAll) throws IOException {
         String requestId = UUID.randomUUID().toString();
+        Path tempFile = null;
 
-        // Get the backing file path from the spooled TikaInputStream
-        Path inputFile = tis.getPath();
-        LOG.debug("parseUnpack: using file {} ({} bytes), requestId={}",
-                inputFile, Files.size(inputFile), requestId);
-
-        // Set parse mode to UNPACK
-        parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
-        // Configure UnpackConfig - use existing or create new
-        UnpackConfig unpackConfig = parseContext.get(UnpackConfig.class);
-        if (unpackConfig == null) {
-            unpackConfig = new UnpackConfig();
-        }
+        try {
+            // Spool input to our dedicated temp directory with proper suffix
+            String suffix = getSuffix(metadata);
+            tempFile = Files.createTempFile(inputTempDirectory, 
"tika-unpack-", suffix);
+            Files.copy(tis, tempFile, 
java.nio.file.StandardCopyOption.REPLACE_EXISTING);
+
+            String relativeName = tempFile.getFileName().toString();
+            LOG.debug("parseUnpack: spooled to {} ({} bytes), requestId={}",
+                    relativeName, Files.size(tempFile), requestId);
+
+            // Set parse mode to UNPACK
+            parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+            // Configure UnpackConfig - use existing or create new
+            UnpackConfig unpackConfig = parseContext.get(UnpackConfig.class);
+            if (unpackConfig == null) {
+                unpackConfig = new UnpackConfig();
+            }
 
-        // Enable zip creation in the child process
-        unpackConfig.setZipEmbeddedFiles(true);
+            // Enable zip creation in the child process
+            unpackConfig.setZipEmbeddedFiles(true);
 
-        // Set suffix strategy to DETECTED so files get their proper 
extensions (e.g., .wav, .jpg)
-        unpackConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
+            // Set suffix strategy to DETECTED so files get their proper 
extensions (e.g., .wav, .jpg)
+            
unpackConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
 
-        // Set emitter to our file-system emitter
-        unpackConfig.setEmitter(UNPACK_EMITTER_ID);
+            // Set emitter to our file-system emitter
+            unpackConfig.setEmitter(UNPACK_EMITTER_ID);
 
-        // Include original document if saveAll is requested
-        if (saveAll) {
-            unpackConfig.setIncludeOriginal(true);
-            unpackConfig.setIncludeMetadataInZip(true);
-        }
+            // Include original document if saveAll is requested
+            if (saveAll) {
+                unpackConfig.setIncludeOriginal(true);
+                unpackConfig.setIncludeMetadataInZip(true);
+            }
 
-        parseContext.set(UnpackConfig.class, unpackConfig);
+            parseContext.set(UnpackConfig.class, unpackConfig);
 
-        // Create FetchEmitTuple - the emitKey will be used to determine the 
zip file location
-        // The zip file will be written to: emitter.basePath + "/" + emitKey + 
"-embedded.zip"
-        FetchKey fetchKey = new FetchKey(DEFAULT_FETCHER_ID, 
inputFile.toAbsolutePath().toString());
-        EmitKey emitKey = new EmitKey(UNPACK_EMITTER_ID, requestId);
+            // Create FetchEmitTuple with relative filename (basePath is 
configured in fetcher)
+            FetchKey fetchKey = new FetchKey(DEFAULT_FETCHER_ID, relativeName);
+            EmitKey emitKey = new EmitKey(UNPACK_EMITTER_ID, requestId);
 
         FetchEmitTuple tuple = new FetchEmitTuple(
                 requestId,
@@ -322,70 +356,80 @@ public class PipesParsingHelper {
                 parseContext
         );
 
-        // Execute parse via pipes
-        PipesResult result;
-        try {
-            result = pipesParser.parse(tuple);
-        } catch (InterruptedException e) {
-            Thread.currentThread().interrupt();
-            throw new TikaServerParseException("Parsing interrupted");
-        } catch (PipesException e) {
-            throw new TikaServerParseException(e);
-        }
+            // Execute parse via pipes
+            PipesResult result;
+            try {
+                result = pipesParser.parse(tuple);
+            } catch (InterruptedException e) {
+                Thread.currentThread().interrupt();
+                throw new TikaServerParseException("Parsing interrupted");
+            } catch (PipesException e) {
+                throw new TikaServerParseException(e);
+            }
 
-        // Check for errors
-        if (result.isProcessCrash() || result.isFatal() || 
result.isInitializationFailure()) {
-            LOG.warn("UNPACK parse failed: {} - {}", result.status(), 
result.message());
-            throw new WebApplicationException(
-                    "Parse failed: " + result.status(),
-                    mapStatusToHttpResponse(result.status()));
-        }
+            // Check for errors
+            if (result.isProcessCrash() || result.isFatal() || 
result.isInitializationFailure()) {
+                LOG.warn("UNPACK parse failed: {} - {}", result.status(), 
result.message());
+                throw new WebApplicationException(
+                        "Parse failed: " + result.status(),
+                        mapStatusToHttpResponse(result.status()));
+            }
 
-        if (result.isTaskException()) {
-            LOG.warn("UNPACK task exception: {} - {}", result.status(), 
result.message());
-            throw new WebApplicationException(
-                    "Parse failed: " + result.message(),
-                    Response.Status.INTERNAL_SERVER_ERROR);
-        }
+            if (result.isTaskException()) {
+                LOG.warn("UNPACK task exception: {} - {}", result.status(), 
result.message());
+                throw new WebApplicationException(
+                        "Parse failed: " + result.message(),
+                        Response.Status.INTERNAL_SERVER_ERROR);
+            }
 
-        // Get metadata list from result
-        List<Metadata> metadataList = Collections.emptyList();
-        EmitData emitData = result.emitData();
-        if (emitData != null && emitData.getMetadataList() != null) {
-            metadataList = emitData.getMetadataList();
-        }
+            // Get metadata list from result
+            List<Metadata> metadataList = Collections.emptyList();
+            EmitData emitData = result.emitData();
+            if (emitData != null && emitData.getMetadataList() != null) {
+                metadataList = emitData.getMetadataList();
+            }
 
-        // Check for parse exceptions in the container document metadata
-        // These should return appropriate HTTP status codes
-        if (!metadataList.isEmpty()) {
-            Metadata containerMetadata = metadataList.get(0);
-            String containerException = 
containerMetadata.get(TikaCoreProperties.CONTAINER_EXCEPTION);
-            if (containerException != null) {
-                // Map exception type to HTTP status
-                // 422 (Unprocessable Entity) for parse-related exceptions
-                int status = 422; // Default for parse exceptions
-                if (containerException.contains("EncryptedDocumentException") 
||
-                        containerException.contains("TikaException") ||
-                        containerException.contains("NullPointerException") ||
-                        containerException.contains("IllegalStateException")) {
-                    status = 422;
+            // Check for parse exceptions in the container document metadata
+            // These should return appropriate HTTP status codes
+            if (!metadataList.isEmpty()) {
+                Metadata containerMetadata = metadataList.get(0);
+                String containerException = 
containerMetadata.get(TikaCoreProperties.CONTAINER_EXCEPTION);
+                if (containerException != null) {
+                    // Map exception type to HTTP status
+                    // 422 (Unprocessable Entity) for parse-related exceptions
+                    int status = 422; // Default for parse exceptions
+                    if 
(containerException.contains("EncryptedDocumentException") ||
+                            containerException.contains("TikaException") ||
+                            
containerException.contains("NullPointerException") ||
+                            
containerException.contains("IllegalStateException")) {
+                        status = 422;
+                    }
+                    // Build response with exception string as body for stack 
trace support
+                    Response response = Response.status(status)
+                            .entity(containerException)
+                            .type("text/plain")
+                            .build();
+                    throw new WebApplicationException(response);
                 }
-                // Build response with exception string as body for stack 
trace support
-                Response response = Response.status(status)
-                        .entity(containerException)
-                        .type("text/plain")
-                        .build();
-                throw new WebApplicationException(response);
             }
-        }
 
-        // Determine the zip file path
-        // Regular format: emitter.basePath + "/" + emitKey + "-embedded.zip"
-        // Frictionless format: emitter.basePath + "/" + emitKey + 
"-frictionless.zip"
-        boolean isFrictionless = unpackConfig.getOutputFormat() == 
UnpackConfig.OUTPUT_FORMAT.FRICTIONLESS;
-        Path zipFile = getEmittedZipPath(requestId, isFrictionless);
+            // Determine the zip file path
+            // Regular format: emitter.basePath + "/" + emitKey + 
"-embedded.zip"
+            // Frictionless format: emitter.basePath + "/" + emitKey + 
"-frictionless.zip"
+            boolean isFrictionless = unpackConfig.getOutputFormat() == 
UnpackConfig.OUTPUT_FORMAT.FRICTIONLESS;
+            Path zipFile = getEmittedZipPath(requestId, isFrictionless);
 
-        return new UnpackResult(zipFile, metadataList);
+            return new UnpackResult(zipFile, metadataList);
+        } finally {
+            // Clean up temp file
+            if (tempFile != null) {
+                try {
+                    Files.deleteIfExists(tempFile);
+                } catch (IOException e) {
+                    LOG.warn("Failed to delete temp file: {}", tempFile, e);
+                }
+            }
+        }
     }
 
     /**
diff --git 
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
 
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
index d11d21984d..9cbdb7a11d 100644
--- 
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
+++ 
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
@@ -196,7 +196,12 @@ public abstract class CXFTestBase {
 
             this.tika = TikaLoader.load(tmp);
 
+            // Create input temp directory for pipes-based parsing
+            Path inputTempDirectory = 
Files.createTempDirectory("tika-server-test-input-");
+
             // Initialize PipesParsingHelper for pipes-based parsing
+            // Merge the fetcher config with basePath pointing to the temp 
directory
+            this.pipesConfigPath = mergeFetcherConfig(this.pipesConfigPath, 
inputTempDirectory);
             TikaJsonConfig tikaJsonConfig = 
TikaJsonConfig.load(this.pipesConfigPath);
             PipesConfig pipesConfig = tikaJsonConfig.deserialize("pipes", 
PipesConfig.class);
             if (pipesConfig == null) {
@@ -204,7 +209,8 @@ public abstract class CXFTestBase {
             }
             pipesConfig.setEmitStrategy(new 
EmitStrategyConfig(EmitStrategy.PASSBACK_ALL));
             this.pipesParser = PipesParser.load(tikaJsonConfig, pipesConfig, 
this.pipesConfigPath);
-            PipesParsingHelper pipesParsingHelper = new 
PipesParsingHelper(this.pipesParser, pipesConfig, getUnpackEmitterBasePath());
+            PipesParsingHelper pipesParsingHelper = new 
PipesParsingHelper(this.pipesParser, pipesConfig,
+                    inputTempDirectory, getUnpackEmitterBasePath());
 
             TikaResource.init(tika, new ServerStatus(), pipesParsingHelper);
         } finally {
@@ -259,6 +265,37 @@ public abstract class CXFTestBase {
         return tempConfig;
     }
 
+    /**
+     * Merges the tika-server-fetcher configuration into the pipes config.
+     * The fetcher is configured with basePath pointing to the input temp 
directory.
+     */
+    private Path mergeFetcherConfig(Path configPath, Path inputTempDirectory) 
throws IOException {
+        ObjectMapper mapper = new ObjectMapper();
+        com.fasterxml.jackson.databind.node.ObjectNode root =
+                (com.fasterxml.jackson.databind.node.ObjectNode) 
mapper.readTree(configPath.toFile());
+
+        // Get or create fetchers section
+        com.fasterxml.jackson.databind.node.ObjectNode fetchers =
+                (com.fasterxml.jackson.databind.node.ObjectNode) 
root.get("fetchers");
+        if (fetchers == null) {
+            fetchers = mapper.createObjectNode();
+            root.set("fetchers", fetchers);
+        }
+
+        // Create the tika-server-fetcher with basePath
+        com.fasterxml.jackson.databind.node.ObjectNode fetcherTypeConfig = 
mapper.createObjectNode();
+        fetcherTypeConfig.put("basePath", 
inputTempDirectory.toAbsolutePath().toString());
+
+        com.fasterxml.jackson.databind.node.ObjectNode fetcherNode = 
mapper.createObjectNode();
+        fetcherNode.set("file-system-fetcher", fetcherTypeConfig);
+
+        fetchers.set(PipesParsingHelper.DEFAULT_FETCHER_ID, fetcherNode);
+
+        Path tempConfig = Files.createTempFile("tika-server-pipes-fetcher-", 
".json");
+        
mapper.writerWithDefaultPrettyPrinter().writeValue(tempConfig.toFile(), root);
+        return tempConfig;
+    }
+
     /**
      * Creates a default test config with pipes configuration.
      * If the tika config contains metadata-filters, they are merged into the 
pipes config.


Reply via email to