This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4645-usability-scripts
in repository https://gitbox.apache.org/repos/asf/tika.git

commit b9400211c81fe609a2b008b9719c2587388695da
Author: tallison <[email protected]>
AuthorDate: Sun Feb 1 11:42:06 2026 -0500

    TIKA-4645 - usability scripts
---
 docs/advanced/integration-testing/tika-app.adoc    | 398 +++++++++++++++++++++
 docs/modules/ROOT/pages/migration-to-4x/index.adoc |  17 +
 .../main/java/org/apache/tika/cli/AsyncHelper.java |  16 +
 .../src/main/java/org/apache/tika/cli/TikaCLI.java |  55 +--
 .../java/org/apache/tika/cli/AsyncHelperTest.java  |  43 +++
 .../test/java/org/apache/tika/cli/TikaCLITest.java |  51 +++
 .../org/apache/tika/async/cli/TikaAsyncCLI.java    |  11 +-
 .../src/main/resources/config-template.json        |   5 +-
 8 files changed, 565 insertions(+), 31 deletions(-)

diff --git a/docs/advanced/integration-testing/tika-app.adoc 
b/docs/advanced/integration-testing/tika-app.adoc
new file mode 100644
index 0000000000..ea0b846173
--- /dev/null
+++ b/docs/advanced/integration-testing/tika-app.adoc
@@ -0,0 +1,398 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Tika-App Integration Testing
+
+Integration tests for `tika-app` to be run from a distribution ZIP.
+
+== Setup
+
+[source,bash]
+----
+# Create test directory
+mkdir -p /tmp/tika-app-test
+cd /tmp/tika-app-test
+
+# Copy and extract distribution
+cp /path/to/tika-app-4.0.0-SNAPSHOT.zip .
+unzip tika-app-4.0.0-SNAPSHOT.zip
+cd tika-app-4.0.0-SNAPSHOT
+
+# Get test files
+cp 
/path/to/tika-main/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testPDF.pdf
 .
+cp 
/path/to/tika-main/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/test_recursive_embedded.docx
 .
+cp 
/path/to/tika-main/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testHTML.html
 .
+----
+
+== Test Cases
+
+=== Test 1: Basic Text Extraction
+
+[source,bash]
+----
+java -jar tika-app.jar --text testPDF.pdf
+----
+
+*Expected:* Outputs extracted text from PDF.
+
+=== Test 2: Metadata Extraction
+
+[source,bash]
+----
+java -jar tika-app.jar --metadata testPDF.pdf
+----
+
+*Expected:* Outputs key=value metadata pairs.
+
+=== Test 3: JSON Output with Pretty Print
+
+[source,bash]
+----
+java -jar tika-app.jar --json --pretty-print testPDF.pdf
+----
+
+*Expected:* Clean, readable JSON output with metadata.
+
+=== Test 4: File Type Detection
+
+[source,bash]
+----
+java -jar tika-app.jar --detect testPDF.pdf
+----
+
+*Expected:* Returns `application/pdf`
+
+=== Test 5: Non-existent File Handling
+
+[source,bash]
+----
+java -jar tika-app.jar --text nonexistent_file.pdf
+----
+
+*Expected:* Clear error message (currently shows confusing 
"MalformedURLException: no protocol").
+
+=== Test 6: Recursive JSON Output
+
+[source,bash]
+----
+java -jar tika-app.jar --jsonRecursive test_recursive_embedded.docx
+----
+
+*Expected:* JSON array with metadata and content for main doc and all embedded 
documents.
+
+=== Test 7: Stdin Input
+
+[source,bash]
+----
+echo "Hello World" | java -jar tika-app.jar --text
+----
+
+*Expected:* Outputs "Hello World"
+
+=== Test 8: Extract Attachments (-z)
+
+[source,bash]
+----
+mkdir -p /tmp/tika-app-test/extract-out
+java -jar tika-app.jar -z --extract-dir=/tmp/tika-app-test/extract-out 
test_recursive_embedded.docx
+ls /tmp/tika-app-test/extract-out
+----
+
+*Expected:* Creates .json metadata file and extracts embedded files to 
extract-out directory.
+
+=== Test 9: Recursive Extract (-Z)
+
+[source,bash]
+----
+mkdir -p /tmp/tika-app-test/extract-recursive
+java -jar tika-app.jar -Z --extract-dir=/tmp/tika-app-test/extract-recursive 
test_recursive_embedded.docx
+ls -R /tmp/tika-app-test/extract-recursive
+----
+
+*Expected:* Extracts all nested embedded documents recursively.
+
+=== Test 10: Batch Mode (Simple)
+
+[source,bash]
+----
+mkdir -p /tmp/tika-app-test/batch-input
+mkdir -p /tmp/tika-app-test/batch-output
+cp testPDF.pdf testHTML.html /tmp/tika-app-test/batch-input/
+java -jar tika-app.jar /tmp/tika-app-test/batch-input 
/tmp/tika-app-test/batch-output
+ls /tmp/tika-app-test/batch-output
+----
+
+*Expected:* Creates .json files for each input file in output directory.
+
+=== Test 10b: Batch Mode with Output Options
+
+[source,bash]
+----
+mkdir -p /tmp/tika-app-test/batch-output2
+java -jar tika-app.jar -J -t /tmp/tika-app-test/batch-input 
/tmp/tika-app-test/batch-output2
+ls /tmp/tika-app-test/batch-output2
+----
+
+*Expected:* Creates .json files with text content (X-TIKA:content_handler 
should be ToTextContentHandler).
+
+=== Test 11: Version Check
+
+[source,bash]
+----
+java -jar tika-app.jar --version
+----
+
+*Expected:* Returns `Apache Tika X.X.X`
+
+=== Test 12: List Parsers
+
+[source,bash]
+----
+java -jar tika-app.jar --list-parsers
+----
+
+*Expected:* Hierarchical list of available parsers.
+
+=== Test 13: Language Detection
+
+[source,bash]
+----
+java -jar tika-app.jar --language testPDF.pdf
+----
+
+*Expected:* Returns detected language code.
+
+=== Test 14: Digest Computation
+
+[source,bash]
+----
+java -jar tika-app.jar --digest=md5 --json testPDF.pdf
+----
+
+*Expected:* JSON output includes `X-TIKA:digest:MD5` field.
+
+=== Test 15: URL Input
+
+[source,bash]
+----
+java -jar tika-app.jar --detect https://www.apache.org/
+----
+
+*Expected:* Returns `text/html`
+
+=== Test 16: XMP Output
+
+[source,bash]
+----
+java -jar tika-app.jar --xmp testPDF.pdf
+----
+
+*Expected:* Valid XMP metadata in RDF/XML format.
+
+=== Test 17: Boilerpipe Main Content
+
+[source,bash]
+----
+java -jar tika-app.jar --text-main testHTML.html
+----
+
+*Expected:* Returns only main content, not boilerplate.
+
+=== Test 18: Depth Limiting
+
+[source,bash]
+----
+java -jar tika-app.jar --maxEmbeddedDepth=1 --text test_recursive_embedded.docx
+----
+
+*Expected:* Limited depth of embedded document extraction.
+
+=== Test 19: GUI Mode
+
+[source,bash]
+----
+java -jar tika-app.jar
+----
+
+*Expected:* Opens GUI (skip in headless environments).
+
+== Advanced Tests: Custom Config
+
+These tests require creating a custom tika-config.json file.
+
+=== Test 20: Create Custom Config File
+
+Create `/tmp/tika-app-test/my-config.json`:
+[source,json]
+----
+{
+  "content-handler-factory": {
+    "basic-content-handler-factory": {
+      "type": "TEXT",
+      "writeLimit": 100000,
+      "throwOnWriteLimitReached": false
+    }
+  },
+  "parsers": [
+    {
+      "default-parser": {}
+    },
+    {
+      "pdf-parser": {
+        "extractActions": true,
+        "extractInlineImages": true,
+        "ocrStrategy": "NO_OCR"
+      }
+    },
+    {
+      "ooxml-parser": {
+        "includeDeletedContent": true,
+        "includeMoveFromContent": true,
+        "extractMacros": true
+      }
+    }
+  ],
+  "fetchers": {
+    "fsf": {
+      "file-system-fetcher": {
+        "basePath": "/tmp/tika-app-test/batch-input",
+        "extractFileSystemMetadata": true
+      }
+    }
+  },
+  "emitters": {
+    "fse": {
+      "file-system-emitter": {
+        "basePath": "/tmp/tika-app-test/config-output",
+        "fileExtension": "json",
+        "onExists": "REPLACE"
+      }
+    }
+  },
+  "pipes-iterator": {
+    "file-system-pipes-iterator": {
+      "basePath": "/tmp/tika-app-test/batch-input",
+      "countTotal": true,
+      "fetcherId": "fsf",
+      "emitterId": "fse"
+    }
+  },
+  "pipes": {
+    "parseMode": "RMETA",
+    "numClients": 2,
+    "timeoutMillis": 60000
+  },
+  "plugin-roots": "/tmp/tika-app-test/plugins"
+}
+----
+
+=== Test 21: Run with Custom Config
+
+[source,bash]
+----
+mkdir -p /tmp/tika-app-test/config-output
+java -jar tika-app.jar /tmp/tika-app-test/my-config.json
+ls /tmp/tika-app-test/config-output
+----
+
+*Expected:* Processes all files in batch-input using custom parser settings.
+
+=== Test 22: Async Mode with Config Flag
+
+[source,bash]
+----
+java -jar tika-app.jar -a --config=/tmp/tika-app-test/my-config.json
+----
+
+*Expected:* Same as Test 21 but using explicit async flag.
+
+=== Test 23: Unpack with Frictionless Format
+
+[source,bash]
+----
+mkdir -p /tmp/tika-app-test/frictionless-out
+java -jar tika-app.jar -Z --extract-dir=/tmp/tika-app-test/frictionless-out 
--unpack-format=FRICTIONLESS --unpack-include-metadata 
test_recursive_embedded.docx
+ls /tmp/tika-app-test/frictionless-out
+----
+
+*Expected:* Extracts embedded files in Frictionless data package format with 
metadata.json.
+
+=== Test 24: Unpack to Directory (not zipped)
+
+[source,bash]
+----
+mkdir -p /tmp/tika-app-test/unpack-dir-out
+java -jar tika-app.jar -Z --extract-dir=/tmp/tika-app-test/unpack-dir-out 
--unpack-mode=DIRECTORY test_recursive_embedded.docx
+ls -R /tmp/tika-app-test/unpack-dir-out
+----
+
+*Expected:* Extracts embedded files to directory structure instead of zipped.
+
+=== Test 25: Batch with Multiple Workers
+
+[source,bash]
+----
+mkdir -p /tmp/tika-app-test/multi-worker-out
+java -jar tika-app.jar -n 4 /tmp/tika-app-test/batch-input 
/tmp/tika-app-test/multi-worker-out
+----
+
+*Expected:* Processes files using 4 parallel forked clients.
+
+=== Test 26: Batch with Custom Timeout
+
+[source,bash]
+----
+mkdir -p /tmp/tika-app-test/timeout-out
+java -jar tika-app.jar -T 30000 /tmp/tika-app-test/batch-input 
/tmp/tika-app-test/timeout-out
+----
+
+*Expected:* Processes files with 30 second timeout per file.
+
+=== Test 27: Batch with Custom Heap
+
+[source,bash]
+----
+mkdir -p /tmp/tika-app-test/heap-out
+java -jar tika-app.jar -X 2g /tmp/tika-app-test/batch-input 
/tmp/tika-app-test/heap-out
+----
+
+*Expected:* Forked processes use 2GB heap.
+
+== Known Issues
+
+=== Issue 1: Confusing "no protocol" Error
+
+When a file doesn't exist, the error message is misleading:
+[source]
+----
+MalformedURLException: no protocol: nonexistent_file.pdf
+----
+
+Should say "File not found".
+
+=== Issue 2: INFO Message on Every Command
+
+Every command prints an INFO message to stderr about convenience features. Use 
`2>/dev/null` to suppress.
+
+=== Issue 3: Config Dump Options Not Implemented
+
+These options are not yet implemented in 4.x:
+
+* `--dump-minimal-config`
+* `--dump-current-config`
+* `--dump-static-config`
+* `--dump-static-full-config`
diff --git a/docs/modules/ROOT/pages/migration-to-4x/index.adoc 
b/docs/modules/ROOT/pages/migration-to-4x/index.adoc
index 20a7c5cf48..eebf29f3db 100644
--- a/docs/modules/ROOT/pages/migration-to-4x/index.adoc
+++ b/docs/modules/ROOT/pages/migration-to-4x/index.adoc
@@ -31,3 +31,20 @@ See the xref:roadmap.adoc[Roadmap] for version timelines and 
support schedules.
 
 * xref:migration-to-4x/design-notes-4x.adoc[Design Notes] - Architectural 
decisions and design rationale
 * xref:migration-to-4x/serialization-4x.adoc[Serialization] - JSON 
serialization design and implementation details
+
+== TODOs / Missing Features in 4.x
+
+The following features from 3.x are not yet implemented in 4.x:
+
+=== Config Serialization
+
+The following tika-app options for dumping configuration are not yet available:
+
+* `--dump-minimal-config` - Print minimal TikaConfig
+* `--dump-current-config` - Print current TikaConfig
+* `--dump-static-config` - Print static config
+* `--dump-static-full-config` - Print static explicit config
+
+These require completing the JSON serialization support for TikaConfig 
objects. The underlying serialization infrastructure exists (see 
xref:migration-to-4x/serialization-4x.adoc[Serialization]) but the CLI 
integration is pending.
+
+*Workaround:* Manually create JSON config files using the templates in 
`tika-pipes/tika-async-cli/src/main/resources/config-template.json` as a 
starting point.
diff --git a/tika-app/src/main/java/org/apache/tika/cli/AsyncHelper.java 
b/tika-app/src/main/java/org/apache/tika/cli/AsyncHelper.java
index 38a0094f79..e3561ecf5f 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/AsyncHelper.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/AsyncHelper.java
@@ -58,6 +58,22 @@ public class AsyncHelper {
                 argList.add(mode);
             } else if (arg.equals(UNPACK_INCLUDE_METADATA)) {
                 argList.add("--unpack-include-metadata");
+            } else if (arg.equals("-t") || arg.equals("--text")) {
+                // Translate TikaCLI text output to TikaAsyncCLI handler type
+                argList.add("-h");
+                argList.add("t");
+            } else if (arg.equals("--html")) {
+                // Translate TikaCLI html output to TikaAsyncCLI handler type
+                // Note: TikaCLI uses -h for html, but TikaAsyncCLI uses -h 
for handler type
+                argList.add("-h");
+                argList.add("h");
+            } else if (arg.equals("-x") || arg.equals("--xml")) {
+                // Translate TikaCLI xml output to TikaAsyncCLI handler type
+                argList.add("-h");
+                argList.add("x");
+            } else if (arg.equals("-J") || arg.equals("--jsonRecursive")) {
+                // TikaAsyncCLI always outputs JSON with recursive metadata 
(RMETA mode)
+                // This is already the default, so we just skip this arg
             } else {
                 argList.add(args[i]);
             }
diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java 
b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 5f388865c4..97ca90a489 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -38,7 +38,6 @@ import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.nio.file.StandardCopyOption;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
@@ -276,25 +275,11 @@ public class TikaCLI {
         if (args.length == 1 &&  args[0].endsWith(".json")) {
             TikaAsyncCLI.main(args);
             return;
-        };
-        //TODO -- are there other shortcuts?
-        Path tmpConfig = null;
-        try {
-            tmpConfig = Files.createTempFile("tika-config-", ".json");
-            
Files.copy(TikaCLI.class.getResourceAsStream("/tika-config-default-single-file.json"),
-                    tmpConfig, StandardCopyOption.REPLACE_EXISTING);
-            List<String> argList = new ArrayList<>();
-            argList.add("-c");
-            argList.add(tmpConfig.toAbsolutePath().toString());
-            for (String arg : args) {
-                argList.add(arg);
-            }
-            TikaAsyncCLI.main(argList.toArray(new String[0]));
-        } finally {
-            if (tmpConfig != null) {
-                Files.delete(tmpConfig);
-            }
         }
+        // For batch mode (two directories), pass directly to TikaAsyncCLI.
+        // It will create its own config with PluginsWriter that includes
+        // plugin-roots, fetcher, emitter, and pipes-iterator configuration.
+        TikaAsyncCLI.main(args);
     }
 
     /**
@@ -350,12 +335,34 @@ public class TikaCLI {
 
     private boolean testForAsync(String[] args) {
 
+        // Single .json file is a config file for async mode
+        if (args.length == 1 && args[0].endsWith(".json")) {
+            return true;
+        }
+
         if (args.length == 2) {
             if (Files.isDirectory(Paths.get(args[0]))) {
                 return true;
             }
         }
 
+        // Check if last two args are directories (batch mode with options)
+        if (args.length >= 2) {
+            String lastArg = args[args.length - 1];
+            String secondLastArg = args[args.length - 2];
+            // Make sure neither looks like an option value
+            if (!lastArg.startsWith("-") && !secondLastArg.startsWith("-")) {
+                try {
+                    if (Files.isDirectory(Paths.get(secondLastArg)) &&
+                        (Files.isDirectory(Paths.get(lastArg)) || 
!Files.exists(Paths.get(lastArg)))) {
+                        return true;
+                    }
+                } catch (Exception e) {
+                    // Invalid path, not batch mode
+                }
+            }
+        }
+
         for (String arg : args) {
             if (arg.equals("-a") || arg.equals("--async")) {
                 return true;
@@ -590,10 +597,12 @@ public class TikaCLI {
         out.println();
         out.println("    --config=<tika-config.xml>");
         out.println("        TikaConfig file. Must be specified before -g, -s, 
-f or the dump-x-config !");
-        out.println("    --dump-minimal-config  Print minimal TikaConfig");
-        out.println("    --dump-current-config  Print current TikaConfig");
-        out.println("    --dump-static-config   Print static config");
-        out.println("    --dump-static-full-config  Print static explicit 
config");
+        // TODO: TIKA-XXXX - Re-enable config dump options once JSON 
serialization is complete
+        // These options are not yet implemented in 4.x due to the migration 
from XML to JSON config
+        // out.println("    --dump-minimal-config  Print minimal TikaConfig");
+        // out.println("    --dump-current-config  Print current TikaConfig");
+        // out.println("    --dump-static-config   Print static config");
+        // out.println("    --dump-static-full-config  Print static explicit 
config");
         out.println("    
--convert-config-xml-to-json=<input.xml>,<output.json>");
         out.println("        Convert legacy XML config to JSON format (parsers 
section only)");
         out.println("");
diff --git a/tika-app/src/test/java/org/apache/tika/cli/AsyncHelperTest.java 
b/tika-app/src/test/java/org/apache/tika/cli/AsyncHelperTest.java
index 9885feac3f..a26f247500 100644
--- a/tika-app/src/test/java/org/apache/tika/cli/AsyncHelperTest.java
+++ b/tika-app/src/test/java/org/apache/tika/cli/AsyncHelperTest.java
@@ -28,4 +28,47 @@ public class AsyncHelperTest {
         String[] expected = new String[]{"-c", "blah.json", "-i", 
"input.docx", "-o", "output/dir"};
         assertArrayEquals(expected, AsyncHelper.translateArgs(args));
     }
+
+    @Test
+    public void testTextHandler() throws Exception {
+        String[] args = new String[]{"-t", "input", "output"};
+        String[] expected = new String[]{"-h", "t", "input", "output"};
+        assertArrayEquals(expected, AsyncHelper.translateArgs(args));
+    }
+
+    @Test
+    public void testTextHandlerLong() throws Exception {
+        String[] args = new String[]{"--text", "input", "output"};
+        String[] expected = new String[]{"-h", "t", "input", "output"};
+        assertArrayEquals(expected, AsyncHelper.translateArgs(args));
+    }
+
+    @Test
+    public void testHtmlHandler() throws Exception {
+        String[] args = new String[]{"--html", "input", "output"};
+        String[] expected = new String[]{"-h", "h", "input", "output"};
+        assertArrayEquals(expected, AsyncHelper.translateArgs(args));
+    }
+
+    @Test
+    public void testXmlHandler() throws Exception {
+        String[] args = new String[]{"-x", "input", "output"};
+        String[] expected = new String[]{"-h", "x", "input", "output"};
+        assertArrayEquals(expected, AsyncHelper.translateArgs(args));
+    }
+
+    @Test
+    public void testJsonRecursiveSkipped() throws Exception {
+        // -J is the default in async mode, so it's just skipped
+        String[] args = new String[]{"-J", "-t", "input", "output"};
+        String[] expected = new String[]{"-h", "t", "input", "output"};
+        assertArrayEquals(expected, AsyncHelper.translateArgs(args));
+    }
+
+    @Test
+    public void testBatchModeWithOptions() throws Exception {
+        String[] args = new String[]{"-J", "-t", "/path/to/input", 
"/path/to/output"};
+        String[] expected = new String[]{"-h", "t", "/path/to/input", 
"/path/to/output"};
+        assertArrayEquals(expected, AsyncHelper.translateArgs(args));
+    }
 }
diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java 
b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
index 0de27d2354..8c3d78cd34 100644
--- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
+++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
@@ -568,6 +568,57 @@ public class TikaCLITest {
                 "Should have at least 2 files (json + embedded), got " + 
fileNames.size() + ": " + fileNames);
     }
 
+    /**
+     * Test that --extract-dir option correctly sets the output directory
+     * for both -z (shallow) and -Z (recursive) extraction modes.
+     */
+    @Test
+    public void testExtractDirOption() throws Exception {
+        Path input = Paths.get(new URI(resourcePrefix + 
"/test_recursive_embedded.docx"));
+        Path pluginsDir = Paths.get("target/plugins");
+
+        // Test with -z (shallow extraction)
+        String[] params = {"-z",
+                "--extract-dir=" + extractDir.toAbsolutePath(),
+                "-p", pluginsDir.toAbsolutePath().toString(),
+                input.toAbsolutePath().toString()};
+
+        TikaCLI.main(params);
+
+        Set<String> fileNames = getFileNames(extractDir);
+
+        // Should have extracted files in the specified directory, not current 
dir
+        assertTrue(fileNames.stream().anyMatch(f -> f.endsWith(".json")),
+                "Should have a .json metadata file in extractDir, got: " + 
fileNames);
+        assertTrue(fileNames.stream().anyMatch(f -> f.contains("-embed/")),
+                "Should have extracted embedded files in extractDir, got: " + 
fileNames);
+    }
+
+    /**
+     * Test that --extract-dir option works with -Z (recursive) extraction.
+     */
+    @Test
+    public void testExtractDirOptionRecursive() throws Exception {
+        Path input = Paths.get(new URI(resourcePrefix + 
"/test_recursive_embedded.docx"));
+        Path pluginsDir = Paths.get("target/plugins");
+
+        // Test with -Z (recursive extraction)
+        String[] params = {"-Z",
+                "--extract-dir=" + extractDir.toAbsolutePath(),
+                "-p", pluginsDir.toAbsolutePath().toString(),
+                input.toAbsolutePath().toString()};
+
+        TikaCLI.main(params);
+
+        Set<String> fileNames = getFileNames(extractDir);
+
+        // Should have extracted files in the specified directory
+        assertTrue(fileNames.stream().anyMatch(f -> f.endsWith(".json")),
+                "Should have a .json metadata file in extractDir, got: " + 
fileNames);
+        assertTrue(fileNames.stream().anyMatch(f -> f.contains("-embed/")),
+                "Should have extracted embedded files in extractDir, got: " + 
fileNames);
+    }
+
     @Test
     public void testDefaultConfigException() throws Exception {
         //default xml parser will throw TikaException
diff --git 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
index 72531fcc66..d1089a3261 100644
--- 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
+++ 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
@@ -260,10 +260,13 @@ public class TikaAsyncCLI {
                 throw new TikaConfigException("Input file/dir must exist: " + 
inputPath);
             }
             inputDir = inString;
-            if (Files.isRegularFile(inputPath)) {
-                outputDir = Paths.get(".").toAbsolutePath().toString();
-            } else {
-                outputDir = Paths.get("output").toAbsolutePath().toString();
+            // Only set default outputDir if not already specified via -o
+            if (outputDir == null) {
+                if (Files.isRegularFile(inputPath)) {
+                    outputDir = Paths.get(".").toAbsolutePath().toString();
+                } else {
+                    outputDir = 
Paths.get("output").toAbsolutePath().toString();
+                }
             }
         }
 
diff --git a/tika-pipes/tika-async-cli/src/main/resources/config-template.json 
b/tika-pipes/tika-async-cli/src/main/resources/config-template.json
index ee1efd49dc..15cd90b19f 100644
--- a/tika-pipes/tika-async-cli/src/main/resources/config-template.json
+++ b/tika-pipes/tika-async-cli/src/main/resources/config-template.json
@@ -53,10 +53,7 @@
       "basePath": "FETCHER_BASE_PATH",
       "countTotal": true,
       "fetcherId": "fsf",
-      "emitterId": "fse",
-      "onParseException": "EMIT",
-      "maxWaitMs": 600000,
-      "queueSize": 10000
+      "emitterId": "fse"
     }
   },
   "pipes": {

Reply via email to