This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4645-usability-scripts in repository https://gitbox.apache.org/repos/asf/tika.git
commit b9400211c81fe609a2b008b9719c2587388695da Author: tallison <[email protected]> AuthorDate: Sun Feb 1 11:42:06 2026 -0500 TIKA-4645 - usability scripts --- docs/advanced/integration-testing/tika-app.adoc | 398 +++++++++++++++++++++ docs/modules/ROOT/pages/migration-to-4x/index.adoc | 17 + .../main/java/org/apache/tika/cli/AsyncHelper.java | 16 + .../src/main/java/org/apache/tika/cli/TikaCLI.java | 55 +-- .../java/org/apache/tika/cli/AsyncHelperTest.java | 43 +++ .../test/java/org/apache/tika/cli/TikaCLITest.java | 51 +++ .../org/apache/tika/async/cli/TikaAsyncCLI.java | 11 +- .../src/main/resources/config-template.json | 5 +- 8 files changed, 565 insertions(+), 31 deletions(-) diff --git a/docs/advanced/integration-testing/tika-app.adoc b/docs/advanced/integration-testing/tika-app.adoc new file mode 100644 index 0000000000..ea0b846173 --- /dev/null +++ b/docs/advanced/integration-testing/tika-app.adoc @@ -0,0 +1,398 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + += Tika-App Integration Testing + +Integration tests for `tika-app` to be run from a distribution ZIP. + +== Setup + +[source,bash] +---- +# Create test directory +mkdir -p /tmp/tika-app-test +cd /tmp/tika-app-test + +# Copy and extract distribution +cp /path/to/tika-app-4.0.0-SNAPSHOT.zip . +unzip tika-app-4.0.0-SNAPSHOT.zip +cd tika-app-4.0.0-SNAPSHOT + +# Get test files +cp /path/to/tika-main/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testPDF.pdf . +cp /path/to/tika-main/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/test_recursive_embedded.docx . +cp /path/to/tika-main/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testHTML.html . +---- + +== Test Cases + +=== Test 1: Basic Text Extraction + +[source,bash] +---- +java -jar tika-app.jar --text testPDF.pdf +---- + +*Expected:* Outputs extracted text from PDF. + +=== Test 2: Metadata Extraction + +[source,bash] +---- +java -jar tika-app.jar --metadata testPDF.pdf +---- + +*Expected:* Outputs key=value metadata pairs. + +=== Test 3: JSON Output with Pretty Print + +[source,bash] +---- +java -jar tika-app.jar --json --pretty-print testPDF.pdf +---- + +*Expected:* Clean, readable JSON output with metadata. + +=== Test 4: File Type Detection + +[source,bash] +---- +java -jar tika-app.jar --detect testPDF.pdf +---- + +*Expected:* Returns `application/pdf` + +=== Test 5: Non-existent File Handling + +[source,bash] +---- +java -jar tika-app.jar --text nonexistent_file.pdf +---- + +*Expected:* Clear error message (currently shows confusing "MalformedURLException: no protocol"). + +=== Test 6: Recursive JSON Output + +[source,bash] +---- +java -jar tika-app.jar --jsonRecursive test_recursive_embedded.docx +---- + +*Expected:* JSON array with metadata and content for main doc and all embedded documents. + +=== Test 7: Stdin Input + +[source,bash] +---- +echo "Hello World" | java -jar tika-app.jar --text +---- + +*Expected:* Outputs "Hello World" + +=== Test 8: Extract Attachments (-z) + +[source,bash] +---- +mkdir -p /tmp/tika-app-test/extract-out +java -jar tika-app.jar -z --extract-dir=/tmp/tika-app-test/extract-out test_recursive_embedded.docx +ls /tmp/tika-app-test/extract-out +---- + +*Expected:* Creates .json metadata file and extracts embedded files to extract-out directory. + +=== Test 9: Recursive Extract (-Z) + +[source,bash] +---- +mkdir -p /tmp/tika-app-test/extract-recursive +java -jar tika-app.jar -Z --extract-dir=/tmp/tika-app-test/extract-recursive test_recursive_embedded.docx +ls -R /tmp/tika-app-test/extract-recursive +---- + +*Expected:* Extracts all nested embedded documents recursively. + +=== Test 10: Batch Mode (Simple) + +[source,bash] +---- +mkdir -p /tmp/tika-app-test/batch-input +mkdir -p /tmp/tika-app-test/batch-output +cp testPDF.pdf testHTML.html /tmp/tika-app-test/batch-input/ +java -jar tika-app.jar /tmp/tika-app-test/batch-input /tmp/tika-app-test/batch-output +ls /tmp/tika-app-test/batch-output +---- + +*Expected:* Creates .json files for each input file in output directory. + +=== Test 10b: Batch Mode with Output Options + +[source,bash] +---- +mkdir -p /tmp/tika-app-test/batch-output2 +java -jar tika-app.jar -J -t /tmp/tika-app-test/batch-input /tmp/tika-app-test/batch-output2 +ls /tmp/tika-app-test/batch-output2 +---- + +*Expected:* Creates .json files with text content (X-TIKA:content_handler should be ToTextContentHandler). + +=== Test 11: Version Check + +[source,bash] +---- +java -jar tika-app.jar --version +---- + +*Expected:* Returns `Apache Tika X.X.X` + +=== Test 12: List Parsers + +[source,bash] +---- +java -jar tika-app.jar --list-parsers +---- + +*Expected:* Hierarchical list of available parsers. + +=== Test 13: Language Detection + +[source,bash] +---- +java -jar tika-app.jar --language testPDF.pdf +---- + +*Expected:* Returns detected language code. + +=== Test 14: Digest Computation + +[source,bash] +---- +java -jar tika-app.jar --digest=md5 --json testPDF.pdf +---- + +*Expected:* JSON output includes `X-TIKA:digest:MD5` field. + +=== Test 15: URL Input + +[source,bash] +---- +java -jar tika-app.jar --detect https://www.apache.org/ +---- + +*Expected:* Returns `text/html` + +=== Test 16: XMP Output + +[source,bash] +---- +java -jar tika-app.jar --xmp testPDF.pdf +---- + +*Expected:* Valid XMP metadata in RDF/XML format. + +=== Test 17: Boilerpipe Main Content + +[source,bash] +---- +java -jar tika-app.jar --text-main testHTML.html +---- + +*Expected:* Returns only main content, not boilerplate. + +=== Test 18: Depth Limiting + +[source,bash] +---- +java -jar tika-app.jar --maxEmbeddedDepth=1 --text test_recursive_embedded.docx +---- + +*Expected:* Limited depth of embedded document extraction. + +=== Test 19: GUI Mode + +[source,bash] +---- +java -jar tika-app.jar +---- + +*Expected:* Opens GUI (skip in headless environments). + +== Advanced Tests: Custom Config + +These tests require creating a custom tika-config.json file. + +=== Test 20: Create Custom Config File + +Create `/tmp/tika-app-test/my-config.json`: +[source,json] +---- +{ + "content-handler-factory": { + "basic-content-handler-factory": { + "type": "TEXT", + "writeLimit": 100000, + "throwOnWriteLimitReached": false + } + }, + "parsers": [ + { + "default-parser": {} + }, + { + "pdf-parser": { + "extractActions": true, + "extractInlineImages": true, + "ocrStrategy": "NO_OCR" + } + }, + { + "ooxml-parser": { + "includeDeletedContent": true, + "includeMoveFromContent": true, + "extractMacros": true + } + } + ], + "fetchers": { + "fsf": { + "file-system-fetcher": { + "basePath": "/tmp/tika-app-test/batch-input", + "extractFileSystemMetadata": true + } + } + }, + "emitters": { + "fse": { + "file-system-emitter": { + "basePath": "/tmp/tika-app-test/config-output", + "fileExtension": "json", + "onExists": "REPLACE" + } + } + }, + "pipes-iterator": { + "file-system-pipes-iterator": { + "basePath": "/tmp/tika-app-test/batch-input", + "countTotal": true, + "fetcherId": "fsf", + "emitterId": "fse" + } + }, + "pipes": { + "parseMode": "RMETA", + "numClients": 2, + "timeoutMillis": 60000 + }, + "plugin-roots": "/tmp/tika-app-test/plugins" +} +---- + +=== Test 21: Run with Custom Config + +[source,bash] +---- +mkdir -p /tmp/tika-app-test/config-output +java -jar tika-app.jar /tmp/tika-app-test/my-config.json +ls /tmp/tika-app-test/config-output +---- + +*Expected:* Processes all files in batch-input using custom parser settings. + +=== Test 22: Async Mode with Config Flag + +[source,bash] +---- +java -jar tika-app.jar -a --config=/tmp/tika-app-test/my-config.json +---- + +*Expected:* Same as Test 21 but using explicit async flag. + +=== Test 23: Unpack with Frictionless Format + +[source,bash] +---- +mkdir -p /tmp/tika-app-test/frictionless-out +java -jar tika-app.jar -Z --extract-dir=/tmp/tika-app-test/frictionless-out --unpack-format=FRICTIONLESS --unpack-include-metadata test_recursive_embedded.docx +ls /tmp/tika-app-test/frictionless-out +---- + +*Expected:* Extracts embedded files in Frictionless data package format with metadata.json. + +=== Test 24: Unpack to Directory (not zipped) + +[source,bash] +---- +mkdir -p /tmp/tika-app-test/unpack-dir-out +java -jar tika-app.jar -Z --extract-dir=/tmp/tika-app-test/unpack-dir-out --unpack-mode=DIRECTORY test_recursive_embedded.docx +ls -R /tmp/tika-app-test/unpack-dir-out +---- + +*Expected:* Extracts embedded files to directory structure instead of zipped. + +=== Test 25: Batch with Multiple Workers + +[source,bash] +---- +mkdir -p /tmp/tika-app-test/multi-worker-out +java -jar tika-app.jar -n 4 /tmp/tika-app-test/batch-input /tmp/tika-app-test/multi-worker-out +---- + +*Expected:* Processes files using 4 parallel forked clients. + +=== Test 26: Batch with Custom Timeout + +[source,bash] +---- +mkdir -p /tmp/tika-app-test/timeout-out +java -jar tika-app.jar -T 30000 /tmp/tika-app-test/batch-input /tmp/tika-app-test/timeout-out +---- + +*Expected:* Processes files with 30 second timeout per file. + +=== Test 27: Batch with Custom Heap + +[source,bash] +---- +mkdir -p /tmp/tika-app-test/heap-out +java -jar tika-app.jar -X 2g /tmp/tika-app-test/batch-input /tmp/tika-app-test/heap-out +---- + +*Expected:* Forked processes use 2GB heap. + +== Known Issues + +=== Issue 1: Confusing "no protocol" Error + +When a file doesn't exist, the error message is misleading: +[source] +---- +MalformedURLException: no protocol: nonexistent_file.pdf +---- + +Should say "File not found". + +=== Issue 2: INFO Message on Every Command + +Every command prints an INFO message to stderr about convenience features. Use `2>/dev/null` to suppress. + +=== Issue 3: Config Dump Options Not Implemented + +These options are not yet implemented in 4.x: + +* `--dump-minimal-config` +* `--dump-current-config` +* `--dump-static-config` +* `--dump-static-full-config` diff --git a/docs/modules/ROOT/pages/migration-to-4x/index.adoc b/docs/modules/ROOT/pages/migration-to-4x/index.adoc index 20a7c5cf48..eebf29f3db 100644 --- a/docs/modules/ROOT/pages/migration-to-4x/index.adoc +++ b/docs/modules/ROOT/pages/migration-to-4x/index.adoc @@ -31,3 +31,20 @@ See the xref:roadmap.adoc[Roadmap] for version timelines and support schedules. * xref:migration-to-4x/design-notes-4x.adoc[Design Notes] - Architectural decisions and design rationale * xref:migration-to-4x/serialization-4x.adoc[Serialization] - JSON serialization design and implementation details + +== TODOs / Missing Features in 4.x + +The following features from 3.x are not yet implemented in 4.x: + +=== Config Serialization + +The following tika-app options for dumping configuration are not yet available: + +* `--dump-minimal-config` - Print minimal TikaConfig +* `--dump-current-config` - Print current TikaConfig +* `--dump-static-config` - Print static config +* `--dump-static-full-config` - Print static explicit config + +These require completing the JSON serialization support for TikaConfig objects. The underlying serialization infrastructure exists (see xref:migration-to-4x/serialization-4x.adoc[Serialization]) but the CLI integration is pending. + +*Workaround:* Manually create JSON config files using the templates in `tika-pipes/tika-async-cli/src/main/resources/config-template.json` as a starting point. diff --git a/tika-app/src/main/java/org/apache/tika/cli/AsyncHelper.java b/tika-app/src/main/java/org/apache/tika/cli/AsyncHelper.java index 38a0094f79..e3561ecf5f 100644 --- a/tika-app/src/main/java/org/apache/tika/cli/AsyncHelper.java +++ b/tika-app/src/main/java/org/apache/tika/cli/AsyncHelper.java @@ -58,6 +58,22 @@ public class AsyncHelper { argList.add(mode); } else if (arg.equals(UNPACK_INCLUDE_METADATA)) { argList.add("--unpack-include-metadata"); + } else if (arg.equals("-t") || arg.equals("--text")) { + // Translate TikaCLI text output to TikaAsyncCLI handler type + argList.add("-h"); + argList.add("t"); + } else if (arg.equals("--html")) { + // Translate TikaCLI html output to TikaAsyncCLI handler type + // Note: TikaCLI uses -h for html, but TikaAsyncCLI uses -h for handler type + argList.add("-h"); + argList.add("h"); + } else if (arg.equals("-x") || arg.equals("--xml")) { + // Translate TikaCLI xml output to TikaAsyncCLI handler type + argList.add("-h"); + argList.add("x"); + } else if (arg.equals("-J") || arg.equals("--jsonRecursive")) { + // TikaAsyncCLI always outputs JSON with recursive metadata (RMETA mode) + // This is already the default, so we just skip this arg } else { argList.add(args[i]); } diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java index 5f388865c4..97ca90a489 100644 --- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java +++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java @@ -38,7 +38,6 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardCopyOption; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; @@ -276,25 +275,11 @@ public class TikaCLI { if (args.length == 1 && args[0].endsWith(".json")) { TikaAsyncCLI.main(args); return; - }; - //TODO -- are there other shortcuts? - Path tmpConfig = null; - try { - tmpConfig = Files.createTempFile("tika-config-", ".json"); - Files.copy(TikaCLI.class.getResourceAsStream("/tika-config-default-single-file.json"), - tmpConfig, StandardCopyOption.REPLACE_EXISTING); - List<String> argList = new ArrayList<>(); - argList.add("-c"); - argList.add(tmpConfig.toAbsolutePath().toString()); - for (String arg : args) { - argList.add(arg); - } - TikaAsyncCLI.main(argList.toArray(new String[0])); - } finally { - if (tmpConfig != null) { - Files.delete(tmpConfig); - } } + // For batch mode (two directories), pass directly to TikaAsyncCLI. + // It will create its own config with PluginsWriter that includes + // plugin-roots, fetcher, emitter, and pipes-iterator configuration. + TikaAsyncCLI.main(args); } /** @@ -350,12 +335,34 @@ public class TikaCLI { private boolean testForAsync(String[] args) { + // Single .json file is a config file for async mode + if (args.length == 1 && args[0].endsWith(".json")) { + return true; + } + if (args.length == 2) { if (Files.isDirectory(Paths.get(args[0]))) { return true; } } + // Check if last two args are directories (batch mode with options) + if (args.length >= 2) { + String lastArg = args[args.length - 1]; + String secondLastArg = args[args.length - 2]; + // Make sure neither looks like an option value + if (!lastArg.startsWith("-") && !secondLastArg.startsWith("-")) { + try { + if (Files.isDirectory(Paths.get(secondLastArg)) && + (Files.isDirectory(Paths.get(lastArg)) || !Files.exists(Paths.get(lastArg)))) { + return true; + } + } catch (Exception e) { + // Invalid path, not batch mode + } + } + } + for (String arg : args) { if (arg.equals("-a") || arg.equals("--async")) { return true; @@ -590,10 +597,12 @@ public class TikaCLI { out.println(); out.println(" --config=<tika-config.xml>"); out.println(" TikaConfig file. Must be specified before -g, -s, -f or the dump-x-config !"); - out.println(" --dump-minimal-config Print minimal TikaConfig"); - out.println(" --dump-current-config Print current TikaConfig"); - out.println(" --dump-static-config Print static config"); - out.println(" --dump-static-full-config Print static explicit config"); + // TODO: TIKA-XXXX - Re-enable config dump options once JSON serialization is complete + // These options are not yet implemented in 4.x due to the migration from XML to JSON config + // out.println(" --dump-minimal-config Print minimal TikaConfig"); + // out.println(" --dump-current-config Print current TikaConfig"); + // out.println(" --dump-static-config Print static config"); + // out.println(" --dump-static-full-config Print static explicit config"); out.println(" --convert-config-xml-to-json=<input.xml>,<output.json>"); out.println(" Convert legacy XML config to JSON format (parsers section only)"); out.println(""); diff --git a/tika-app/src/test/java/org/apache/tika/cli/AsyncHelperTest.java b/tika-app/src/test/java/org/apache/tika/cli/AsyncHelperTest.java index 9885feac3f..a26f247500 100644 --- a/tika-app/src/test/java/org/apache/tika/cli/AsyncHelperTest.java +++ b/tika-app/src/test/java/org/apache/tika/cli/AsyncHelperTest.java @@ -28,4 +28,47 @@ public class AsyncHelperTest { String[] expected = new String[]{"-c", "blah.json", "-i", "input.docx", "-o", "output/dir"}; assertArrayEquals(expected, AsyncHelper.translateArgs(args)); } + + @Test + public void testTextHandler() throws Exception { + String[] args = new String[]{"-t", "input", "output"}; + String[] expected = new String[]{"-h", "t", "input", "output"}; + assertArrayEquals(expected, AsyncHelper.translateArgs(args)); + } + + @Test + public void testTextHandlerLong() throws Exception { + String[] args = new String[]{"--text", "input", "output"}; + String[] expected = new String[]{"-h", "t", "input", "output"}; + assertArrayEquals(expected, AsyncHelper.translateArgs(args)); + } + + @Test + public void testHtmlHandler() throws Exception { + String[] args = new String[]{"--html", "input", "output"}; + String[] expected = new String[]{"-h", "h", "input", "output"}; + assertArrayEquals(expected, AsyncHelper.translateArgs(args)); + } + + @Test + public void testXmlHandler() throws Exception { + String[] args = new String[]{"-x", "input", "output"}; + String[] expected = new String[]{"-h", "x", "input", "output"}; + assertArrayEquals(expected, AsyncHelper.translateArgs(args)); + } + + @Test + public void testJsonRecursiveSkipped() throws Exception { + // -J is the default in async mode, so it's just skipped + String[] args = new String[]{"-J", "-t", "input", "output"}; + String[] expected = new String[]{"-h", "t", "input", "output"}; + assertArrayEquals(expected, AsyncHelper.translateArgs(args)); + } + + @Test + public void testBatchModeWithOptions() throws Exception { + String[] args = new String[]{"-J", "-t", "/path/to/input", "/path/to/output"}; + String[] expected = new String[]{"-h", "t", "/path/to/input", "/path/to/output"}; + assertArrayEquals(expected, AsyncHelper.translateArgs(args)); + } } diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java index 0de27d2354..8c3d78cd34 100644 --- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java +++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java @@ -568,6 +568,57 @@ public class TikaCLITest { "Should have at least 2 files (json + embedded), got " + fileNames.size() + ": " + fileNames); } + /** + * Test that --extract-dir option correctly sets the output directory + * for both -z (shallow) and -Z (recursive) extraction modes. + */ + @Test + public void testExtractDirOption() throws Exception { + Path input = Paths.get(new URI(resourcePrefix + "/test_recursive_embedded.docx")); + Path pluginsDir = Paths.get("target/plugins"); + + // Test with -z (shallow extraction) + String[] params = {"-z", + "--extract-dir=" + extractDir.toAbsolutePath(), + "-p", pluginsDir.toAbsolutePath().toString(), + input.toAbsolutePath().toString()}; + + TikaCLI.main(params); + + Set<String> fileNames = getFileNames(extractDir); + + // Should have extracted files in the specified directory, not current dir + assertTrue(fileNames.stream().anyMatch(f -> f.endsWith(".json")), + "Should have a .json metadata file in extractDir, got: " + fileNames); + assertTrue(fileNames.stream().anyMatch(f -> f.contains("-embed/")), + "Should have extracted embedded files in extractDir, got: " + fileNames); + } + + /** + * Test that --extract-dir option works with -Z (recursive) extraction. + */ + @Test + public void testExtractDirOptionRecursive() throws Exception { + Path input = Paths.get(new URI(resourcePrefix + "/test_recursive_embedded.docx")); + Path pluginsDir = Paths.get("target/plugins"); + + // Test with -Z (recursive extraction) + String[] params = {"-Z", + "--extract-dir=" + extractDir.toAbsolutePath(), + "-p", pluginsDir.toAbsolutePath().toString(), + input.toAbsolutePath().toString()}; + + TikaCLI.main(params); + + Set<String> fileNames = getFileNames(extractDir); + + // Should have extracted files in the specified directory + assertTrue(fileNames.stream().anyMatch(f -> f.endsWith(".json")), + "Should have a .json metadata file in extractDir, got: " + fileNames); + assertTrue(fileNames.stream().anyMatch(f -> f.contains("-embed/")), + "Should have extracted embedded files in extractDir, got: " + fileNames); + } + @Test public void testDefaultConfigException() throws Exception { //default xml parser will throw TikaException diff --git a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java index 72531fcc66..d1089a3261 100644 --- a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java +++ b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java @@ -260,10 +260,13 @@ public class TikaAsyncCLI { throw new TikaConfigException("Input file/dir must exist: " + inputPath); } inputDir = inString; - if (Files.isRegularFile(inputPath)) { - outputDir = Paths.get(".").toAbsolutePath().toString(); - } else { - outputDir = Paths.get("output").toAbsolutePath().toString(); + // Only set default outputDir if not already specified via -o + if (outputDir == null) { + if (Files.isRegularFile(inputPath)) { + outputDir = Paths.get(".").toAbsolutePath().toString(); + } else { + outputDir = Paths.get("output").toAbsolutePath().toString(); + } } } diff --git a/tika-pipes/tika-async-cli/src/main/resources/config-template.json b/tika-pipes/tika-async-cli/src/main/resources/config-template.json index ee1efd49dc..15cd90b19f 100644 --- a/tika-pipes/tika-async-cli/src/main/resources/config-template.json +++ b/tika-pipes/tika-async-cli/src/main/resources/config-template.json @@ -53,10 +53,7 @@ "basePath": "FETCHER_BASE_PATH", "countTotal": true, "fetcherId": "fsf", - "emitterId": "fse", - "onParseException": "EMIT", - "maxWaitMs": 600000, - "queueSize": 10000 + "emitterId": "fse" } }, "pipes": {
