This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new a0b088749 TIKA-4517 -- improve async cli (#2365)
a0b088749 is described below

commit a0b088749c451d65af1580a5fa59ffcda5bbc710
Author: Tim Allison <[email protected]>
AuthorDate: Tue Oct 14 20:07:15 2025 -0400

    TIKA-4517 -- improve async cli (#2365)
---
 .../main/java/org/apache/tika/cli/AsyncHelper.java |  42 +++-----
 .../src/main/java/org/apache/tika/cli/TikaCLI.java |  30 ++----
 .../java/org/apache/tika/cli/AsyncHelperTest.java  |  31 ++++++
 .../apache/tika/async/cli/SimpleAsyncConfig.java   |  10 +-
 .../org/apache/tika/async/cli/TikaAsyncCLI.java    | 114 +++++++++++++++++----
 .../apache/tika/async/cli/AsyncCliParserTest.java  |   5 +-
 .../tika/async/cli/TikaConfigAsyncWriterTest.java  |   7 +-
 7 files changed, 162 insertions(+), 77 deletions(-)

diff --git a/tika-app/src/main/java/org/apache/tika/cli/AsyncHelper.java 
b/tika-app/src/main/java/org/apache/tika/cli/AsyncHelper.java
index a9cc2330c..f8189cf69 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/AsyncHelper.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/AsyncHelper.java
@@ -17,45 +17,27 @@
 package org.apache.tika.cli;
 
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
 
 
 public class AsyncHelper {
+
+    private static final String TIKA_CONFIG_KEY = "--config=";
+
     public static String[] translateArgs(String[] args) {
         List<String> argList = new ArrayList<>();
-        if (args.length == 2) {
-            if (args[0].startsWith("-Z")) {
-                argList.add("-Z");
-                argList.add("-i");
-                argList.add(args[1]);
-                argList.add("-o");
-                argList.add(args[1]);
-                return argList.toArray(new String[0]);
-            } else if (args[0].startsWith("-") || args[1].startsWith("-")) {
-                argList.add(args[0]);
-                argList.add(args[1]);
-                return argList.toArray(new String[0]);
+        for (int i = 0; i < args.length; i++) {
+            String arg = args[i];
+            if (arg.startsWith(TIKA_CONFIG_KEY)) {
+                String c = arg.substring(TIKA_CONFIG_KEY.length());
+                argList.add("-c");
+                argList.add(c);
+            } else if (arg.equals("-a")) {
+                //do nothing
             } else {
-                argList.add("-i");
-                argList.add(args[0]);
-                argList.add("-o");
-                argList.add(args[1]);
-                return argList.toArray(new String[0]);
-            }
-        }
-        if (args.length == 3) {
-            if (args[0].equals("-Z") && ! args[1].startsWith("-") && ! 
args[2].startsWith("-")) {
-                argList.add("-Z");
-                argList.add("-i");
-                argList.add(args[1]);
-                argList.add("-o");
-                argList.add(args[2]);
-                return argList.toArray(new String[0]);
+                argList.add(args[i]);
             }
         }
-        argList.addAll(Arrays.asList(args));
-        argList.remove("-a");
         return argList.toArray(new String[0]);
     }
 }
diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java 
b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 7706c0f59..28a9b29c7 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -276,11 +276,11 @@ public class TikaCLI {
             
Files.copy(TikaCLI.class.getResourceAsStream("/tika-config-default-single-file.xml"),
                     tmpConfig, StandardCopyOption.REPLACE_EXISTING);
             List<String> argList = new ArrayList<>();
+            argList.add("-c");
+            argList.add(tmpConfig.toAbsolutePath().toString());
             for (String arg : args) {
                 argList.add(arg);
             }
-            argList.add("-c");
-            argList.add(tmpConfig.toAbsolutePath().toString());
             TikaAsyncCLI.main(argList.toArray(new String[0]));
         } finally {
             if (tmpConfig != null) {
@@ -625,27 +625,15 @@ public class TikaCLI {
         out.println("    Specify two directories as args with no other args:");
         out.println("         java -jar tika-app.jar <inputDirectory> 
<outputDirectory>");
         out.println();
-        out.println("Batch Options:");
-        out.println("    -i  or --inputDir          Input directory");
-        out.println("    -o  or --outputDir         Output directory");
-        out.println("    -numConsumers              Number of processing 
threads");
-        out.println("    -bc                        Batch config file");
-        out.println("    -maxRestarts               Maximum number of times 
the ");
-        out.println("                               watchdog process will 
restart the forked process.");
-        out.println("    -timeoutThresholdMillis    Number of milliseconds 
allowed to a parse");
-        out.println("                               before the process is 
terminated and restarted");
-        out.println("    -fileList                  List of files to process, 
with");
-        out.println("                               paths relative to the 
input directory");
-        out.println("    -includeFilePat            Regular expression to 
determine which");
-        out.println("                               files to process, e.g. 
\"(?i)\\.pdf\"");
-        out.println("    -excludeFilePat            Regular expression to 
determine which");
-        out.println("                               files to avoid processing, 
e.g. \"(?i)\\.pdf\"");
-        out.println("    -maxFileSizeBytes          Skip files longer than 
this value");
+        out.println("Batch/Pipes Options:");
+        out.println("    -i                         Input directory");
+        out.println("    -o                         Output directory");
+        out.println("    -n                         Number of forked 
processes");
+        out.println("    -X                         -Xmx in the forked 
processes");
+        out.println("    -T                         Timeout in milliseconds");
+        out.println("    -Z                         Recursively unpack all the 
attachments, too");
         out.println();
-        out.println("    Control the type of output with -x, -h, -t and/or 
-J.");
         out.println();
-        out.println("    To modify forked process jvm args, prepend \"J\" as 
in:");
-        out.println("    -JXmx4g or -JDlog4j.configuration=file:log4j.xml.");
     }
 
     private void version() {
diff --git a/tika-app/src/test/java/org/apache/tika/cli/AsyncHelperTest.java 
b/tika-app/src/test/java/org/apache/tika/cli/AsyncHelperTest.java
new file mode 100644
index 000000000..8b1d79d10
--- /dev/null
+++ b/tika-app/src/test/java/org/apache/tika/cli/AsyncHelperTest.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.cli;
+
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+
+import org.junit.jupiter.api.Test;
+
+public class AsyncHelperTest {
+
+    @Test
+    public void testBasic() throws Exception {
+        String[] args = new String[]{"-a", "--config=blah.xml", "-i", 
"input.docx", "-o", "output/dir"};
+        String[] expected = new String[]{"-c", "blah.xml", "-i", "input.docx", 
"-o", "output/dir"};
+        assertArrayEquals(expected, AsyncHelper.translateArgs(args));
+    }
+}
diff --git 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/SimpleAsyncConfig.java
 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/SimpleAsyncConfig.java
index 0c3987165..e8c48f663 100644
--- 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/SimpleAsyncConfig.java
+++ 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/SimpleAsyncConfig.java
@@ -16,6 +16,8 @@
  */
 package org.apache.tika.async.cli;
 
+import org.apache.tika.sax.BasicContentHandlerFactory;
+
 class SimpleAsyncConfig {
 
     private String inputDir;
@@ -26,10 +28,11 @@ class SimpleAsyncConfig {
     private String fileList;
     private String tikaConfig;//path to the tikaConfig file to be used in the 
forked process
     private boolean extractBytes;
+    private final BasicContentHandlerFactory.HANDLER_TYPE handlerType;
 
     //TODO -- switch to a builder
     public SimpleAsyncConfig(String inputDir, String outputDir, Integer 
numClients, Long timeoutMs, String xmx, String fileList,
-                             String tikaConfig, boolean extractBytes) {
+                             String tikaConfig, 
BasicContentHandlerFactory.HANDLER_TYPE handlerType, boolean extractBytes) {
         this.inputDir = inputDir;
         this.outputDir = outputDir;
         this.numClients = numClients;
@@ -37,6 +40,7 @@ class SimpleAsyncConfig {
         this.xmx = xmx;
         this.fileList = fileList;
         this.tikaConfig = tikaConfig;
+        this.handlerType = handlerType;
         this.extractBytes = extractBytes;
     }
 
@@ -71,4 +75,8 @@ class SimpleAsyncConfig {
     public boolean isExtractBytes() {
         return extractBytes;
     }
+
+    public BasicContentHandlerFactory.HANDLER_TYPE getHandlerType() {
+        return handlerType;
+    }
 }
diff --git 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
index fe4377213..8dff25ab3 100644
--- 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
+++ 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
@@ -33,11 +33,13 @@ import org.slf4j.LoggerFactory;
 import org.apache.tika.exception.TikaConfigException;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.pipes.core.FetchEmitTuple;
+import org.apache.tika.pipes.core.HandlerConfig;
 import org.apache.tika.pipes.core.async.AsyncProcessor;
 import org.apache.tika.pipes.core.emitter.EmitKey;
 import org.apache.tika.pipes.core.extractor.EmbeddedDocumentBytesConfig;
 import org.apache.tika.pipes.core.fetcher.FetchKey;
 import org.apache.tika.pipes.core.pipesiterator.PipesIterator;
+import org.apache.tika.sax.BasicContentHandlerFactory;
 import org.apache.tika.utils.StringUtils;
 
 public class TikaAsyncCLI {
@@ -49,11 +51,11 @@ public class TikaAsyncCLI {
         Options options = new Options();
         options.addOption("i", "inputDir", true, "input directory");
         options.addOption("o", "outputDir", true, "output directory");
-
         options.addOption("n", "numClients", true, "number of forked clients");
-        options.addOption("x", "Xmx", true, "heap for the forked clients in 
usual jvm heap amount, e.g. -x 1g");
+        options.addOption("X", "Xmx", true, "heap for the forked clients in 
usual jvm heap amount, e.g. -x 1g");
         options.addOption("?", "help", false, "this help message");
-        options.addOption("t", "timeoutMs", true, "timeout for each parse in 
milliseconds");
+        options.addOption("T", "timeoutMs", true, "timeout for each parse in 
milliseconds");
+        options.addOption("h", "handlerType", true, "handler type: t=text, 
h=html, x=xml, b=body, i=ignore");
         options.addOption("l", "fileList", true, "file list");
         options.addOption("c", "config", true, "tikaConfig to inherit from -- 
" +
                 "commandline options will not overwrite existing iterators, 
emitters, fetchers and async");
@@ -72,12 +74,12 @@ public class TikaAsyncCLI {
 
     private static void processCommandLine(String[] args) throws Exception {
         if (args.length == 1) {
-            processWithTikaConfig(PipesIterator.build(Paths.get(args[0])), 
Paths.get(args[0]), false);
+            processWithTikaConfig(PipesIterator.build(Paths.get(args[0])), 
Paths.get(args[0]), null);
             return;
 
         }
         if (args.length == 2 && args[0].equals("-c")) {
-            processWithTikaConfig(PipesIterator.build(Paths.get(args[1])), 
Paths.get(args[1]), false);
+            processWithTikaConfig(PipesIterator.build(Paths.get(args[1])), 
Paths.get(args[1]), null);
             return;
         }
         SimpleAsyncConfig simpleAsyncConfig = parseCommandLine(args);
@@ -88,7 +90,7 @@ public class TikaAsyncCLI {
             TikaConfigAsyncWriter tikaConfigAsyncWriter = new 
TikaConfigAsyncWriter(simpleAsyncConfig);
             tikaConfigAsyncWriter.write(tikaConfig);
             PipesIterator pipesIterator = buildPipesIterator(tikaConfig, 
simpleAsyncConfig);
-            processWithTikaConfig(pipesIterator, tikaConfig, 
simpleAsyncConfig.isExtractBytes());
+            processWithTikaConfig(pipesIterator, tikaConfig, 
simpleAsyncConfig);
         } finally {
             if (tikaConfig != null) {
                 Files.delete(tikaConfig);
@@ -103,23 +105,24 @@ public class TikaAsyncCLI {
         }
         Path p = Paths.get(simpleAsyncConfig.getInputDir());
         if (Files.isRegularFile(p)) {
-            return new SingleFilePipesIterator(p.getFileName().toString(), 
simpleAsyncConfig.isExtractBytes());
+            return new SingleFilePipesIterator(p.getFileName().toString());
         }
         return PipesIterator.build(tikaConfig);
     }
 
     //not private for testing purposes
-    static SimpleAsyncConfig parseCommandLine(String[] args) throws 
ParseException, IOException {
+    static SimpleAsyncConfig parseCommandLine(String[] args) throws 
TikaConfigException, ParseException, IOException {
         if (args.length == 2 && ! args[0].startsWith("-")) {
             return new SimpleAsyncConfig(args[0], args[1], null,
-                    null, null, null, null, false);
+                    null, null, null, null,
+                    BasicContentHandlerFactory.HANDLER_TYPE.TEXT, false);
         }
 
         Options options = getOptions();
 
         CommandLineParser cliParser = new DefaultParser();
 
-        CommandLine line = cliParser.parse(options, args);
+        CommandLine line = cliParser.parse(options, args, true);
         if (line.hasOption("help")) {
             usage(options);
         }
@@ -130,6 +133,7 @@ public class TikaAsyncCLI {
         Integer numClients = null;
         String fileList = null;
         String tikaConfig = null;
+        BasicContentHandlerFactory.HANDLER_TYPE handlerType = 
BasicContentHandlerFactory.HANDLER_TYPE.TEXT;
         boolean extractBytes = false;
         if (line.hasOption("i")) {
             inputDir = line.getOptionValue("i");
@@ -137,11 +141,11 @@ public class TikaAsyncCLI {
         if (line.hasOption("o")) {
             outputDir = line.getOptionValue("o");
         }
-        if (line.hasOption("x")) {
-            xmx = line.getOptionValue("x");
+        if (line.hasOption("X")) {
+            xmx = line.getOptionValue("X");
         }
-        if (line.hasOption("t")) {
-            timeoutMs = Long.parseLong(line.getOptionValue("t"));
+        if (line.hasOption("T")) {
+            timeoutMs = Long.parseLong(line.getOptionValue("T"));
         }
         if (line.hasOption("n")) {
             numClients = Integer.parseInt(line.getOptionValue("n"));
@@ -155,18 +159,71 @@ public class TikaAsyncCLI {
         if (line.hasOption("Z")) {
             extractBytes = true;
         }
+        if (line.hasOption('h')) {
+            handlerType = getHandlerType(line.getOptionValue('h'));
+        }
+        if (line.getArgList().size() > 2) {
+            throw new TikaConfigException("Can't have more than 2 unknown 
args: " + line.getArgList());
+        }
+
+        if (line.getArgList().size() == 2) {
+            if (inputDir != null || outputDir != null) {
+                throw new TikaConfigException("Can only set inputDir and 
outputDir once. Extra args: " + line.getArgList());
+            }
+            String inString = line.getArgList().get(0);
+            String outString = line.getArgList().get(1);
+            if (inString.startsWith("-") || outString.startsWith("-")) {
+                throw new TikaConfigException("Found an unknown arg in one of 
the last two args: " + line.getArgList());
+            }
+            Path p = Paths.get(inString);
+            if (! Files.isDirectory(p) && ! Files.isRegularFile(p)) {
+                throw new TikaConfigException("Input file/dir must exist: " + 
p);
+            }
+            inputDir = inString;
+            outputDir = outString;
+        } else if (line.getArgList().size() == 1) {
+            if (inputDir != null) {
+                throw new TikaConfigException("Can only set inputDir once. 
Extra args: " + line.getArgList());
+            }
+            String inString = line.getArgList().get(0);
+            if (inString.startsWith("-")) {
+                throw new TikaConfigException("Found an unknown arg in one of 
the last arg: " + inString);
+            }
+            Path inputPath = Paths.get(inString);
+            if (! Files.isDirectory(inputPath) && ! 
Files.isRegularFile(inputPath)) {
+                throw new TikaConfigException("Input file/dir must exist: " + 
inputPath);
+            }
+            inputDir = inString;
+            if (Files.isRegularFile(inputPath)) {
+                outputDir = Paths.get(".").toAbsolutePath().toString();
+            } else {
+                outputDir = Paths.get("output").toAbsolutePath().toString();
+            }
+        }
 
         return new SimpleAsyncConfig(inputDir, outputDir,
-                numClients, timeoutMs, xmx, fileList, tikaConfig, 
extractBytes);
+                numClients, timeoutMs, xmx, fileList, tikaConfig, handlerType, 
extractBytes);
+    }
+
+    private static BasicContentHandlerFactory.HANDLER_TYPE 
getHandlerType(String t) throws TikaConfigException {
+        return switch (t) {
+            case "x" -> BasicContentHandlerFactory.HANDLER_TYPE.XML;
+            case "h" -> BasicContentHandlerFactory.HANDLER_TYPE.HTML;
+            case "b" -> BasicContentHandlerFactory.HANDLER_TYPE.BODY;
+            case "i" -> BasicContentHandlerFactory.HANDLER_TYPE.IGNORE;
+            case "t" -> BasicContentHandlerFactory.HANDLER_TYPE.TEXT;
+            default -> throw new TikaConfigException("Can't understand " + t + 
" as a handler type. Must be one of: x(ml), h(tml), b(ody), i(gnore), t(ext)");
+        };
     }
 
 
-    private static void processWithTikaConfig(PipesIterator pipesIterator, 
Path tikaConfigPath, boolean extractBytes) throws Exception {
+    private static void processWithTikaConfig(PipesIterator pipesIterator, 
Path tikaConfigPath, SimpleAsyncConfig asyncConfig) throws Exception {
         long start = System.currentTimeMillis();
         try (AsyncProcessor processor = new AsyncProcessor(tikaConfigPath, 
pipesIterator)) {
 
             for (FetchEmitTuple t : pipesIterator) {
-                configureExtractBytes(t, extractBytes);
+                configureExtractBytes(t, asyncConfig);
+                configureHandler(t, asyncConfig);
                 boolean offered = processor.offer(t, TIMEOUT_MS);
                 if (!offered) {
                     throw new TimeoutException("timed out waiting to add a 
fetch emit tuple");
@@ -186,8 +243,23 @@ public class TikaAsyncCLI {
         }
     }
 
-    private static void configureExtractBytes(FetchEmitTuple t, boolean 
extractBytes) {
-        if (! extractBytes) {
+    private static void configureHandler(FetchEmitTuple t, SimpleAsyncConfig 
asyncConfig) {
+        if (asyncConfig == null) {
+            return;
+        }
+        if (asyncConfig.getHandlerType() == 
BasicContentHandlerFactory.HANDLER_TYPE.TEXT) {
+            return;
+        }
+        HandlerConfig handlerConfig = new 
HandlerConfig(asyncConfig.getHandlerType(), HandlerConfig.PARSE_MODE.RMETA,
+                -1, -1, false);
+        t.getParseContext().set(HandlerConfig.class, handlerConfig);
+    }
+
+    private static void configureExtractBytes(FetchEmitTuple t, 
SimpleAsyncConfig asyncConfig) {
+        if (asyncConfig == null) {
+            return;
+        }
+        if (!asyncConfig.isExtractBytes()) {
             return;
         }
         ParseContext parseContext = t.getParseContext();
@@ -213,11 +285,9 @@ public class TikaAsyncCLI {
 
     private static class SingleFilePipesIterator extends PipesIterator {
         private final String fName;
-        private final boolean extractBytes;
-        public SingleFilePipesIterator(String string, boolean extractBytes) {
+        public SingleFilePipesIterator(String string) {
             super();
             this.fName = string;
-            this.extractBytes = extractBytes;
         }
 
         @Override
diff --git 
a/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncCliParserTest.java
 
b/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncCliParserTest.java
index 4e38aac9c..9d3941cd8 100644
--- 
a/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncCliParserTest.java
+++ 
b/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncCliParserTest.java
@@ -21,6 +21,8 @@ import static org.junit.jupiter.api.Assertions.assertNull;
 
 import org.junit.jupiter.api.Test;
 
+import org.apache.tika.sax.BasicContentHandlerFactory;
+
 public class AsyncCliParserTest {
 
     @Test
@@ -69,13 +71,14 @@ public class AsyncCliParserTest {
     @Test
     public void testAll() throws Exception {
         SimpleAsyncConfig simpleAsyncConfig = TikaAsyncCLI.parseCommandLine(
-                new String[]{"-i", "input", "-o", "output", "-n", "5", "-t", 
"30000", "-x", "1g"});
+                new String[]{"-i", "input", "-o", "output", "-n", "5", "-T", 
"30000", "-X", "1g", "-h", "x"});
         assertEquals("input", simpleAsyncConfig.getInputDir());
         assertEquals("output", simpleAsyncConfig.getOutputDir());
         assertNull(simpleAsyncConfig.getFileList());
         assertEquals(5, simpleAsyncConfig.getNumClients());
         assertEquals(30000L, simpleAsyncConfig.getTimeoutMs());
         assertEquals("1g", simpleAsyncConfig.getXmx());
+        assertEquals(BasicContentHandlerFactory.HANDLER_TYPE.XML, 
simpleAsyncConfig.getHandlerType());
     }
 
     //TODO -- test for file list with and without inputDir
diff --git 
a/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/TikaConfigAsyncWriterTest.java
 
b/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/TikaConfigAsyncWriterTest.java
index adafdafd6..7db2dd133 100644
--- 
a/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/TikaConfigAsyncWriterTest.java
+++ 
b/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/TikaConfigAsyncWriterTest.java
@@ -33,6 +33,7 @@ import org.w3c.dom.Node;
 import org.xml.sax.SAXException;
 
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.sax.BasicContentHandlerFactory;
 import org.apache.tika.utils.XMLReaderUtils;
 
 public class TikaConfigAsyncWriterTest {
@@ -42,7 +43,8 @@ public class TikaConfigAsyncWriterTest {
     public void testBasic(@TempDir Path dir) throws Exception {
         Path p = 
Paths.get(TikaConfigAsyncWriter.class.getResource("/configs/TIKA-4508-parsers.xml").toURI());
         SimpleAsyncConfig simpleAsyncConfig = new SimpleAsyncConfig("input", 
"output", 4,
-                10000L, "-Xmx1g", null, p.toAbsolutePath().toString(), false);
+                10000L, "-Xmx1g", null, p.toAbsolutePath().toString(),
+                BasicContentHandlerFactory.HANDLER_TYPE.TEXT, false);
         Path target = dir.resolve("combined.xml");
         TikaConfigAsyncWriter writer = new 
TikaConfigAsyncWriter(simpleAsyncConfig);
         writer.write(target);
@@ -56,7 +58,8 @@ public class TikaConfigAsyncWriterTest {
     public void testDontOverwriteEmitters(@TempDir Path dir) throws Exception {
         Path p = 
Paths.get(TikaConfigAsyncWriter.class.getResource("/configs/TIKA-4508-emitters.xml").toURI());
         SimpleAsyncConfig simpleAsyncConfig = new SimpleAsyncConfig("input", 
"output", 4,
-                10000L, "-Xmx1g", null, p.toAbsolutePath().toString(), false);
+                10000L, "-Xmx1g", null, p.toAbsolutePath().toString(),
+                BasicContentHandlerFactory.HANDLER_TYPE.TEXT, false);
         Path target = dir.resolve("combined.xml");
         TikaConfigAsyncWriter writer = new 
TikaConfigAsyncWriter(simpleAsyncConfig);
         writer.write(target);

Reply via email to