This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4507
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 97e90cb000d8154134da2ff373207da7afb00efc
Author: tallison <[email protected]>
AuthorDate: Mon Oct 6 09:52:05 2025 -0400

    TIKA-4507 -- improve tika-eval-app's commandline in 4.x
---
 .../src/main/java/org/apache/tika/eval/app/EvalConfig.java   |  8 ++++++++
 .../java/org/apache/tika/eval/app/ExtractComparerRunner.java | 12 ++++++++++++
 .../java/org/apache/tika/eval/app/ExtractProfileRunner.java  | 12 +++++++++++-
 3 files changed, 31 insertions(+), 1 deletion(-)

diff --git 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/EvalConfig.java
 
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/EvalConfig.java
index 5525180ed..fc0d72f0a 100644
--- 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/EvalConfig.java
+++ 
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/EvalConfig.java
@@ -85,4 +85,12 @@ public class EvalConfig {
                 jdbcDriverClass + '\'' + ", forceDrop=" + forceDrop + ", 
maxFilesToAdd=" + maxFilesToAdd + ", maxTokens=" + maxTokens + ", 
maxContentLength=" + maxContentLength +
                 ", numThreads=" + numWorkers + ", errorLogFile=" + 
errorLogFile + '}';
     }
+
+    public void setNumWorkers(int n) {
+        numWorkers = n;
+    }
+
+    public void setMaxExtractLength(long m) {
+        maxExtractLength = m;
+    }
 }
diff --git 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractComparerRunner.java
 
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractComparerRunner.java
index 57f98d601..8f86ab81e 100644
--- 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractComparerRunner.java
+++ 
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractComparerRunner.java
@@ -77,8 +77,11 @@ public class ExtractComparerRunner {
                         + " If not specified, -extracts is crawled as 
is.").get())
                 
.addOption(Option.builder("d").longOpt("db").hasArg().desc("optional: db 
path").get())
                 
.addOption(Option.builder("c").longOpt("config").hasArg().desc("tika-eval json 
config file").get())
+                
.addOption(Option.builder("n").longOpt("numWorkers").hasArg().desc("number of 
worker threads").get())
+                
.addOption(Option.builder("m").longOpt("maxExtractLength").hasArg().desc("maximum
 extract length").get())
                 ;
     }
+
     public static void main(String[] args) throws Exception {
         DefaultParser defaultCLIParser = new DefaultParser();
         CommandLine commandLine = defaultCLIParser.parse(OPTIONS, args);
@@ -87,6 +90,15 @@ public class ExtractComparerRunner {
         Path extractsBDir = commandLine.hasOption('b') ? 
Paths.get(commandLine.getOptionValue('b')) : Paths.get(USAGE_FAIL("Must specify 
extractsB dir: -b"));
         Path inputDir = commandLine.hasOption('i') ? 
Paths.get(commandLine.getOptionValue('i')) : extractsADir;
         String dbPath = commandLine.hasOption('d') ? 
commandLine.getOptionValue('d') : USAGE_FAIL("Must specify the db name: -d");
+
+        if (commandLine.hasOption('n')) {
+            
evalConfig.setNumWorkers(Integer.parseInt(commandLine.getOptionValue('n')));
+        }
+
+        if (commandLine.hasOption('m')) {
+            
evalConfig.setMaxExtractLength(Long.parseLong(commandLine.getOptionValue('m')));
+        }
+
         String jdbcString = getJdbcConnectionString(dbPath);
         execute(inputDir, extractsADir, extractsBDir, jdbcString, evalConfig);
     }
diff --git 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractProfileRunner.java
 
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractProfileRunner.java
index 221df02fa..a73a2f579 100644
--- 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractProfileRunner.java
+++ 
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractProfileRunner.java
@@ -76,8 +76,11 @@ public class ExtractProfileRunner {
                         + " If not specified, -extracts is crawled as 
is.").get())
                 
.addOption(Option.builder("d").longOpt("db").hasArg().desc("optional: db 
path").get())
                 
.addOption(Option.builder("c").longOpt("config").hasArg().desc("tika-eval json 
config file").get())
-                ;
+                
.addOption(Option.builder("n").longOpt("numWorkers").hasArg().desc("number of 
worker threads").get())
+                
.addOption(Option.builder("m").longOpt("maxExtractLength").hasArg().desc("maximum
 extract length").get())
+        ;
     }
+
     public static void main(String[] args) throws Exception {
         DefaultParser defaultCLIParser = new DefaultParser();
         CommandLine commandLine = defaultCLIParser.parse(OPTIONS, args);
@@ -86,6 +89,13 @@ public class ExtractProfileRunner {
         Path inputDir = commandLine.hasOption('i') ? 
Paths.get(commandLine.getOptionValue('i')) : extractsDir;
         String dbPath = commandLine.hasOption('d') ? 
commandLine.getOptionValue('d') : USAGE_FAIL("Must specify the db name: -d");
         String jdbcString = getJdbcConnectionString(dbPath);
+        if (commandLine.hasOption('n')) {
+            
evalConfig.setNumWorkers(Integer.parseInt(commandLine.getOptionValue('n')));
+        }
+
+        if (commandLine.hasOption('m')) {
+            
evalConfig.setMaxExtractLength(Long.parseLong(commandLine.getOptionValue('m')));
+        }
         execute(inputDir, extractsDir, jdbcString, evalConfig);
     }
 

Reply via email to