This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4507 in repository https://gitbox.apache.org/repos/asf/tika.git
commit 97e90cb000d8154134da2ff373207da7afb00efc Author: tallison <[email protected]> AuthorDate: Mon Oct 6 09:52:05 2025 -0400 TIKA-4507 -- improve tika-eval-app's commandline in 4.x --- .../src/main/java/org/apache/tika/eval/app/EvalConfig.java | 8 ++++++++ .../java/org/apache/tika/eval/app/ExtractComparerRunner.java | 12 ++++++++++++ .../java/org/apache/tika/eval/app/ExtractProfileRunner.java | 12 +++++++++++- 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/EvalConfig.java b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/EvalConfig.java index 5525180ed..fc0d72f0a 100644 --- a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/EvalConfig.java +++ b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/EvalConfig.java @@ -85,4 +85,12 @@ public class EvalConfig { jdbcDriverClass + '\'' + ", forceDrop=" + forceDrop + ", maxFilesToAdd=" + maxFilesToAdd + ", maxTokens=" + maxTokens + ", maxContentLength=" + maxContentLength + ", numThreads=" + numWorkers + ", errorLogFile=" + errorLogFile + '}'; } + + public void setNumWorkers(int n) { + numWorkers = n; + } + + public void setMaxExtractLength(long m) { + maxExtractLength = m; + } } diff --git a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractComparerRunner.java b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractComparerRunner.java index 57f98d601..8f86ab81e 100644 --- a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractComparerRunner.java +++ b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractComparerRunner.java @@ -77,8 +77,11 @@ public class ExtractComparerRunner { + " If not specified, -extracts is crawled as is.").get()) .addOption(Option.builder("d").longOpt("db").hasArg().desc("optional: db path").get()) .addOption(Option.builder("c").longOpt("config").hasArg().desc("tika-eval json config file").get()) + .addOption(Option.builder("n").longOpt("numWorkers").hasArg().desc("number of worker threads").get()) + .addOption(Option.builder("m").longOpt("maxExtractLength").hasArg().desc("maximum extract length").get()) ; } + public static void main(String[] args) throws Exception { DefaultParser defaultCLIParser = new DefaultParser(); CommandLine commandLine = defaultCLIParser.parse(OPTIONS, args); @@ -87,6 +90,15 @@ public class ExtractComparerRunner { Path extractsBDir = commandLine.hasOption('b') ? Paths.get(commandLine.getOptionValue('b')) : Paths.get(USAGE_FAIL("Must specify extractsB dir: -b")); Path inputDir = commandLine.hasOption('i') ? Paths.get(commandLine.getOptionValue('i')) : extractsADir; String dbPath = commandLine.hasOption('d') ? commandLine.getOptionValue('d') : USAGE_FAIL("Must specify the db name: -d"); + + if (commandLine.hasOption('n')) { + evalConfig.setNumWorkers(Integer.parseInt(commandLine.getOptionValue('n'))); + } + + if (commandLine.hasOption('m')) { + evalConfig.setMaxExtractLength(Long.parseLong(commandLine.getOptionValue('m'))); + } + String jdbcString = getJdbcConnectionString(dbPath); execute(inputDir, extractsADir, extractsBDir, jdbcString, evalConfig); } diff --git a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractProfileRunner.java b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractProfileRunner.java index 221df02fa..a73a2f579 100644 --- a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractProfileRunner.java +++ b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractProfileRunner.java @@ -76,8 +76,11 @@ public class ExtractProfileRunner { + " If not specified, -extracts is crawled as is.").get()) .addOption(Option.builder("d").longOpt("db").hasArg().desc("optional: db path").get()) .addOption(Option.builder("c").longOpt("config").hasArg().desc("tika-eval json config file").get()) - ; + .addOption(Option.builder("n").longOpt("numWorkers").hasArg().desc("number of worker threads").get()) + .addOption(Option.builder("m").longOpt("maxExtractLength").hasArg().desc("maximum extract length").get()) + ; } + public static void main(String[] args) throws Exception { DefaultParser defaultCLIParser = new DefaultParser(); CommandLine commandLine = defaultCLIParser.parse(OPTIONS, args); @@ -86,6 +89,13 @@ public class ExtractProfileRunner { Path inputDir = commandLine.hasOption('i') ? Paths.get(commandLine.getOptionValue('i')) : extractsDir; String dbPath = commandLine.hasOption('d') ? commandLine.getOptionValue('d') : USAGE_FAIL("Must specify the db name: -d"); String jdbcString = getJdbcConnectionString(dbPath); + if (commandLine.hasOption('n')) { + evalConfig.setNumWorkers(Integer.parseInt(commandLine.getOptionValue('n'))); + } + + if (commandLine.hasOption('m')) { + evalConfig.setMaxExtractLength(Long.parseLong(commandLine.getOptionValue('m'))); + } execute(inputDir, extractsDir, jdbcString, evalConfig); }
