This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch cli-tweaks in repository https://gitbox.apache.org/repos/asf/tika.git
commit 0f37509d5f82636faf9f2a52c2c5d6a2d69e6f7e Author: tallison <[email protected]> AuthorDate: Wed Mar 4 15:07:59 2026 -0500 cli tweaks --- .../org/apache/tika/async/cli/PluginsWriter.java | 20 +++++++--- .../org/apache/tika/async/cli/TikaAsyncCLI.java | 44 ++++++++++++++++++---- .../apache/tika/server/core/TikaServerProcess.java | 29 ++++++++++++-- 3 files changed, 77 insertions(+), 16 deletions(-) diff --git a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java index 7871bfb9a5..88bb26d631 100644 --- a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java +++ b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java @@ -85,11 +85,15 @@ public class PluginsWriter { } // Set plugin-roots - String pluginString = StringUtils.isBlank(simpleAsyncConfig.getPluginsDir()) ? - "plugins" : simpleAsyncConfig.getPluginsDir(); - Path plugins = Paths.get(pluginString); - if (Files.isDirectory(plugins)) { - pluginString = plugins.toAbsolutePath().toString(); + String pluginString; + if (!StringUtils.isBlank(simpleAsyncConfig.getPluginsDir())) { + pluginString = simpleAsyncConfig.getPluginsDir(); + Path plugins = Paths.get(pluginString); + if (Files.isDirectory(plugins)) { + pluginString = plugins.toAbsolutePath().toString(); + } + } else { + pluginString = TikaAsyncCLI.resolveDefaultPluginsDir(); } root.put("plugin-roots", pluginString); @@ -98,7 +102,11 @@ public class PluginsWriter { pipesConfig.setNumClients(simpleAsyncConfig.getNumClients() == null ? 2 : simpleAsyncConfig.getNumClients()); if (simpleAsyncConfig.getXmx() != null) { - pipesConfig.setForkedJvmArgs(new ArrayList<>(List.of(simpleAsyncConfig.getXmx()))); + String xmx = simpleAsyncConfig.getXmx(); + if (!xmx.startsWith("-")) { + xmx = "-Xmx" + xmx; + } + pipesConfig.setForkedJvmArgs(new ArrayList<>(List.of(xmx))); } if (simpleAsyncConfig.isContentOnly()) { pipesConfig.setParseMode(ParseMode.CONTENT_ONLY); diff --git a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java index a28e0b26f9..f05ef724e3 100644 --- a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java +++ b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java @@ -64,7 +64,7 @@ public class TikaAsyncCLI { options.addOption("i", "inputDir", true, "input directory"); options.addOption("o", "outputDir", true, "output directory"); options.addOption("n", "numClients", true, "number of forked clients"); - options.addOption("X", "Xmx", true, "heap for the forked clients in usual jvm heap amount, e.g. -X 1g"); + options.addOption(null, "Xmx", true, "heap for the forked clients, e.g. --Xmx 1g"); options.addOption("?", "help", false, "this help message"); options.addOption("T", "timeoutMs", true, "timeout for each parse in milliseconds"); options.addOption("h", "handlerType", true, "handler type: t=text, h=html, x=xml, m=markdown, b=body, i=ignore"); @@ -196,8 +196,8 @@ public class TikaAsyncCLI { if (line.hasOption("o")) { outputDir = line.getOptionValue("o"); } - if (line.hasOption("X")) { - xmx = line.getOptionValue("X"); + if (line.hasOption("Xmx")) { + xmx = line.getOptionValue("Xmx"); } if (line.hasOption("T")) { timeoutMs = Long.parseLong(line.getOptionValue("T")); @@ -391,6 +391,33 @@ public class TikaAsyncCLI { private static final String DEFAULT_PLUGINS_DIR = "plugins"; + /** + * Resolves the default plugins directory. Looks for a "plugins" directory + * next to the running jar first, then falls back to the current working directory. + * + * @return the resolved plugins directory path, or "plugins" if neither location exists + */ + static String resolveDefaultPluginsDir() { + try { + Path jarPath = Paths.get( + TikaAsyncCLI.class.getProtectionDomain().getCodeSource().getLocation().toURI()); + Path jarDir = jarPath.getParent(); + if (jarDir != null) { + Path pluginsNextToJar = jarDir.resolve(DEFAULT_PLUGINS_DIR); + if (Files.isDirectory(pluginsNextToJar)) { + return pluginsNextToJar.toAbsolutePath().toString(); + } + } + } catch (Exception e) { + // Fall through to cwd-relative + } + Path cwdPlugins = Paths.get(DEFAULT_PLUGINS_DIR); + if (Files.isDirectory(cwdPlugins)) { + return cwdPlugins.toAbsolutePath().toString(); + } + return DEFAULT_PLUGINS_DIR; + } + /** * Ensures plugin-roots is set in the config. If missing, creates a merged config * with a default plugin-roots value. @@ -410,10 +437,13 @@ public class TikaAsyncCLI { // Need to add plugin-roots ObjectNode mutableRoot = (ObjectNode) rootNode; - String pluginString = StringUtils.isBlank(pluginsDir) ? DEFAULT_PLUGINS_DIR : pluginsDir; - Path plugins = Paths.get(pluginString); - if (Files.isDirectory(plugins)) { - pluginString = plugins.toAbsolutePath().toString(); + String pluginString; + if (!StringUtils.isBlank(pluginsDir)) { + Path plugins = Paths.get(pluginsDir); + pluginString = Files.isDirectory(plugins) ? + plugins.toAbsolutePath().toString() : pluginsDir; + } else { + pluginString = resolveDefaultPluginsDir(); } mutableRoot.put("plugin-roots", pluginString); diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java index 5028c09000..3111d2f529 100644 --- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java +++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java @@ -549,10 +549,33 @@ public class TikaServerProcess { } } + private static final String DEFAULT_PLUGINS_DIR = "plugins"; + /** - * Default plugins directory name, relative to current working directory. + * Resolves the default plugins directory. Looks for a "plugins" directory + * next to the running jar first, then falls back to the current working directory. */ - private static final String DEFAULT_PLUGINS_DIR = "plugins"; + private static String resolveDefaultPluginsDir() { + try { + Path jarPath = Path.of( + TikaServerProcess.class.getProtectionDomain() + .getCodeSource().getLocation().toURI()); + Path jarDir = jarPath.getParent(); + if (jarDir != null) { + Path pluginsNextToJar = jarDir.resolve(DEFAULT_PLUGINS_DIR); + if (Files.isDirectory(pluginsNextToJar)) { + return pluginsNextToJar.toAbsolutePath().toString(); + } + } + } catch (Exception e) { + // Fall through to cwd-relative + } + Path cwdPlugins = Path.of(DEFAULT_PLUGINS_DIR); + if (Files.isDirectory(cwdPlugins)) { + return cwdPlugins.toAbsolutePath().toString(); + } + return DEFAULT_PLUGINS_DIR; + } /** * Creates or merges server configuration using ConfigMerger. @@ -586,7 +609,7 @@ public class TikaServerProcess { // Use PASSBACK_ALL strategy - results returned through socket .setEmitStrategy(EmitStrategy.PASSBACK_ALL) // Set plugin roots - .setPluginRoots(Path.of(DEFAULT_PLUGINS_DIR).toAbsolutePath().toString()); + .setPluginRoots(resolveDefaultPluginsDir()); // Only set default pipes config if there's no existing config // This allows user-provided config to specify their own numClients, etc.
