This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch branch_3x
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_3x by this push:
new 9d3f1e60e4 TIKA-4663 -- add cli option for markdown in 3.x to include
tika-batch (#2624)
9d3f1e60e4 is described below
commit 9d3f1e60e4b034cdeee6a37af0020a16d59b3828
Author: Tim Allison <[email protected]>
AuthorDate: Sun Feb 22 14:15:30 2026 -0500
TIKA-4663 -- add cli option for markdown in 3.x to include tika-batch
(#2624)
---
.../main/java/org/apache/tika/cli/BatchCommandLineBuilder.java | 3 +++
.../java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java | 10 ++++++++++
.../java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java | 8 ++++++++
.../tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java | 3 +++
.../org/apache/tika/batch/fs/default-tika-batch-config.xml | 2 +-
5 files changed, 25 insertions(+), 1 deletion(-)
diff --git
a/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java
b/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java
index ee88595e23..108326498c 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java
@@ -203,6 +203,9 @@ class BatchCommandLineBuilder {
map.remove("-T");
map.remove("--text-main");
map.put("-basicHandlerType", "body");
+ } else if (map.containsKey("--md")) {
+ map.remove("--md");
+ map.put("-basicHandlerType", "markdown");
}
if (map.containsKey("-J") || map.containsKey("--jsonRecursive")) {
diff --git
a/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java
b/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java
index d25b35cfd7..3359f75db7 100644
---
a/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java
+++
b/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java
@@ -173,6 +173,16 @@ public class TikaCLIBatchCommandLineTest {
}
+ @Test
+ public void testMarkdownMapping() throws Exception {
+ String[] params = {"-i", testInputPathForCommandLine, "-o",
"outputRoot", "--md"};
+ String[] commandLine = BatchCommandLineBuilder.build(params);
+ Map<String, String> attrs = mapify(commandLine);
+ assertEquals("markdown", attrs.get("-basicHandlerType"));
+ assertEquals(escapedInputPathForCommandLine, attrs.get("-inputDir"));
+ assertEquals("outputRoot", attrs.get("-outputDir"));
+ }
+
@Test
public void testOneDirOneFileException() throws Exception {
boolean ex = false;
diff --git
a/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java
b/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java
index 15557488b1..10649e6e44 100644
---
a/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java
+++
b/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java
@@ -158,6 +158,14 @@ public class TikaCLIBatchIntegrationTest {
}
}
+ @Test
+ public void testMarkdownBatchIntegration() throws Exception {
+ String[] params = {"-i", testInputDirForCommandLine, "-o",
tempOutputDirForCommandLine, "-numConsumers", "2", "--md"};
+ TikaCLI.main(params);
+ assertFileExists(tempOutputDir.resolve("bad_xml.xml.md"));
+ assertFileExists(tempOutputDir.resolve("coffee.xls.md"));
+ }
+
@Test
public void testProcessLogFileConfig() throws Exception {
String[] params = {"-i", testInputDirForCommandLine, "-o",
tempOutputDirForCommandLine, "-numConsumers", "2",
"-JDlog4j.configurationFile=" + customBatchLogging.toUri()};
diff --git
a/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java
b/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java
index 88a3bd085c..0a0d87fce5 100644
---
a/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java
+++
b/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java
@@ -268,6 +268,9 @@ public class BasicTikaFSConsumersBuilder extends
AbstractConsumersBuilder {
case HTML:
sb.append("html");
break;
+ case MARKDOWN:
+ sb.append("md");
+ break;
default:
sb.append("txt");
}
diff --git
a/tika-batch/src/main/resources/org/apache/tika/batch/fs/default-tika-batch-config.xml
b/tika-batch/src/main/resources/org/apache/tika/batch/fs/default-tika-batch-config.xml
index 51cbe697e8..36fe1460ec 100644
---
a/tika-batch/src/main/resources/org/apache/tika/batch/fs/default-tika-batch-config.xml
+++
b/tika-batch/src/main/resources/org/apache/tika/batch/fs/default-tika-batch-config.xml
@@ -67,7 +67,7 @@
<option opt="handleExisting" hasArg="true"
description="if an output file already exists, do you want to:
overwrite, rename or skip"/>
<option opt="basicHandlerType" hasArg="true"
- description="what type of content handler: xml, text, html,
body"/>
+ description="what type of content handler: xml, text, html,
body, markdown/md"/>
<option opt="outputSuffix" hasArg="true"
description="suffix to add to the end of the output file
name"/>
<option opt="timeoutThresholdMillis" hasArg="true"