This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_3x
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/branch_3x by this push:
     new 9d3f1e60e4 TIKA-4663 -- add cli option for markdown in 3.x to include 
tika-batch (#2624)
9d3f1e60e4 is described below

commit 9d3f1e60e4b034cdeee6a37af0020a16d59b3828
Author: Tim Allison <[email protected]>
AuthorDate: Sun Feb 22 14:15:30 2026 -0500

    TIKA-4663 -- add cli option for markdown in 3.x to include tika-batch 
(#2624)
---
 .../main/java/org/apache/tika/cli/BatchCommandLineBuilder.java |  3 +++
 .../java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java  | 10 ++++++++++
 .../java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java  |  8 ++++++++
 .../tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java    |  3 +++
 .../org/apache/tika/batch/fs/default-tika-batch-config.xml     |  2 +-
 5 files changed, 25 insertions(+), 1 deletion(-)

diff --git 
a/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java 
b/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java
index ee88595e23..108326498c 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java
@@ -203,6 +203,9 @@ class BatchCommandLineBuilder {
             map.remove("-T");
             map.remove("--text-main");
             map.put("-basicHandlerType", "body");
+        } else if (map.containsKey("--md")) {
+            map.remove("--md");
+            map.put("-basicHandlerType", "markdown");
         }
 
         if (map.containsKey("-J") || map.containsKey("--jsonRecursive")) {
diff --git 
a/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java 
b/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java
index d25b35cfd7..3359f75db7 100644
--- 
a/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java
+++ 
b/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java
@@ -173,6 +173,16 @@ public class TikaCLIBatchCommandLineTest {
 
     }
 
+    @Test
+    public void testMarkdownMapping() throws Exception {
+        String[] params = {"-i", testInputPathForCommandLine, "-o", 
"outputRoot", "--md"};
+        String[] commandLine = BatchCommandLineBuilder.build(params);
+        Map<String, String> attrs = mapify(commandLine);
+        assertEquals("markdown", attrs.get("-basicHandlerType"));
+        assertEquals(escapedInputPathForCommandLine, attrs.get("-inputDir"));
+        assertEquals("outputRoot", attrs.get("-outputDir"));
+    }
+
     @Test
     public void testOneDirOneFileException() throws Exception {
         boolean ex = false;
diff --git 
a/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java 
b/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java
index 15557488b1..10649e6e44 100644
--- 
a/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java
+++ 
b/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java
@@ -158,6 +158,14 @@ public class TikaCLIBatchIntegrationTest {
         }
     }
 
+    @Test
+    public void testMarkdownBatchIntegration() throws Exception {
+        String[] params = {"-i", testInputDirForCommandLine, "-o", 
tempOutputDirForCommandLine, "-numConsumers", "2", "--md"};
+        TikaCLI.main(params);
+        assertFileExists(tempOutputDir.resolve("bad_xml.xml.md"));
+        assertFileExists(tempOutputDir.resolve("coffee.xls.md"));
+    }
+
     @Test
     public void testProcessLogFileConfig() throws Exception {
         String[] params = {"-i", testInputDirForCommandLine, "-o", 
tempOutputDirForCommandLine, "-numConsumers", "2", 
"-JDlog4j.configurationFile=" + customBatchLogging.toUri()};
diff --git 
a/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java
 
b/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java
index 88a3bd085c..0a0d87fce5 100644
--- 
a/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java
+++ 
b/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java
@@ -268,6 +268,9 @@ public class BasicTikaFSConsumersBuilder extends 
AbstractConsumersBuilder {
             case HTML:
                 sb.append("html");
                 break;
+            case MARKDOWN:
+                sb.append("md");
+                break;
             default:
                 sb.append("txt");
         }
diff --git 
a/tika-batch/src/main/resources/org/apache/tika/batch/fs/default-tika-batch-config.xml
 
b/tika-batch/src/main/resources/org/apache/tika/batch/fs/default-tika-batch-config.xml
index 51cbe697e8..36fe1460ec 100644
--- 
a/tika-batch/src/main/resources/org/apache/tika/batch/fs/default-tika-batch-config.xml
+++ 
b/tika-batch/src/main/resources/org/apache/tika/batch/fs/default-tika-batch-config.xml
@@ -67,7 +67,7 @@
         <option opt="handleExisting" hasArg="true"
                 description="if an output file already exists, do you want to: 
overwrite, rename or skip"/>
         <option opt="basicHandlerType" hasArg="true"
-                description="what type of content handler: xml, text, html, 
body"/>
+                description="what type of content handler: xml, text, html, 
body, markdown/md"/>
         <option opt="outputSuffix" hasArg="true"
                 description="suffix to add to the end of the output file 
name"/>
         <option opt="timeoutThresholdMillis" hasArg="true"

Reply via email to