This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4653-markdown-handler
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 96a6d3c2b097d2a5ceac10af25c4587193ae0f2d
Author: tallison <[email protected]>
AuthorDate: Sun Feb 8 17:53:28 2026 -0500

    TIKA-4653 - add markdown contenthandler
---
 .../org/apache/tika/sax/BasicContentHandlerFactory.java    | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git 
a/tika-core/src/main/java/org/apache/tika/sax/BasicContentHandlerFactory.java 
b/tika-core/src/main/java/org/apache/tika/sax/BasicContentHandlerFactory.java
index 16195b9de5..ddef58d96e 100644
--- 
a/tika-core/src/main/java/org/apache/tika/sax/BasicContentHandlerFactory.java
+++ 
b/tika-core/src/main/java/org/apache/tika/sax/BasicContentHandlerFactory.java
@@ -103,7 +103,7 @@ public class BasicContentHandlerFactory implements 
StreamingContentHandlerFactor
      * Tries to parse string into handler type.  Returns default if string is 
null or
      * parse fails.
      * <p/>
-     * Options: xml, html, text, body, ignore (no content)
+     * Options: xml, html, text, body, ignore (no content), markdown/md
      *
      * @param handlerTypeName string to parse
      * @param defaultType     type to return if parse fails
@@ -128,6 +128,9 @@ public class BasicContentHandlerFactory implements 
StreamingContentHandlerFactor
                 return HANDLER_TYPE.BODY;
             case "ignore":
                 return HANDLER_TYPE.IGNORE;
+            case "markdown":
+            case "md":
+                return HANDLER_TYPE.MARKDOWN;
             default:
                 return defaultType;
         }
@@ -159,6 +162,8 @@ public class BasicContentHandlerFactory implements 
StreamingContentHandlerFactor
                 return new ToHTMLContentHandler();
             case XML:
                 return new ToXMLContentHandler();
+            case MARKDOWN:
+                return new ToMarkdownContentHandler();
             default:
                 return new ToTextContentHandler();
         }
@@ -186,6 +191,9 @@ public class BasicContentHandlerFactory implements 
StreamingContentHandlerFactor
                     case XML:
                         return new WriteOutContentHandler(
                                 new ToXMLContentHandler(os, charset.name()), 
writeLimit);
+                    case MARKDOWN:
+                        return new WriteOutContentHandler(
+                                new ToMarkdownContentHandler(os, 
charset.name()), writeLimit);
                     default:
                         return new WriteOutContentHandler(
                                 new ToTextContentHandler(os, charset.name()), 
writeLimit);
@@ -200,6 +208,8 @@ public class BasicContentHandlerFactory implements 
StreamingContentHandlerFactor
                         return new ToHTMLContentHandler(os, charset.name());
                     case XML:
                         return new ToXMLContentHandler(os, charset.name());
+                    case MARKDOWN:
+                        return new ToMarkdownContentHandler(os, 
charset.name());
                     default:
                         return new ToTextContentHandler(os, charset.name());
 
@@ -230,7 +240,7 @@ public class BasicContentHandlerFactory implements 
StreamingContentHandlerFactor
      */
     public enum HANDLER_TYPE {
         BODY, IGNORE, //don't store content
-        TEXT, HTML, XML
+        TEXT, HTML, XML, MARKDOWN
     }
 
     public int getWriteLimit() {

Reply via email to