This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 9dfccdebdc02fc18bb94badaa8a71f93c3b26690
Author: Alexander Klimetschek <[email protected]>
AuthorDate: Wed Oct 7 10:47:33 2020 -0700

    add -C/--content cli option using WriteOutContentHandler (#312)
---
 tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java    | 14 +++++++++++++-
 .../src/test/java/org/apache/tika/cli/TikaCLITest.java     | 14 ++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java 
b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 7a3a30c..216f9c6 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -105,6 +105,7 @@ import org.apache.tika.sax.BodyContentHandler;
 import org.apache.tika.sax.ContentHandlerFactory;
 import org.apache.tika.sax.ExpandedTitleContentHandler;
 import org.apache.tika.sax.RecursiveParserWrapperHandler;
+import org.apache.tika.sax.WriteOutContentHandler;
 import org.apache.tika.xmp.XMPMetadata;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -264,7 +265,15 @@ public class TikaCLI {
             return new BoilerpipeContentHandler(getOutputWriter(output, 
encoding));
         }
     };
-    
+
+    private final OutputType CONTENT = new OutputType() {
+        @Override
+        protected ContentHandler getContentHandler(
+            OutputStream output, Metadata metadata) throws Exception {
+            return new WriteOutContentHandler(getOutputWriter(output, 
encoding));
+        }
+    };
+
     private final OutputType METADATA = new OutputType() {
         @Override
         protected ContentHandler getContentHandler(
@@ -438,6 +447,8 @@ public class TikaCLI {
             type = TEXT;
         } else if (arg.equals("-T") || arg.equals("--text-main")) {
             type = TEXT_MAIN;
+        } else if (arg.equals("-C") || arg.equals("--content")) {
+            type = CONTENT;
         } else if (arg.equals("-m") || arg.equals("--metadata")) {
             type = METADATA;
         } else if (arg.equals("-l") || arg.equals("--language")) {
@@ -563,6 +574,7 @@ public class TikaCLI {
         out.println("    -h  or --html          Output HTML content");
         out.println("    -t  or --text          Output plain text content");
         out.println("    -T  or --text-main     Output plain text content 
(main content only)");
+        out.println("    -C  or --content       Output all text content");
         out.println("    -m  or --metadata      Output only metadata");
         out.println("    -j  or --json          Output metadata in JSON");
         out.println("    -y  or --xmp           Output metadata in XMP");
diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java 
b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
index 0a0ae17..955433e 100644
--- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
+++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
@@ -154,6 +154,20 @@ public class TikaCLITest {
     }
 
     /**
+     * Tests -C option of the cli
+     *
+     * @throws Exception
+     */
+    @Test
+    public void testContentOutput() throws Exception{
+        String[] params = {"-C", resourcePrefix + "testJsonMultipleInts.html"};
+        TikaCLI.main(params);
+        String out = outContent.toString(UTF_8.name());
+        assertTrue(out.contains("this is a title"));
+        assertTrue(out.contains("body"));
+    }
+
+    /**
      * Tests -f option of the cli
      *
      * @throws Exception

Reply via email to