Author: tallison
Date: Tue Mar 31 01:54:40 2015
New Revision: 1670237

URL: http://svn.apache.org/r1670237
Log:
TIKA-1330: add integration tests to TikaCLITest

Modified:
    tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
    tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
    
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/builders/SimpleLogReporterBuilder.java
    
tika/trunk/tika-batch/src/main/resources/org/apache/tika/batch/fs/default-tika-batch-config.xml
    
tika/trunk/tika-batch/src/test/resources/tika-batch-config-MockConsumersBuilder.xml
    tika/trunk/tika-batch/src/test/resources/tika-batch-config-broken.xml
    tika/trunk/tika-batch/src/test/resources/tika-batch-config-test.xml

Modified: tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java?rev=1670237&r1=1670236&r2=1670237&view=diff
==============================================================================
--- tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java 
(original)
+++ tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java Tue Mar 
31 01:54:40 2015
@@ -125,7 +125,7 @@ public class TikaCLI {
             String[] batchArgs = BatchCommandLineBuilder.build(args);
             BatchProcessDriverCLI batchDriver = new 
BatchProcessDriverCLI(batchArgs);
             batchDriver.execute();
-            System.exit(0);
+            return;
         }
 
         BasicConfigurator.configure(

Modified: tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java?rev=1670237&r1=1670236&r2=1670237&view=diff
==============================================================================
--- tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java 
(original)
+++ tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java Tue 
Mar 31 01:54:40 2015
@@ -16,16 +16,26 @@
  */
 package org.apache.tika.cli;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
 import java.io.ByteArrayOutputStream;
 import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
 import java.io.PrintStream;
+import java.io.Reader;
 import java.net.URI;
+import java.util.List;
+
 import org.apache.commons.io.FileUtils;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.IOUtils;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.serialization.JsonMetadataList;
+import org.apache.tika.parser.RecursiveParserWrapper;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -378,4 +388,96 @@ public class TikaCLITest {
         assertTrue(content.contains("\\n\\nembed_0"));
     }
 
+    @Test
+    public void testSimplestBatchIntegration() throws Exception {
+        File tempDir = File.createTempFile("tika-cli-test-batch-", "");
+        tempDir.delete();
+        tempDir.mkdir();
+        ByteArrayOutputStream outBuffer = new ByteArrayOutputStream();
+        PrintStream writer = new PrintStream(outBuffer, true, 
IOUtils.UTF_8.name());
+        OutputStream os = System.out;
+        System.setOut(writer);
+        try {
+            String[] params = {escape(testDataFile.getAbsolutePath()),
+                    escape(tempDir.getAbsolutePath())};
+            TikaCLI.main(params);
+
+            StringBuffer allFiles = new StringBuffer();
+            assertTrue("bad_xml.xml.xml", new File(tempDir, 
"bad_xml.xml.xml").isFile());
+            assertTrue("coffee.xls.xml", new File(tempDir, 
"coffee.xls.xml").exists());
+        } finally {
+            //reset in case something went horribly wrong
+            System.setOut(new PrintStream(os, true, IOUtils.UTF_8.name()));
+            FileUtils.deleteDirectory(tempDir);
+        }
+    }
+
+    @Test
+    public void testBasicBatchIntegration() throws Exception {
+        File tempDir = File.createTempFile("tika-cli-test-batch-", "");
+        tempDir.delete();
+        tempDir.mkdir();
+        ByteArrayOutputStream outBuffer = new ByteArrayOutputStream();
+        PrintStream writer = new PrintStream(outBuffer, true, 
IOUtils.UTF_8.name());
+        OutputStream os = System.out;
+        System.setOut(writer);
+        try {
+            String[] params = {"-i", escape(testDataFile.getAbsolutePath()),
+                    "-o", escape(tempDir.getAbsolutePath()),
+                    "-numConsumers", "2",
+                    "-reporterSleepMillis", "100"};//report often to make sure
+            TikaCLI.main(params);
+
+            StringBuffer allFiles = new StringBuffer();
+            assertTrue("bad_xml.xml.xml", new File(tempDir, 
"bad_xml.xml.xml").isFile());
+            assertTrue("coffee.xls.xml", new File(tempDir, 
"coffee.xls.xml").exists());
+            String sysOutString = new String(outBuffer.toByteArray(), 
IOUtils.UTF_8);
+
+            assertEquals(-1, sysOutString.indexOf("There are 3 file processors 
still active"));
+            assertTrue(sysOutString.indexOf("There are 2 file processors") > 
-1);
+        } finally {
+            //reset in case something went horribly wrong
+            System.setOut(new PrintStream(os, true, IOUtils.UTF_8.name()));
+            FileUtils.deleteDirectory(tempDir);
+        }
+    }
+
+    @Test
+    public void testJsonRecursiveBatchIntegration() throws Exception {
+        File tempDir = File.createTempFile("tika-cli-test-batch-", "");
+        tempDir.delete();
+        tempDir.mkdir();
+        ByteArrayOutputStream outBuffer = new ByteArrayOutputStream();
+        PrintStream writer = new PrintStream(outBuffer, true, 
IOUtils.UTF_8.name());
+        OutputStream os = System.out;
+        System.setOut(writer);
+        Reader reader = null;
+        try {
+            String[] params = {"-i", escape(testDataFile.getAbsolutePath()),
+                    "-o", escape(tempDir.getAbsolutePath()),
+                    "-numConsumers", "10",
+                    "-J", //recursive Json
+                    "-t" //plain text in content
+            };
+            TikaCLI.main(params);
+            reader = new InputStreamReader(
+                    new FileInputStream(new File(tempDir, 
"test_recursive_embedded.docx.json")), IOUtils.UTF_8);
+            List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
+            assertEquals(12, metadataList.size());
+            
assertTrue(metadataList.get(6).get(RecursiveParserWrapper.TIKA_CONTENT).contains("human
 events"));
+        } finally {
+            IOUtils.closeQuietly(reader);
+            //reset in case something went horribly wrong
+            System.setOut(new PrintStream(os, true, IOUtils.UTF_8.name()));
+            FileUtils.deleteDirectory(tempDir);
+        }
+    }
+
+
+    public static String escape(String path) {
+        if (path.indexOf(' ') > -1){
+            return '"'+path+'"';
+        }
+        return path;
+    }
 }

Modified: 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/builders/SimpleLogReporterBuilder.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/builders/SimpleLogReporterBuilder.java?rev=1670237&r1=1670236&r2=1670237&view=diff
==============================================================================
--- 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/builders/SimpleLogReporterBuilder.java
 (original)
+++ 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/builders/SimpleLogReporterBuilder.java
 Tue Mar 31 01:54:40 2015
@@ -30,13 +30,13 @@ public class SimpleLogReporterBuilder im
 
     @Override
     public StatusReporter build(FileResourceCrawler crawler, ConsumersManager 
consumersManager,
-                                Node n, Map<String, String> 
commandlineArguments) {
-
-        Map<String, String> attributes = XMLDOMUtil.mapifyAttrs(n, 
commandlineArguments);
-        long sleepMillis = PropsUtil.getLong(attributes.get("sleepMillis"), 
1000L);
-        long staleThresholdMillis = 
PropsUtil.getLong(attributes.get("reporterStaleThresholdMillis"), 500000L);
-        StatusReporter reporter = new StatusReporter(crawler, 
consumersManager);
-        reporter.setSleepMillis(sleepMillis);
+                                Node n, Map<String, String> 
commandlineArguments) {
+
+        Map<String, String> attributes = XMLDOMUtil.mapifyAttrs(n, 
commandlineArguments);
+        long sleepMillis = 
PropsUtil.getLong(attributes.get("reporterSleepMillis"), 1000L);
+        long staleThresholdMillis = 
PropsUtil.getLong(attributes.get("reporterStaleThresholdMillis"), 500000L);
+        StatusReporter reporter = new StatusReporter(crawler, 
consumersManager);
+        reporter.setSleepMillis(sleepMillis);
         reporter.setStaleThresholdMillis(staleThresholdMillis);
         return reporter;
     }

Modified: 
tika/trunk/tika-batch/src/main/resources/org/apache/tika/batch/fs/default-tika-batch-config.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/resources/org/apache/tika/batch/fs/default-tika-batch-config.xml?rev=1670237&r1=1670236&r2=1670237&view=diff
==============================================================================
--- 
tika/trunk/tika-batch/src/main/resources/org/apache/tika/batch/fs/default-tika-batch-config.xml
 (original)
+++ 
tika/trunk/tika-batch/src/main/resources/org/apache/tika/batch/fs/default-tika-batch-config.xml
 Tue Mar 31 01:54:40 2015
@@ -26,13 +26,13 @@
 <tika-batch-config
         maxAliveTimeSeconds="-1"
         pauseOnEarlyTerminationMillis="10000"
-        timeoutThresholdMillis="300000"
-        timeoutCheckPulseMillis="1000"
-        maxQueueSize="10000"
-        numConsumers="5">
-
-    <!-- options to allow on the commandline -->
-    <commandline>
+        timeoutThresholdMillis="300000"
+        timeoutCheckPulseMillis="1000"
+        maxQueueSize="10000"
+        numConsumers="default"> <!-- numConsumers = number of file consumers, 
"default" = number of processors -1 -->
+
+    <!-- options to allow on the commandline -->
+    <commandline>
         <option opt="c" longOpt="tika-config" hasArg="true"
                 description="TikaConfig file"/>
         <option opt="bc" longOpt="batch-config" hasArg="true"
@@ -72,12 +72,14 @@
         <option opt="timeoutThresholdMillis" hasArg="true"
                 description="how long to wait before determining that a 
consumer is stale"/>
         <option opt="includeFilePat" hasArg="true"
-                description="regex that specifies which files to process"/>
-        <option opt="excludeFilePat" hasArg="true"
-                description="regex that specifies which files to avoid 
processing"/>
-    </commandline>
-
-
+                description="regex that specifies which files to process"/>
+        <option opt="excludeFilePat" hasArg="true"
+                description="regex that specifies which files to avoid 
processing"/>
+        <option opt="reporterSleepMillis" hasArg="true"
+                description="millisecond between reports by the reporter"/>
+    </commandline>
+
+
     <!-- can specify inputDir="input", but the default config should not 
include this -->
     <!-- can also specify startDir="input/someDir" to specify which child 
directory
          to start processing -->
@@ -111,10 +113,10 @@
         <!-- overwritePolicy: "skip" a file if output file exists, "rename" a 
output file, "overwrite" -->
         <!-- can include e.g. outputDir="output", but we don't want to include 
this in the default! -->
         <outputstream class="FSOutputStreamFactory" encoding="UTF-8" 
outputSuffix="xml"/>
-    </consumers>
-
-    <!-- reporter and interrupter are optional -->
-    <reporter 
builderClass="org.apache.tika.batch.builders.SimpleLogReporterBuilder" 
sleepMillis="1000"
-              reporterStaleThresholdMillis="60000"/>
-    <interrupter 
builderClass="org.apache.tika.batch.builders.InterrupterBuilder"/>
+    </consumers>
+
+    <!-- reporter and interrupter are optional -->
+    <reporter 
builderClass="org.apache.tika.batch.builders.SimpleLogReporterBuilder" 
reporterSleepMillis="1000"
+              reporterStaleThresholdMillis="60000"/>
+    <interrupter 
builderClass="org.apache.tika.batch.builders.InterrupterBuilder"/>
 </tika-batch-config>
\ No newline at end of file

Modified: 
tika/trunk/tika-batch/src/test/resources/tika-batch-config-MockConsumersBuilder.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/resources/tika-batch-config-MockConsumersBuilder.xml?rev=1670237&r1=1670236&r2=1670237&view=diff
==============================================================================
--- 
tika/trunk/tika-batch/src/test/resources/tika-batch-config-MockConsumersBuilder.xml
 (original)
+++ 
tika/trunk/tika-batch/src/test/resources/tika-batch-config-MockConsumersBuilder.xml
 Tue Mar 31 01:54:40 2015
@@ -103,10 +103,10 @@
 
                <outputstream class="FSOutputStreamFactory"
                 encoding="UTF-8" outputSuffix="xml"/>
-       </consumers>
-       
-       <!-- reporter and interrupter are optional -->
-       <reporter 
builderClass="org.apache.tika.batch.builders.SimpleLogReporterBuilder" 
sleepMillis="1000"
-              reporterStaleThresholdMillis="500000"/>
-       <interrupter 
builderClass="org.apache.tika.batch.builders.InterrupterBuilder"/>
+       </consumers>
+       
+       <!-- reporter and interrupter are optional -->
+       <reporter 
builderClass="org.apache.tika.batch.builders.SimpleLogReporterBuilder" 
reporterSleepMillis="1000"
+              reporterStaleThresholdMillis="500000"/>
+       <interrupter 
builderClass="org.apache.tika.batch.builders.InterrupterBuilder"/>
 </tika-batch-config>
\ No newline at end of file

Modified: tika/trunk/tika-batch/src/test/resources/tika-batch-config-broken.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/resources/tika-batch-config-broken.xml?rev=1670237&r1=1670236&r2=1670237&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/resources/tika-batch-config-broken.xml 
(original)
+++ tika/trunk/tika-batch/src/test/resources/tika-batch-config-broken.xml Tue 
Mar 31 01:54:40 2015
@@ -96,10 +96,10 @@
 
                <outputstream class="FSOutputStreamFactory"
                 encoding="UTF-8" outputSuffix="xml"/>
-       </consumers>
-       
-       <!-- reporter and interrupter are optional -->
-       <reporter 
builderClass="org.apache.tika.batch.builders.SimpleLogReporterBuilder" 
sleepMillis="1000"
-              reporterStaleThresholdMillis="500000"/>
-       <interrupter 
builderClass="org.apache.tika.batch.builders.InterrupterBuilder"/>
+       </consumers>
+       
+       <!-- reporter and interrupter are optional -->
+       <reporter 
builderClass="org.apache.tika.batch.builders.SimpleLogReporterBuilder" 
reporterSleepMillis="1000"
+              reporterStaleThresholdMillis="500000"/>
+       <interrupter 
builderClass="org.apache.tika.batch.builders.InterrupterBuilder"/>
 </tika-batch-config>
\ No newline at end of file

Modified: tika/trunk/tika-batch/src/test/resources/tika-batch-config-test.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/resources/tika-batch-config-test.xml?rev=1670237&r1=1670236&r2=1670237&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/resources/tika-batch-config-test.xml 
(original)
+++ tika/trunk/tika-batch/src/test/resources/tika-batch-config-test.xml Tue Mar 
31 01:54:40 2015
@@ -102,10 +102,10 @@
 
                <outputstream class="FSOutputStreamFactory"
                 encoding="UTF-8" outputSuffix="xml"/>
-       </consumers>
-       
-       <!-- reporter and interrupter are optional -->
-       <reporter 
builderClass="org.apache.tika.batch.builders.SimpleLogReporterBuilder" 
sleepMillis="1000"
-              reporterStaleThresholdMillis="500000"/>
-       <interrupter 
builderClass="org.apache.tika.batch.builders.InterrupterBuilder"/>
+       </consumers>
+       
+       <!-- reporter and interrupter are optional -->
+       <reporter 
builderClass="org.apache.tika.batch.builders.SimpleLogReporterBuilder" 
reporterSleepMillis="1000"
+              reporterStaleThresholdMillis="500000"/>
+       <interrupter 
builderClass="org.apache.tika.batch.builders.InterrupterBuilder"/>
 </tika-batch-config>
\ No newline at end of file


Reply via email to