Author: tallison
Date: Wed Apr  1 18:27:23 2015
New Revision: 1670749

URL: http://svn.apache.org/r1670749
Log:
TIKA-1330 clean up logging in tika-batch ant tika-app integration of tika-batch

Added:
    tika/trunk/tika-app/src/main/resources/log4j_batch_process.properties
    
tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java
    tika/trunk/tika-app/src/test/resources/log4j_batch_process_test.properties
Modified:
    
tika/trunk/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java
    tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
    tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
    tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcess.java
    
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java
    
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/FileResourceConsumer.java
    
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/ParallelFileProcessingResult.java
    
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/AbstractFSConsumer.java
    
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/BasicTikaFSConsumer.java
    
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSBatchProcessCLI.java
    
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/RecursiveParserWrapperFSConsumer.java
    tika/trunk/tika-batch/src/test/resources/log4j.properties
    tika/trunk/tika-batch/src/test/resources/log4j_process.properties

Modified: 
tika/trunk/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java?rev=1670749&r1=1670748&r2=1670749&view=diff
==============================================================================
--- 
tika/trunk/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java
 (original)
+++ 
tika/trunk/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java
 Wed Apr  1 18:27:23 2015
@@ -39,13 +39,13 @@ class BatchCommandLineBuilder {
     static Pattern JVM_OPTS_PATTERN = Pattern.compile("^(--?)J(.+)");
 
     protected static String[] build(String[] args) throws IOException {
-        Map<String, String> processArgs = new LinkedHashMap<String, String>();
-        Map<String, String> jvmOpts = new LinkedHashMap<String,String>();
-        //take the args, and divide them into process args and options for
-        //the parent jvm process (i.e. log files, etc)
-        mapifyArgs(args, processArgs, jvmOpts);
-
-        //now modify processArgs in place
+        Map<String, String> processArgs = new LinkedHashMap<String, String>();
+        Map<String, String> jvmOpts = new LinkedHashMap<String,String>();
+        //take the args, and divide them into process args and options for
+        //the child jvm process (i.e. log files, etc)
+        mapifyArgs(args, processArgs, jvmOpts);
+
+        //now modify processArgs in place
         translateCommandLine(args, processArgs);
 
         //maybe the user specified a different classpath?!
@@ -56,12 +56,23 @@ class BatchCommandLineBuilder {
             if (cp.contains(" ")){
                 cp = "\""+cp+"\"";
             }
-            jvmOpts.put("-cp", cp);
-        }
-
-        //now build the full command line
-        List<String> fullCommand = new ArrayList<String>();
-        fullCommand.add("java");
+            jvmOpts.put("-cp", cp);
+        }
+
+        boolean hasLog4j = false;
+        for (String k : jvmOpts.keySet()) {
+            if (k.startsWith("-Dlog4j.configuration=")) {
+                hasLog4j = true;
+                break;
+            }
+        }
+        //use the log4j config file inside the app 
/resources/log4j_batch_process.properties
+        if (! hasLog4j) {
+            
jvmOpts.put("-Dlog4j.configuration=\"log4j_batch_process.properties\"", "");
+        }
+        //now build the full command line
+        List<String> fullCommand = new ArrayList<String>();
+        fullCommand.add("java");
         for (Map.Entry<String, String> e : jvmOpts.entrySet()) {
             fullCommand.add(e.getKey());
             if (e.getValue().length() > 0) {
@@ -79,16 +90,16 @@ class BatchCommandLineBuilder {
         return fullCommand.toArray(new String[fullCommand.size()]);
     }
 
-
-    /**
-     * Take the input args and separate them into args that belong on the 
commandline
-     * and those that belong as jvm args for the parent process.
-     * @param args -- literal args from TikaCLI commandline
-     * @param commandLine args that should be part of the batch commandline
-     * @param jvmArgs args that belong as jvm arguments for the parent process
-     */
-    private static void mapifyArgs(final String[] args,
-                                   final Map<String, String> commandLine,
+
+    /**
+     * Take the input args and separate them into args that belong on the 
commandline
+     * and those that belong as jvm args for the child process.
+     * @param args -- literal args from TikaCLI commandline
+     * @param commandLine args that should be part of the batch commandline
+     * @param jvmArgs args that belong as jvm arguments for the child process
+     */
+    private static void mapifyArgs(final String[] args,
+                                   final Map<String, String> commandLine,
                                    final Map<String, String> jvmArgs) {
 
         if (args.length == 0) {
@@ -187,8 +198,9 @@ class BatchCommandLineBuilder {
         if (map.containsKey("--outputDir") || map.containsKey("-o")) {
             String v1 = map.remove("--outputDir");
             String v2 = map.remove("-o");
-            String v = (v1 == null) ? v2 : v1;
-            map.put("-outputDir", v);
-        }
-    }
-}
+            String v = (v1 == null) ? v2 : v1;
+            map.put("-outputDir", v);
+        }
+
+    }
+}

Modified: tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java?rev=1670749&r1=1670748&r2=1670749&view=diff
==============================================================================
--- tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java 
(original)
+++ tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java Wed Apr  
1 18:27:23 2015
@@ -21,7 +21,6 @@ import javax.xml.transform.TransformerCo
 import javax.xml.transform.sax.SAXTransformerFactory;
 import javax.xml.transform.sax.TransformerHandler;
 import javax.xml.transform.stream.StreamResult;
-
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
@@ -65,7 +64,6 @@ import org.apache.poi.poifs.filesystem.D
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 import org.apache.tika.Tika;
 import org.apache.tika.batch.BatchProcessDriverCLI;
-import org.apache.tika.batch.fs.FSBatchProcessCLI;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.detect.CompositeDetector;
 import org.apache.tika.detect.DefaultDetector;
@@ -115,10 +113,17 @@ public class TikaCLI {
     private static final Log logger = LogFactory.getLog(TikaCLI.class);
 
     public static void main(String[] args) throws Exception {
+
+        String log4jFile = System.getProperty("log4j.configuration");
+        if (log4jFile == null || log4jFile.trim().length()==0) {
+            BasicConfigurator.configure(
+                    new WriterAppender(new SimpleLayout(), System.err));
+            Logger.getRootLogger().setLevel(Level.INFO);
+        }
+
         TikaCLI cli = new TikaCLI();
 
         if (cli.testForHelp(args)) {
-            FSBatchProcessCLI batchProcessCLI = new FSBatchProcessCLI(args);
             cli.usage();
             return;
         } else if (cli.testForBatch(args)) {
@@ -128,10 +133,6 @@ public class TikaCLI {
             return;
         }
 
-        BasicConfigurator.configure(
-                new WriterAppender(new SimpleLayout(), System.err));
-        Logger.getRootLogger().setLevel(Level.INFO);
-
         if (args.length > 0) {
             for (int i = 0; i < args.length; i++) {
                 cli.process(args[i]);
@@ -587,7 +588,7 @@ public class TikaCLI {
         out.println();
         out.println("    Simplest method.");
         out.println("    Specify two directories as args with no other args:");
-        out.println("         java -jar tika-app.jar <inputDirectory> 
<outputDirectory");
+        out.println("         java -jar tika-app.jar <inputDirectory> 
<outputDirectory>");
         out.println();
         out.println("Batch Options:");
         out.println("    -i  or --inputDir          Input directory");
@@ -610,7 +611,6 @@ public class TikaCLI {
         out.println();
         out.println("    To modify child process jvm args, prepend \"J\" as 
in:");
         out.println("    -JXmx4g or -JDlog4j.configuration=file:log4j.xml.");
-
     }
 
     private void version() {

Added: tika/trunk/tika-app/src/main/resources/log4j_batch_process.properties
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-app/src/main/resources/log4j_batch_process.properties?rev=1670749&view=auto
==============================================================================
--- tika/trunk/tika-app/src/main/resources/log4j_batch_process.properties 
(added)
+++ tika/trunk/tika-app/src/main/resources/log4j_batch_process.properties Wed 
Apr  1 18:27:23 2015
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#info,debug, error,fatal ...
+log4j.rootLogger=info,stdout
+
+#console
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+
+
+log4j.appender.stdout.layout.ConversionPattern=%m%n

Added: 
tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java?rev=1670749&view=auto
==============================================================================
--- 
tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java
 (added)
+++ 
tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java
 Wed Apr  1 18:27:23 2015
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.cli;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.PrintStream;
+import java.io.Reader;
+import java.util.List;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.serialization.JsonMetadataList;
+import org.apache.tika.parser.RecursiveParserWrapper;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TikaCLIBatchIntegrationTest {
+
+    private File testDataFile = new File("src/test/resources/test-data");
+
+    private File tempDir;
+    private OutputStream out = null;
+    private OutputStream err = null;
+    private ByteArrayOutputStream outBuffer = null;
+
+    @Before
+    public void setup() throws Exception {
+        tempDir = File.createTempFile("tika-cli-test-batch-", "");
+        tempDir.delete();
+        tempDir.mkdir();
+        outBuffer = new ByteArrayOutputStream();
+        PrintStream outWriter = new PrintStream(outBuffer, true, 
IOUtils.UTF_8.name());
+        ByteArrayOutputStream errBuffer = new ByteArrayOutputStream();
+        PrintStream errWriter = new PrintStream(errBuffer, true, 
IOUtils.UTF_8.name());
+        out = System.out;
+        err = System.err;
+        System.setOut(outWriter);
+        System.setErr(errWriter);
+    }
+
+    @After
+    public void tearDown() throws Exception {
+        System.setOut(new PrintStream(out, true, IOUtils.UTF_8.name()));
+        System.setErr(new PrintStream(err, true, IOUtils.UTF_8.name()));
+        FileUtils.deleteDirectory(tempDir);
+    }
+
+    @Test
+    public void testSimplestBatchIntegration() throws Exception {
+        String[] params = {escape(testDataFile.getAbsolutePath()),
+                escape(tempDir.getAbsolutePath())};
+        TikaCLI.main(params);
+
+        assertTrue("bad_xml.xml.xml", new File(tempDir, 
"bad_xml.xml.xml").isFile());
+        assertTrue("coffee.xls.xml", new File(tempDir, 
"coffee.xls.xml").exists());
+    }
+
+    @Test
+    public void testBasicBatchIntegration() throws Exception {
+        String[] params = {"-i", escape(testDataFile.getAbsolutePath()),
+                "-o", escape(tempDir.getAbsolutePath()),
+                "-numConsumers", "2"
+        };
+        TikaCLI.main(params);
+
+        assertTrue("bad_xml.xml.xml", new File(tempDir, 
"bad_xml.xml.xml").isFile());
+        assertTrue("coffee.xls.xml", new File(tempDir, 
"coffee.xls.xml").exists());
+    }
+
+    @Test
+    public void testJsonRecursiveBatchIntegration() throws Exception {
+        Reader reader = null;
+        try {
+            String[] params = {"-i", escape(testDataFile.getAbsolutePath()),
+                    "-o", escape(tempDir.getAbsolutePath()),
+                    "-numConsumers", "10",
+                    "-J", //recursive Json
+                    "-t" //plain text in content
+            };
+            TikaCLI.main(params);
+            reader = new InputStreamReader(
+                    new FileInputStream(new File(tempDir, 
"test_recursive_embedded.docx.json")), IOUtils.UTF_8);
+            List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
+            assertEquals(12, metadataList.size());
+            
assertTrue(metadataList.get(6).get(RecursiveParserWrapper.TIKA_CONTENT).contains("human
 events"));
+        } finally {
+            IOUtils.closeQuietly(reader);
+        }
+    }
+
+    @Test
+    public void testProcessLogFileConfig() throws Exception {
+        String[] params = {"-i", escape(testDataFile.getAbsolutePath()),
+                "-o", escape(tempDir.getAbsolutePath()),
+                "-numConsumers", "2",
+                "-JDlog4j.configuration=log4j_batch_process_test.properties"};
+        TikaCLI.main(params);
+
+        assertTrue("bad_xml.xml.xml", new File(tempDir, 
"bad_xml.xml.xml").isFile());
+        assertTrue("coffee.xls.xml", new File(tempDir, 
"coffee.xls.xml").exists());
+        String sysOutString = new String(outBuffer.toByteArray(), 
IOUtils.UTF_8);
+        assertTrue(sysOutString.contains("MY_CUSTOM_LOG_CONFIG"));
+    }
+
+    public static String escape(String path) {
+        if (path.indexOf(' ') > -1) {
+            return '"' + path + '"';
+        }
+        return path;
+    }
+
+}

Modified: tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java?rev=1670749&r1=1670748&r2=1670749&view=diff
==============================================================================
--- tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java 
(original)
+++ tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java Wed 
Apr  1 18:27:23 2015
@@ -16,26 +16,17 @@
  */
 package org.apache.tika.cli;
 
-import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
 import java.io.ByteArrayOutputStream;
 import java.io.File;
-import java.io.FileInputStream;
-import java.io.InputStreamReader;
-import java.io.OutputStream;
 import java.io.PrintStream;
-import java.io.Reader;
 import java.net.URI;
-import java.util.List;
 
 import org.apache.commons.io.FileUtils;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.IOUtils;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.serialization.JsonMetadataList;
-import org.apache.tika.parser.RecursiveParserWrapper;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -387,97 +378,4 @@ public class TikaCLITest {
         assertTrue(content.contains("\\n\\nembed_4\\n"));
         assertTrue(content.contains("\\n\\nembed_0"));
     }
-
-    @Test
-    public void testSimplestBatchIntegration() throws Exception {
-        File tempDir = File.createTempFile("tika-cli-test-batch-", "");
-        tempDir.delete();
-        tempDir.mkdir();
-        ByteArrayOutputStream outBuffer = new ByteArrayOutputStream();
-        PrintStream writer = new PrintStream(outBuffer, true, 
IOUtils.UTF_8.name());
-        OutputStream os = System.out;
-        System.setOut(writer);
-        try {
-            String[] params = {escape(testDataFile.getAbsolutePath()),
-                    escape(tempDir.getAbsolutePath())};
-            TikaCLI.main(params);
-
-            StringBuffer allFiles = new StringBuffer();
-            assertTrue("bad_xml.xml.xml", new File(tempDir, 
"bad_xml.xml.xml").isFile());
-            assertTrue("coffee.xls.xml", new File(tempDir, 
"coffee.xls.xml").exists());
-        } finally {
-            //reset in case something went horribly wrong
-            System.setOut(new PrintStream(os, true, IOUtils.UTF_8.name()));
-            FileUtils.deleteDirectory(tempDir);
-        }
-    }
-
-    @Test
-    public void testBasicBatchIntegration() throws Exception {
-        File tempDir = File.createTempFile("tika-cli-test-batch-", "");
-        tempDir.delete();
-        tempDir.mkdir();
-        ByteArrayOutputStream outBuffer = new ByteArrayOutputStream();
-        PrintStream writer = new PrintStream(outBuffer, true, 
IOUtils.UTF_8.name());
-        OutputStream os = System.out;
-        System.setOut(writer);
-        try {
-            String[] params = {"-i", escape(testDataFile.getAbsolutePath()),
-                    "-o", escape(tempDir.getAbsolutePath()),
-                    "-numConsumers", "2",
-                    "-reporterSleepMillis", "100"};//report often to make sure
-            TikaCLI.main(params);
-
-            StringBuffer allFiles = new StringBuffer();
-            assertTrue("bad_xml.xml.xml", new File(tempDir, 
"bad_xml.xml.xml").isFile());
-            assertTrue("coffee.xls.xml", new File(tempDir, 
"coffee.xls.xml").exists());
-            String sysOutString = new String(outBuffer.toByteArray(), 
IOUtils.UTF_8);
-
-            assertEquals(-1, sysOutString.indexOf("There are 3 file processors 
still active"));
-            assertTrue(sysOutString.indexOf("There are 2 file processors") > 
-1);
-        } finally {
-            //reset in case something went horribly wrong
-            System.setOut(new PrintStream(os, true, IOUtils.UTF_8.name()));
-            FileUtils.deleteDirectory(tempDir);
-        }
-    }
-
-    @Test
-    public void testJsonRecursiveBatchIntegration() throws Exception {
-        File tempDir = File.createTempFile("tika-cli-test-batch-", "");
-        tempDir.delete();
-        tempDir.mkdir();
-        ByteArrayOutputStream outBuffer = new ByteArrayOutputStream();
-        PrintStream writer = new PrintStream(outBuffer, true, 
IOUtils.UTF_8.name());
-        OutputStream os = System.out;
-        System.setOut(writer);
-        Reader reader = null;
-        try {
-            String[] params = {"-i", escape(testDataFile.getAbsolutePath()),
-                    "-o", escape(tempDir.getAbsolutePath()),
-                    "-numConsumers", "10",
-                    "-J", //recursive Json
-                    "-t" //plain text in content
-            };
-            TikaCLI.main(params);
-            reader = new InputStreamReader(
-                    new FileInputStream(new File(tempDir, 
"test_recursive_embedded.docx.json")), IOUtils.UTF_8);
-            List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
-            assertEquals(12, metadataList.size());
-            
assertTrue(metadataList.get(6).get(RecursiveParserWrapper.TIKA_CONTENT).contains("human
 events"));
-        } finally {
-            IOUtils.closeQuietly(reader);
-            //reset in case something went horribly wrong
-            System.setOut(new PrintStream(os, true, IOUtils.UTF_8.name()));
-            FileUtils.deleteDirectory(tempDir);
-        }
-    }
-
-
-    public static String escape(String path) {
-        if (path.indexOf(' ') > -1){
-            return '"'+path+'"';
-        }
-        return path;
-    }
 }

Added: 
tika/trunk/tika-app/src/test/resources/log4j_batch_process_test.properties
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-app/src/test/resources/log4j_batch_process_test.properties?rev=1670749&view=auto
==============================================================================
--- tika/trunk/tika-app/src/test/resources/log4j_batch_process_test.properties 
(added)
+++ tika/trunk/tika-app/src/test/resources/log4j_batch_process_test.properties 
Wed Apr  1 18:27:23 2015
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#info,debug, error,fatal ...
+log4j.rootLogger=info,stdout
+
+#console
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+
+
+log4j.appender.stdout.layout.ConversionPattern=MY_CUSTOM_LOG_CONFIG %m%n

Modified: 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcess.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcess.java?rev=1670749&r1=1670748&r2=1670749&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcess.java 
(original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcess.java 
Wed Apr  1 18:27:23 2015
@@ -127,15 +127,17 @@ public class BatchProcess implements Cal
      */
     public ParallelFileProcessingResult call()
             throws InterruptedException {
-        if (alreadyExecuted) {
-            throw new IllegalStateException("Can only execute BatchRunner 
once.");
-        }
-        //redirect streams
-        try {
-            outputStreamWriter = new PrintStream(System.err, true, 
IOUtils.UTF_8.toString());
-        } catch (IOException e) {
-            throw new RuntimeException("Can't redirect streams");
-        }
+        if (alreadyExecuted) {
+            throw new IllegalStateException("Can only execute BatchRunner 
once.");
+        }
+        //redirect streams; all organic warnings should go to System.err;
+        //System.err should be redirected to System.out
+        PrintStream sysErr = System.err;
+        try {
+            outputStreamWriter = new PrintStream(sysErr, true, 
IOUtils.UTF_8.toString());
+        } catch (IOException e) {
+            throw new RuntimeException("Can't redirect streams");
+        }
         System.setErr(System.out);
 
         ParallelFileProcessingResult result = null;
@@ -152,13 +154,13 @@ public class BatchProcess implements Cal
             TimeoutChecker timeoutChecker = new TimeoutChecker();
 
             try {
-                startConsumersManager();
-            } catch (BatchNoRestartError e) {
-                return new
-                        ParallelFileProcessingResult(0, 0, 0,
-                        0, BatchProcessDriverCLI.PROCESS_NO_RESTART_EXIT_CODE,
-                        
CAUSE_FOR_TERMINATION.CONSUMERS_MANAGER_DIDNT_INIT_IN_TIME_NO_RESTART.toString());
-
+                startConsumersManager();
+            } catch (BatchNoRestartError e) {
+                return new
+                        ParallelFileProcessingResult(0, 0, 0, 0,
+                        0, BatchProcessDriverCLI.PROCESS_NO_RESTART_EXIT_CODE,
+                        
CAUSE_FOR_TERMINATION.CONSUMERS_MANAGER_DIDNT_INIT_IN_TIME_NO_RESTART.toString());
+
             }
 
             State state = mainLoop(completionService, timeoutChecker);
@@ -198,13 +200,12 @@ public class BatchProcess implements Cal
 
                 if (futureResult != null) {
                     state.removed++;
-                    IFileProcessorFutureResult result = futureResult.get();
-                    if (result instanceof FileConsumerFutureResult) {
-                        state.consumersRemoved++;
-                        state.processed += ((FileConsumerFutureResult) 
result).getFilesProcessed();
-                    } else if (result instanceof 
FileResourceCrawlerFutureResult) {
-                        state.crawlersRemoved++;
-                        if (fileResourceCrawler.wasTimedOut()) {
+                    IFileProcessorFutureResult result = futureResult.get();
+                    if (result instanceof FileConsumerFutureResult) {
+                        state.consumersRemoved++;
+                    } else if (result instanceof 
FileResourceCrawlerFutureResult) {
+                        state.crawlersRemoved++;
+                        if (fileResourceCrawler.wasTimedOut()) {
                             causeForTermination = 
CAUSE_FOR_TERMINATION.CRAWLER_TIMED_OUT;
                             break;
                         }
@@ -290,13 +291,12 @@ public class BatchProcess implements Cal
                 break;
             }
             try {
-                IFileProcessorFutureResult result = future.get();
-                if (result instanceof FileConsumerFutureResult) {
-                    FileConsumerFutureResult consumerResult = 
(FileConsumerFutureResult) result;
-                    state.processed += consumerResult.getFilesProcessed();
-                    FileStarted fileStarted = consumerResult.getFileStarted();
-                    if (fileStarted != null
-                            && fileStarted.getElapsedMillis() > 
timeoutThresholdMillis) {
+                IFileProcessorFutureResult result = future.get();
+                if (result instanceof FileConsumerFutureResult) {
+                    FileConsumerFutureResult consumerResult = 
(FileConsumerFutureResult) result;
+                    FileStarted fileStarted = consumerResult.getFileStarted();
+                    if (fileStarted != null
+                            && fileStarted.getElapsedMillis() > 
timeoutThresholdMillis) {
                         logger.warn(fileStarted.getResourceId()
                                 + "\t caused a file processor to hang or 
crash. You may need to remove "
                                 + "this file from your input set and rerun.");
@@ -345,18 +345,23 @@ public class BatchProcess implements Cal
                     "< for " + fs.getElapsedMillis() + " milliseconds after it 
started." +
                     " This exceeds the maxTimeoutMillis parameter");
         }
-        double elapsed = ((double) new Date().getTime() - (double) 
state.start) / 1000.0;
-        return new
-            ParallelFileProcessingResult(considered, added, state.processed,
-                elapsed, exitStatus, state.causeForTermination.toString());
-    }
-
-    private class State {
-        long start = -1;
-        int processed = 0;
-        int numConsumers = 0;
-        int numNonConsumers = 0;
-        int removed = 0;
+        double elapsed = ((double) new Date().getTime() - (double) 
state.start) / 1000.0;
+        int processed = 0;
+        int numExceptions = 0;
+        for (FileResourceConsumer c : consumersManager.getConsumers()) {
+            processed += c.getNumResourcesConsumed();
+            numExceptions += c.getNumHandledExceptions();
+        }
+        return new
+            ParallelFileProcessingResult(considered, added, processed, 
numExceptions,
+                elapsed, exitStatus, state.causeForTermination.toString());
+    }
+
+    private class State {
+        long start = -1;
+        int numConsumers = 0;
+        int numNonConsumers = 0;
+        int removed = 0;
         int consumersRemoved = 0;
         int crawlersRemoved = 0;
         CAUSE_FOR_TERMINATION causeForTermination = null;
@@ -574,12 +579,14 @@ public class BatchProcess implements Cal
                 }
             }
         }
-    }
-
-    private class TimeoutFutureResult implements IFileProcessorFutureResult {
-        private final int timedOutCount;
-
-        private TimeoutFutureResult(final int timedOutCount) {
+    }
+
+    private class TimeoutFutureResult implements IFileProcessorFutureResult {
+        //used to be used when more than one timeout was allowed
+        //TODO: get rid of this?
+        private final int timedOutCount;
+
+        private TimeoutFutureResult(final int timedOutCount) {
             this.timedOutCount = timedOutCount;
         }
 

Modified: 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java?rev=1670749&r1=1670748&r2=1670749&view=diff
==============================================================================
--- 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java
 (original)
+++ 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java
 Wed Apr  1 18:27:23 2015
@@ -102,21 +102,21 @@ public class BatchProcessDriverCLI {
     }
 
     public void execute() throws Exception {
-
-        interruptWatcherThread.setDaemon(true);
-        interruptWatcherThread.start();
-        logger.trace("about to start");
-        start();
-        int loopsAfterRestartMessageReceived = 0;
-        while (!userInterrupted) {
+
+        interruptWatcherThread.setDaemon(true);
+        interruptWatcherThread.start();
+        logger.info("about to start driver");
+        start();
+        int loopsAfterRestartMessageReceived = 0;
+        while (!userInterrupted) {
             Integer exit = null;
-            try {
-                logger.trace("about to check exit value");
-                exit = process.exitValue();
-                logger.trace("exit value:" + exit);
-                stop();
-            } catch (IllegalThreadStateException e) {
-                //hasn't exited
+            try {
+                logger.trace("about to check exit value");
+                exit = process.exitValue();
+                logger.info("The child process has finished with an exit value 
of: "+exit);
+                stop();
+            } catch (IllegalThreadStateException e) {
+                //hasn't exited
                 logger.trace("process has not exited; 
IllegalThreadStateException");
             }
 
@@ -135,13 +135,13 @@ public class BatchProcessDriverCLI {
                     " exit=" + exit + " receivedRestartMsg=" + 
receivedRestartMsg);
             //if we've gotten the message via stdout to restart
             //but the process hasn't exited yet, give it another
-            //chance
-            if (receivedRestartMsg && exit == null) {
-                loopsAfterRestartMessageReceived++;
-                logger.trace("Must restart, still not exited; loops after 
restart: " +
-                            loopsAfterRestartMessageReceived);
-                continue;
-            }
+            //chance
+            if (receivedRestartMsg && exit == null) {
+                loopsAfterRestartMessageReceived++;
+                logger.warn("Must restart, still not exited; loops after 
restart: " +
+                            loopsAfterRestartMessageReceived);
+                continue;
+            }
             if (loopsAfterRestartMessageReceived > 
waitNumLoopsAfterRestartmessage) {
                 logger.trace("About to try to restart because:" +
                         " exit=" + exit + " receivedRestartMsg=" + 
receivedRestartMsg);
@@ -153,13 +153,13 @@ public class BatchProcessDriverCLI {
                 }
             } else if (exit != null && exit != 
BatchProcessDriverCLI.PROCESS_NO_RESTART_EXIT_CODE
                     && exit != 
BatchProcessDriverCLI.PROCESS_COMPLETED_SUCCESSFULLY) {
-                logger.trace("About to try to restart because:" +
-                            " exit=" + exit + " receivedRestartMsg=" + 
receivedRestartMsg);
-
-                if (exit != null && exit == 
BatchProcessDriverCLI.PROCESS_RESTART_EXIT_CODE) {
-                    logger.info("Restarting on expected restart code");
-                } else {
-                    logger.warn("Restarting on unexpected restart code: 
"+exit);
+                logger.trace("About to try to restart because:" +
+                            " exit=" + exit + " receivedRestartMsg=" + 
receivedRestartMsg);
+
+                if (exit == BatchProcessDriverCLI.PROCESS_RESTART_EXIT_CODE) {
+                    logger.info("Restarting on expected restart code");
+                } else {
+                    logger.warn("Restarting on unexpected restart code: 
"+exit);
                 }
                 boolean restarted = restart(exit, receivedRestartMsg);
                 if (!restarted) {
@@ -170,17 +170,18 @@ public class BatchProcessDriverCLI {
                 logger.trace("Will not restart: "+exit);
                 break;
             }
-        }
-        logger.trace("about to call shutdown driver now");
-        shutdownDriverNow();
-    }
-
-    private void shutdownDriverNow() {
-        if (process != null) {
-            for (int i = 0; i < 10; i++) {
-
-                logger.trace("trying to shut down: "+i);
-                try {
+        }
+        logger.trace("about to call shutdown driver now");
+        shutdownDriverNow();
+        logger.info("Process driver has completed");
+    }
+
+    private void shutdownDriverNow() {
+        if (process != null) {
+            for (int i = 0; i < 60; i++) {
+
+                logger.trace("trying to shut down: "+i);
+                try {
                     int exit = process.exitValue();
                     logger.trace("trying to stop:"+exit);
                     stop();
@@ -192,13 +193,13 @@ public class BatchProcessDriverCLI {
                 try {
                     Thread.sleep(1000);
                 } catch (InterruptedException e) {
-                    //swallow
-                }
-            }
-            logger.error("Process didn't stop after 10 seconds after shutdown. 
" +
-                    "I am forcefully killing it.");
-        }
-        interruptWatcherThread.interrupt();
+                    //swallow
+                }
+            }
+            logger.error("Process didn't stop after 60 seconds after shutdown. 
" +
+                    "I am forcefully killing it.");
+        }
+        interruptWatcherThread.interrupt();
     }
 
     public int getNumRestarts() {
@@ -260,12 +261,17 @@ public class BatchProcessDriverCLI {
         interruptWriter = new InterruptWriter(process.getOutputStream());
         interruptWriterThread = new Thread(interruptWriter);
         interruptWriterThread.start();
-
-    }
-
-    public void setRedirectChildProcessToStdOut(boolean 
redirectChildProcessToStdOut) {
-        this.redirectChildProcessToStdOut = redirectChildProcessToStdOut;
-    }
+
+    }
+
+    /**
+     * Typically only used for testing.  This determines whether or not
+     * to redirect child process's stdOut to driver's stdout
+     * @param redirectChildProcessToStdOut should the driver redirect the 
child's stdout
+     */
+    public void setRedirectChildProcessToStdOut(boolean 
redirectChildProcessToStdOut) {
+        this.redirectChildProcessToStdOut = redirectChildProcessToStdOut;
+    }
 
     /**
      * Class to watch stdin from the driver for anything that is typed.

Modified: 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/FileResourceConsumer.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/FileResourceConsumer.java?rev=1670749&r1=1670748&r2=1670749&view=diff
==============================================================================
--- 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/FileResourceConsumer.java
 (original)
+++ 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/FileResourceConsumer.java
 Wed Apr  1 18:27:23 2015
@@ -20,21 +20,24 @@ package org.apache.tika.batch;
 import javax.xml.stream.XMLOutputFactory;
 import javax.xml.stream.XMLStreamException;
 import javax.xml.stream.XMLStreamWriter;
-import java.io.Closeable;
-import java.io.Flushable;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-import java.util.Date;
+import java.io.Closeable;
+import java.io.Flushable;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.util.Date;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.Callable;
-import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 
-import org.apache.log4j.Level;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import org.slf4j.MarkerFactory;
+import org.xml.sax.ContentHandler;
 
 
 /**
@@ -42,23 +45,29 @@ import org.slf4j.MarkerFactory;
  * goal of this class is to abstract out the multithreading
  * and recordkeeping components.
  * <p/>
- */
-public abstract class FileResourceConsumer implements 
Callable<IFileProcessorFutureResult> {
-
-    private static enum STATE {
-        NOT_YET_STARTED,
-        ACTIVELY_CONSUMING,
-        SWALLOWED_POISON,
+ */
+public abstract class FileResourceConsumer implements 
Callable<IFileProcessorFutureResult> {
+
+    private enum STATE {
+        NOT_YET_STARTED,
+        ACTIVELY_CONSUMING,
+        SWALLOWED_POISON,
         THREAD_INTERRUPTED,
         EXCEEDED_MAX_CONSEC_WAIT_MILLIS,
         ASKED_TO_SHUTDOWN,
         TIMED_OUT,
         CONSUMER_EXCEPTION,
         CONSUMER_ERROR,
-        COMPLETED
-    }
-
-    public static String TIME_OUT = "timeout";
+        COMPLETED
+    }
+
+    public static String TIMED_OUT = "timed_out";
+    public static String OOM = "oom";
+    public static String IO_IS = "io_on_inputstream";
+    public static String IO_OS = "io_on_outputstream";
+    public static String PARSE_ERR = "parse_err";
+    public static String PARSE_EX = "parse_ex";
+
     public static String ELAPSED_MILLIS = "elapsedMS";
 
     private static AtomicInteger numConsumers = new AtomicInteger(-1);
@@ -248,34 +257,35 @@ public abstract class FileResourceConsum
             FileStarted tmp = currentFile;
             if (tmp == null) {
                 return null;
-            }
-            if (tmp.getElapsedMillis() > staleThresholdMillis) {
-                setEndedState(STATE.TIMED_OUT);
-                logWithResourceId(Level.FATAL, TIME_OUT,
-                        tmp.getResourceId(), ELAPSED_MILLIS, 
Long.toString(tmp.getElapsedMillis()));
-                return tmp;
-            }
-        }
-        return null;
-    }
-
-    protected void logWithResourceId(Level level, String type, String 
resourceId, String... attrs) {
-        logWithResourceId(level, type, resourceId, null, attrs);
-    }
-
-    /**
-     * Use this for structured output that captures resourceId and other 
attributes.
-     *
-     * @param level level
-     * @param type entity name for exception
-     * @param resourceId resourceId string
-     * @param t throwable can be null
-     * @param attrs (array of key0, value0, key1, value1, etc.)
-     */
-    protected void logWithResourceId(Level level, String type, String 
resourceId, Throwable t, String... attrs) {
-
-        StringWriter writer = new StringWriter();
-        try {
+            }
+            if (tmp.getElapsedMillis() > staleThresholdMillis) {
+                setEndedState(STATE.TIMED_OUT);
+                logger.error("{}", getXMLifiedLogMsg(
+                        TIMED_OUT,
+                        tmp.getResourceId(),
+                        ELAPSED_MILLIS, 
Long.toString(tmp.getElapsedMillis())));
+                return tmp;
+            }
+        }
+        return null;
+    }
+
+    protected String getXMLifiedLogMsg(String type, String resourceId, 
String... attrs) {
+        return getXMLifiedLogMsg(type, resourceId, null, attrs);
+    }
+
+    /**
+     * Use this for structured output that captures resourceId and other 
attributes.
+     *
+     * @param type entity name for exception
+     * @param resourceId resourceId string
+     * @param t throwable can be null
+     * @param attrs (array of key0, value0, key1, value1, etc.)
+     */
+    protected String getXMLifiedLogMsg(String type, String resourceId, 
Throwable t, String... attrs) {
+
+        StringWriter writer = new StringWriter();
+        try {
             XMLStreamWriter xml = 
xmlOutputFactory.createXMLStreamWriter(writer);
             xml.writeStartDocument();
             xml.writeStartElement(type);
@@ -299,23 +309,7 @@ public abstract class FileResourceConsum
         } catch (XMLStreamException e) {
             logger.error("error writing xml stream for: " + resourceId, t);
         }
-        switch (level.toInt()) {
-            case Level.FATAL_INT:
-                logger.error(MarkerFactory.getMarker("FATAL"), 
writer.toString());
-                break;
-            case Level.ERROR_INT:
-                logger.error(writer.toString());
-                break;
-            case Level.WARN_INT:
-                logger.warn(writer.toString());
-                break;
-            case Level.DEBUG_INT :
-                logger.debug(writer.toString());
-                break;
-            case Level.TRACE_INT :
-                logger.trace(writer.toString());
-                break;
-        };
+        return writer.toString();
     }
 
     private FileResource getNextFileResource() throws InterruptedException {
@@ -388,10 +382,49 @@ public abstract class FileResourceConsum
         synchronized(lock) {
             if (currentState == STATE.NOT_YET_STARTED ||
                     currentState == STATE.ACTIVELY_CONSUMING ||
-                    currentState == STATE.ASKED_TO_SHUTDOWN) {
-                currentState = cause;
-            }
-        }
-    }
-
+                    currentState == STATE.ASKED_TO_SHUTDOWN) {
+                currentState = cause;
+            }
+        }
+    }
+
+    /**
+     * Utility method to handle logging equivalently among all
+     * implementing classes.  Use, override or avoid as desired.
+     * <p>
+     * This will throw Errors, but it will catch all Exceptions and log them
+     * @param resourceId resourceId
+     * @param parser parser to use
+     * @param is inputStream (will be closed by this method!)
+     * @param handler handler for the content
+     * @param m metadata
+     * @param parseContext parse context
+     * @throws Throwable
+     */
+    protected void parse(final String resourceId, final Parser parser, 
InputStream is,
+                         final ContentHandler handler,
+                         final Metadata m, final ParseContext parseContext) 
throws Throwable {
+
+        try {
+            parser.parse(is, handler, m, parseContext);
+        } catch (Throwable t) {
+            if (t instanceof OutOfMemoryError) {
+                logger.error(getXMLifiedLogMsg(OOM,
+                        resourceId, t));
+                throw t;
+            } else if (t instanceof Error) {
+                logger.error(getXMLifiedLogMsg(PARSE_ERR,
+                        resourceId, t));
+                throw t;
+            } else {
+                //warn, but do not rethrow
+                logger.warn(getXMLifiedLogMsg(PARSE_EX,
+                        resourceId, t));
+                incrementHandledExceptions();
+            }
+        } finally {
+            close(is);
+        }
+    }
+
 }

Modified: 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/ParallelFileProcessingResult.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/ParallelFileProcessingResult.java?rev=1670749&r1=1670748&r2=1670749&view=diff
==============================================================================
--- 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/ParallelFileProcessingResult.java
 (original)
+++ 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/ParallelFileProcessingResult.java
 Wed Apr  1 18:27:23 2015
@@ -18,22 +18,26 @@ package org.apache.tika.batch;
  */
 
 public class ParallelFileProcessingResult {
-    private final int considered;
-    private final int added;
-    private final int consumed;
-    private final double secondsElapsed;
-    private final int exitStatus;
-    private final String causeForTermination;
-
-    public ParallelFileProcessingResult(int considered, int added, int 
consumed, double secondsElapsed,
-                                        int exitStatus,
-                                        String causeForTermination) {
-        this.considered = considered;
-        this.added = added;
-        this.consumed = consumed;
-        this.secondsElapsed = secondsElapsed;
-        this.exitStatus = exitStatus;
-        this.causeForTermination = causeForTermination;
+    private final int considered;
+    private final int added;
+    private final int consumed;
+    private final int numberHandledExceptions;
+    private final double secondsElapsed;
+    private final int exitStatus;
+    private final String causeForTermination;
+
+    public ParallelFileProcessingResult(int considered, int added,
+                                        int consumed, int 
numberHandledExceptions,
+                                        double secondsElapsed,
+                                        int exitStatus,
+                                        String causeForTermination) {
+        this.considered = considered;
+        this.added = added;
+        this.consumed = consumed;
+        this.numberHandledExceptions = numberHandledExceptions;
+        this.secondsElapsed = secondsElapsed;
+        this.exitStatus = exitStatus;
+        this.causeForTermination = causeForTermination;
     }
 
     /**
@@ -75,12 +79,16 @@ public class ParallelFileProcessingResul
      * @return seconds elapsed since the start of the batch processing
      */
     public double secondsElapsed() {
-        return secondsElapsed;
-    }
-
-    /**
-     *
-     * @return intendedExitStatus
+        return secondsElapsed;
+    }
+
+    public int getNumberHandledExceptions() {
+        return numberHandledExceptions;
+    }
+
+    /**
+     *
+     * @return intendedExitStatus
      */
     public int getExitStatus() {
         return exitStatus;
@@ -89,12 +97,13 @@ public class ParallelFileProcessingResul
     @Override
     public String toString() {
         return "ParallelFileProcessingResult{" +
-                "considered=" + considered +
-                ", added=" + added +
-                ", consumed=" + consumed +
-                ", secondsElapsed=" + secondsElapsed +
-                ", exitStatus=" + exitStatus +
-                ", causeForTermination='" + causeForTermination + '\'' +
+                "considered=" + considered +
+                ", added=" + added +
+                ", consumed=" + consumed +
+                ", numberHandledExceptions=" + numberHandledExceptions +
+                ", secondsElapsed=" + secondsElapsed +
+                ", exitStatus=" + exitStatus +
+                ", causeForTermination='" + causeForTermination + '\'' +
                 '}';
     }
 }

Modified: 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/AbstractFSConsumer.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/AbstractFSConsumer.java?rev=1670749&r1=1670748&r2=1670749&view=diff
==============================================================================
--- 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/AbstractFSConsumer.java
 (original)
+++ 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/AbstractFSConsumer.java
 Wed Apr  1 18:27:23 2015
@@ -19,13 +19,12 @@ package org.apache.tika.batch.fs;
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.concurrent.ArrayBlockingQueue;
-
-import org.apache.log4j.Level;
-import org.apache.tika.batch.BatchNoRestartError;
-import org.apache.tika.batch.FileResource;
-import org.apache.tika.batch.FileResourceConsumer;
+import java.io.OutputStream;
+import java.util.concurrent.ArrayBlockingQueue;
+
+import org.apache.tika.batch.BatchNoRestartError;
+import org.apache.tika.batch.FileResource;
+import org.apache.tika.batch.FileResourceConsumer;
 import org.apache.tika.batch.OutputStreamFactory;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
@@ -52,54 +51,32 @@ public abstract class AbstractFSConsumer
         OutputStream os = null;
         try {
             os = fsOSFactory.getOutputStream(fileResource.getMetadata());
-        } catch (IOException e) {
-            //This can happen if the disk has run out of space,
-            //or if there was a failure with mkdirs in fsOSFactory
-            logWithResourceId(Level.FATAL, "ioe_opening_os",
-                    fileResource.getResourceId(), e);
-            throw new BatchNoRestartError("IOException trying to open output 
stream for " +
-                    fileResource.getResourceId() + " :: " + e.getMessage());
-        }
-        return os;
-    }
-
-    protected InputStream getInputStream(FileResource fileResource) {
-        InputStream is = null;
+        } catch (IOException e) {
+            //This can happen if the disk has run out of space,
+            //or if there was a failure with mkdirs in fsOSFactory
+            logger.error("{}", getXMLifiedLogMsg(IO_OS,
+                    fileResource.getResourceId(), e));
+            throw new BatchNoRestartError("IOException trying to open output 
stream for " +
+                    fileResource.getResourceId() + " :: " + e.getMessage());
+        }
+        return os;
+    }
+
+    /**
+     *
+     * @param fileResource
+     * @return inputStream, can be null if there is an exception opening IS
+     */
+    protected InputStream getInputStream(FileResource fileResource) {
+        InputStream is = null;
         try {
             is = fileResource.openInputStream();
         } catch (IOException e) {
-            logWithResourceId(Level.FATAL, "ioe_opening_is",
-                    fileResource.getResourceId(), e);
+            logger.warn("{}", getXMLifiedLogMsg(IO_IS,
+                    fileResource.getResourceId(), e));
             flushAndClose(is);
         }
-        return is;
-    }
-
-    protected void parse(final String resourceId, final Parser parser, 
InputStream is,
-                         final ContentHandler handler,
-                         final Metadata m, final ParseContext parseContext) 
throws Throwable {
-
-        Throwable thrown = null;
-        try {
-            parser.parse(is, handler, m, parseContext);
-        } catch (Throwable t) {
-            if (t instanceof OutOfMemoryError) {
-                logWithResourceId(Level.ERROR, "oom",
-                        resourceId, t);
-            } else if (t instanceof Error) {
-                logWithResourceId(Level.ERROR, "parse_err",
-                        resourceId, t);
-            } else {
-                logWithResourceId(Level.WARN, "parse_ex",
-                        resourceId, t);
-                incrementHandledExceptions();
-            }
-            thrown = t;
-        } finally {
-            close(is);
-        }
-        if (thrown != null) {
-            throw thrown;
-        }
-    }
-}
+        return is;
+    }
+
+}

Modified: 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/BasicTikaFSConsumer.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/BasicTikaFSConsumer.java?rev=1670749&r1=1670748&r2=1670749&view=diff
==============================================================================
--- 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/BasicTikaFSConsumer.java
 (original)
+++ 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/BasicTikaFSConsumer.java
 Wed Apr  1 18:27:23 2015
@@ -19,13 +19,12 @@ package org.apache.tika.batch.fs;
 
 import java.io.InputStream;
 import java.io.OutputStream;
-import java.io.UnsupportedEncodingException;
-import java.util.concurrent.ArrayBlockingQueue;
-
-import org.apache.log4j.Level;
-import org.apache.tika.batch.FileResource;
-import org.apache.tika.batch.OutputStreamFactory;
-import org.apache.tika.batch.ParserFactory;
+import java.io.UnsupportedEncodingException;
+import java.util.concurrent.ArrayBlockingQueue;
+
+import org.apache.tika.batch.FileResource;
+import org.apache.tika.batch.OutputStreamFactory;
+import org.apache.tika.batch.ParserFactory;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.io.IOUtils;
 import org.apache.tika.parser.ParseContext;
@@ -87,14 +86,14 @@ public class BasicTikaFSConsumer extends
         }
         ContentHandler handler;
         try {
-            handler = contentHandlerFactory.getNewContentHandler(os, 
getOutputEncoding());
-        } catch (UnsupportedEncodingException e) {
-            incrementHandledExceptions();
-            logWithResourceId(Level.FATAL, "output_encoding_ex",
-                    fileResource.getResourceId(), e);
-            flushAndClose(os);
-            throw new RuntimeException(e.getMessage());
-        }
+            handler = contentHandlerFactory.getNewContentHandler(os, 
getOutputEncoding());
+        } catch (UnsupportedEncodingException e) {
+            incrementHandledExceptions();
+            logger.error(getXMLifiedLogMsg("output_encoding_ex",
+                    fileResource.getResourceId(), e));
+            flushAndClose(os);
+            throw new RuntimeException(e.getMessage());
+        }
 
         //now actually call parse!
         Throwable thrown = null;

Modified: 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSBatchProcessCLI.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSBatchProcessCLI.java?rev=1670749&r1=1670748&r2=1670749&view=diff
==============================================================================
--- 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSBatchProcessCLI.java
 (original)
+++ 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSBatchProcessCLI.java
 Wed Apr  1 18:27:23 2015
@@ -16,29 +16,24 @@ package org.apache.tika.batch.fs;
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-import java.io.File;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.concurrent.ExecutorService;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 
 import org.apache.commons.cli.CommandLine;
 import org.apache.commons.cli.CommandLineParser;
 import org.apache.commons.cli.GnuParser;
-import org.apache.commons.cli.HelpFormatter;
-import org.apache.commons.cli.Option;
-import org.apache.commons.cli.Options;
-import org.apache.log4j.BasicConfigurator;
-import org.apache.log4j.ConsoleAppender;
-import org.apache.log4j.Level;
-import org.apache.log4j.PatternLayout;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
 import org.apache.tika.batch.BatchProcess;
 import org.apache.tika.batch.BatchProcessDriverCLI;
-import org.apache.tika.batch.ParallelFileProcessingResult;
+import org.apache.tika.batch.ParallelFileProcessingResult;
 import org.apache.tika.batch.builders.BatchProcessBuilder;
 import org.apache.tika.batch.builders.CommandLineParserBuilder;
 import org.apache.tika.io.IOUtils;
@@ -48,39 +43,42 @@ import org.slf4j.LoggerFactory;
 import org.slf4j.MarkerFactory;
 
 public class FSBatchProcessCLI {
+
     public static String FINISHED_STRING = "Main thread in TikaFSBatchCLI has 
finished processing.";
 
     private static Logger logger = 
LoggerFactory.getLogger(FSBatchProcessCLI.class);
     private final Options options;
 
     public FSBatchProcessCLI(String[] args) throws IOException {
-        TikaInputStream configIs = null;
-        try {
-            configIs = getConfigInputStream(args);
-            CommandLineParserBuilder builder = new CommandLineParserBuilder();
-            options = builder.build(configIs);
-        } finally {
+        TikaInputStream configIs = null;
+        try {
+            configIs = getConfigInputStream(args, true);
+            CommandLineParserBuilder builder = new CommandLineParserBuilder();
+            options = builder.build(configIs);
+        } finally {
             IOUtils.closeQuietly(configIs);
         }
     }
 
     public void usage() {
         HelpFormatter helpFormatter = new HelpFormatter();
-        helpFormatter.printHelp("tika filesystem batch", options);
-    }
-
-    private TikaInputStream getConfigInputStream(String[] args) throws 
IOException {
-        TikaInputStream is = null;
-        File batchConfigFile = getConfigFile(args);
-        if (batchConfigFile != null) {
+        helpFormatter.printHelp("tika filesystem batch", options);
+    }
+
+    private TikaInputStream getConfigInputStream(String[] args, boolean 
logDefault) throws IOException {
+        TikaInputStream is = null;
+        File batchConfigFile = getConfigFile(args);
+        if (batchConfigFile != null) {
             //this will throw IOException if it can't find a specified config 
file
-            //better to throw an exception than silently back off to default.
-            is = TikaInputStream.get(batchConfigFile);
-        } else {
-            logger.info("No config file set via -bc, relying on 
default-tika-batch-config.xml");
-            is = TikaInputStream.get(
-                    
FSBatchProcessCLI.class.getResourceAsStream("default-tika-batch-config.xml"));
-        }
+            //better to throw an exception than silently back off to default.
+            is = TikaInputStream.get(batchConfigFile);
+        } else {
+            if (logDefault) {
+                logger.info("No config file set via -bc, relying on 
default-tika-batch-config.xml");
+            }
+            is = TikaInputStream.get(
+                    
FSBatchProcessCLI.class.getResourceAsStream("default-tika-batch-config.xml"));
+        }
         return is;
     }
 
@@ -104,13 +102,13 @@ public class FSBatchProcessCLI {
         }
 
         BatchProcessBuilder b = new BatchProcessBuilder();
-        TikaInputStream is = null;
-        BatchProcess process = null;
-        try {
-            is = getConfigInputStream(args);
-            process = b.build(is, mapArgs);
-        } finally {
-            IOUtils.closeQuietly(is);
+        TikaInputStream is = null;
+        BatchProcess process = null;
+        try {
+            is = getConfigInputStream(args, false);
+            process = b.build(is, mapArgs);
+        } finally {
+            IOUtils.closeQuietly(is);
         }
         final Thread mainThread = Thread.currentThread();
 
@@ -134,22 +132,11 @@ public class FSBatchProcessCLI {
                 }
             }
         }
-        return configFile;
-    }
-
-
+        return configFile;
+    }
+
     public static void main(String[] args) throws Exception {
-        //if no log4j config file has been set via
-        //sysprops, use BasicConfigurator
-        //TODO: figure out if this can cleanly be moved to pure slf4j?
-        String log4jFile = System.getProperty("log4j.configuration");
-        if (log4jFile == null || log4jFile.trim().length()==0) {
-            ConsoleAppender appender = new ConsoleAppender();
-            appender.setLayout(new PatternLayout("%m%n"));
-            appender.setWriter(new OutputStreamWriter(System.out, 
IOUtils.UTF_8.name()));
-            BasicConfigurator.configure(appender);
-            org.apache.log4j.Logger.getRootLogger().setLevel(Level.INFO);
-        }
+
         try{
             FSBatchProcessCLI cli = new FSBatchProcessCLI(args);
             cli.execute(args);

Modified: 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/RecursiveParserWrapperFSConsumer.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/RecursiveParserWrapperFSConsumer.java?rev=1670749&r1=1670748&r2=1670749&view=diff
==============================================================================
--- 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/RecursiveParserWrapperFSConsumer.java
 (original)
+++ 
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/RecursiveParserWrapperFSConsumer.java
 Wed Apr  1 18:27:23 2015
@@ -22,10 +22,9 @@ import java.io.OutputStream;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
 import java.util.LinkedList;
-import java.util.List;
+import java.util.List;
 import java.util.concurrent.ArrayBlockingQueue;
 
-import org.apache.log4j.Level;
 import org.apache.tika.batch.FileResource;
 import org.apache.tika.batch.OutputStreamFactory;
 import org.apache.tika.batch.ParserFactory;
@@ -130,14 +129,16 @@ public class RecursiveParserWrapperFSCon
         Writer writer = null;
 
         try {
-            writer = new OutputStreamWriter(os, getOutputEncoding());
-            JsonMetadataList.toJson(metadataList, writer);
-        } catch (Exception e) {
-            logWithResourceId(Level.ERROR, "json_ex",
-                    fileResource.getResourceId(), e);
-        } finally {
-            flushAndClose(writer);
-        }
+            writer = new OutputStreamWriter(os, getOutputEncoding());
+            JsonMetadataList.toJson(metadataList, writer);
+        } catch (Exception e) {
+            //this is a stop the world kind of thing
+            logger.error("{}", getXMLifiedLogMsg(IO_OS+"json",
+                    fileResource.getResourceId(), e));
+            throw new RuntimeException(e);
+        } finally {
+            flushAndClose(writer);
+        }
 
         if (thrown != null) {
             if (thrown instanceof Error) {

Modified: tika/trunk/tika-batch/src/test/resources/log4j.properties
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/resources/log4j.properties?rev=1670749&r1=1670748&r2=1670749&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/resources/log4j.properties (original)
+++ tika/trunk/tika-batch/src/test/resources/log4j.properties Wed Apr  1 
18:27:23 2015
@@ -1,8 +1,22 @@
-
-log4j.rootLogger=OFF,A1
-
-#for debugging
-#log4j.rootLogger=TRACE,A1
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+log4j.rootLogger=OFF
+
+#for debugging
+#log4j.rootLogger=TRACE,A1
 
 log4j.appender.A1=org.apache.log4j.ConsoleAppender
 

Modified: tika/trunk/tika-batch/src/test/resources/log4j_process.properties
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/resources/log4j_process.properties?rev=1670749&r1=1670748&r2=1670749&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/resources/log4j_process.properties (original)
+++ tika/trunk/tika-batch/src/test/resources/log4j_process.properties Wed Apr  
1 18:27:23 2015
@@ -1,8 +1,24 @@
-
-log4j.rootLogger=OFF,A1
-
-#for debugging
-#log4j.rootLogger=TRACE,A1
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#This is used by the batch process; see log4j.properties for the driver
+
+log4j.rootLogger=OFF
+
+#for debugging
+#log4j.rootLogger=TRACE,A1
 
 log4j.appender.A1=org.apache.log4j.ConsoleAppender
 


Reply via email to