Author: tallison
Date: Wed May 13 13:49:36 2015
New Revision: 1679211
URL: http://svn.apache.org/r1679211
Log:
TIKA-1629 fix eol-style to LF in *.java *.properties and select *.xml
Modified:
tika/trunk/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java
tika/trunk/tika-app/src/main/resources/log4j.properties
tika/trunk/tika-app/src/main/resources/log4j_batch_process.properties
tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java
tika/trunk/tika-app/src/test/resources/log4j_batch_process_test.properties
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcess.java
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/FileResourceCrawler.java
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/Interrupter.java
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/ParallelFileProcessingResult.java
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/StatusReporter.java
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/builders/SimpleLogReporterBuilder.java
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/AbstractFSConsumer.java
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/BasicTikaFSConsumer.java
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSBatchProcessCLI.java
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/RecursiveParserWrapperFSConsumer.java
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/strawman/StrawManTikaAppDriver.java
tika/trunk/tika-batch/src/test/resources/log4j.properties
tika/trunk/tika-batch/src/test/resources/log4j_process.properties
tika/trunk/tika-batch/src/test/resources/tika-batch-config-MockConsumersBuilder.xml
tika/trunk/tika-batch/src/test/resources/tika-batch-config-broken.xml
tika/trunk/tika-batch/src/test/resources/tika-batch-config-test.xml
tika/trunk/tika-core/src/main/java/org/apache/tika/exception/AccessPermissionException.java
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/AccessPermissions.java
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToHTMLContentHandler.java
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToXMLContentHandler.java
tika/trunk/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTextExtractorTest.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/AccessChecker.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/RarParser.java
tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/pdf/PDFParser.properties
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/AccessCheckerTest.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/RarParserTest.java
tika/trunk/tika-parsers/src/test/resources/test-documents/mock/example.xml
tika/trunk/tika-parsers/src/test/resources/test-documents/mock/fake_oom.xml
tika/trunk/tika-parsers/src/test/resources/test-documents/mock/heavy_hang.xml
tika/trunk/tika-parsers/src/test/resources/test-documents/mock/nothing_bad.xml
tika/trunk/tika-parsers/src/test/resources/test-documents/mock/null_pointer.xml
tika/trunk/tika-parsers/src/test/resources/test-documents/mock/null_pointer_no_msg.xml
tika/trunk/tika-parsers/src/test/resources/test-documents/mock/real_oom.xml
tika/trunk/tika-parsers/src/test/resources/test-documents/mock/sleep.xml
tika/trunk/tika-parsers/src/test/resources/test-documents/mock/sleep_interruptible.xml
tika/trunk/tika-parsers/src/test/resources/test-documents/mock/sleep_not_interruptible.xml
tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataBase.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/HTMLHelper.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/RichTextContentHandler.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerParseExceptionMapper.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/DetectorResource.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaDetectors.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaMimeTypes.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaParsers.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/writer/TarWriter.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/writer/TextMessageBodyWriter.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/writer/ZipWriter.java
tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
tika/trunk/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java
tika/trunk/tika-server/src/test/java/org/apache/tika/server/LanguageResourceTest.java
tika/trunk/tika-server/src/test/java/org/apache/tika/server/StackTraceOffTest.java
tika/trunk/tika-server/src/test/java/org/apache/tika/server/StackTraceTest.java
tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaDetectorsTest.java
tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaMimeTypesTest.java
tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaParsersTest.java
tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaVersionTest.java
tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaWelcomeTest.java
tika/trunk/tika-server/src/test/java/org/apache/tika/server/TranslateResourceTest.java
Modified:
tika/trunk/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
---
tika/trunk/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java
(original)
+++
tika/trunk/tika-app/src/main/java/org/apache/tika/cli/BatchCommandLineBuilder.java
Wed May 13 13:49:36 2015
@@ -39,13 +39,13 @@ class BatchCommandLineBuilder {
static Pattern JVM_OPTS_PATTERN = Pattern.compile("^(--?)J(.+)");
protected static String[] build(String[] args) throws IOException {
- Map<String, String> processArgs = new LinkedHashMap<String, String>();
- Map<String, String> jvmOpts = new LinkedHashMap<String,String>();
- //take the args, and divide them into process args and options for
- //the child jvm process (i.e. log files, etc)
- mapifyArgs(args, processArgs, jvmOpts);
-
- //now modify processArgs in place
+ Map<String, String> processArgs = new LinkedHashMap<String, String>();
+ Map<String, String> jvmOpts = new LinkedHashMap<String,String>();
+ //take the args, and divide them into process args and options for
+ //the child jvm process (i.e. log files, etc)
+ mapifyArgs(args, processArgs, jvmOpts);
+
+ //now modify processArgs in place
translateCommandLine(args, processArgs);
//maybe the user specified a different classpath?!
@@ -56,23 +56,23 @@ class BatchCommandLineBuilder {
if (cp.contains(" ")){
cp = "\""+cp+"\"";
}
- jvmOpts.put("-cp", cp);
- }
-
- boolean hasLog4j = false;
- for (String k : jvmOpts.keySet()) {
- if (k.startsWith("-Dlog4j.configuration=")) {
- hasLog4j = true;
- break;
- }
- }
- //use the log4j config file inside the app
/resources/log4j_batch_process.properties
- if (! hasLog4j) {
-
jvmOpts.put("-Dlog4j.configuration=\"log4j_batch_process.properties\"", "");
- }
- //now build the full command line
- List<String> fullCommand = new ArrayList<String>();
- fullCommand.add("java");
+ jvmOpts.put("-cp", cp);
+ }
+
+ boolean hasLog4j = false;
+ for (String k : jvmOpts.keySet()) {
+ if (k.startsWith("-Dlog4j.configuration=")) {
+ hasLog4j = true;
+ break;
+ }
+ }
+ //use the log4j config file inside the app
/resources/log4j_batch_process.properties
+ if (! hasLog4j) {
+
jvmOpts.put("-Dlog4j.configuration=\"log4j_batch_process.properties\"", "");
+ }
+ //now build the full command line
+ List<String> fullCommand = new ArrayList<String>();
+ fullCommand.add("java");
for (Map.Entry<String, String> e : jvmOpts.entrySet()) {
fullCommand.add(e.getKey());
if (e.getValue().length() > 0) {
@@ -90,16 +90,16 @@ class BatchCommandLineBuilder {
return fullCommand.toArray(new String[fullCommand.size()]);
}
-
- /**
- * Take the input args and separate them into args that belong on the
commandline
- * and those that belong as jvm args for the child process.
- * @param args -- literal args from TikaCLI commandline
- * @param commandLine args that should be part of the batch commandline
- * @param jvmArgs args that belong as jvm arguments for the child process
- */
- private static void mapifyArgs(final String[] args,
- final Map<String, String> commandLine,
+
+ /**
+ * Take the input args and separate them into args that belong on the
commandline
+ * and those that belong as jvm args for the child process.
+ * @param args -- literal args from TikaCLI commandline
+ * @param commandLine args that should be part of the batch commandline
+ * @param jvmArgs args that belong as jvm arguments for the child process
+ */
+ private static void mapifyArgs(final String[] args,
+ final Map<String, String> commandLine,
final Map<String, String> jvmArgs) {
if (args.length == 0) {
@@ -198,9 +198,9 @@ class BatchCommandLineBuilder {
if (map.containsKey("--outputDir") || map.containsKey("-o")) {
String v1 = map.remove("--outputDir");
String v2 = map.remove("-o");
- String v = (v1 == null) ? v2 : v1;
- map.put("-outputDir", v);
- }
-
- }
-}
+ String v = (v1 == null) ? v2 : v1;
+ map.put("-outputDir", v);
+ }
+
+ }
+}
Modified: tika/trunk/tika-app/src/main/resources/log4j.properties
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-app/src/main/resources/log4j.properties?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-app/src/main/resources/log4j.properties (original)
+++ tika/trunk/tika-app/src/main/resources/log4j.properties Wed May 13 13:49:36
2015
@@ -1,24 +1,24 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#info,debug, error,fatal ...
-log4j.rootLogger=info,stderr
-
-#console
-log4j.appender.stderr=org.apache.log4j.ConsoleAppender
-log4j.appender.stderr.layout=org.apache.log4j.PatternLayout
-log4j.appender.stderr.Target=System.err
-
-log4j.appender.stderr.layout.ConversionPattern= %-5p %m%n
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#info,debug, error,fatal ...
+log4j.rootLogger=info,stderr
+
+#console
+log4j.appender.stderr=org.apache.log4j.ConsoleAppender
+log4j.appender.stderr.layout=org.apache.log4j.PatternLayout
+log4j.appender.stderr.Target=System.err
+
+log4j.appender.stderr.layout.ConversionPattern= %-5p %m%n
Modified: tika/trunk/tika-app/src/main/resources/log4j_batch_process.properties
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-app/src/main/resources/log4j_batch_process.properties?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-app/src/main/resources/log4j_batch_process.properties
(original)
+++ tika/trunk/tika-app/src/main/resources/log4j_batch_process.properties Wed
May 13 13:49:36 2015
@@ -1,24 +1,24 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#info,debug, error,fatal ...
-log4j.rootLogger=info,stdout
-
-#console
-log4j.appender.stdout=org.apache.log4j.ConsoleAppender
-log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
-
-
-log4j.appender.stdout.layout.ConversionPattern=%m%n
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#info,debug, error,fatal ...
+log4j.rootLogger=info,stdout
+
+#console
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+
+
+log4j.appender.stdout.layout.ConversionPattern=%m%n
Modified:
tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
---
tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java
(original)
+++
tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java
Wed May 13 13:49:36 2015
@@ -1,136 +1,136 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tika.cli;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.InputStreamReader;
-import java.io.OutputStream;
-import java.io.PrintStream;
-import java.io.Reader;
-import java.util.List;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.tika.io.IOUtils;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.serialization.JsonMetadataList;
-import org.apache.tika.parser.RecursiveParserWrapper;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-
-public class TikaCLIBatchIntegrationTest {
-
- private File testDataFile = new File("src/test/resources/test-data");
-
- private File tempDir;
- private OutputStream out = null;
- private OutputStream err = null;
- private ByteArrayOutputStream outBuffer = null;
-
- @Before
- public void setup() throws Exception {
- tempDir = File.createTempFile("tika-cli-test-batch-", "");
- tempDir.delete();
- tempDir.mkdir();
- outBuffer = new ByteArrayOutputStream();
- PrintStream outWriter = new PrintStream(outBuffer, true,
IOUtils.UTF_8.name());
- ByteArrayOutputStream errBuffer = new ByteArrayOutputStream();
- PrintStream errWriter = new PrintStream(errBuffer, true,
IOUtils.UTF_8.name());
- out = System.out;
- err = System.err;
- System.setOut(outWriter);
- System.setErr(errWriter);
- }
-
- @After
- public void tearDown() throws Exception {
- System.setOut(new PrintStream(out, true, IOUtils.UTF_8.name()));
- System.setErr(new PrintStream(err, true, IOUtils.UTF_8.name()));
- FileUtils.deleteDirectory(tempDir);
- }
-
- @Test
- public void testSimplestBatchIntegration() throws Exception {
- String[] params = {escape(testDataFile.getAbsolutePath()),
- escape(tempDir.getAbsolutePath())};
- TikaCLI.main(params);
-
- assertTrue("bad_xml.xml.xml", new File(tempDir,
"bad_xml.xml.xml").isFile());
- assertTrue("coffee.xls.xml", new File(tempDir,
"coffee.xls.xml").exists());
- }
-
- @Test
- public void testBasicBatchIntegration() throws Exception {
- String[] params = {"-i", escape(testDataFile.getAbsolutePath()),
- "-o", escape(tempDir.getAbsolutePath()),
- "-numConsumers", "2"
- };
- TikaCLI.main(params);
-
- assertTrue("bad_xml.xml.xml", new File(tempDir,
"bad_xml.xml.xml").isFile());
- assertTrue("coffee.xls.xml", new File(tempDir,
"coffee.xls.xml").exists());
- }
-
- @Test
- public void testJsonRecursiveBatchIntegration() throws Exception {
- Reader reader = null;
- try {
- String[] params = {"-i", escape(testDataFile.getAbsolutePath()),
- "-o", escape(tempDir.getAbsolutePath()),
- "-numConsumers", "10",
- "-J", //recursive Json
- "-t" //plain text in content
- };
- TikaCLI.main(params);
- reader = new InputStreamReader(
- new FileInputStream(new File(tempDir,
"test_recursive_embedded.docx.json")), IOUtils.UTF_8);
- List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
- assertEquals(12, metadataList.size());
-
assertTrue(metadataList.get(6).get(RecursiveParserWrapper.TIKA_CONTENT).contains("human
events"));
- } finally {
- IOUtils.closeQuietly(reader);
- }
- }
-
- @Test
- public void testProcessLogFileConfig() throws Exception {
- String[] params = {"-i", escape(testDataFile.getAbsolutePath()),
- "-o", escape(tempDir.getAbsolutePath()),
- "-numConsumers", "2",
- "-JDlog4j.configuration=log4j_batch_process_test.properties"};
- TikaCLI.main(params);
-
- assertTrue("bad_xml.xml.xml", new File(tempDir,
"bad_xml.xml.xml").isFile());
- assertTrue("coffee.xls.xml", new File(tempDir,
"coffee.xls.xml").exists());
- String sysOutString = new String(outBuffer.toByteArray(),
IOUtils.UTF_8);
- assertTrue(sysOutString.contains("MY_CUSTOM_LOG_CONFIG"));
- }
-
- public static String escape(String path) {
- if (path.indexOf(' ') > -1) {
- return '"' + path + '"';
- }
- return path;
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.cli;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.PrintStream;
+import java.io.Reader;
+import java.util.List;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.serialization.JsonMetadataList;
+import org.apache.tika.parser.RecursiveParserWrapper;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TikaCLIBatchIntegrationTest {
+
+ private File testDataFile = new File("src/test/resources/test-data");
+
+ private File tempDir;
+ private OutputStream out = null;
+ private OutputStream err = null;
+ private ByteArrayOutputStream outBuffer = null;
+
+ @Before
+ public void setup() throws Exception {
+ tempDir = File.createTempFile("tika-cli-test-batch-", "");
+ tempDir.delete();
+ tempDir.mkdir();
+ outBuffer = new ByteArrayOutputStream();
+ PrintStream outWriter = new PrintStream(outBuffer, true,
IOUtils.UTF_8.name());
+ ByteArrayOutputStream errBuffer = new ByteArrayOutputStream();
+ PrintStream errWriter = new PrintStream(errBuffer, true,
IOUtils.UTF_8.name());
+ out = System.out;
+ err = System.err;
+ System.setOut(outWriter);
+ System.setErr(errWriter);
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ System.setOut(new PrintStream(out, true, IOUtils.UTF_8.name()));
+ System.setErr(new PrintStream(err, true, IOUtils.UTF_8.name()));
+ FileUtils.deleteDirectory(tempDir);
+ }
+
+ @Test
+ public void testSimplestBatchIntegration() throws Exception {
+ String[] params = {escape(testDataFile.getAbsolutePath()),
+ escape(tempDir.getAbsolutePath())};
+ TikaCLI.main(params);
+
+ assertTrue("bad_xml.xml.xml", new File(tempDir,
"bad_xml.xml.xml").isFile());
+ assertTrue("coffee.xls.xml", new File(tempDir,
"coffee.xls.xml").exists());
+ }
+
+ @Test
+ public void testBasicBatchIntegration() throws Exception {
+ String[] params = {"-i", escape(testDataFile.getAbsolutePath()),
+ "-o", escape(tempDir.getAbsolutePath()),
+ "-numConsumers", "2"
+ };
+ TikaCLI.main(params);
+
+ assertTrue("bad_xml.xml.xml", new File(tempDir,
"bad_xml.xml.xml").isFile());
+ assertTrue("coffee.xls.xml", new File(tempDir,
"coffee.xls.xml").exists());
+ }
+
+ @Test
+ public void testJsonRecursiveBatchIntegration() throws Exception {
+ Reader reader = null;
+ try {
+ String[] params = {"-i", escape(testDataFile.getAbsolutePath()),
+ "-o", escape(tempDir.getAbsolutePath()),
+ "-numConsumers", "10",
+ "-J", //recursive Json
+ "-t" //plain text in content
+ };
+ TikaCLI.main(params);
+ reader = new InputStreamReader(
+ new FileInputStream(new File(tempDir,
"test_recursive_embedded.docx.json")), IOUtils.UTF_8);
+ List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
+ assertEquals(12, metadataList.size());
+
assertTrue(metadataList.get(6).get(RecursiveParserWrapper.TIKA_CONTENT).contains("human
events"));
+ } finally {
+ IOUtils.closeQuietly(reader);
+ }
+ }
+
+ @Test
+ public void testProcessLogFileConfig() throws Exception {
+ String[] params = {"-i", escape(testDataFile.getAbsolutePath()),
+ "-o", escape(tempDir.getAbsolutePath()),
+ "-numConsumers", "2",
+ "-JDlog4j.configuration=log4j_batch_process_test.properties"};
+ TikaCLI.main(params);
+
+ assertTrue("bad_xml.xml.xml", new File(tempDir,
"bad_xml.xml.xml").isFile());
+ assertTrue("coffee.xls.xml", new File(tempDir,
"coffee.xls.xml").exists());
+ String sysOutString = new String(outBuffer.toByteArray(),
IOUtils.UTF_8);
+ assertTrue(sysOutString.contains("MY_CUSTOM_LOG_CONFIG"));
+ }
+
+ public static String escape(String path) {
+ if (path.indexOf(' ') > -1) {
+ return '"' + path + '"';
+ }
+ return path;
+ }
+
+}
Modified:
tika/trunk/tika-app/src/test/resources/log4j_batch_process_test.properties
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-app/src/test/resources/log4j_batch_process_test.properties?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-app/src/test/resources/log4j_batch_process_test.properties
(original)
+++ tika/trunk/tika-app/src/test/resources/log4j_batch_process_test.properties
Wed May 13 13:49:36 2015
@@ -1,24 +1,24 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#info,debug, error,fatal ...
-log4j.rootLogger=info,stdout
-
-#console
-log4j.appender.stdout=org.apache.log4j.ConsoleAppender
-log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
-
-
-log4j.appender.stdout.layout.ConversionPattern=MY_CUSTOM_LOG_CONFIG %m%n
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#info,debug, error,fatal ...
+log4j.rootLogger=info,stdout
+
+#console
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+
+
+log4j.appender.stdout.layout.ConversionPattern=MY_CUSTOM_LOG_CONFIG %m%n
Modified:
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcess.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcess.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcess.java
(original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcess.java
Wed May 13 13:49:36 2015
@@ -28,15 +28,15 @@ import java.util.concurrent.ExecutionExc
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.tika.io.IOUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-
-/**
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.tika.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
* This is the main processor class for a single process.
* This class can only be run once.
* <p/>
@@ -65,15 +65,15 @@ public class BatchProcess implements Cal
CRAWLER_TIMED_OUT,
TIMED_OUT_CONSUMER,
USER_INTERRUPTION,
- BATCH_PROCESS_ALIVE_TOO_LONG,
- }
-
- private static final Logger logger;
- static {
- logger = LoggerFactory.getLogger(BatchProcess.class);
- }
-
- private PrintStream outputStreamWriter;
+ BATCH_PROCESS_ALIVE_TOO_LONG,
+ }
+
+ private static final Logger logger;
+ static {
+ logger = LoggerFactory.getLogger(BatchProcess.class);
+ }
+
+ private PrintStream outputStreamWriter;
// If a file hasn't been processed in this amount of time,
// report it to the console. When the directory crawler has stopped, the
thread will
@@ -127,17 +127,17 @@ public class BatchProcess implements Cal
*/
public ParallelFileProcessingResult call()
throws InterruptedException {
- if (alreadyExecuted) {
- throw new IllegalStateException("Can only execute BatchRunner
once.");
- }
- //redirect streams; all organic warnings should go to System.err;
- //System.err should be redirected to System.out
- PrintStream sysErr = System.err;
- try {
- outputStreamWriter = new PrintStream(sysErr, true,
IOUtils.UTF_8.toString());
- } catch (IOException e) {
- throw new RuntimeException("Can't redirect streams");
- }
+ if (alreadyExecuted) {
+ throw new IllegalStateException("Can only execute BatchRunner
once.");
+ }
+ //redirect streams; all organic warnings should go to System.err;
+ //System.err should be redirected to System.out
+ PrintStream sysErr = System.err;
+ try {
+ outputStreamWriter = new PrintStream(sysErr, true,
IOUtils.UTF_8.toString());
+ } catch (IOException e) {
+ throw new RuntimeException("Can't redirect streams");
+ }
System.setErr(System.out);
ParallelFileProcessingResult result = null;
@@ -154,13 +154,13 @@ public class BatchProcess implements Cal
TimeoutChecker timeoutChecker = new TimeoutChecker();
try {
- startConsumersManager();
- } catch (BatchNoRestartError e) {
- return new
- ParallelFileProcessingResult(0, 0, 0, 0,
- 0, BatchProcessDriverCLI.PROCESS_NO_RESTART_EXIT_CODE,
-
CAUSE_FOR_TERMINATION.CONSUMERS_MANAGER_DIDNT_INIT_IN_TIME_NO_RESTART.toString());
-
+ startConsumersManager();
+ } catch (BatchNoRestartError e) {
+ return new
+ ParallelFileProcessingResult(0, 0, 0, 0,
+ 0, BatchProcessDriverCLI.PROCESS_NO_RESTART_EXIT_CODE,
+
CAUSE_FOR_TERMINATION.CONSUMERS_MANAGER_DIDNT_INIT_IN_TIME_NO_RESTART.toString());
+
}
State state = mainLoop(completionService, timeoutChecker);
@@ -200,12 +200,12 @@ public class BatchProcess implements Cal
if (futureResult != null) {
state.removed++;
- IFileProcessorFutureResult result = futureResult.get();
- if (result instanceof FileConsumerFutureResult) {
- state.consumersRemoved++;
- } else if (result instanceof
FileResourceCrawlerFutureResult) {
- state.crawlersRemoved++;
- if (fileResourceCrawler.wasTimedOut()) {
+ IFileProcessorFutureResult result = futureResult.get();
+ if (result instanceof FileConsumerFutureResult) {
+ state.consumersRemoved++;
+ } else if (result instanceof
FileResourceCrawlerFutureResult) {
+ state.crawlersRemoved++;
+ if (fileResourceCrawler.wasTimedOut()) {
causeForTermination =
CAUSE_FOR_TERMINATION.CRAWLER_TIMED_OUT;
break;
}
@@ -229,13 +229,13 @@ public class BatchProcess implements Cal
} catch (Throwable e) {
if (isNonRestart(e)) {
causeForTermination =
CAUSE_FOR_TERMINATION.MAIN_LOOP_EXCEPTION_NO_RESTART;
- } else {
- causeForTermination =
CAUSE_FOR_TERMINATION.MAIN_LOOP_EXCEPTION;
- }
- logger.error("Main loop execution exception: " +
e.getMessage());
- break;
- }
- }
+ } else {
+ causeForTermination =
CAUSE_FOR_TERMINATION.MAIN_LOOP_EXCEPTION;
+ }
+ logger.error("Main loop execution exception: " +
e.getMessage());
+ break;
+ }
+ }
state.causeForTermination = causeForTermination;
return state;
}
@@ -291,12 +291,12 @@ public class BatchProcess implements Cal
break;
}
try {
- IFileProcessorFutureResult result = future.get();
- if (result instanceof FileConsumerFutureResult) {
- FileConsumerFutureResult consumerResult =
(FileConsumerFutureResult) result;
- FileStarted fileStarted = consumerResult.getFileStarted();
- if (fileStarted != null
- && fileStarted.getElapsedMillis() >
timeoutThresholdMillis) {
+ IFileProcessorFutureResult result = future.get();
+ if (result instanceof FileConsumerFutureResult) {
+ FileConsumerFutureResult consumerResult =
(FileConsumerFutureResult) result;
+ FileStarted fileStarted = consumerResult.getFileStarted();
+ if (fileStarted != null
+ && fileStarted.getElapsedMillis() >
timeoutThresholdMillis) {
logger.warn(fileStarted.getResourceId()
+ "\t caused a file processor to hang or
crash. You may need to remove "
+ "this file from your input set and rerun.");
@@ -338,30 +338,30 @@ public class BatchProcess implements Cal
int exitStatus = getExitStatus(state.causeForTermination, restartMsg);
//need to re-check, report, mark timed out consumers
- timeoutChecker.checkForTimedOutConsumers();
-
- for (FileStarted fs : timedOuts) {
- logger.warn("A parser was still working on >" + fs.getResourceId()
+
- "< for " + fs.getElapsedMillis() + " milliseconds after it
started." +
- " This exceeds the maxTimeoutMillis parameter");
- }
- double elapsed = ((double) new Date().getTime() - (double)
state.start) / 1000.0;
- int processed = 0;
- int numExceptions = 0;
- for (FileResourceConsumer c : consumersManager.getConsumers()) {
- processed += c.getNumResourcesConsumed();
- numExceptions += c.getNumHandledExceptions();
- }
- return new
- ParallelFileProcessingResult(considered, added, processed,
numExceptions,
- elapsed, exitStatus, state.causeForTermination.toString());
- }
-
- private class State {
- long start = -1;
- int numConsumers = 0;
- int numNonConsumers = 0;
- int removed = 0;
+ timeoutChecker.checkForTimedOutConsumers();
+
+ for (FileStarted fs : timedOuts) {
+ logger.warn("A parser was still working on >" + fs.getResourceId()
+
+ "< for " + fs.getElapsedMillis() + " milliseconds after it
started." +
+ " This exceeds the maxTimeoutMillis parameter");
+ }
+ double elapsed = ((double) new Date().getTime() - (double)
state.start) / 1000.0;
+ int processed = 0;
+ int numExceptions = 0;
+ for (FileResourceConsumer c : consumersManager.getConsumers()) {
+ processed += c.getNumResourcesConsumed();
+ numExceptions += c.getNumHandledExceptions();
+ }
+ return new
+ ParallelFileProcessingResult(considered, added, processed,
numExceptions,
+ elapsed, exitStatus, state.causeForTermination.toString());
+ }
+
+ private class State {
+ long start = -1;
+ int numConsumers = 0;
+ int numNonConsumers = 0;
+ int removed = 0;
int consumersRemoved = 0;
int crawlersRemoved = 0;
CAUSE_FOR_TERMINATION causeForTermination = null;
@@ -385,13 +385,13 @@ public class BatchProcess implements Cal
try {
timed.join(consumersManagerMaxMillis);
} catch (InterruptedException e) {
- logger.warn("interruption exception during consumers manager
shutdown");
- }
- if (timed.isAlive()) {
- logger.error("ConsumersManager did not start within " +
consumersManagerMaxMillis + "ms");
- throw new BatchNoRestartError("ConsumersManager did not start
within "+consumersManagerMaxMillis+"ms");
- }
- }
+ logger.warn("interruption exception during consumers manager
shutdown");
+ }
+ if (timed.isAlive()) {
+ logger.error("ConsumersManager did not start within " +
consumersManagerMaxMillis + "ms");
+ throw new BatchNoRestartError("ConsumersManager did not start
within "+consumersManagerMaxMillis+"ms");
+ }
+ }
private void shutdownConsumersManager() {
if (consumersManagerMaxMillis < 0) {
@@ -454,21 +454,21 @@ public class BatchProcess implements Cal
Throwable cause = e.getCause();
return cause != null && isNonRestart(cause);
}
-
- private int getExitStatus(CAUSE_FOR_TERMINATION causeForTermination,
String restartMsg) {
- if (causeForTermination ==
CAUSE_FOR_TERMINATION.MAIN_LOOP_EXCEPTION_NO_RESTART) {
-
logger.info(CAUSE_FOR_TERMINATION.MAIN_LOOP_EXCEPTION_NO_RESTART.name());
- return BatchProcessDriverCLI.PROCESS_NO_RESTART_EXIT_CODE;
- }
-
+
+ private int getExitStatus(CAUSE_FOR_TERMINATION causeForTermination,
String restartMsg) {
+ if (causeForTermination ==
CAUSE_FOR_TERMINATION.MAIN_LOOP_EXCEPTION_NO_RESTART) {
+
logger.info(CAUSE_FOR_TERMINATION.MAIN_LOOP_EXCEPTION_NO_RESTART.name());
+ return BatchProcessDriverCLI.PROCESS_NO_RESTART_EXIT_CODE;
+ }
+
if (restartMsg != null) {
- if
(restartMsg.equals(BATCH_CONSTANTS.BATCH_PROCESS_EXCEEDED_MAX_ALIVE_TIME.toString()))
{
- logger.warn(restartMsg);
- } else {
- logger.error(restartMsg);
- }
-
- //send over stdout wrapped in outputStreamWriter
+ if
(restartMsg.equals(BATCH_CONSTANTS.BATCH_PROCESS_EXCEEDED_MAX_ALIVE_TIME.toString()))
{
+ logger.warn(restartMsg);
+ } else {
+ logger.error(restartMsg);
+ }
+
+ //send over stdout wrapped in outputStreamWriter
outputStreamWriter.println(
BATCH_CONSTANTS.BATCH_PROCESS_FATAL_MUST_RESTART.toString() +
" >> " + restartMsg);
@@ -579,14 +579,14 @@ public class BatchProcess implements Cal
}
}
}
- }
-
- private class TimeoutFutureResult implements IFileProcessorFutureResult {
- //used to be used when more than one timeout was allowed
- //TODO: get rid of this?
- private final int timedOutCount;
-
- private TimeoutFutureResult(final int timedOutCount) {
+ }
+
+ private class TimeoutFutureResult implements IFileProcessorFutureResult {
+ //used to be used when more than one timeout was allowed
+ //TODO: get rid of this?
+ private final int timedOutCount;
+
+ private TimeoutFutureResult(final int timedOutCount) {
this.timedOutCount = timedOutCount;
}
Modified:
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
---
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java
(original)
+++
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java
Wed May 13 13:49:36 2015
@@ -26,15 +26,15 @@ import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
-import java.util.List;
-import java.util.Locale;
-
-import org.apache.tika.io.IOUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class BatchProcessDriverCLI {
-
+import java.util.List;
+import java.util.Locale;
+
+import org.apache.tika.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class BatchProcessDriverCLI {
+
/**
* This relies on an special exit values of 254 (do not restart),
* 0 ended correctly, 253 ended with exception (do restart)
@@ -42,13 +42,13 @@ public class BatchProcessDriverCLI {
public static final int PROCESS_RESTART_EXIT_CODE = 253;
//make sure this is above 255 to avoid stopping on system errors
//that is, if there is a system error (e.g. 143), you
- //should restart the process.
- public static final int PROCESS_NO_RESTART_EXIT_CODE = 254;
- public static final int PROCESS_COMPLETED_SUCCESSFULLY = 0;
- private static Logger logger =
LoggerFactory.getLogger(BatchProcessDriverCLI.class);
-
- private int maxProcessRestarts = -1;
- private long pulseMillis = 1000;
+ //should restart the process.
+ public static final int PROCESS_NO_RESTART_EXIT_CODE = 254;
+ public static final int PROCESS_COMPLETED_SUCCESSFULLY = 0;
+ private static Logger logger =
LoggerFactory.getLogger(BatchProcessDriverCLI.class);
+
+ private int maxProcessRestarts = -1;
+ private long pulseMillis = 1000;
//how many times to wait pulseMillis milliseconds if a restart
//message has been received through stdout, but the
@@ -102,21 +102,21 @@ public class BatchProcessDriverCLI {
}
public void execute() throws Exception {
-
- interruptWatcherThread.setDaemon(true);
- interruptWatcherThread.start();
- logger.info("about to start driver");
- start();
- int loopsAfterRestartMessageReceived = 0;
- while (!userInterrupted) {
+
+ interruptWatcherThread.setDaemon(true);
+ interruptWatcherThread.start();
+ logger.info("about to start driver");
+ start();
+ int loopsAfterRestartMessageReceived = 0;
+ while (!userInterrupted) {
Integer exit = null;
- try {
- logger.trace("about to check exit value");
- exit = process.exitValue();
- logger.info("The child process has finished with an exit value
of: "+exit);
- stop();
- } catch (IllegalThreadStateException e) {
- //hasn't exited
+ try {
+ logger.trace("about to check exit value");
+ exit = process.exitValue();
+ logger.info("The child process has finished with an exit value
of: "+exit);
+ stop();
+ } catch (IllegalThreadStateException e) {
+ //hasn't exited
logger.trace("process has not exited;
IllegalThreadStateException");
}
@@ -135,13 +135,13 @@ public class BatchProcessDriverCLI {
" exit=" + exit + " receivedRestartMsg=" +
receivedRestartMsg);
//if we've gotten the message via stdout to restart
//but the process hasn't exited yet, give it another
- //chance
- if (receivedRestartMsg && exit == null) {
- loopsAfterRestartMessageReceived++;
- logger.warn("Must restart, still not exited; loops after
restart: " +
- loopsAfterRestartMessageReceived);
- continue;
- }
+ //chance
+ if (receivedRestartMsg && exit == null) {
+ loopsAfterRestartMessageReceived++;
+ logger.warn("Must restart, still not exited; loops after
restart: " +
+ loopsAfterRestartMessageReceived);
+ continue;
+ }
if (loopsAfterRestartMessageReceived >
waitNumLoopsAfterRestartmessage) {
logger.trace("About to try to restart because:" +
" exit=" + exit + " receivedRestartMsg=" +
receivedRestartMsg);
@@ -153,13 +153,13 @@ public class BatchProcessDriverCLI {
}
} else if (exit != null && exit !=
BatchProcessDriverCLI.PROCESS_NO_RESTART_EXIT_CODE
&& exit !=
BatchProcessDriverCLI.PROCESS_COMPLETED_SUCCESSFULLY) {
- logger.trace("About to try to restart because:" +
- " exit=" + exit + " receivedRestartMsg=" +
receivedRestartMsg);
-
- if (exit == BatchProcessDriverCLI.PROCESS_RESTART_EXIT_CODE) {
- logger.info("Restarting on expected restart code");
- } else {
- logger.warn("Restarting on unexpected restart code:
"+exit);
+ logger.trace("About to try to restart because:" +
+ " exit=" + exit + " receivedRestartMsg=" +
receivedRestartMsg);
+
+ if (exit == BatchProcessDriverCLI.PROCESS_RESTART_EXIT_CODE) {
+ logger.info("Restarting on expected restart code");
+ } else {
+ logger.warn("Restarting on unexpected restart code:
"+exit);
}
boolean restarted = restart(exit, receivedRestartMsg);
if (!restarted) {
@@ -170,18 +170,18 @@ public class BatchProcessDriverCLI {
logger.trace("Will not restart: "+exit);
break;
}
- }
- logger.trace("about to call shutdown driver now");
- shutdownDriverNow();
- logger.info("Process driver has completed");
- }
-
- private void shutdownDriverNow() {
- if (process != null) {
- for (int i = 0; i < 60; i++) {
-
- logger.trace("trying to shut down: "+i);
- try {
+ }
+ logger.trace("about to call shutdown driver now");
+ shutdownDriverNow();
+ logger.info("Process driver has completed");
+ }
+
+ private void shutdownDriverNow() {
+ if (process != null) {
+ for (int i = 0; i < 60; i++) {
+
+ logger.trace("trying to shut down: "+i);
+ try {
int exit = process.exitValue();
logger.trace("trying to stop:"+exit);
stop();
@@ -193,13 +193,13 @@ public class BatchProcessDriverCLI {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
- //swallow
- }
- }
- logger.error("Process didn't stop after 60 seconds after shutdown.
" +
- "I am forcefully killing it.");
- }
- interruptWatcherThread.interrupt();
+ //swallow
+ }
+ }
+ logger.error("Process didn't stop after 60 seconds after shutdown.
" +
+ "I am forcefully killing it.");
+ }
+ interruptWatcherThread.interrupt();
}
public int getNumRestarts() {
@@ -261,17 +261,17 @@ public class BatchProcessDriverCLI {
interruptWriter = new InterruptWriter(process.getOutputStream());
interruptWriterThread = new Thread(interruptWriter);
interruptWriterThread.start();
-
- }
-
- /**
- * Typically only used for testing. This determines whether or not
- * to redirect child process's stdOut to driver's stdout
- * @param redirectChildProcessToStdOut should the driver redirect the
child's stdout
- */
- public void setRedirectChildProcessToStdOut(boolean
redirectChildProcessToStdOut) {
- this.redirectChildProcessToStdOut = redirectChildProcessToStdOut;
- }
+
+ }
+
+ /**
+ * Typically only used for testing. This determines whether or not
+ * to redirect child process's stdOut to driver's stdout
+ * @param redirectChildProcessToStdOut should the driver redirect the
child's stdout
+ */
+ public void setRedirectChildProcessToStdOut(boolean
redirectChildProcessToStdOut) {
+ this.redirectChildProcessToStdOut = redirectChildProcessToStdOut;
+ }
/**
* Class to watch stdin from the driver for anything that is typed.
Modified:
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/FileResourceCrawler.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/FileResourceCrawler.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
---
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/FileResourceCrawler.java
(original)
+++
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/FileResourceCrawler.java
Wed May 13 13:49:36 2015
@@ -19,16 +19,16 @@ package org.apache.tika.batch;
import java.util.Date;
import java.util.concurrent.ArrayBlockingQueue;
-import java.util.concurrent.Callable;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.tika.extractor.DocumentSelector;
-import org.apache.tika.metadata.Metadata;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public abstract class FileResourceCrawler implements
Callable<IFileProcessorFutureResult> {
-
+import java.util.concurrent.Callable;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.tika.extractor.DocumentSelector;
+import org.apache.tika.metadata.Metadata;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public abstract class FileResourceCrawler implements
Callable<IFileProcessorFutureResult> {
+
protected final static int SKIPPED = 0;
protected final static int ADDED = 1;
protected final static int STOP_NOW = 2;
@@ -38,13 +38,13 @@ public abstract class FileResourceCrawle
private volatile boolean isActive = true;
private volatile boolean timedOut = false;
- //how long to pause if can't add to queue
- private static final long PAUSE_INCREMENT_MILLIS = 1000;
-
- protected static Logger logger =
LoggerFactory.getLogger(FileResourceCrawler.class.toString());
-
- private int maxFilesToAdd = -1;
- private int maxFilesToConsider = -1;
+ //how long to pause if can't add to queue
+ private static final long PAUSE_INCREMENT_MILLIS = 1000;
+
+ protected static Logger logger =
LoggerFactory.getLogger(FileResourceCrawler.class.toString());
+
+ private int maxFilesToAdd = -1;
+ private int maxFilesToConsider = -1;
private final ArrayBlockingQueue<FileResource> queue;
private final int numConsumers;
Modified:
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/Interrupter.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/Interrupter.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/Interrupter.java
(original)
+++ tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/Interrupter.java
Wed May 13 13:49:36 2015
@@ -19,27 +19,27 @@ package org.apache.tika.batch;
import java.io.BufferedReader;
import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.concurrent.Callable;
-
-import org.apache.tika.io.IOUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-
-/**
+import java.io.InputStreamReader;
+import java.util.concurrent.Callable;
+
+import org.apache.tika.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
* Class that waits for input on System.in. If the user enters a keystroke on
* System.in, this will send a signal to the FileResourceRunner to shutdown
gracefully.
*
* <p>
* In the future, this may implement a common IInterrupter interface for more
flexibility.
- */
-public class Interrupter implements Callable<IFileProcessorFutureResult> {
-
- private Logger logger = LoggerFactory.getLogger(Interrupter.class);
- public IFileProcessorFutureResult call(){
- try{
- BufferedReader reader = new BufferedReader(new
InputStreamReader(System.in, IOUtils.UTF_8));
+ */
+public class Interrupter implements Callable<IFileProcessorFutureResult> {
+
+ private Logger logger = LoggerFactory.getLogger(Interrupter.class);
+ public IFileProcessorFutureResult call(){
+ try{
+ BufferedReader reader = new BufferedReader(new
InputStreamReader(System.in, IOUtils.UTF_8));
while (true){
if (reader.ready()){
reader.readLine();
Modified:
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/ParallelFileProcessingResult.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/ParallelFileProcessingResult.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
---
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/ParallelFileProcessingResult.java
(original)
+++
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/ParallelFileProcessingResult.java
Wed May 13 13:49:36 2015
@@ -18,26 +18,26 @@ package org.apache.tika.batch;
*/
public class ParallelFileProcessingResult {
- private final int considered;
- private final int added;
- private final int consumed;
- private final int numberHandledExceptions;
- private final double secondsElapsed;
- private final int exitStatus;
- private final String causeForTermination;
-
- public ParallelFileProcessingResult(int considered, int added,
- int consumed, int
numberHandledExceptions,
- double secondsElapsed,
- int exitStatus,
- String causeForTermination) {
- this.considered = considered;
- this.added = added;
- this.consumed = consumed;
- this.numberHandledExceptions = numberHandledExceptions;
- this.secondsElapsed = secondsElapsed;
- this.exitStatus = exitStatus;
- this.causeForTermination = causeForTermination;
+ private final int considered;
+ private final int added;
+ private final int consumed;
+ private final int numberHandledExceptions;
+ private final double secondsElapsed;
+ private final int exitStatus;
+ private final String causeForTermination;
+
+ public ParallelFileProcessingResult(int considered, int added,
+ int consumed, int
numberHandledExceptions,
+ double secondsElapsed,
+ int exitStatus,
+ String causeForTermination) {
+ this.considered = considered;
+ this.added = added;
+ this.consumed = consumed;
+ this.numberHandledExceptions = numberHandledExceptions;
+ this.secondsElapsed = secondsElapsed;
+ this.exitStatus = exitStatus;
+ this.causeForTermination = causeForTermination;
}
/**
@@ -79,16 +79,16 @@ public class ParallelFileProcessingResul
* @return seconds elapsed since the start of the batch processing
*/
public double secondsElapsed() {
- return secondsElapsed;
- }
-
- public int getNumberHandledExceptions() {
- return numberHandledExceptions;
- }
-
- /**
- *
- * @return intendedExitStatus
+ return secondsElapsed;
+ }
+
+ public int getNumberHandledExceptions() {
+ return numberHandledExceptions;
+ }
+
+ /**
+ *
+ * @return intendedExitStatus
*/
public int getExitStatus() {
return exitStatus;
@@ -97,13 +97,13 @@ public class ParallelFileProcessingResul
@Override
public String toString() {
return "ParallelFileProcessingResult{" +
- "considered=" + considered +
- ", added=" + added +
- ", consumed=" + consumed +
- ", numberHandledExceptions=" + numberHandledExceptions +
- ", secondsElapsed=" + secondsElapsed +
- ", exitStatus=" + exitStatus +
- ", causeForTermination='" + causeForTermination + '\'' +
+ "considered=" + considered +
+ ", added=" + added +
+ ", consumed=" + consumed +
+ ", numberHandledExceptions=" + numberHandledExceptions +
+ ", secondsElapsed=" + secondsElapsed +
+ ", exitStatus=" + exitStatus +
+ ", causeForTermination='" + causeForTermination + '\'' +
'}';
}
}
Modified:
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/StatusReporter.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/StatusReporter.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
---
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/StatusReporter.java
(original)
+++
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/StatusReporter.java
Wed May 13 13:49:36 2015
@@ -19,24 +19,24 @@ package org.apache.tika.batch;
import java.text.NumberFormat;
import java.util.Date;
-import java.util.Locale;
-import java.util.concurrent.Callable;
-
-import org.apache.tika.util.DurationFormatUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Basic class to use for reporting status from both the crawler and the
consumers.
+import java.util.Locale;
+import java.util.concurrent.Callable;
+
+import org.apache.tika.util.DurationFormatUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Basic class to use for reporting status from both the crawler and the
consumers.
* This wakes up roughly every {@link #sleepMillis} and log.info's a status
report.
*/
-
-public class StatusReporter implements Callable<IFileProcessorFutureResult> {
-
- private final Logger logger =
LoggerFactory.getLogger(StatusReporter.class);
-
- //require references to these so that the
- //StatusReporter can query them when it wakes up
+
+public class StatusReporter implements Callable<IFileProcessorFutureResult> {
+
+ private final Logger logger =
LoggerFactory.getLogger(StatusReporter.class);
+
+ //require references to these so that the
+ //StatusReporter can query them when it wakes up
private final ConsumersManager consumersManager;
private final FileResourceCrawler crawler;
Modified:
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/builders/SimpleLogReporterBuilder.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/builders/SimpleLogReporterBuilder.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
---
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/builders/SimpleLogReporterBuilder.java
(original)
+++
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/builders/SimpleLogReporterBuilder.java
Wed May 13 13:49:36 2015
@@ -30,13 +30,13 @@ public class SimpleLogReporterBuilder im
@Override
public StatusReporter build(FileResourceCrawler crawler, ConsumersManager
consumersManager,
- Node n, Map<String, String>
commandlineArguments) {
-
- Map<String, String> attributes = XMLDOMUtil.mapifyAttrs(n,
commandlineArguments);
- long sleepMillis =
PropsUtil.getLong(attributes.get("reporterSleepMillis"), 1000L);
- long staleThresholdMillis =
PropsUtil.getLong(attributes.get("reporterStaleThresholdMillis"), 500000L);
- StatusReporter reporter = new StatusReporter(crawler,
consumersManager);
- reporter.setSleepMillis(sleepMillis);
+ Node n, Map<String, String>
commandlineArguments) {
+
+ Map<String, String> attributes = XMLDOMUtil.mapifyAttrs(n,
commandlineArguments);
+ long sleepMillis =
PropsUtil.getLong(attributes.get("reporterSleepMillis"), 1000L);
+ long staleThresholdMillis =
PropsUtil.getLong(attributes.get("reporterStaleThresholdMillis"), 500000L);
+ StatusReporter reporter = new StatusReporter(crawler,
consumersManager);
+ reporter.setSleepMillis(sleepMillis);
reporter.setStaleThresholdMillis(staleThresholdMillis);
return reporter;
}
Modified:
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/AbstractFSConsumer.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/AbstractFSConsumer.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
---
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/AbstractFSConsumer.java
(original)
+++
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/AbstractFSConsumer.java
Wed May 13 13:49:36 2015
@@ -19,12 +19,12 @@ package org.apache.tika.batch.fs;
import java.io.IOException;
import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.concurrent.ArrayBlockingQueue;
-
-import org.apache.tika.batch.BatchNoRestartError;
-import org.apache.tika.batch.FileResource;
-import org.apache.tika.batch.FileResourceConsumer;
+import java.io.OutputStream;
+import java.util.concurrent.ArrayBlockingQueue;
+
+import org.apache.tika.batch.BatchNoRestartError;
+import org.apache.tika.batch.FileResource;
+import org.apache.tika.batch.FileResourceConsumer;
import org.apache.tika.batch.OutputStreamFactory;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
@@ -51,32 +51,32 @@ public abstract class AbstractFSConsumer
OutputStream os = null;
try {
os = fsOSFactory.getOutputStream(fileResource.getMetadata());
- } catch (IOException e) {
- //This can happen if the disk has run out of space,
- //or if there was a failure with mkdirs in fsOSFactory
- logger.error("{}", getXMLifiedLogMsg(IO_OS,
- fileResource.getResourceId(), e));
- throw new BatchNoRestartError("IOException trying to open output
stream for " +
- fileResource.getResourceId() + " :: " + e.getMessage());
- }
- return os;
- }
-
- /**
- *
- * @param fileResource
- * @return inputStream, can be null if there is an exception opening IS
- */
- protected InputStream getInputStream(FileResource fileResource) {
- InputStream is = null;
- try {
- is = fileResource.openInputStream();
- } catch (IOException e) {
- logger.warn("{}", getXMLifiedLogMsg(IO_IS,
- fileResource.getResourceId(), e));
- flushAndClose(is);
- }
- return is;
- }
-
-}
+ } catch (IOException e) {
+ //This can happen if the disk has run out of space,
+ //or if there was a failure with mkdirs in fsOSFactory
+ logger.error("{}", getXMLifiedLogMsg(IO_OS,
+ fileResource.getResourceId(), e));
+ throw new BatchNoRestartError("IOException trying to open output
stream for " +
+ fileResource.getResourceId() + " :: " + e.getMessage());
+ }
+ return os;
+ }
+
+ /**
+ *
+ * @param fileResource
+ * @return inputStream, can be null if there is an exception opening IS
+ */
+ protected InputStream getInputStream(FileResource fileResource) {
+ InputStream is = null;
+ try {
+ is = fileResource.openInputStream();
+ } catch (IOException e) {
+ logger.warn("{}", getXMLifiedLogMsg(IO_IS,
+ fileResource.getResourceId(), e));
+ flushAndClose(is);
+ }
+ return is;
+ }
+
+}
Modified:
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/BasicTikaFSConsumer.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/BasicTikaFSConsumer.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
---
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/BasicTikaFSConsumer.java
(original)
+++
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/BasicTikaFSConsumer.java
Wed May 13 13:49:36 2015
@@ -19,12 +19,12 @@ package org.apache.tika.batch.fs;
import java.io.InputStream;
import java.io.OutputStream;
-import java.io.UnsupportedEncodingException;
-import java.util.concurrent.ArrayBlockingQueue;
-
-import org.apache.tika.batch.FileResource;
-import org.apache.tika.batch.OutputStreamFactory;
-import org.apache.tika.batch.ParserFactory;
+import java.io.UnsupportedEncodingException;
+import java.util.concurrent.ArrayBlockingQueue;
+
+import org.apache.tika.batch.FileResource;
+import org.apache.tika.batch.OutputStreamFactory;
+import org.apache.tika.batch.ParserFactory;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.io.IOUtils;
import org.apache.tika.parser.ParseContext;
@@ -86,14 +86,14 @@ public class BasicTikaFSConsumer extends
}
ContentHandler handler;
try {
- handler = contentHandlerFactory.getNewContentHandler(os,
getOutputEncoding());
- } catch (UnsupportedEncodingException e) {
- incrementHandledExceptions();
- logger.error(getXMLifiedLogMsg("output_encoding_ex",
- fileResource.getResourceId(), e));
- flushAndClose(os);
- throw new RuntimeException(e.getMessage());
- }
+ handler = contentHandlerFactory.getNewContentHandler(os,
getOutputEncoding());
+ } catch (UnsupportedEncodingException e) {
+ incrementHandledExceptions();
+ logger.error(getXMLifiedLogMsg("output_encoding_ex",
+ fileResource.getResourceId(), e));
+ flushAndClose(os);
+ throw new RuntimeException(e.getMessage());
+ }
//now actually call parse!
Throwable thrown = null;
Modified:
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSBatchProcessCLI.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSBatchProcessCLI.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
---
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSBatchProcessCLI.java
(original)
+++
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/FSBatchProcessCLI.java
Wed May 13 13:49:36 2015
@@ -16,69 +16,69 @@ package org.apache.tika.batch.fs;
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-import java.io.File;
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.concurrent.ExecutorService;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
-import org.apache.commons.cli.HelpFormatter;
-import org.apache.commons.cli.Option;
-import org.apache.commons.cli.Options;
-import org.apache.tika.batch.BatchProcess;
-import org.apache.tika.batch.BatchProcessDriverCLI;
-import org.apache.tika.batch.ParallelFileProcessingResult;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.tika.batch.BatchProcess;
+import org.apache.tika.batch.BatchProcessDriverCLI;
+import org.apache.tika.batch.ParallelFileProcessingResult;
import org.apache.tika.batch.builders.BatchProcessBuilder;
-import org.apache.tika.batch.builders.CommandLineParserBuilder;
-import org.apache.tika.io.IOUtils;
-import org.apache.tika.io.TikaInputStream;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.slf4j.MarkerFactory;
-
-public class FSBatchProcessCLI {
-
- public static String FINISHED_STRING = "Main thread in TikaFSBatchCLI has
finished processing.";
-
- private static Logger logger =
LoggerFactory.getLogger(FSBatchProcessCLI.class);
- private final Options options;
-
- public FSBatchProcessCLI(String[] args) throws IOException {
- TikaInputStream configIs = null;
- try {
- configIs = getConfigInputStream(args, true);
- CommandLineParserBuilder builder = new CommandLineParserBuilder();
- options = builder.build(configIs);
- } finally {
+import org.apache.tika.batch.builders.CommandLineParserBuilder;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.io.TikaInputStream;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.slf4j.MarkerFactory;
+
+public class FSBatchProcessCLI {
+
+ public static String FINISHED_STRING = "Main thread in TikaFSBatchCLI has
finished processing.";
+
+ private static Logger logger =
LoggerFactory.getLogger(FSBatchProcessCLI.class);
+ private final Options options;
+
+ public FSBatchProcessCLI(String[] args) throws IOException {
+ TikaInputStream configIs = null;
+ try {
+ configIs = getConfigInputStream(args, true);
+ CommandLineParserBuilder builder = new CommandLineParserBuilder();
+ options = builder.build(configIs);
+ } finally {
IOUtils.closeQuietly(configIs);
}
}
public void usage() {
HelpFormatter helpFormatter = new HelpFormatter();
- helpFormatter.printHelp("tika filesystem batch", options);
- }
-
- private TikaInputStream getConfigInputStream(String[] args, boolean
logDefault) throws IOException {
- TikaInputStream is = null;
- File batchConfigFile = getConfigFile(args);
- if (batchConfigFile != null) {
+ helpFormatter.printHelp("tika filesystem batch", options);
+ }
+
+ private TikaInputStream getConfigInputStream(String[] args, boolean
logDefault) throws IOException {
+ TikaInputStream is = null;
+ File batchConfigFile = getConfigFile(args);
+ if (batchConfigFile != null) {
//this will throw IOException if it can't find a specified config
file
- //better to throw an exception than silently back off to default.
- is = TikaInputStream.get(batchConfigFile);
- } else {
- if (logDefault) {
- logger.info("No config file set via -bc, relying on
default-tika-batch-config.xml");
- }
- is = TikaInputStream.get(
-
FSBatchProcessCLI.class.getResourceAsStream("default-tika-batch-config.xml"));
- }
+ //better to throw an exception than silently back off to default.
+ is = TikaInputStream.get(batchConfigFile);
+ } else {
+ if (logDefault) {
+ logger.info("No config file set via -bc, relying on
default-tika-batch-config.xml");
+ }
+ is = TikaInputStream.get(
+
FSBatchProcessCLI.class.getResourceAsStream("default-tika-batch-config.xml"));
+ }
return is;
}
@@ -102,13 +102,13 @@ public class FSBatchProcessCLI {
}
BatchProcessBuilder b = new BatchProcessBuilder();
- TikaInputStream is = null;
- BatchProcess process = null;
- try {
- is = getConfigInputStream(args, false);
- process = b.build(is, mapArgs);
- } finally {
- IOUtils.closeQuietly(is);
+ TikaInputStream is = null;
+ BatchProcess process = null;
+ try {
+ is = getConfigInputStream(args, false);
+ process = b.build(is, mapArgs);
+ } finally {
+ IOUtils.closeQuietly(is);
}
final Thread mainThread = Thread.currentThread();
@@ -132,20 +132,20 @@ public class FSBatchProcessCLI {
}
}
}
- return configFile;
- }
-
- public static void main(String[] args) throws Exception {
-
- try{
- FSBatchProcessCLI cli = new FSBatchProcessCLI(args);
- cli.execute(args);
- } catch (Throwable t) {
- t.printStackTrace();
- logger.error(MarkerFactory.getMarker("FATAL"),
- "Fatal exception from FSBatchProcessCLI: " +
t.getMessage(), t);
- System.exit(BatchProcessDriverCLI.PROCESS_NO_RESTART_EXIT_CODE);
- }
- }
+ return configFile;
+ }
+
+ public static void main(String[] args) throws Exception {
+
+ try{
+ FSBatchProcessCLI cli = new FSBatchProcessCLI(args);
+ cli.execute(args);
+ } catch (Throwable t) {
+ t.printStackTrace();
+ logger.error(MarkerFactory.getMarker("FATAL"),
+ "Fatal exception from FSBatchProcessCLI: " +
t.getMessage(), t);
+ System.exit(BatchProcessDriverCLI.PROCESS_NO_RESTART_EXIT_CODE);
+ }
+ }
}
Modified:
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/RecursiveParserWrapperFSConsumer.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/RecursiveParserWrapperFSConsumer.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
---
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/RecursiveParserWrapperFSConsumer.java
(original)
+++
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/RecursiveParserWrapperFSConsumer.java
Wed May 13 13:49:36 2015
@@ -14,21 +14,21 @@ package org.apache.tika.batch.fs;
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.OutputStreamWriter;
+ * limitations under the License.
+ */
+
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.LinkedList;
-import java.util.List;
-import java.util.concurrent.ArrayBlockingQueue;
-
-import org.apache.tika.batch.FileResource;
-import org.apache.tika.batch.OutputStreamFactory;
-import org.apache.tika.batch.ParserFactory;
-import org.apache.tika.config.TikaConfig;
+import java.util.List;
+import java.util.concurrent.ArrayBlockingQueue;
+
+import org.apache.tika.batch.FileResource;
+import org.apache.tika.batch.OutputStreamFactory;
+import org.apache.tika.batch.ParserFactory;
+import org.apache.tika.config.TikaConfig;
import org.apache.tika.io.IOUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
@@ -127,16 +127,16 @@ public class RecursiveParserWrapperFSCon
Writer writer = null;
try {
- writer = new OutputStreamWriter(os, getOutputEncoding());
- JsonMetadataList.toJson(metadataList, writer);
- } catch (Exception e) {
- //this is a stop the world kind of thing
- logger.error("{}", getXMLifiedLogMsg(IO_OS+"json",
- fileResource.getResourceId(), e));
- throw new RuntimeException(e);
- } finally {
- flushAndClose(writer);
- }
+ writer = new OutputStreamWriter(os, getOutputEncoding());
+ JsonMetadataList.toJson(metadataList, writer);
+ } catch (Exception e) {
+ //this is a stop the world kind of thing
+ logger.error("{}", getXMLifiedLogMsg(IO_OS+"json",
+ fileResource.getResourceId(), e));
+ throw new RuntimeException(e);
+ } finally {
+ flushAndClose(writer);
+ }
if (thrown != null) {
if (thrown instanceof Error) {
Modified:
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
---
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java
(original)
+++
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java
Wed May 13 13:49:36 2015
@@ -66,13 +66,13 @@ public class BasicTikaFSConsumersBuilder
Long consumersManagerMaxMillis = null;
String consumersManagerMaxMillisString =
runtimeAttributes.get("consumersManagerMaxMillis");
if (consumersManagerMaxMillisString != null){
- consumersManagerMaxMillis =
PropsUtil.getLong(consumersManagerMaxMillisString, null);
- } else {
- Node consumersManagerMaxMillisNode =
node.getAttributes().getNamedItem("consumersManagerMaxMillis");
- if (consumersManagerMaxMillis == null &&
consumersManagerMaxMillisNode != null) {
- consumersManagerMaxMillis =
PropsUtil.getLong(consumersManagerMaxMillisNode.getNodeValue(),
- null);
- }
+ consumersManagerMaxMillis =
PropsUtil.getLong(consumersManagerMaxMillisString, null);
+ } else {
+ Node consumersManagerMaxMillisNode =
node.getAttributes().getNamedItem("consumersManagerMaxMillis");
+ if (consumersManagerMaxMillis == null &&
consumersManagerMaxMillisNode != null) {
+ consumersManagerMaxMillis =
PropsUtil.getLong(consumersManagerMaxMillisNode.getNodeValue(),
+ null);
+ }
}
TikaConfig config = null;
Modified:
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/strawman/StrawManTikaAppDriver.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/strawman/StrawManTikaAppDriver.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
---
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/strawman/StrawManTikaAppDriver.java
(original)
+++
tika/trunk/tika-batch/src/main/java/org/apache/tika/batch/fs/strawman/StrawManTikaAppDriver.java
Wed May 13 13:49:36 2015
@@ -31,16 +31,16 @@ import java.util.concurrent.ExecutionExc
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import org.apache.tika.io.IOUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.slf4j.MarkerFactory;
-
-/**
- * Simple single-threaded class that calls tika-app against every file in a
directory.
+import java.util.concurrent.Future;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.tika.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.slf4j.MarkerFactory;
+
+/**
+ * Simple single-threaded class that calls tika-app against every file in a
directory.
*
* This is exceedingly robust. One file per process.
*
@@ -54,13 +54,13 @@ public class StrawManTikaAppDriver imple
private final int totalThreads;
private final int threadNum;
private int rootLen = -1;
- private File inputDir = null;
- private File outputDir = null;
- private String[] args = null;
- private Logger logger =
LoggerFactory.getLogger(StrawManTikaAppDriver.class);
-
-
- public StrawManTikaAppDriver(File inputDir, File outputDir, int
totalThreads, String[] args) {
+ private File inputDir = null;
+ private File outputDir = null;
+ private String[] args = null;
+ private Logger logger =
LoggerFactory.getLogger(StrawManTikaAppDriver.class);
+
+
+ public StrawManTikaAppDriver(File inputDir, File outputDir, int
totalThreads, String[] args) {
rootLen = inputDir.getAbsolutePath().length()+1;
this.inputDir = inputDir;
this.outputDir = outputDir;
@@ -97,14 +97,14 @@ public class StrawManTikaAppDriver imple
return 0;
}
}
- File outputFile = new File(outputDir,
f.getAbsolutePath().substring(rootLen)+".txt");
- outputFile.getAbsoluteFile().getParentFile().mkdirs();
- if (! outputFile.getParentFile().exists()) {
- logger.error(MarkerFactory.getMarker("FATAL"),
- "parent directory for "+ outputFile + " was not made!");
- throw new RuntimeException("couldn't make parent file for " +
outputFile);
- }
- List<String> commandLine = new ArrayList<String>();
+ File outputFile = new File(outputDir,
f.getAbsolutePath().substring(rootLen)+".txt");
+ outputFile.getAbsoluteFile().getParentFile().mkdirs();
+ if (! outputFile.getParentFile().exists()) {
+ logger.error(MarkerFactory.getMarker("FATAL"),
+ "parent directory for "+ outputFile + " was not made!");
+ throw new RuntimeException("couldn't make parent file for " +
outputFile);
+ }
+ List<String> commandLine = new ArrayList<String>();
for (String arg : args) {
commandLine.add(arg);
}