Author: tallison
Date: Wed Jun 24 19:49:50 2015
New Revision: 1687353
URL: http://svn.apache.org/r1687353
Log:
add test to ensure that the list reader for tika-batch properly creates
subdirectories
Added:
tika/trunk/tika-batch/src/test/resources/test-input/hierarchical/
tika/trunk/tika-batch/src/test/resources/test-input/hierarchical/sub1a/
tika/trunk/tika-batch/src/test/resources/test-input/hierarchical/sub1a/sub2a/
tika/trunk/tika-batch/src/test/resources/test-input/hierarchical/sub1a/sub2a/test3.xml
tika/trunk/tika-batch/src/test/resources/test-input/hierarchical/sub1a/test2.xml
tika/trunk/tika-batch/src/test/resources/test-input/hierarchical/test1.xml
tika/trunk/tika-batch/src/test/resources/testFileList.txt
Modified:
tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchProcessTest.java
Modified:
tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchProcessTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchProcessTest.java?rev=1687353&r1=1687352&r2=1687353&view=diff
==============================================================================
---
tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchProcessTest.java
(original)
+++
tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/fs/BatchProcessTest.java
Wed Jun 24 19:49:50 2015
@@ -265,6 +265,33 @@ public class BatchProcessTest extends FS
assertContains("ConsumersManager did not shutdown within",
streamStrings.getOutString());
}
+ @Test
+ public void testHierarchicalWFileList() throws Exception {
+ //tests to make sure that hierarchy is maintained when reading from
+ //file list
+ //also tests that list actually works.
+ File outputDir = getNewOutputDir("hierarchical_file_list");
+
+ Map<String, String> args = getDefaultArgs("hierarchical", outputDir);
+ args.put("numConsumers", "1");
+ args.put("fileList",
this.getClass().getResource("/testFileList.txt").getPath());
+ args.put("recursiveParserWrapper", "true");
+ args.put("basicHandlerType", "text");
+ args.put("outputSuffix", "json");
+ BatchProcessTestExecutor ex = new BatchProcessTestExecutor(args,
"/tika-batch-config-MockConsumersBuilder.xml");
+ ex.execute();
+ File test1 = new File(outputDir, "test1.xml.json");
+ File test2 = new File(outputDir, "sub1a/test2.xml.json");
+ File test3 = new File(outputDir, "sub1a/sub2a/test3.xml.json");
+ assertTrue("test1 exists", test1.exists());
+ assertTrue("test1 length > 10", test1.length() > 10);
+ assertTrue(test3.exists() && test3.length() > 10);
+ File test2Dir = new File(outputDir, "sub1a");
+ //should be just the subdirectory, no actual test2 file
+ assertEquals(1, test2Dir.listFiles().length);
+ assertFalse(test2.exists());
+ }
+
private class BatchProcessTestExecutor {
private final Map<String, String> args;
private final String configPath;
Added:
tika/trunk/tika-batch/src/test/resources/test-input/hierarchical/sub1a/sub2a/test3.xml
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/resources/test-input/hierarchical/sub1a/sub2a/test3.xml?rev=1687353&view=auto
==============================================================================
---
tika/trunk/tika-batch/src/test/resources/test-input/hierarchical/sub1a/sub2a/test3.xml
(added)
+++
tika/trunk/tika-batch/src/test/resources/test-input/hierarchical/sub1a/sub2a/test3.xml
Wed Jun 24 19:49:50 2015
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<mock>
+ <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
+ <write element="p">test_3</write>
+</mock>
\ No newline at end of file
Added:
tika/trunk/tika-batch/src/test/resources/test-input/hierarchical/sub1a/test2.xml
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/resources/test-input/hierarchical/sub1a/test2.xml?rev=1687353&view=auto
==============================================================================
---
tika/trunk/tika-batch/src/test/resources/test-input/hierarchical/sub1a/test2.xml
(added)
+++
tika/trunk/tika-batch/src/test/resources/test-input/hierarchical/sub1a/test2.xml
Wed Jun 24 19:49:50 2015
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<mock>
+ <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
+ <write element="p">test_2</write>
+</mock>
\ No newline at end of file
Added:
tika/trunk/tika-batch/src/test/resources/test-input/hierarchical/test1.xml
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/resources/test-input/hierarchical/test1.xml?rev=1687353&view=auto
==============================================================================
--- tika/trunk/tika-batch/src/test/resources/test-input/hierarchical/test1.xml
(added)
+++ tika/trunk/tika-batch/src/test/resources/test-input/hierarchical/test1.xml
Wed Jun 24 19:49:50 2015
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<mock>
+ <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
+ <write element="p">test_1</write>
+</mock>
\ No newline at end of file
Added: tika/trunk/tika-batch/src/test/resources/testFileList.txt
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/resources/testFileList.txt?rev=1687353&view=auto
==============================================================================
--- tika/trunk/tika-batch/src/test/resources/testFileList.txt (added)
+++ tika/trunk/tika-batch/src/test/resources/testFileList.txt Wed Jun 24
19:49:50 2015
@@ -0,0 +1,2 @@
+test1.xml
+sub1a/sub2a/test3.xml
\ No newline at end of file