This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4519
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/TIKA-4519 by this push:
     new af7b25209 TIKA-4519 -- tika-app tests work
af7b25209 is described below

commit af7b25209b282e034abd7622168b40e68a8bbf18
Author: tallison <[email protected]>
AuthorDate: Mon Nov 3 14:39:32 2025 -0500

    TIKA-4519 -- tika-app tests work
---
 tika-app/pom.xml                                   |  7 +++
 .../java/org/apache/tika/cli/TikaCLIAsyncTest.java | 26 ++++++----
 .../test/java/org/apache/tika/cli/TikaCLITest.java | 14 +++---
 tika-pipes/tika-async-cli/pom.xml                  |  6 +++
 .../org/apache/tika/async/cli/PluginsWriter.java   | 14 ++++--
 .../org/apache/tika/async/cli/TikaAsyncCLI.java    |  1 +
 .../tika/async/cli/TikaConfigAsyncWriter.java      | 17 +------
 .../apache/tika/async/cli/AsyncProcessorTest.java  | 55 ++++++++++------------
 .../tika/async/cli/TikaConfigAsyncWriterTest.java  |  2 +-
 .../test/resources/configs/TIKA-4207-emitter.xml   | 29 ------------
 .../test/resources/configs/tika-config-default.xml | 21 +++++++++
 .../tika/pipes/emitter/fs/FileSystemEmitter.java   | 25 +++++++++-
 .../apache/tika/pipes/core/async/AsyncConfig.java  |  6 ++-
 .../tika/pipes/core/async/AsyncProcessor.java      |  2 +-
 14 files changed, 126 insertions(+), 99 deletions(-)

diff --git a/tika-app/pom.xml b/tika-app/pom.xml
index 97b0b6e16..28b558495 100644
--- a/tika-app/pom.xml
+++ b/tika-app/pom.xml
@@ -153,6 +153,13 @@
                   <type>jar</type>
                   <overWrite>true</overWrite>
                 </artifactItem>
+                <artifactItem>
+                  <groupId>org.apache.tika</groupId>
+                  <artifactId>tika-emitter-file-system</artifactId>
+                  <version>${project.version}</version>
+                  <type>jar</type>
+                  <overWrite>true</overWrite>
+                </artifactItem>
               </artifactItems>
             </configuration>
           </execution>
diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLIAsyncTest.java 
b/tika-app/src/test/java/org/apache/tika/cli/TikaCLIAsyncTest.java
index b0f879d80..7ea7982c5 100644
--- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLIAsyncTest.java
+++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLIAsyncTest.java
@@ -43,13 +43,21 @@ public class TikaCLIAsyncTest {
 
     final static String JSON_TEMPLATE = """
             {
-              "pipesPluginsConfig" : {
+              "plugins" : {
                 "fetchers": {
                   "file-system-fetcher": {
-                    "basePath": "BASE_PATH",
+                    "basePath": "FETCHER_BASE_PATH",
                     "extractFileSystemMetadata": false
                   }
                 },
+                "emitters": {
+                  "file-system-emitter": {
+                    "basePath": "EMITTER_BASE_PATH",
+                    "fileExtension": "jsn",
+                    "onExists":"EXCEPTION",
+                    "prettyPrint": true
+                  }
+                },
                 "pf4j.pluginsDir": "PLUGINS_DIR"
               }
             }
@@ -73,15 +81,15 @@ public class TikaCLIAsyncTest {
     public static void setUpClass() throws Exception {
         ASYNC_CONFIG = Files.createTempFile(ASYNC_OUTPUT_DIR, "async-config-", 
".xml");
         String xml = "<properties>" + "<async>" + "<numClients>3</numClients>" 
+ "<tikaConfig>" + ASYNC_CONFIG.toAbsolutePath() + "</tikaConfig>" + "</async>" 
+
-                "<emitters>" + "<emitter 
class=\"org.apache.tika.pipes.emitter.fs.FileSystemEmitter\">" + 
"<name>fse</name>" + "<basePath>" +
-                ASYNC_OUTPUT_DIR.toAbsolutePath() + "</basePath>" + 
"<prettyPrint>true</prettyPrint>" + "</emitter>" + "</emitters>" +
                 "<pipesIterator 
class=\"org.apache.tika.pipes.pipesiterator.fs.FileSystemPipesIterator\">" + 
"<basePath>" + TEST_DATA_FILE.getAbsolutePath() + "</basePath>" +
-                "<fetcherName>file-system-fetcher</fetcherName>" + 
"<emitterName>fse</emitterName>" + "</pipesIterator>" + "</properties>";
+                "<fetcherPluginId>file-system-fetcher</fetcherPluginId>" + 
"<emitterPluginId>file-system-emitter</emitterPluginId>" + "</pipesIterator>" + 
"</properties>";
         Files.write(ASYNC_CONFIG, xml.getBytes(UTF_8));
         ASYNC_PLUGINS_CONFIG = Files.createTempFile(ASYNC_OUTPUT_DIR, 
"plugins-", ".json");
 
         Path pluginsDir = Paths.get("target/plugins");
-        String json = JSON_TEMPLATE.replace("BASE_PATH", 
TEST_DATA_FILE.getAbsolutePath().toString()).replace("PLUGINS_DIR", 
pluginsDir.toAbsolutePath().toString());
+        String json = JSON_TEMPLATE.replace("FETCHER_BASE_PATH", 
TEST_DATA_FILE.getAbsolutePath().toString())
+                                   .replace("EMITTER_BASE_PATH", 
ASYNC_OUTPUT_DIR.toAbsolutePath().toString())
+                                   .replace("PLUGINS_DIR", 
pluginsDir.toAbsolutePath().toString());
         Files.writeString(ASYNC_PLUGINS_CONFIG, json, UTF_8);
     }
 
@@ -126,6 +134,8 @@ public class TikaCLIAsyncTest {
 
     @Test
     public void testAsync() throws Exception {
+        //extension is "jsn" to avoid conflict with json config
+
         String content = getParamOutContent("-c", 
ASYNC_CONFIG.toAbsolutePath().toString(),
                 "-a", ASYNC_PLUGINS_CONFIG.toAbsolutePath().toString());
 
@@ -135,11 +145,11 @@ public class TikaCLIAsyncTest {
                 .listFiles()) {
             if (f
                     .getName()
-                    .endsWith(".json")) {
+                    .endsWith(".jsn")) {
                 //check one file for pretty print
                 if (f
                         .getName()
-                        .equals("coffee.xls.json")) {
+                        .equals("coffee.xls.jsn")) {
                     checkForPrettyPrint(f);
                 }
                 json++;
diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java 
b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
index b4f6301a7..cd10f4566 100644
--- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
+++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
@@ -282,8 +282,10 @@ public class TikaCLITest {
 
     @Test
     public void testRUnpack() throws Exception {
+        //TODO -- rework this to use two separate emitters
+        //one for bytes and one for json
         String[] expectedChildren = new String[]{
-                "testPDFPackage.pdf.json",
+                "testPDFPackage.pdf.jsn",
                 //the first two test that the default single file config is 
working
                 "testPDFPackage.pdf-embed/00000001-embedded-1",
                 "testPDFPackage.pdf-embed/00000002-image0.jpg",
@@ -294,7 +296,7 @@ public class TikaCLITest {
 
     @Test
     public void testPSTRUnpack() throws Exception {
-        String[] expectedChildren = new String[]{"testPST.pst.json",
+        String[] expectedChildren = new String[]{"testPST.pst.jsn",
                 "testPST.pst-embed/00000007-First email.msg",
                 "testPST.pst-embed/00000001-Feature Generators.msg",
                 "testPST.pst-embed/00000008-First email.msg",
@@ -305,7 +307,7 @@ public class TikaCLITest {
                 "testPST.pst-embed/00000009-attachment.docx",
                 "testPST.pst-embed/00000006-[WEBINAR] - %22Introducing 
Couchbase Server 2.5%22.msg"};
         testRecursiveUnpack("testPST.pst", expectedChildren, 2);
-        try (Reader reader = 
Files.newBufferedReader(extractDir.resolve("testPST.pst.json"))) {
+        try (Reader reader = 
Files.newBufferedReader(extractDir.resolve("testPST.pst.jsn"))) {
             List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
             for (Metadata m : metadataList) {
                 String content = m.get(TikaCoreProperties.TIKA_CONTENT);
@@ -400,13 +402,13 @@ public class TikaCLITest {
         Path asyncConfig = Files.createTempFile("async-config-", ".json");
         Path pluginsDir = Paths.get("target/plugins");
 
-        String json = JSON_TEMPLATE.replace("BASE_PATH", 
TEST_DATA_FILE.getAbsolutePath().toString())
+        String json = JSON_TEMPLATE.replace("FETCHER_BASE_PATH", 
TEST_DATA_FILE.getAbsolutePath().toString())
+                                   .replace("EMITTER_BASE_PATH", 
extractDir.toAbsolutePath().toString())
                                    .replace("PLUGINS_DIR", 
pluginsDir.toAbsolutePath().toString());
         Files.writeString(asyncConfig, json, UTF_8);
 
         String[] params = {"-Z",
                 "-a", asyncConfig.toAbsolutePath().toString(),
-
                 
ProcessUtils.escapeCommandLine(input.toAbsolutePath().toString()),
                 ProcessUtils.escapeCommandLine(extractDir
                 .toAbsolutePath()
@@ -424,7 +426,7 @@ public class TikaCLITest {
         assertEquals(expectedLength, jsonFile.length);
 
         for (String expectedChildName : expectedChildrenFileNames) {
-            assertTrue(fileNames.contains(expectedChildName));
+            assertTrue(fileNames.contains(expectedChildName), 
expectedChildName);
         }
     }
 
diff --git a/tika-pipes/tika-async-cli/pom.xml 
b/tika-pipes/tika-async-cli/pom.xml
index 4fe950200..e1c02d5f1 100644
--- a/tika-pipes/tika-async-cli/pom.xml
+++ b/tika-pipes/tika-async-cli/pom.xml
@@ -50,6 +50,12 @@
       <groupId>org.apache.logging.log4j</groupId>
       <artifactId>log4j-slf4j2-impl</artifactId>
     </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-emitter-file-system</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>${project.groupId}</groupId>
       <artifactId>tika-fetcher-file-system</artifactId>
diff --git 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java
 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java
index 80d7d46f0..308ad4ae1 100644
--- 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java
+++ 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java
@@ -25,13 +25,19 @@ public class PluginsWriter {
 
     final static String JSON_TEMPLATE = """
             {
-              "pipesPluginsConfig" : {
+              "plugins" : {
                 "fetchers": {
                   "file-system-fetcher": {
-                    "basePath": "BASE_PATH",
+                    "basePath": "FETCHER_BASE_PATH",
                     "extractFileSystemMetadata": false
                   }
                 },
+                "emitters": {
+                  "file-system-emitter": {
+                    "basePath": "EMITTER_BASE_PATH",
+                    "fileExtension": "json"
+                  }
+                },
                 "pf4j.pluginsDir": "PLUGINS_DIR"
               }
             }
@@ -52,8 +58,8 @@ public class PluginsWriter {
             }
         }
         try {
-            String json = JSON_TEMPLATE.replace("BASE_PATH", 
baseInput.toAbsolutePath().toString());
-            System.out.println("PWD: " + Paths.get("").toAbsolutePath());
+            String json = JSON_TEMPLATE.replace("FETCHER_BASE_PATH", 
baseInput.toAbsolutePath().toString());
+            json = json.replace("EMITTER_BASE_PATH", 
baseOutput.toAbsolutePath().toString());
             String pluginString = "plugins";
             Path plugins = Paths.get(pluginString);
             if (Files.isDirectory(plugins)) {
diff --git 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
index 3eb29bb39..40ca2b2bd 100644
--- 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
+++ 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
@@ -103,6 +103,7 @@ public class TikaAsyncCLI {
             try {
                 pipesIterator = PipesIterator.build(tikaConfig);
             } catch (IOException | TikaException e) {
+                e.printStackTrace();
                 //swallow
             }
         }
diff --git 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaConfigAsyncWriter.java
 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaConfigAsyncWriter.java
index e8306c9da..4fa8af1fb 100644
--- 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaConfigAsyncWriter.java
+++ 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaConfigAsyncWriter.java
@@ -50,7 +50,7 @@ class TikaConfigAsyncWriter {
     private static final Logger LOG = 
LoggerFactory.getLogger(TikaAsyncCLI.class);
 
     protected static final String FETCHER_NAME = "file-system-fetcher";
-    protected static final String EMITTER_NAME = "fse";
+    protected static final String EMITTER_NAME = "file-system-emitter";
 
     private final SimpleAsyncConfig simpleAsyncConfig;
 
@@ -92,7 +92,6 @@ class TikaConfigAsyncWriter {
         }
 
         writePipesIterator(document, properties, baseInput);
-        writeEmitters(document, properties, baseOutput);
         writeAsync(document, properties, output);
         Transformer transformer = TransformerFactory
                 .newInstance().newTransformer();
@@ -135,20 +134,6 @@ class TikaConfigAsyncWriter {
         appendTextElement(document, pipesIterator, "hasHeader", "false");
     }
 
-    private void writeEmitters(Document document, Element properties, Path 
baseOutput) {
-        Element emitters = findChild("emitters", properties);
-        if (emitters != null) {
-            LOG.info("emitters already exist in tika-config. Not overwriting 
with commandline");
-            return;
-        }
-
-        emitters = createAndGetElement(document, properties, "emitters");
-        Element emitter = createAndGetElement( document, emitters, "emitter",
-                "class", "org.apache.tika.pipes.emitter.fs.FileSystemEmitter");
-        appendTextElement(document, emitter, "name", EMITTER_NAME);
-        appendTextElement(document, emitter, "basePath", 
baseOutput.toAbsolutePath().toString());
-    }
-
     private void writeAsync(Document document, Element properties, Path 
thisTikaConfig) {
         Element async = findChild("async", properties);
         if (async != null) {
diff --git 
a/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncProcessorTest.java
 
b/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncProcessorTest.java
index 771c02e87..49e85ba64 100644
--- 
a/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncProcessorTest.java
+++ 
b/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncProcessorTest.java
@@ -52,12 +52,19 @@ public class AsyncProcessorTest extends TikaTest {
 
     final static String JSON_TEMPLATE_TEST = """
             {
-              "pipesPluginsConfig" : {
+              "plugins" : {
                 "fetchers": {
                   "file-system-fetcher": {
-                    "basePath": "BASE_PATH",
+                    "basePath": "FETCHER_BASE_PATH",
                     "extractFileSystemMetadata": false
                   }
+                },
+                "emitters": {
+                  "file-system-emitter": {
+                    "basePath": "EMITTER_BASE_PATH",
+                    "fileExtension": "",
+                    "onExists":"EXCEPTION"
+                  }
                 }
               }
             }
@@ -68,44 +75,32 @@ public class AsyncProcessorTest extends TikaTest {
     private Path basedir;
     private Path inputDir;
 
-    private Path bytesDir;
-
-    private Path jsonDir;
+    private Path outputDir;
 
     private Path configDir;
 
+    private Path tikaConfigPath;
+
     @BeforeEach
     public void setUp() throws IOException {
         inputDir = basedir.resolve("input");
 
-        bytesDir = basedir.resolve("bytes");
-
-        jsonDir = basedir.resolve("json");
+        outputDir = basedir.resolve("output");
 
         configDir = basedir.resolve("config");
-        Path tikaConfig = configDir.resolve("tika-config.xml");
 
         Files.createDirectories(basedir);
         Files.createDirectories(configDir);
         Files.createDirectories(inputDir);
 
-        String xml = 
IOUtils.toString(AsyncProcessorTest.class.getResourceAsStream("/configs/TIKA-4207-emitter.xml"),
 StandardCharsets.UTF_8);
-        //do stuff to xml
-        xml = xml.replace("BASE_PATH", inputDir
-                .toAbsolutePath()
-                .toString());
-        xml = xml.replace("JSON_PATH", jsonDir
-                .toAbsolutePath()
-                .toString());
-        xml = xml.replace("BYTES_PATH", bytesDir
-                .toAbsolutePath()
-                .toString());
-
-        Files.writeString(tikaConfig, xml, StandardCharsets.UTF_8);
-
+        tikaConfigPath = configDir.resolve("tika-config.xml");
+        
Files.copy(AsyncProcessorTest.class.getResourceAsStream("/configs/tika-config-default.xml"),
 tikaConfigPath);
         Path pipesConfig = configDir.resolve("tika-pipes.json");
         String jsonTemp = JSON_TEMPLATE_TEST
-                .replace("BASE_PATH", inputDir.toAbsolutePath().toString());
+                .replace("FETCHER_BASE_PATH", 
inputDir.toAbsolutePath().toString())
+                .replace("EMITTER_BASE_PATH", 
outputDir.toAbsolutePath().toString());
+
+
         Files.writeString(pipesConfig, jsonTemp, StandardCharsets.UTF_8);
 
         Path mock = inputDir.resolve("mock.xml");
@@ -118,11 +113,11 @@ public class AsyncProcessorTest extends TikaTest {
     public void testBasic() throws Exception {
 //        TikaAsyncCLI cli = new TikaAsyncCLI();
         //      cli.main(new String[]{ 
configDir.resolve("tika-config.xml").toAbsolutePath().toString()});
-        AsyncProcessor processor = new 
AsyncProcessor(configDir.resolve("tika-config.xml"), 
configDir.resolve("tika-pipes.json"));
+        AsyncProcessor processor = new AsyncProcessor(tikaConfigPath, 
configDir.resolve("tika-pipes.json"));
 
         EmbeddedDocumentBytesConfig embeddedDocumentBytesConfig = new 
EmbeddedDocumentBytesConfig(true);
         embeddedDocumentBytesConfig.setIncludeOriginal(true);
-        embeddedDocumentBytesConfig.setEmitter("bytes");
+        embeddedDocumentBytesConfig.setEmitter("file-system-emitter");
         
embeddedDocumentBytesConfig.setSuffixStrategy(EmbeddedDocumentBytesConfig.SUFFIX_STRATEGY.NONE);
         embeddedDocumentBytesConfig.setEmbeddedIdPrefix("-");
         ParseContext parseContext = new ParseContext();
@@ -130,7 +125,7 @@ public class AsyncProcessorTest extends TikaTest {
         parseContext.set(EmbeddedDocumentBytesConfig.class, 
embeddedDocumentBytesConfig);
         FetchEmitTuple t =
                 new FetchEmitTuple("myId-1", new 
FetchKey("file-system-fetcher", "mock.xml"),
-                        new EmitKey("json", "emit-1"), new Metadata(), 
parseContext, FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT);
+                        new EmitKey("file-system-emitter", "emit-1"), new 
Metadata(), parseContext, FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT);
 
         processor.offer(t, 1000);
 
@@ -143,15 +138,15 @@ public class AsyncProcessorTest extends TikaTest {
         }
         processor.close();
 
-        String container = 
Files.readString(bytesDir.resolve("emit-1-embed/emit-1-0"));
+        String container = 
Files.readString(outputDir.resolve("emit-1-embed/emit-1-0"));
         assertContains("\"dc:creator\">Nikolai Lobachevsky", container);
 
-        String xmlEmbedded = 
Files.readString(bytesDir.resolve("emit-1-embed/emit-1-1"));
+        String xmlEmbedded = 
Files.readString(outputDir.resolve("emit-1-embed/emit-1-1"));
         assertContains("name=\"dc:creator\"", xmlEmbedded);
         assertContains(">embeddedAuthor</metadata>", xmlEmbedded);
 
         List<Metadata> metadataList;
-        try (BufferedReader reader = 
Files.newBufferedReader(jsonDir.resolve("emit-1.json"))) {
+        try (BufferedReader reader = 
Files.newBufferedReader(outputDir.resolve("emit-1"))) {
             metadataList = JsonMetadataList.fromJson(reader);
         }
         assertEquals(2, metadataList.size());
diff --git 
a/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/TikaConfigAsyncWriterTest.java
 
b/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/TikaConfigAsyncWriterTest.java
index 3dd105926..44608e1ee 100644
--- 
a/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/TikaConfigAsyncWriterTest.java
+++ 
b/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/TikaConfigAsyncWriterTest.java
@@ -49,7 +49,7 @@ public class TikaConfigAsyncWriterTest {
         TikaConfigAsyncWriter writer = new 
TikaConfigAsyncWriter(simpleAsyncConfig);
         writer.write(target);
 
-        Set<String> expected = Set.of("service-loader", "parsers", 
"pipesIterator", "emitters", "async");
+        Set<String> expected = Set.of("service-loader", "parsers", 
"pipesIterator", "async");
         Set<String> properties = loadProperties(target);
         assertEquals(expected, properties);
     }
diff --git 
a/tika-pipes/tika-async-cli/src/test/resources/configs/TIKA-4207-emitter.xml 
b/tika-pipes/tika-async-cli/src/test/resources/configs/TIKA-4207-emitter.xml
deleted file mode 100644
index 1f5229480..000000000
--- a/tika-pipes/tika-async-cli/src/test/resources/configs/TIKA-4207-emitter.xml
+++ /dev/null
@@ -1,29 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-<properties>
-  <emitters>
-    <emitter class="org.apache.tika.pipes.emitter.fs.FileSystemEmitter">
-      <name>json</name>
-      <basePath>JSON_PATH</basePath>
-    </emitter>
-    <emitter class="org.apache.tika.pipes.emitter.fs.FileSystemEmitter">
-      <name>bytes</name>
-      <basePath>BYTES_PATH</basePath>
-    </emitter>
-  </emitters>
-</properties>
\ No newline at end of file
diff --git 
a/tika-pipes/tika-async-cli/src/test/resources/configs/tika-config-default.xml 
b/tika-pipes/tika-async-cli/src/test/resources/configs/tika-config-default.xml
new file mode 100644
index 000000000..008a36dfd
--- /dev/null
+++ 
b/tika-pipes/tika-async-cli/src/test/resources/configs/tika-config-default.xml
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<properties>
+</properties>
\ No newline at end of file
diff --git 
a/tika-pipes/tika-emitters/tika-emitter-file-system/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
 
b/tika-pipes/tika-emitters/tika-emitter-file-system/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
index cb03a1e26..87aed84d2 100644
--- 
a/tika-pipes/tika-emitters/tika-emitter-file-system/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
+++ 
b/tika-pipes/tika-emitters/tika-emitter-file-system/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
@@ -29,6 +29,8 @@ import java.util.List;
 import java.util.Optional;
 
 import org.pf4j.Extension;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import org.apache.tika.exception.TikaConfigException;
 import org.apache.tika.metadata.Metadata;
@@ -52,6 +54,9 @@ import org.apache.tika.utils.StringUtils;
 @Extension
 public class FileSystemEmitter extends AbstractStreamEmitter {
 
+    private static final Logger LOG = 
LoggerFactory.getLogger(FileSystemEmitter.class);
+
+
     private FileSystemEmitterConfig fileSystemEmitterConfig;
 
     public FileSystemEmitter() throws IOException {
@@ -62,12 +67,18 @@ public class FileSystemEmitter extends 
AbstractStreamEmitter {
     public void configure(PluginConfig pluginConfig) throws 
TikaConfigException, IOException {
         checkPluginId(pluginConfig.pluginId());
         fileSystemEmitterConfig = 
FileSystemEmitterConfig.load(pluginConfig.jsonConfig());
-        //checkConfig(fileSystemEmitterConfig);
+        checkConfig(fileSystemEmitterConfig);
+    }
+
+    private void checkConfig(FileSystemEmitterConfig fileSystemEmitterConfig) {
+        if (fileSystemEmitterConfig.onExists() == null) {
+            throw new IllegalArgumentException("Must configure 'onExists' as 
'skip', 'exception' or 'replace'");
+        }
     }
 
     @Override
     public void emit(String emitKey, List<Metadata> metadataList, ParseContext 
parseContext) throws IOException {
-
+        LOG.warn("about to emit: {}", emitKey);
         if (metadataList == null || metadataList.isEmpty()) {
             throw new IOException("metadata list must not be null or of size 
0");
         }
@@ -100,6 +111,8 @@ public class FileSystemEmitter extends 
AbstractStreamEmitter {
 
     @Override
     public void emit(String emitKey, InputStream inputStream, Metadata 
userMetadata, ParseContext parseContext) throws IOException {
+        LOG.warn("about to stream emit: {}", emitKey);
+
         FileSystemEmitterConfig config = getConfig(parseContext);
 
         Path output;
@@ -114,18 +127,25 @@ public class FileSystemEmitter extends 
AbstractStreamEmitter {
         }
 
         if (!Files.isDirectory(output.getParent())) {
+            LOG.warn("creating parent directory: {}", output);
             Files.createDirectories(output.getParent());
         }
+        LOG.warn("on exists: {}", config.onExists());
         if (config.onExists() == ON_EXISTS.REPLACE) {
+            LOG.warn("copying {}", output);
             Files.copy(inputStream, output, 
StandardCopyOption.REPLACE_EXISTING);
         } else if (config.onExists() == ON_EXISTS.EXCEPTION) {
+            LOG.warn("copying 2 {}", output);
             Files.copy(inputStream, output);
         } else if (config.onExists() == ON_EXISTS.SKIP) {
             if (!Files.isRegularFile(output)) {
                 try {
+                    LOG.warn("copying 3 {}", output);
+
                     Files.copy(inputStream, output);
                 } catch (FileAlreadyExistsException e) {
                     //swallow
+                    LOG.warn("file exists");
                 }
             }
         }
@@ -138,6 +158,7 @@ public class FileSystemEmitter extends 
AbstractStreamEmitter {
             Optional<PluginConfig> pluginConfigOpt = 
pluginConfigs.get(getPluginId());
             if (pluginConfigOpt.isPresent()) {
                 config = 
FileSystemEmitterConfig.load(pluginConfigOpt.get().jsonConfig());
+                checkConfig(config);
             }
         }
         return config;
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/AsyncConfig.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/AsyncConfig.java
index b0808f7fa..2bde10515 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/AsyncConfig.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/AsyncConfig.java
@@ -39,8 +39,10 @@ public class AsyncConfig extends PipesConfigBase {
 
     public static AsyncConfig load(Path tikaConfig, Path pipesPluginsConfig) 
throws IOException, TikaConfigException {
         AsyncConfig asyncConfig = new AsyncConfig();
-        try (InputStream is = Files.newInputStream(tikaConfig)) {
-            asyncConfig.configure("async", is);
+        if (tikaConfig != null) {
+            try (InputStream is = Files.newInputStream(tikaConfig)) {
+                asyncConfig.configure("async", is);
+            }
         }
         if (asyncConfig.getTikaConfig() == null) {
             asyncConfig.setTikaConfig(tikaConfig);
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/AsyncProcessor.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/AsyncProcessor.java
index 2c15d05ef..6abce6093 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/AsyncProcessor.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/AsyncProcessor.java
@@ -83,7 +83,7 @@ public class AsyncProcessor implements Closeable {
         this.executorCompletionService =
                 new ExecutorCompletionService<>(executorService);
         try {
-            if 
(!tikaConfigPath.toAbsolutePath().equals(asyncConfig.getTikaConfig().toAbsolutePath()))
 {
+            if (asyncConfig.getTikaConfig() != null && 
!tikaConfigPath.toAbsolutePath().equals(asyncConfig.getTikaConfig().toAbsolutePath()))
 {
                 LOG.warn("TikaConfig for AsyncProcessor ({}) is different " +
                                 "from TikaConfig for workers ({}). If this is 
intended," +
                                 " please ignore this warning.", 
tikaConfigPath.toAbsolutePath(),

Reply via email to