This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4252
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 63fcc02887eb8e8733cdfad7d614d375efa0d55c
Author: tallison <[email protected]>
AuthorDate: Thu May 9 12:20:13 2024 -0400

    TIKA-4252 - revert and add user test to confirm user metadata is in results
---
 .../java/org/apache/tika/pipes/PipesServer.java    | 22 ++++----
 .../org/apache/tika/pipes/PipesClientTest.java     | 66 ++++++++++++++++++++++
 .../resources/org/apache/tika/pipes/TIKA-4252.xml  | 30 ++++++++++
 3 files changed, 106 insertions(+), 12 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java 
b/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java
index 20a5def59..98192d694 100644
--- a/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java
@@ -371,8 +371,13 @@ public class PipesServer implements Runnable {
         MetadataListAndEmbeddedBytes parseData = null;
 
         try {
-            //this can be null if there is a fetch exception
-            parseData = parseFromTuple(t, fetcher);
+            try {
+                parseData = parseFromTuple(t, fetcher);
+            } catch (IOException | TikaException e) {
+                LOG.warn("fetch exception " + t.getId(), e);
+                write(STATUS.FETCH_EXCEPTION, ExceptionUtils.getStackTrace(e));
+                return;
+            }
 
             if (LOG.isTraceEnabled()) {
                 LOG.trace("timer -- to parse: {} ms", 
System.currentTimeMillis() - start);
@@ -455,37 +460,30 @@ public class PipesServer implements Runnable {
         }
     }
 
-    protected MetadataListAndEmbeddedBytes parseFromTuple(FetchEmitTuple t, 
Fetcher fetcher) {
+    protected MetadataListAndEmbeddedBytes parseFromTuple(FetchEmitTuple t, 
Fetcher fetcher) throws TikaException, IOException {
         FetchKey fetchKey = t.getFetchKey();
         if (fetchKey.hasRange()) {
             if (!(fetcher instanceof RangeFetcher)) {
                 throw new IllegalArgumentException(
                         "fetch key has a range, but the fetcher is not a range 
fetcher");
             }
-            Metadata metadata = t.getMetadata() == null ? new Metadata() : 
t.getMetadata();
+            Metadata metadata = new Metadata();
             try (InputStream stream = ((RangeFetcher) 
fetcher).fetch(fetchKey.getFetchKey(),
                     fetchKey.getRangeStart(), fetchKey.getRangeEnd(), 
metadata)) {
                 return parseWithStream(t, stream, metadata);
             } catch (SecurityException e) {
                 LOG.error("security exception " + t.getId(), e);
                 throw e;
-            } catch (TikaException | IOException e) {
-                LOG.warn("fetch exception " + t.getId(), e);
-                write(STATUS.FETCH_EXCEPTION, ExceptionUtils.getStackTrace(e));
             }
         } else {
-            Metadata metadata = t.getMetadata() == null ? new Metadata() : 
t.getMetadata();
+            Metadata metadata = new Metadata();
             try (InputStream stream = 
fetcher.fetch(t.getFetchKey().getFetchKey(), metadata)) {
                 return parseWithStream(t, stream, metadata);
             } catch (SecurityException e) {
                 LOG.error("security exception " + t.getId(), e);
                 throw e;
-            } catch (TikaException | IOException e) {
-                LOG.warn("fetch exception " + t.getId(), e);
-                write(STATUS.FETCH_EXCEPTION, ExceptionUtils.getStackTrace(e));
             }
         }
-        return null;
     }
 
     private String getNoFetcherMsg(String fetcherName) {
diff --git a/tika-core/src/test/java/org/apache/tika/pipes/PipesClientTest.java 
b/tika-core/src/test/java/org/apache/tika/pipes/PipesClientTest.java
new file mode 100644
index 000000000..a8c182ddc
--- /dev/null
+++ b/tika-core/src/test/java/org/apache/tika/pipes/PipesClientTest.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes;
+
+import static org.apache.tika.TikaTest.assertContains;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.List;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.pipes.emitter.EmitKey;
+import org.apache.tika.pipes.fetcher.FetchKey;
+
+public class PipesClientTest {
+
+    @Test
+    public void testUserMetadataAndNoEmitter(@TempDir Path tmp) throws 
Exception {
+        Path tikaConfigTemplate = 
Paths.get(PipesClientTest.class.getResource("TIKA-4252.xml").toURI());
+        Path tikaConfig = tmp.resolve("tika-config.xml");
+        String xml = Files.readString(tikaConfigTemplate, 
StandardCharsets.UTF_8);
+        xml = xml.replace("BASE_PATH",
+                
Paths.get(PipesClientTest.class.getResource("/test-documents").toURI()).toAbsolutePath().toString());
+        Files.writeString(tikaConfig, xml);
+
+        List<Metadata> metadataList;
+        try (PipesClient pipesClient = new 
PipesClient(PipesConfig.load(tikaConfig))) {
+            FetchKey fetchKey = new FetchKey("fs", "mock_times.xml");
+            Metadata userMetadata = new Metadata();
+            userMetadata.set("k1", "v1");
+            userMetadata.add("k2", "v2a");
+            userMetadata.add("k2", "v2b");
+            PipesResult pipesResult = pipesClient.process(
+                    new FetchEmitTuple("my-id", fetchKey, new EmitKey(), 
userMetadata, HandlerConfig.DEFAULT_HANDLER_CONFIG, 
FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
+            metadataList = pipesResult
+                    .getEmitData()
+                    .getMetadataList();
+        }
+        assertEquals("application/mock+xml", 
metadataList.get(0).get(Metadata.CONTENT_TYPE));
+        assertContains("hello", 
metadataList.get(0).get(TikaCoreProperties.TIKA_CONTENT));
+        assertEquals("v1", metadataList.get(0).get("k1"));
+        assertEquals("v2a", metadataList.get(0).getValues("k2")[0]);
+        assertEquals("v2b", metadataList.get(0).getValues("k2")[1]);
+    }
+}
diff --git a/tika-core/src/test/resources/org/apache/tika/pipes/TIKA-4252.xml 
b/tika-core/src/test/resources/org/apache/tika/pipes/TIKA-4252.xml
new file mode 100644
index 000000000..036f0f2a5
--- /dev/null
+++ b/tika-core/src/test/resources/org/apache/tika/pipes/TIKA-4252.xml
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <fetchers>
+    <fetcher class="org.apache.tika.pipes.fetcher.fs.FileSystemFetcher">
+      <name>fs</name>
+      <basePath>BASE_PATH</basePath>
+    </fetcher>
+  </fetchers>
+  <emitters>
+    <emitter class="org.apache.tika.pipes.async.MockEmitter">
+      <name>e</name>
+    </emitter>
+  </emitters>
+</properties>
\ No newline at end of file

Reply via email to