This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tika.git
commit a8293fee09ca7c5b922d50fe9d6b6a00696d074e Author: tallison <[email protected]> AuthorDate: Wed Dec 16 15:52:40 2020 -0500 TIKA-3242 -- allow users to pass metadata via httpheaders to tika-server --- .../tika/server/classic/DetectorResourceTest.java | 2 +- .../classic/RecursiveMetadataResourceTest.java | 20 +++++++++++++++++ .../tika/server/core/resource/TikaResource.java | 13 ++++++++++- .../apache/tika/server/core/TikaResourceTest.java | 25 +++++++++++++++++----- 4 files changed, 53 insertions(+), 7 deletions(-) diff --git a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/DetectorResourceTest.java b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/DetectorResourceTest.java index d531c2e..8f1afb5 100644 --- a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/DetectorResourceTest.java +++ b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/DetectorResourceTest.java @@ -80,7 +80,7 @@ public class DetectorResourceTest extends CXFTestBase { @Test public void testDetectCsvNoExt() throws Exception { - String url = endPoint + DETECT_STREAM_PATH; + Response response = WebClient .create(endPoint + DETECT_STREAM_PATH) .type("text/csv") diff --git a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/RecursiveMetadataResourceTest.java b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/RecursiveMetadataResourceTest.java index 3a32f13..a9be9a6 100644 --- a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/RecursiveMetadataResourceTest.java +++ b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/RecursiveMetadataResourceTest.java @@ -19,11 +19,14 @@ package org.apache.tika.server.classic; import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.tika.TikaTest.assertNotContained; +import static org.apache.tika.TikaTest.debug; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +import javax.ws.rs.core.MultivaluedHashMap; +import javax.ws.rs.core.MultivaluedMap; import javax.ws.rs.core.Response; import java.io.InputStream; @@ -138,6 +141,23 @@ public class RecursiveMetadataResourceTest extends CXFTestBase { } @Test + public void testHeaders() throws Exception { + MultivaluedMap<String, String> map = new MultivaluedHashMap<>(); + map.addAll("meta_mymeta", "first", "second", "third"); + + Response response = WebClient + .create(endPoint + META_PATH) + .headers(map) + .accept("application/json") + .put(ClassLoader + .getSystemResourceAsStream(TEST_RECURSIVE_DOC)); + + Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8); + List<Metadata> metadataList = JsonMetadataList.fromJson(reader); + assertEquals("first,second,third", metadataList.get(0).get("mymeta")); + } + + @Test public void testPasswordProtected() throws Exception { Response response = WebClient .create(endPoint + META_PATH) diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java index 627a12a..c8ee4da 100644 --- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java +++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java @@ -72,7 +72,9 @@ import java.io.OutputStreamWriter; import java.io.Writer; import java.lang.reflect.Field; import java.lang.reflect.Method; +import java.util.List; import java.util.Locale; +import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -82,7 +84,7 @@ import static java.nio.charset.StandardCharsets.UTF_8; public class TikaResource { private static Pattern ALLOWABLE_HEADER_CHARS = Pattern.compile("(?i)^[-/_+\\.A-Z0-9 ]+$"); - + private static final String META_PREFIX = "meta_"; public static final String GREETING = "This is Tika Server (" + new Tika().toString() + "). Please PUT\n"; @@ -309,6 +311,15 @@ public class TikaResource { } }); } + + for (Map.Entry<String, List<String>> e : httpHeaders.entrySet()) { + if (e.getKey().startsWith(META_PREFIX)) { + String tikaKey = e.getKey().substring(META_PREFIX.length()); + for (String value: e.getValue()) { + metadata.add(tikaKey, value); + } + } + } } public static void setDetector(Parser p, Detector detector) { diff --git a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java index d8f93ce..18b859a 100644 --- a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java +++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java @@ -22,19 +22,16 @@ import org.apache.cxf.jaxrs.JAXRSServerFactoryBean; import org.apache.cxf.jaxrs.client.WebClient; import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider; import org.apache.tika.server.core.resource.TikaResource; -import org.junit.Ignore; import org.junit.Test; -import javax.ws.rs.ProcessingException; +import javax.ws.rs.core.MultivaluedHashMap; +import javax.ws.rs.core.MultivaluedMap; import javax.ws.rs.core.Response; import java.io.InputStream; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; -import static org.apache.cxf.helpers.HttpHeaderHelper.CONTENT_ENCODING; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; public class TikaResourceTest extends CXFTestBase { @@ -68,6 +65,24 @@ public class TikaResourceTest extends CXFTestBase { getStringFromInputStream((InputStream) response.getEntity())); } + @Test + public void testHeaders() throws Exception { + MultivaluedMap<String, String> map = new MultivaluedHashMap<>(); + map.addAll("meta_mymeta", "first", "second", "third"); + Response response = WebClient + .create(endPoint + TIKA_PATH) + .headers(map) + .accept("text/xml") + .put(ClassLoader + .getSystemResourceAsStream(TEST_HELLO_WORLD)); + String xml = getStringFromInputStream((InputStream) response.getEntity()); + //can't figure out why these values are comma-delimited, rather + //than a true list...is this really the expected behavior? + //this at least tests that the pass-through, basically works... + //except for multi-values... :D + assertContains("<meta name=\"mymeta\" content=\"first,second,third\"/>", + xml); + } @Test public void testJAXBAndActivationDependency() {
