This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch TIKA-3226
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/TIKA-3226 by this push:
new 83217e5 TIKA-3226 -- WIP do not merge -- add put and post tests for
EmitterResource
83217e5 is described below
commit 83217e580200f1a574f9acf465e800f421eb4c21
Author: tballison <[email protected]>
AuthorDate: Wed Jan 20 14:50:48 2021 -0500
TIKA-3226 -- WIP do not merge -- add put and post tests for EmitterResource
---
.../org/apache/tika/emitter/DefaultEmitter.java | 2 +-
.../org/apache/tika/fetcher/DefaultFetcher.java | 8 +-
.../apache/tika/fetcher/FetchPrefixKeyPair.java | 8 +-
.../main/java/org/apache/tika/fetcher/Fetcher.java | 4 +-
.../org/apache/tika/fetcher/FileSystemFetcher.java | 4 +-
.../org/apache/tika/fetcher/SimpleUrlFetcher.java | 4 +-
.../apache/tika/emitter/fs/FileSystemEmitter.java | 5 +-
.../org/apache/tika/fetcher/jdbc/JDBCFetcher.java | 2 +-
.../java/org/apache/tika/fetcher/s3/S3Fetcher.java | 4 +-
.../tika/server/core/resource/EmitterResource.java | 94 +++++++++++++++++-----
.../apache/tika/server/core/TikaEmitterTest.java | 86 +++++++++++++++++++-
11 files changed, 178 insertions(+), 43 deletions(-)
diff --git
a/tika-core/src/main/java/org/apache/tika/emitter/DefaultEmitter.java
b/tika-core/src/main/java/org/apache/tika/emitter/DefaultEmitter.java
index e221505..f95977b 100644
--- a/tika-core/src/main/java/org/apache/tika/emitter/DefaultEmitter.java
+++ b/tika-core/src/main/java/org/apache/tika/emitter/DefaultEmitter.java
@@ -31,7 +31,7 @@ import java.util.concurrent.ConcurrentHashMap;
/**
* Utility class that will apply the appropriate fetcher
- * to the fetchString based on the prefix.
+ * to the fetcherString based on the prefix.
*
* This does not allow multiple fetchers supporting the same prefix.
*/
diff --git
a/tika-core/src/main/java/org/apache/tika/fetcher/DefaultFetcher.java
b/tika-core/src/main/java/org/apache/tika/fetcher/DefaultFetcher.java
index ac539c0..085e021 100644
--- a/tika-core/src/main/java/org/apache/tika/fetcher/DefaultFetcher.java
+++ b/tika-core/src/main/java/org/apache/tika/fetcher/DefaultFetcher.java
@@ -28,7 +28,7 @@ import java.util.concurrent.ConcurrentHashMap;
/**
* Utility class that will apply the appropriate fetcher
- * to the fetchString based on the prefix.
+ * to the fetcherString based on the prefix.
*
* This forbids multiple fetchers supporting the same prefix.
*/
@@ -55,15 +55,15 @@ public class DefaultFetcher implements Fetcher {
}
@Override
- public InputStream fetch(String fetchString, Metadata metadata)
+ public InputStream fetch(String fetcherString, Metadata metadata)
throws IOException, TikaException {
- FetchPrefixKeyPair fetchPrefixKeyPair =
FetchPrefixKeyPair.create(fetchString);
+ FetchPrefixKeyPair fetchPrefixKeyPair =
FetchPrefixKeyPair.create(fetcherString);
Fetcher fetcher = fetcherMap.get(fetchPrefixKeyPair.getPrefix());
if (fetcher == null) {
throw new IllegalArgumentException("Can't find fetcher for prefix:
"+
fetchPrefixKeyPair.getPrefix());
}
- return fetcher.fetch(fetchString, metadata);
+ return fetcher.fetch(fetcherString, metadata);
}
}
diff --git
a/tika-core/src/main/java/org/apache/tika/fetcher/FetchPrefixKeyPair.java
b/tika-core/src/main/java/org/apache/tika/fetcher/FetchPrefixKeyPair.java
index 9b263ae..2a068c5 100644
--- a/tika-core/src/main/java/org/apache/tika/fetcher/FetchPrefixKeyPair.java
+++ b/tika-core/src/main/java/org/apache/tika/fetcher/FetchPrefixKeyPair.java
@@ -25,13 +25,13 @@ public class FetchPrefixKeyPair {
this.key = key;
}
- public static FetchPrefixKeyPair create(String fetchString) throws
FetcherStringException {
- int prefixIndex = fetchString.indexOf(":");
+ public static FetchPrefixKeyPair create(String fetcherString) throws
FetcherStringException {
+ int prefixIndex = fetcherString.indexOf(":");
if (prefixIndex < 0) {
throw new FetcherStringException("Can't find fetcher prefix, e.g.
the 's3' in s3:/myfile");
}
- String prefix = fetchString.substring(0, prefixIndex);
- String key = fetchString.substring(prefixIndex+1);
+ String prefix = fetcherString.substring(0, prefixIndex);
+ String key = fetcherString.substring(prefixIndex+1);
return new FetchPrefixKeyPair(prefix, key);
}
diff --git a/tika-core/src/main/java/org/apache/tika/fetcher/Fetcher.java
b/tika-core/src/main/java/org/apache/tika/fetcher/Fetcher.java
index 12c6a5b..95a795c 100644
--- a/tika-core/src/main/java/org/apache/tika/fetcher/Fetcher.java
+++ b/tika-core/src/main/java/org/apache/tika/fetcher/Fetcher.java
@@ -30,12 +30,12 @@ import java.util.Set;
*
* Implementations of Fetcher must be thread safe.
*
- * The fetchString must start with a prefix that can be
+ * The fetcherString must start with a prefix that can be
* used to uniquely select the fetcher, e.g. file:my_file.pdf,
s3:bucket/path/to/my_file
*
* Each fetcher must specify which prefixes it can handle.
*/
public interface Fetcher {
Set<String> getSupportedPrefixes();
- InputStream fetch(String fetchString, Metadata metadata) throws
TikaException, IOException;
+ InputStream fetch(String fetcherString, Metadata metadata) throws
TikaException, IOException;
}
diff --git
a/tika-core/src/main/java/org/apache/tika/fetcher/FileSystemFetcher.java
b/tika-core/src/main/java/org/apache/tika/fetcher/FileSystemFetcher.java
index b93f202..63c4cba 100644
--- a/tika-core/src/main/java/org/apache/tika/fetcher/FileSystemFetcher.java
+++ b/tika-core/src/main/java/org/apache/tika/fetcher/FileSystemFetcher.java
@@ -42,9 +42,9 @@ public class FileSystemFetcher implements Fetcher {
}
@Override
- public InputStream fetch(String fetchString, Metadata metadata)
+ public InputStream fetch(String fetcherString, Metadata metadata)
throws IOException, TikaException {
- FetchPrefixKeyPair fetchPrefixKeyPair =
FetchPrefixKeyPair.create(fetchString);
+ FetchPrefixKeyPair fetchPrefixKeyPair =
FetchPrefixKeyPair.create(fetcherString);
metadata.set(TikaCoreProperties.SOURCE_PATH,
fetchPrefixKeyPair.getKey());
Path p = null;
if (basePath != null) {
diff --git
a/tika-core/src/main/java/org/apache/tika/fetcher/SimpleUrlFetcher.java
b/tika-core/src/main/java/org/apache/tika/fetcher/SimpleUrlFetcher.java
index a68d004..a15b78c 100644
--- a/tika-core/src/main/java/org/apache/tika/fetcher/SimpleUrlFetcher.java
+++ b/tika-core/src/main/java/org/apache/tika/fetcher/SimpleUrlFetcher.java
@@ -46,9 +46,9 @@ public class SimpleUrlFetcher implements Fetcher {
}
@Override
- public InputStream fetch(String fetchString, Metadata metadata)
+ public InputStream fetch(String fetcherString, Metadata metadata)
throws IOException, TikaException {
- FetchPrefixKeyPair fetchPrefixKeyPair =
FetchPrefixKeyPair.create(fetchString);
+ FetchPrefixKeyPair fetchPrefixKeyPair =
FetchPrefixKeyPair.create(fetcherString);
URL url = new URL(fetchPrefixKeyPair.getKey());
if (! url.getProtocol().equals("http") &&
! url.getProtocol().equals("https") &&
diff --git
a/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/emitter/fs/FileSystemEmitter.java
b/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/emitter/fs/FileSystemEmitter.java
index 54219a8..0acae53 100644
---
a/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/emitter/fs/FileSystemEmitter.java
+++
b/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/emitter/fs/FileSystemEmitter.java
@@ -38,7 +38,10 @@ public class FileSystemEmitter implements Emitter {
String relPath = metadataList.get(0)
.get(TikaCoreProperties.SOURCE_PATH);
-
+ if (relPath == null) {
+ throw new TikaEmitterException("Must specify a
"+TikaCoreProperties.SOURCE_PATH.getName() +
+ " in the metadata in order for this emitter to generate
the output file path.");
+ }
if (fileExtension != null && fileExtension.length() > 0) {
relPath += "." + fileExtension;
}
diff --git
a/tika-fetchers/tika-fetcher-jdbc/src/main/java/org/apache/tika/fetcher/jdbc/JDBCFetcher.java
b/tika-fetchers/tika-fetcher-jdbc/src/main/java/org/apache/tika/fetcher/jdbc/JDBCFetcher.java
index b6dd4f0..25152a9 100644
---
a/tika-fetchers/tika-fetcher-jdbc/src/main/java/org/apache/tika/fetcher/jdbc/JDBCFetcher.java
+++
b/tika-fetchers/tika-fetcher-jdbc/src/main/java/org/apache/tika/fetcher/jdbc/JDBCFetcher.java
@@ -57,7 +57,7 @@ public class JDBCFetcher implements Fetcher, Initializable {
}
@Override
- public InputStream fetch(String fetchString, Metadata metadata) throws
TikaException, IOException {
+ public InputStream fetch(String fetcherString, Metadata metadata) throws
TikaException, IOException {
return null;
}
diff --git
a/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/fetcher/s3/S3Fetcher.java
b/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/fetcher/s3/S3Fetcher.java
index 4e592c7..2757f17 100644
---
a/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/fetcher/s3/S3Fetcher.java
+++
b/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/fetcher/s3/S3Fetcher.java
@@ -59,9 +59,9 @@ public class S3Fetcher implements Fetcher, Initializable {
}
@Override
- public InputStream fetch(String fetchString, Metadata metadata)
+ public InputStream fetch(String fetcherString, Metadata metadata)
throws TikaException, IOException {
- FetchPrefixKeyPair fetchPrefixKeyPair =
FetchPrefixKeyPair.create(fetchString);
+ FetchPrefixKeyPair fetchPrefixKeyPair =
FetchPrefixKeyPair.create(fetcherString);
String bucketKey = fetchPrefixKeyPair.getKey();
if (bucketKey.startsWith("//")) {
bucketKey = bucketKey.substring(2);
diff --git
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/EmitterResource.java
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/EmitterResource.java
index b706420..bad2983 100644
---
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/EmitterResource.java
+++
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/EmitterResource.java
@@ -17,11 +17,16 @@
package org.apache.tika.server.core.resource;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonParser;
+import org.apache.commons.io.IOUtils;
import org.apache.tika.emitter.Emitter;
import org.apache.tika.emitter.TikaEmitterException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.fetcher.Fetcher;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.utils.ExceptionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -38,6 +43,9 @@ import javax.ws.rs.core.HttpHeaders;
import javax.ws.rs.core.UriInfo;
import java.io.IOException;
import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -46,7 +54,14 @@ import java.util.Map;
public class EmitterResource {
private static final String EMITTER_PARAM = "emitter";
- private static final String FETCH_STRING = "fetchString";
+ private static final String FETCH_STRING = "fetcherString";
+ private static final String FETCH_STRING_ABBREV = "f";
+
+ /**
+ * key that is safe to pass through http header.
+ * The user _must_ specify this for the fsemitter if calling 'put'
+ */
+ public static final String PATH_KEY_FOR_HTTP_HEADER =
TikaCoreProperties.SOURCE_PATH.getName().replaceAll(":", "-");
private static final Logger LOG =
LoggerFactory.getLogger(EmitterResource.class);
@@ -55,7 +70,7 @@ public class EmitterResource {
* @param httpHeaders
* @param info
* @param emitterName
- * @param fetchString specify the fetch string in the url's query section
+ * @param fetcherString specify the fetch string in the url's query section
* @return
* @throws Exception
*/
@@ -65,12 +80,12 @@ public class EmitterResource {
public Map<String, String> getMetadata(InputStream is, @Context
HttpHeaders httpHeaders,
@Context UriInfo info,
@PathParam(EMITTER_PARAM) String
emitterName,
- @QueryParam(FETCH_STRING) String
fetchString) throws Exception {
+ @QueryParam(FETCH_STRING) String
fetcherString) throws Exception {
Metadata metadata = new Metadata();
Fetcher fetcher = TikaResource.getConfig().getFetcher();
List<Metadata> metadataList;
- try (InputStream fetchedIs = fetcher.fetch(fetchString, metadata)) {
+ try (InputStream fetchedIs = fetcher.fetch(fetcherString, metadata)) {
metadataList =
RecursiveMetadataResource.parseMetadata(fetchedIs,
metadata,
@@ -79,14 +94,29 @@ public class EmitterResource {
return emit(emitterName, metadataList);
}
+ /**
+ *
+ * @param httpHeaders
+ * @param info
+ * @param emitterName
+ * @param fetcherString specify the fetch string in the url's query section
+ * @return
+ * @throws Exception
+ */
+ @GET
+ @Produces("application/json")
+ @Path("{" + EMITTER_PARAM + " : (\\w+)?}")
+ public Map<String, String> getMetadataAbbrev(InputStream is, @Context
HttpHeaders httpHeaders,
+ @Context UriInfo info,
+ @PathParam(EMITTER_PARAM) String
emitterName,
+ @QueryParam(FETCH_STRING_ABBREV)
String fetcherString) throws Exception {
+ return getMetadata(is, httpHeaders, info, emitterName, fetcherString);
+ }
/**
- * Returns an InputStream that can be deserialized as a list of
- * {@link Metadata} objects.
- * The first in the list represents the main document, and the
- * rest represent metadata for the embedded objects. This works
- * recursively through all descendants of the main document, not
- * just the immediate children.
+ * The user puts the raw bytes of the file and specifies the emitter
+ * as elsewhere. This will not trigger a fetcher. If you want a
+ * fetcher, use the get or post options.
* <p>
* The extracted text content is stored with the key
* {@link
org.apache.tika.sax.AbstractRecursiveParserWrapperHandler#TIKA_CONTENT}
@@ -108,6 +138,8 @@ public class EmitterResource {
) throws Exception {
Metadata metadata = new Metadata();
+ String path = httpHeaders.getHeaderString(PATH_KEY_FOR_HTTP_HEADER);
+ metadata.set(TikaCoreProperties.SOURCE_PATH, path);
List<Metadata> metadataList =
RecursiveMetadataResource.parseMetadata(is,
metadata,
@@ -116,12 +148,11 @@ public class EmitterResource {
}
/**
- * Returns an InputStream that can be deserialized as a list of
- * {@link Metadata} objects.
- * The first in the list represents the main document, and the
- * rest represent metadata for the embedded objects. This works
- * recursively through all descendants of the main document, not
- * just the immediate children.
+ * The client posts a json request. At a minimum, this must be a
+ * json object that contains a fetcherString key with the key to
+ * fetch the inputStream. Optionally, it may contain a metadata
+ * object that will be used to populate the metadata key for pass
+ * through of metadata from the client.
* <p>
* The extracted text content is stored with the key
* {@link
org.apache.tika.sax.AbstractRecursiveParserWrapperHandler#TIKA_CONTENT}
@@ -141,13 +172,32 @@ public class EmitterResource {
@Context UriInfo info,
@PathParam(EMITTER_PARAM) String emitterName
) throws Exception {
-
+ JsonElement root = null;
+ try (Reader reader = new InputStreamReader(is,
StandardCharsets.UTF_8)) {
+ root = JsonParser.parseReader(reader);
+ }
+ String fetcherString =
root.getAsJsonObject().get("fetcherString").getAsString();
Metadata metadata = new Metadata();
- List<Metadata> metadataList =
-
RecursiveMetadataResource.parseMetadata(TikaResource.getInputStream(is,
metadata,
- httpHeaders),
- metadata,
- httpHeaders.getRequestHeaders(), info, "text");
+ if (root.getAsJsonObject().has("metadata")) {
+ JsonObject meta =
root.getAsJsonObject().getAsJsonObject("metadata");
+ for (String k : meta.keySet()) {
+ JsonElement val = meta.get(k);
+ if (val.isJsonArray()) {
+ for (JsonElement v : val.getAsJsonArray()) {
+ metadata.add(k, v.getAsString());
+ }
+ } else {
+ metadata.set(k, val.getAsString());
+ }
+ }
+ }
+ List<Metadata> metadataList;
+ try (InputStream stream =
TikaResource.getConfig().getFetcher().fetch(fetcherString, metadata)) {
+ metadataList = RecursiveMetadataResource.parseMetadata(
+ stream,
+ metadata,
+ httpHeaders.getRequestHeaders(), info, "text");
+ }
return emit(emitterName, metadataList);
}
diff --git
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaEmitterTest.java
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaEmitterTest.java
index ec26812..2155680 100644
---
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaEmitterTest.java
+++
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaEmitterTest.java
@@ -16,7 +16,12 @@
*/
package org.apache.tika.server.core;
+import com.google.gson.Gson;
+import com.google.gson.JsonArray;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonPrimitive;
import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.ResourceProvider;
@@ -24,6 +29,7 @@ import
org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
import org.apache.tika.TikaTest;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.serialization.JsonMetadataList;
import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
import org.apache.tika.server.core.resource.EmitterResource;
@@ -44,7 +50,9 @@ import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
+import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
/**
* This offers basic integration tests with fetchers and emitters.
@@ -68,6 +76,7 @@ public class TikaEmitterTest extends CXFTestBase {
Files.createDirectories(TMP_OUTPUT_DIR);
Files.copy(TikaEmitterTest.class.getResourceAsStream("/test-documents/mock/hello_world.xml"),
inputDir.resolve("hello_world.xml"));
+
TIKA_CONFIG_XML = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"+
"<properties>"+
"<fetchers>"+
@@ -125,13 +134,55 @@ public class TikaEmitterTest extends CXFTestBase {
@Test
public void testGet() throws Exception {
- String q = "?fetchString="+ URLEncoder.encode("fs:hello_world.xml",
StandardCharsets.UTF_8.name());
+ Path targetFile = TMP_OUTPUT_DIR.resolve("hello_world.xml.json");
+ assertFalse(Files.isRegularFile(targetFile));
+
+ String q = "?fetcherString="+
+ URLEncoder.encode("fs:hello_world.xml",
StandardCharsets.UTF_8.name());
String getUrl = endPoint+EMITTER_PATH+q;
Response response = WebClient
.create(getUrl)
.accept("application/json").get();
assertEquals(200, response.getStatus());
+ List<Metadata> metadataList = null;
+ try (Reader reader = Files.newBufferedReader(targetFile)) {
+ metadataList = JsonMetadataList.fromJson(reader);
+ }
+ assertEquals(1, metadataList.size());
+ Metadata metadata = metadataList.get(0);
+ assertEquals("hello world",
+
metadata.get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT).trim());
+ assertEquals("Nikolai Lobachevsky", metadata.get("author"));
+ assertEquals("你好,世界", metadata.get("title"));
+ assertEquals("application/mock+xml",
metadata.get(Metadata.CONTENT_TYPE));
+ }
+
+ @Test
+ public void testPost() throws Exception {
Path targetFile = TMP_OUTPUT_DIR.resolve("hello_world.xml.json");
+ assertFalse(Files.isRegularFile(targetFile));
+
+ JsonObject root = new JsonObject();
+ root.add("fetcherString", new JsonPrimitive("fs:hello_world.xml"));
+ JsonObject userMetadata = new JsonObject();
+ String[] valueArray = new String[] {"my-value-1", "my-value-2",
"my-value-3"};
+ JsonArray arr = new JsonArray();
+ for (int i = 0; i < valueArray.length; i++) {
+ arr.add(valueArray[i]);
+ }
+
+ userMetadata.add("my-key", new JsonPrimitive("my-value"));
+ userMetadata.add("my-key-multi", arr);
+ root.add("metadata", userMetadata);
+ String jsonPost = new Gson().toJson(root);
+
+ String getUrl = endPoint+EMITTER_PATH;
+ Response response = WebClient
+ .create(getUrl)
+ .accept("application/json")
+ .post(jsonPost);
+ assertEquals(200, response.getStatus());
+
List<Metadata> metadataList = null;
try (Reader reader = Files.newBufferedReader(targetFile)) {
metadataList = JsonMetadataList.fromJson(reader);
@@ -143,8 +194,39 @@ public class TikaEmitterTest extends CXFTestBase {
assertEquals("Nikolai Lobachevsky", metadata.get("author"));
assertEquals("你好,世界", metadata.get("title"));
assertEquals("application/mock+xml",
metadata.get(Metadata.CONTENT_TYPE));
+ assertEquals("my-value", metadata.get("my-key"));
+ assertArrayEquals(valueArray, metadata.getValues("my-key-multi"));
}
- //TODO: add put and post
+ @Test
+ public void testPut() throws Exception {
+ Path targetFile = TMP_OUTPUT_DIR.resolve("hello_world.xml.json");
+ assertFalse(Files.isRegularFile(targetFile));
+
+ String getUrl = endPoint+EMITTER_PATH;
+ String metaPathKey = EmitterResource.PATH_KEY_FOR_HTTP_HEADER;
+
+ Response response = WebClient
+ .create(getUrl)
+ .accept("application/json")
+ .header(metaPathKey, "hello_world.xml")
+ .put(
+ ClassLoader
+
.getSystemResourceAsStream("test-documents/mock/hello_world.xml")
+ );
+ System.out.println(IOUtils.toString((InputStream)
response.getEntity(), StandardCharsets.UTF_8));
+ assertEquals(200, response.getStatus());
+ List<Metadata> metadataList = null;
+ try (Reader reader = Files.newBufferedReader(targetFile)) {
+ metadataList = JsonMetadataList.fromJson(reader);
+ }
+ assertEquals(1, metadataList.size());
+ Metadata metadata = metadataList.get(0);
+ assertEquals("hello world",
+
metadata.get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT).trim());
+ assertEquals("Nikolai Lobachevsky", metadata.get("author"));
+ assertEquals("你好,世界", metadata.get("title"));
+ assertEquals("application/mock+xml",
metadata.get(Metadata.CONTENT_TYPE));
+ }
}