This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch TIKA-3288
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/TIKA-3288 by this push:
new 4a6785e TIKA-3288 -- WIP do not merge.
4a6785e is described below
commit 4a6785e1738f0a1f96ddc1106f6995b3d8ab2f75
Author: tballison <[email protected]>
AuthorDate: Tue Feb 2 12:05:27 2021 -0500
TIKA-3288 -- WIP do not merge.
---
.../src/main/java/org/apache/tika/cli/TikaCLI.java | 8 +-
.../test/java/org/apache/tika/cli/TikaCLITest.java | 18 ++-
.../apache/tika/batch/fs/HandlerBuilderTest.java | 2 +-
.../org/apache/tika/pipes/emitter/EmitKey.java | 19 ++++
.../tika/pipes/fetchiterator/FetchEmitTuple.java | 20 ++++
.../org/apache/tika/eval/app/io/ExtractReader.java | 2 -
.../resources/test-dirs/extractsA/file1.pdf.json | 2 +-
tika-eval/tika-eval-core/pom.xml | 5 +
.../tika/pipes/emitter/fs/FileSystemEmitter.java | 10 +-
.../apache/tika/pipes/emitter/s3/S3Emitter.java | 4 +-
.../tika/pipes/emitter/solr/SolrEmitter.java | 2 +-
tika-serialization/pom.xml | 6 +-
.../metadata/serialization/JsonFetchEmitTuple.java | 112 ++++++++++++++++++
.../tika/metadata/serialization/JsonMetadata.java | 126 +++++++++++++++------
.../metadata/serialization/JsonMetadataBase.java | 52 ---------
.../serialization/JsonMetadataDeserializer.java | 73 ------------
.../metadata/serialization/JsonMetadataList.java | 78 ++++++-------
.../serialization/JsonMetadataSerializer.java | 97 ----------------
.../serialization/JsonStreamingSerializer.java | 34 ++----
.../serialization/PrettyMetadataKeyComparator.java | 11 +-
.../serialization/JsonFetchEmitTupleTest.java | 53 +++++++++
.../serialization/JsonMetadataListTest.java | 18 ++-
.../metadata/serialization/JsonMetadataTest.java | 23 ++--
.../tika/server/classic/TikaDetectorsTest.java | 10 +-
.../tika/server/classic/TikaMimeTypesTest.java | 8 +-
.../tika/server/classic/TikaParsersTest.java | 9 +-
.../tika/server/core/resource/EmitterResource.java | 20 ++--
.../core/resource/RecursiveMetadataResource.java | 19 ++--
.../tika/server/core/resource/TikaDetectors.java | 10 +-
.../tika/server/core/resource/TikaMimeTypes.java | 11 +-
.../tika/server/core/resource/TikaParsers.java | 17 +--
.../server/core/writer/JSONMessageBodyWriter.java | 10 +-
.../tika/server/core/writer/JSONObjWriter.java | 9 +-
.../core/writer/MetadataListMessageBodyWriter.java | 10 +-
.../apache/tika/server/core/TikaEmitterTest.java | 79 ++++++-------
.../apache/tika/server/core/TikaMimeTypesTest.java | 3 -
.../core/TikaServerEmitterIntegrationTest.java | 40 ++++---
.../server/core/TikaServerIntegrationTest.java | 19 +---
.../tika/server/core/TikaServerStatusTest.java | 16 ++-
39 files changed, 532 insertions(+), 533 deletions(-)
diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 2849f1a..47ba493 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -531,11 +531,7 @@ public class TikaCLI {
}
JsonMetadataList.setPrettyPrinting(prettyPrint);
Writer writer = getOutputWriter(output, encoding);
- try {
- JsonMetadataList.toJson(handler.getMetadataList(), writer);
- } finally {
- writer.flush();
- }
+ JsonMetadataList.toJson(handler.getMetadataList(), writer);
}
private ContentHandlerFactory getContentHandlerFactory(OutputType type) {
@@ -1233,7 +1229,7 @@ public class TikaCLI {
JsonMetadata.setPrettyPrinting(prettyPrint);
JsonMetadata.toJson(metadata, writer);
writer.flush();
- } catch (TikaException e) {
+ } catch (IOException e) {
throw new SAXException(e);
}
}
diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
index c7d4f52..df0bbc8 100644
--- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
+++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
@@ -220,10 +220,8 @@ public class TikaCLITest {
public void testJsonMetadataPrettyPrintOutput() throws Exception {
String json = getParamOutContent("--json", "-r", resourcePrefix +
"testJsonMultipleInts.html");
- assertTrue(json.contains(" \"X-TIKA:Parsed-By\": [\n" +
- " \"org.apache.tika.parser.DefaultParser\",\n" +
- " \"org.apache.tika.parser.html.HtmlParser\"\n" +
- " ],\n"));
+ assertTrue(json.contains("\"X-TIKA:Parsed-By\" : [
\"org.apache.tika.parser.DefaultParser\", " +
+ "\"org.apache.tika.parser.html.HtmlParser\" ],"));
//test legacy alphabetic sort of keys
int enc = json.indexOf("\"Content-Encoding\"");
int fb = json.indexOf("fb:admins");
@@ -478,17 +476,17 @@ public class TikaCLITest {
public void testJsonRecursiveMetadataParserMetadataOnly() throws Exception
{
String content = getParamOutContent("-m", "-J", "-r",
resourcePrefix+"test_recursive_embedded.docx");
assertTrue(content.contains(
- "\"extended-properties:AppVersion\": \"15.0000\","));
+ "\"extended-properties:AppVersion\" : \"15.0000\","));
assertTrue(content.contains(
- "\"extended-properties:Application\": \"Microsoft Office
Word\","));
- assertTrue(content.contains("\"X-TIKA:embedded_resource_path\":
\"/embed1.zip\""));
+ "\"extended-properties:Application\" : \"Microsoft Office
Word\","));
+ assertTrue(content.contains("\"X-TIKA:embedded_resource_path\" :
\"/embed1.zip\""));
assertFalse(content.contains("X-TIKA:content"));
}
@Test
public void testJsonRecursiveMetadataParserDefault() throws Exception {
String content = getParamOutContent("-J", "-r",
resourcePrefix+"test_recursive_embedded.docx");
- assertTrue(content.contains("\"X-TIKA:content\": \"\\u003chtml
xmlns\\u003d\\\"http://www.w3.org/1999/xhtml"));
+ assertTrue(content.contains("\"X-TIKA:content\" : \"<html
xmlns=\\\"http://www.w3.org/1999/xhtml"));
}
@Test
@@ -502,8 +500,8 @@ public class TikaCLITest {
public void testDigestInJson() throws Exception {
String content = getParamOutContent("-J", "-r", "-t", "--digest=MD5",
resourcePrefix+"test_recursive_embedded.docx");
- assertTrue(content.contains("\"X-TIKA:digest:MD5\":
\"59f626e09a8c16ab6dbc2800c685f772\","));
- assertTrue(content.contains("\"X-TIKA:digest:MD5\":
\"f9627095ef86c482e61d99f0cc1cf87d\""));
+ assertTrue(content.contains("\"X-TIKA:digest:MD5\" :
\"59f626e09a8c16ab6dbc2800c685f772\","));
+ assertTrue(content.contains("\"X-TIKA:digest:MD5\" :
\"f9627095ef86c482e61d99f0cc1cf87d\""));
}
@Test
diff --git
a/tika-batch/src/test/java/org/apache/tika/batch/fs/HandlerBuilderTest.java
b/tika-batch/src/test/java/org/apache/tika/batch/fs/HandlerBuilderTest.java
index 6e3648a..82a1f2b 100644
--- a/tika-batch/src/test/java/org/apache/tika/batch/fs/HandlerBuilderTest.java
+++ b/tika-batch/src/test/java/org/apache/tika/batch/fs/HandlerBuilderTest.java
@@ -109,7 +109,7 @@ public class HandlerBuilderTest extends FSBatchTestBase {
Path outputFile = outputDir.resolve("test0.xml.json");
String resultString = readFileToString(outputFile, UTF_8);
assertTrue(resultString.contains("\"author\":\"Nikolai
Lobachevsky\""));
- assertTrue(resultString.contains("tika-batch\\u0027s first test
file"));
+ assertTrue(resultString.contains("tika-batch's first test file"));
}
diff --git a/tika-core/src/main/java/org/apache/tika/pipes/emitter/EmitKey.java
b/tika-core/src/main/java/org/apache/tika/pipes/emitter/EmitKey.java
index aa53dfd..eb4c2c2 100644
--- a/tika-core/src/main/java/org/apache/tika/pipes/emitter/EmitKey.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/emitter/EmitKey.java
@@ -41,4 +41,23 @@ public class EmitKey {
", emitterKey='" + emitKey + '\'' +
'}';
}
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ EmitKey emitKey1 = (EmitKey) o;
+
+ if (emitterName != null ? !emitterName.equals(emitKey1.emitterName) :
emitKey1.emitterName != null)
+ return false;
+ return emitKey != null ? emitKey.equals(emitKey1.emitKey) :
emitKey1.emitKey == null;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = emitterName != null ? emitterName.hashCode() : 0;
+ result = 31 * result + (emitKey != null ? emitKey.hashCode() : 0);
+ return result;
+ }
}
diff --git
a/tika-core/src/main/java/org/apache/tika/pipes/fetchiterator/FetchEmitTuple.java
b/tika-core/src/main/java/org/apache/tika/pipes/fetchiterator/FetchEmitTuple.java
index 9023378..2792d9b 100644
---
a/tika-core/src/main/java/org/apache/tika/pipes/fetchiterator/FetchEmitTuple.java
+++
b/tika-core/src/main/java/org/apache/tika/pipes/fetchiterator/FetchEmitTuple.java
@@ -52,4 +52,24 @@ public class FetchEmitTuple {
", metadata=" + metadata +
'}';
}
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ FetchEmitTuple that = (FetchEmitTuple) o;
+
+ if (fetchKey != null ? !fetchKey.equals(that.fetchKey) : that.fetchKey
!= null) return false;
+ if (emitKey != null ? !emitKey.equals(that.emitKey) : that.emitKey !=
null) return false;
+ return metadata != null ? metadata.equals(that.metadata) :
that.metadata == null;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = fetchKey != null ? fetchKey.hashCode() : 0;
+ result = 31 * result + (emitKey != null ? emitKey.hashCode() : 0);
+ result = 31 * result + (metadata != null ? metadata.hashCode() : 0);
+ return result;
+ }
}
diff --git
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/ExtractReader.java
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/ExtractReader.java
index 40ef751..cca4c03 100644
---
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/ExtractReader.java
+++
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/ExtractReader.java
@@ -166,8 +166,6 @@ public class ExtractReader {
}
} catch (IOException e) {
throw new
ExtractReaderException(ExtractReaderException.TYPE.IO_EXCEPTION);
- } catch (TikaException e) {
- throw new
ExtractReaderException(ExtractReaderException.TYPE.EXTRACT_PARSE_EXCEPTION);
} finally {
IOUtils.closeQuietly(reader);
IOUtils.closeQuietly(is);
diff --git
a/tika-eval/tika-eval-app/src/test/resources/test-dirs/extractsA/file1.pdf.json
b/tika-eval/tika-eval-app/src/test/resources/test-dirs/extractsA/file1.pdf.json
index 6ef09de..8e6ae43 100644
---
a/tika-eval/tika-eval-app/src/test/resources/test-dirs/extractsA/file1.pdf.json
+++
b/tika-eval/tika-eval-app/src/test/resources/test-dirs/extractsA/file1.pdf.json
@@ -1,5 +1,5 @@
[{
"Content-Type":"text/plain",
"X-TIKA:content":"the quick brown fox fox fox jumped over the lazy lazy dog
1,200 120000",
- "xmpTPg:NPages":2
+ "xmpTPg:NPages":"2"
}]
\ No newline at end of file
diff --git a/tika-eval/tika-eval-core/pom.xml b/tika-eval/tika-eval-core/pom.xml
index 26c087e..664ef2d 100644
--- a/tika-eval/tika-eval-core/pom.xml
+++ b/tika-eval/tika-eval-core/pom.xml
@@ -44,6 +44,11 @@
<version>${project.version}</version>
</dependency>
<dependency>
+ <groupId>com.google.code.gson</groupId>
+ <artifactId>gson</artifactId>
+ <version>${gson.version}</version>
+ </dependency>
+ <dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>${commons.codec.version}</version>
diff --git
a/tika-pipes/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
b/tika-pipes/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
index f58a21c..d4a0448 100644
---
a/tika-pipes/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
+++
b/tika-pipes/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
@@ -41,7 +41,7 @@ import java.util.Set;
/**
* Emitter to write to a file system.
- *
+ * <p>
* This calculates the path to write to based on the {@link #basePath}
* and the value of the {@link TikaCoreProperties#SOURCE_PATH} value.
*
@@ -95,11 +95,7 @@ public class FileSystemEmitter extends AbstractEmitter
implements StreamEmitter
Files.createDirectories(output.getParent());
}
try (Writer writer = Files.newBufferedWriter(output,
StandardCharsets.UTF_8)) {
- try {
- JsonMetadataList.toJson(metadataList, writer);
- } catch (TikaException e) {
- throw new TikaEmitterException("can't create json", e);
- }
+ JsonMetadataList.toJson(metadataList, writer);
}
}
@@ -111,6 +107,7 @@ public class FileSystemEmitter extends AbstractEmitter
implements StreamEmitter
/**
* If you want to customize the output file's file extension.
* Do not include the "."
+ *
* @param fileExtension
*/
@Field
@@ -132,6 +129,7 @@ public class FileSystemEmitter extends AbstractEmitter
implements StreamEmitter
"'; must be one of: 'skip', 'replace',
'exception'");
}
}
+
@Override
public void emit(String path, InputStream inputStream, Metadata
userMetadata) throws IOException,
TikaEmitterException {
diff --git
a/tika-pipes/tika-emitters/tika-emitter-s3/src/main/java/org/apache/tika/pipes/emitter/s3/S3Emitter.java
b/tika-pipes/tika-emitters/tika-emitter-s3/src/main/java/org/apache/tika/pipes/emitter/s3/S3Emitter.java
index d72d8fb..cb97d20 100644
---
a/tika-pipes/tika-emitters/tika-emitter-s3/src/main/java/org/apache/tika/pipes/emitter/s3/S3Emitter.java
+++
b/tika-pipes/tika-emitters/tika-emitter-s3/src/main/java/org/apache/tika/pipes/emitter/s3/S3Emitter.java
@@ -111,7 +111,7 @@ public class S3Emitter extends AbstractEmitter implements
Initializable, StreamE
try (Writer writer =
new BufferedWriter(new OutputStreamWriter(bos,
StandardCharsets.UTF_8))) {
JsonMetadataList.toJson(metadataList, writer);
- } catch (TikaException e) {
+ } catch (IOException e) {
throw new TikaEmitterException("can't jsonify", e);
}
byte[] bytes = bos.toByteArray();
@@ -125,7 +125,7 @@ public class S3Emitter extends AbstractEmitter implements
Initializable, StreamE
try (Writer writer = Files.newBufferedWriter(tmpPath,
StandardCharsets.UTF_8, StandardOpenOption.CREATE)) {
JsonMetadataList.toJson(metadataList, writer);
- } catch (TikaException e) {
+ } catch (IOException e) {
throw new TikaEmitterException("can't jsonify", e);
}
try (InputStream is = TikaInputStream.get(tmpPath)) {
diff --git
a/tika-pipes/tika-emitters/tika-emitter-solr/src/main/java/org/apache/tika/pipes/emitter/solr/SolrEmitter.java
b/tika-pipes/tika-emitters/tika-emitter-solr/src/main/java/org/apache/tika/pipes/emitter/solr/SolrEmitter.java
index 09ec607..2e70ebf 100644
---
a/tika-pipes/tika-emitters/tika-emitter-solr/src/main/java/org/apache/tika/pipes/emitter/solr/SolrEmitter.java
+++
b/tika-pipes/tika-emitters/tika-emitter-solr/src/main/java/org/apache/tika/pipes/emitter/solr/SolrEmitter.java
@@ -88,7 +88,7 @@ public class SolrEmitter extends AbstractEmitter implements
Initializable {
new BufferedWriter(
new OutputStreamWriter(bos, StandardCharsets.UTF_8));
try (
- JsonGenerator jsonGenerator = new
JsonFactory().createGenerator(writer)) {
+ JsonGenerator jsonGenerator = new
JsonFactory().createGenerator(writer)) {
jsonGenerator.writeStartArray();
jsonify(jsonGenerator, emitKey, metadataList);
jsonGenerator.writeEndArray();
diff --git a/tika-serialization/pom.xml b/tika-serialization/pom.xml
index bc2ff93..a8a7e09 100644
--- a/tika-serialization/pom.xml
+++ b/tika-serialization/pom.xml
@@ -51,9 +51,9 @@
</dependency>
<dependency>
- <groupId>com.google.code.gson</groupId>
- <artifactId>gson</artifactId>
- <version>${gson.version}</version>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-core</artifactId>
+ <version>${jackson.version}</version>
</dependency>
<!-- Test dependencies -->
diff --git
a/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonFetchEmitTuple.java
b/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonFetchEmitTuple.java
new file mode 100644
index 0000000..2dfcd3b
--- /dev/null
+++
b/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonFetchEmitTuple.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata.serialization;
+
+import com.fasterxml.jackson.core.JsonFactory;
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonToken;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.pipes.emitter.EmitKey;
+import org.apache.tika.pipes.fetcher.FetchKey;
+import org.apache.tika.pipes.fetchiterator.FetchEmitTuple;
+import org.apache.tika.utils.StringUtils;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.Writer;
+
+public class JsonFetchEmitTuple {
+
+ private static final String FETCHER = "fetcher";
+ private static final String FETCHKEY = "fetchKey";
+ private static final String EMITTER = "emitter";
+ private static final String EMITKEY = "emitKey";
+ private static final String METADATAKEY = "metadata";
+
+ public static FetchEmitTuple fromJson(Reader reader) throws IOException {
+ JsonParser jParser = new JsonFactory().createParser(reader);
+ JsonToken token =jParser.nextToken();
+ if (token != JsonToken.START_OBJECT) {
+ throw new IOException("require start object, but see:
"+token.name());
+ }
+ return parseFetchEmitTuple(jParser);
+ }
+
+
+ private static FetchEmitTuple parseFetchEmitTuple(JsonParser jParser)
throws IOException {
+ JsonToken token = jParser.nextToken();
+ String fetcherName = null;
+ String fetchKey = null;
+ String emitterName = null;
+ String emitKey = null;
+ Metadata metadata = new Metadata();
+ while (token != JsonToken.END_OBJECT) {
+ if (token != JsonToken.FIELD_NAME) {
+ throw new IOException("required field name, but see: " +
token.name());
+ }
+ String name = jParser.getCurrentName();
+ if (FETCHER.equals(name)) {
+ fetcherName = getValue(jParser);
+ } else if (FETCHKEY.equals(name)) {
+ fetchKey = getValue(jParser);
+ } else if (EMITTER.equals(name)) {
+ emitterName = getValue(jParser);
+ } else if (EMITKEY.equals(name)) {
+ emitKey = getValue(jParser);
+ } else if (METADATAKEY.equals(name)) {
+ token = jParser.nextToken();
+ if (token != JsonToken.START_OBJECT) {
+ throw new IOException("required start object, but see: " +
token.name());
+ }
+ metadata = JsonMetadata.readMetadataObject(jParser);
+ }
+ token = jParser.nextToken();
+ }
+
+ return new FetchEmitTuple(
+ new FetchKey(fetcherName, fetchKey),
+ new EmitKey(emitterName, emitKey), metadata
+ );
+ }
+
+ private static String getValue(JsonParser jParser) throws IOException {
+ JsonToken token = jParser.nextToken();
+ if (token != JsonToken.VALUE_STRING) {
+ throw new IOException("required value string, but see:
"+token.name());
+ }
+ return jParser.getValueAsString();
+ }
+
+ public static void toJson(FetchEmitTuple t, Writer writer) throws
IOException {
+
+ try (JsonGenerator jsonGenerator = new
JsonFactory().createGenerator(writer)) {
+ jsonGenerator.writeStartObject();
+ jsonGenerator.writeStringField(FETCHER,
t.getFetchKey().getFetcherName());
+ jsonGenerator.writeStringField(FETCHKEY, t.getFetchKey().getKey());
+ jsonGenerator.writeStringField(EMITTER,
t.getEmitKey().getEmitterName());
+ if (!StringUtils.isBlank(t.getEmitKey().getKey())) {
+ jsonGenerator.writeStringField(EMITKEY,
t.getEmitKey().getKey());
+ }
+ if (t.getMetadata().size() > 0) {
+ jsonGenerator.writeFieldName(METADATAKEY);
+ JsonMetadata.writeMetadataObject(t.getMetadata(),
jsonGenerator, false);
+ }
+ jsonGenerator.writeEndObject();
+ }
+ }
+}
diff --git
a/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadata.java
b/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadata.java
index 50bbbe5..63de2a5 100644
---
a/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadata.java
+++
b/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadata.java
@@ -1,5 +1,3 @@
-package org.apache.tika.metadata.serialization;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,37 +14,69 @@ package org.apache.tika.metadata.serialization;
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+package org.apache.tika.metadata.serialization;
+import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
+import java.util.Arrays;
-import com.google.gson.Gson;
-import com.google.gson.JsonIOException;
+import com.fasterxml.jackson.core.JsonFactory;
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonToken;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
-public class JsonMetadata extends JsonMetadataBase{
- private static Gson GSON;
+public class JsonMetadata {
+
+ static volatile boolean PRETTY_PRINT = false;
- static {
- GSON = defaultInit();
- }
/**
* Serializes a Metadata object to Json. This does not flush or close the
writer.
- *
+ *
* @param metadata metadata to write
- * @param writer writer
+ * @param writer writer
* @throws TikaException if there is an IOException during writing
*/
- public static void toJson(Metadata metadata, Writer writer) throws
TikaException {
- try {
- GSON.toJson(metadata, writer);
- } catch (JsonIOException e) {
- throw new TikaException(e.getMessage());
+ public static void toJson(Metadata metadata, Writer writer) throws
IOException {
+ if (metadata == null) {
+ writer.write("null");
+ return;
+ }
+ try (JsonGenerator jsonGenerator = new
JsonFactory().createGenerator(writer)) {
+ if (PRETTY_PRINT) {
+ jsonGenerator.useDefaultPrettyPrinter();
+ }
+ writeMetadataObject(metadata, jsonGenerator, PRETTY_PRINT);
}
}
-
+
+ static void writeMetadataObject(Metadata metadata,
+ JsonGenerator jsonGenerator, boolean
prettyPrint) throws IOException {
+ jsonGenerator.writeStartObject();
+ String[] names = metadata.names();
+ if (prettyPrint) {
+ Arrays.sort(names, new PrettyMetadataKeyComparator());
+ }
+ for (String n : names) {
+ String[] vals = metadata.getValues(n);
+ if (vals.length == 0) {
+ continue;
+ } else if (vals.length == 1) {
+ jsonGenerator.writeStringField(n, vals[0]);
+ } else if (vals.length > 1) {
+ jsonGenerator.writeArrayFieldStart(n);
+ for (String val : vals) {
+ jsonGenerator.writeString(val);
+ }
+ jsonGenerator.writeEndArray();
+ }
+ }
+ jsonGenerator.writeEndObject();
+ }
+
/**
* Read metadata from reader.
*
@@ -54,34 +84,60 @@ public class JsonMetadata extends JsonMetadataBase{
* @return Metadata or null if nothing could be read from the reader
* @throws TikaException in case of parse failure by Gson or IO failure
with Reader
*/
- public static Metadata fromJson(Reader reader) throws TikaException {
+ public static Metadata fromJson(Reader reader) throws IOException {
Metadata m = null;
- try {
- m = GSON.fromJson(reader, Metadata.class);
- } catch (com.google.gson.JsonParseException e){
- //covers both io and parse exceptions
- throw new TikaException(e.getMessage());
+ try (JsonParser jParser = new JsonFactory().createParser(reader)) {
+ m = readMetadataObject(jParser);
}
return m;
}
/**
- * Enables setting custom configurations on Gson. Remember to register
- * a serializer and a deserializer for Metadata. This does a literal set
- * and does not add the default serializer and deserializers.
- *
- * @param gson
+ * expects that jParser has not yet started on object or
+ * for jParser to be pointing to the start object.
+ * @param jParser
+ * @return
+ * @throws IOException
*/
- public static void setGson(Gson gson) {
- GSON = gson;
+ public static Metadata readMetadataObject(JsonParser jParser) throws
IOException {
+ Metadata metadata = new Metadata();
+ JsonToken token = jParser.currentToken();
+ if (token == null) {
+ token = jParser.nextToken();
+ if (token != JsonToken.START_OBJECT) {
+ throw new IOException("expected start object, but got: " +
token.name());
+ }
+ token = jParser.nextToken();
+ } else if (token == JsonToken.START_OBJECT) {
+ token = jParser.nextToken();
+ }
+
+ while (token != JsonToken.END_OBJECT) {
+ token = jParser.currentToken();
+ if (token != JsonToken.FIELD_NAME) {
+ throw new IOException("expected field name, but got: "
+ + token.name());
+ }
+ String key = jParser.getCurrentName();
+ token = jParser.nextToken();
+ if (token == JsonToken.START_ARRAY) {
+ while (jParser.nextToken() != JsonToken.END_ARRAY) {
+ metadata.add(key, jParser.getText());
+ }
+ } else {
+ if (token != JsonToken.VALUE_STRING) {
+ throw new IOException("expected string value, but found:
"+token.name());
+ }
+ String value = jParser.getValueAsString();
+ metadata.set(key, value);
+ }
+ token = jParser.nextToken();
+ }
+ return metadata;
}
public static void setPrettyPrinting(boolean prettyPrint) {
- if (prettyPrint) {
- GSON = prettyInit();
- } else {
- GSON = defaultInit();
- }
+ PRETTY_PRINT = prettyPrint;
}
}
diff --git
a/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataBase.java
b/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataBase.java
deleted file mode 100644
index 90bfca8..0000000
---
a/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataBase.java
+++ /dev/null
@@ -1,52 +0,0 @@
-package org.apache.tika.metadata.serialization;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.Arrays;
-
-import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
-import org.apache.tika.metadata.Metadata;
-
-public class JsonMetadataBase {
-
-
- static Gson defaultInit() {
- GsonBuilder builder = new GsonBuilder();
- builder.registerTypeHierarchyAdapter(Metadata.class, new
JsonMetadataSerializer());
- builder.registerTypeHierarchyAdapter(Metadata.class, new
JsonMetadataDeserializer());
- return builder.create();
- }
-
- static Gson prettyInit() {
- GsonBuilder builder = new GsonBuilder();
- builder.registerTypeHierarchyAdapter(Metadata.class, new
SortedJsonMetadataSerializer());
- builder.registerTypeHierarchyAdapter(Metadata.class, new
JsonMetadataDeserializer());
- builder.setPrettyPrinting();
- return builder.create();
- }
-
- private static class SortedJsonMetadataSerializer extends
JsonMetadataSerializer {
- @Override
- public String[] getNames(Metadata m) {
- String[] names = m.names();
- Arrays.sort(names, new PrettyMetadataKeyComparator());
- return names;
- }
- }
-}
diff --git
a/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataDeserializer.java
b/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataDeserializer.java
deleted file mode 100644
index fae0141..0000000
---
a/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataDeserializer.java
+++ /dev/null
@@ -1,73 +0,0 @@
-package org.apache.tika.metadata.serialization;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.lang.reflect.Type;
-import java.util.Iterator;
-import java.util.Map;
-
-import org.apache.tika.metadata.Metadata;
-
-import com.google.gson.JsonArray;
-import com.google.gson.JsonDeserializationContext;
-import com.google.gson.JsonDeserializer;
-import com.google.gson.JsonElement;
-import com.google.gson.JsonObject;
-import com.google.gson.JsonParseException;
-
-
-/**
- * Deserializer for Metadata
- *
- * If overriding this, remember that this is called from a static context.
- * Share state only with great caution.
- */
-public class JsonMetadataDeserializer implements JsonDeserializer<Metadata> {
-
- /**
- * Deserializes a json object (equivalent to: Map<String, String[]>)
- * into a Metadata object.
- *
- * @param element to serialize
- * @param type (ignored)
- * @param context (ignored)
- * @return Metadata
- * @throws JsonParseException if element is not able to be parsed
- */
- @Override
- public Metadata deserialize(JsonElement element, Type type,
- JsonDeserializationContext context) throws JsonParseException {
-
- final JsonObject obj = element.getAsJsonObject();
- Metadata m = new Metadata();
- for (Map.Entry<String, JsonElement> entry : obj.entrySet()){
- String key = entry.getKey();
- JsonElement v = entry.getValue();
- if (v.isJsonPrimitive()){
- m.set(key, v.getAsString());
- } else if (v.isJsonArray()){
- JsonArray vArr = v.getAsJsonArray();
- for (JsonElement valueItem : vArr) {
- m.add(key, valueItem.getAsString());
- }
-
- }
- }
- return m;
- }
-}
diff --git
a/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataList.java
b/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataList.java
index dea6faf..9aa9be3 100644
---
a/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataList.java
+++
b/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataList.java
@@ -18,25 +18,23 @@ package org.apache.tika.metadata.serialization;
*/
+import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
-import java.lang.reflect.Type;
+import java.util.ArrayList;
import java.util.List;
-import com.google.gson.Gson;
-import com.google.gson.JsonIOException;
-import com.google.gson.reflect.TypeToken;
+import com.fasterxml.jackson.core.JsonFactory;
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonToken;
+
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
-public class JsonMetadataList extends JsonMetadataBase {
-
- private final static Type listType = new
TypeToken<List<Metadata>>(){}.getType();
- private static Gson GSON;
- static {
- GSON = defaultInit();
- }
+public class JsonMetadataList {
+ static volatile boolean PRETTY_PRINT = false;
/**
* Serializes a Metadata object to Json. This does not flush or close the
writer.
@@ -45,11 +43,20 @@ public class JsonMetadataList extends JsonMetadataBase {
* @param writer writer
* @throws org.apache.tika.exception.TikaException if there is an
IOException during writing
*/
- public static void toJson(List<Metadata> metadataList, Writer writer)
throws TikaException {
- try {
- GSON.toJson(metadataList, writer);
- } catch (JsonIOException e) {
- throw new TikaException(e.getMessage());
+ public static void toJson(List<Metadata> metadataList, Writer writer)
throws IOException {
+ if (metadataList == null) {
+ writer.write("null");
+ return;
+ }
+ try (JsonGenerator jsonGenerator = new
JsonFactory().createGenerator(writer)) {
+ if (PRETTY_PRINT) {
+ jsonGenerator.useDefaultPrettyPrinter();
+ }
+ jsonGenerator.writeStartArray();
+ for (Metadata m : metadataList) {
+ JsonMetadata.writeMetadataObject(m, jsonGenerator,
PRETTY_PRINT);
+ }
+ jsonGenerator.writeEndArray();
}
}
@@ -60,16 +67,26 @@ public class JsonMetadataList extends JsonMetadataBase {
* @return Metadata or null if nothing could be read from the reader
* @throws org.apache.tika.exception.TikaException in case of parse
failure by Gson or IO failure with Reader
*/
- public static List<Metadata> fromJson(Reader reader) throws TikaException {
+ public static List<Metadata> fromJson(Reader reader) throws IOException {
List<Metadata> ms = null;
if (reader == null) {
return ms;
}
- try {
- ms = GSON.fromJson(reader, listType);
- } catch (com.google.gson.JsonParseException e){
- //covers both io and parse exceptions
- throw new TikaException(e.getMessage());
+ ms = new ArrayList<>();
+ try (JsonParser jParser = new JsonFactory().createParser(reader)) {
+
+ JsonToken token = jParser.nextToken();
+ if (token != JsonToken.START_ARRAY) {
+ throw new IOException(
+ "metadata list must start with an array, but I see:
"+token.name());
+ }
+ token = jParser.nextToken();
+ while (token != JsonToken.END_ARRAY) {
+ Metadata m = JsonMetadata.readMetadataObject(jParser);
+ ms.add(m);
+ token = jParser.nextToken();
+ }
+
}
if (ms == null) {
return null;
@@ -88,23 +105,8 @@ public class JsonMetadataList extends JsonMetadataBase {
return ms;
}
- /**
- * Enables setting custom configurations on Gson. Remember to register
- * a serializer and a deserializer for Metadata. This does a literal set
- * and does not add the default serializer and deserializers.
- *
- * @param gson
- */
- public static void setGson(Gson gson) {
- GSON = gson;
- }
-
public static void setPrettyPrinting(boolean prettyPrint) {
- if (prettyPrint) {
- GSON = prettyInit();
- } else {
- GSON = defaultInit();
- }
+ PRETTY_PRINT = prettyPrint;
}
diff --git
a/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataSerializer.java
b/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataSerializer.java
deleted file mode 100644
index 602c346..0000000
---
a/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataSerializer.java
+++ /dev/null
@@ -1,97 +0,0 @@
-package org.apache.tika.metadata.serialization;
-
-/*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements. See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-import java.lang.reflect.Type;
-import java.util.Arrays;
-
-import org.apache.tika.metadata.Metadata;
-
-import com.google.gson.JsonArray;
-import com.google.gson.JsonElement;
-import com.google.gson.JsonNull;
-import com.google.gson.JsonObject;
-import com.google.gson.JsonPrimitive;
-import com.google.gson.JsonSerializationContext;
-import com.google.gson.JsonSerializer;
-
-
-/**
- * Serializer for Metadata
- *
- * If overriding this, remember that this is called from a static context.
- * Share state only with great caution.
- *
- */
-public class JsonMetadataSerializer implements JsonSerializer<Metadata> {
-
-
- /**
- * Serializes a Metadata object into effectively Map<String, String[]>.
- *
- * @param metadata object to serialize
- * @param type (ignored)
- * @param context (ignored)
- * @return JsonElement with key/value(s) pairs or JsonNull if metadata is
null.
- */
- @Override
- public JsonElement serialize(Metadata metadata, Type type,
JsonSerializationContext context) {
- if (metadata == null){
- return JsonNull.INSTANCE;
- }
- String[] names = getNames(metadata);
- if (names == null) {
- return JsonNull.INSTANCE;
- }
-
- JsonObject root = new JsonObject();
-
- for (String n : names) {
-
- String[] vals = metadata.getValues(n);
- if (vals == null) {
- //silently skip
- continue;
- }
-
- if (vals.length == 1){
- root.addProperty(n, vals[0]);
- } else {
- JsonArray jArr = new JsonArray();
- for (String val : vals) {
- jArr.add(new JsonPrimitive(val));
- }
- root.add(n, jArr);
- }
- }
- return root;
- }
-
- /**
- * Override to get a custom sort order
- * or to filter names.
- *
- * @param metadata metadata from which to grab names
- * @return list of names in the order in which they should be serialized
- */
- protected String[] getNames(Metadata metadata) {
- String[] names = metadata.names();
- Arrays.sort(names);
- return names;
- }
-}
diff --git
a/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonStreamingSerializer.java
b/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonStreamingSerializer.java
index f01df6f..fb29a5e 100644
---
a/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonStreamingSerializer.java
+++
b/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonStreamingSerializer.java
@@ -16,7 +16,9 @@
*/
package org.apache.tika.metadata.serialization;
-import com.google.gson.stream.JsonWriter;
+
+import com.fasterxml.jackson.core.JsonFactory;
+import com.fasterxml.jackson.core.JsonGenerator;
import org.apache.tika.metadata.Metadata;
import java.io.IOException;
@@ -26,40 +28,28 @@ import java.util.Arrays;
public class JsonStreamingSerializer implements AutoCloseable {
- private final JsonWriter jsonWriter;
+ private final Writer writer;
+ private JsonGenerator jsonGenerator;
boolean hasStartedArray = false;
public JsonStreamingSerializer(Writer writer) {
- this.jsonWriter = new JsonWriter(writer);
+ this.writer = writer;
}
public void add(Metadata metadata) throws IOException {
if (!hasStartedArray) {
- jsonWriter.beginArray();
+ jsonGenerator = new JsonFactory().createGenerator(writer);
+ jsonGenerator.writeStartArray();
hasStartedArray = true;
}
String[] names = metadata.names();
Arrays.sort(names);
- jsonWriter.beginObject();
- for (String n : names) {
- jsonWriter.name(n);
- String[] values = metadata.getValues(n);
- if (values.length == 1) {
- jsonWriter.value(values[0]);
- } else {
- jsonWriter.beginArray();
- for (String v : values) {
- jsonWriter.value(v);
- }
- jsonWriter.endArray();
- }
- }
- jsonWriter.endObject();
+ JsonMetadata.writeMetadataObject(metadata, jsonGenerator, false);
}
@Override
public void close() throws IOException {
- jsonWriter.endArray();
- jsonWriter.flush();
- jsonWriter.close();
+ jsonGenerator.writeEndArray();
+ jsonGenerator.flush();
+ jsonGenerator.close();
}
}
diff --git
a/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/PrettyMetadataKeyComparator.java
b/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/PrettyMetadataKeyComparator.java
index 5516c1d..35e76b7 100644
---
a/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/PrettyMetadataKeyComparator.java
+++
b/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/PrettyMetadataKeyComparator.java
@@ -17,6 +17,8 @@ package org.apache.tika.metadata.serialization;
* limitations under the License.
*/
+import org.apache.tika.metadata.TikaCoreProperties;
+
public class PrettyMetadataKeyComparator implements
java.util.Comparator<String> {
@Override
public int compare(String s1, String s2) {
@@ -26,15 +28,12 @@ public class PrettyMetadataKeyComparator implements
java.util.Comparator<String>
return -1;
}
- //this is stinky. This should reference
AbstractRecursiveParserWrapperHandler.TIKA_CONTENT
- //but that would require making core a dependency of serialization...
- //do we want to do that?
- if (s1.equals("tika:content")) {
- if (s2.equals("tika:content")) {
+ if (s1.equals(TikaCoreProperties.TIKA_CONTENT.getName())) {
+ if (s2.equals(TikaCoreProperties.TIKA_CONTENT.getName())) {
return 0;
}
return 2;
- } else if (s2.equals("tika:content")) {
+ } else if (s2.equals(TikaCoreProperties.TIKA_CONTENT.getName())) {
return -2;
}
//do we want to lowercase?
diff --git
a/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonFetchEmitTupleTest.java
b/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonFetchEmitTupleTest.java
new file mode 100644
index 0000000..67bbbde
--- /dev/null
+++
b/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonFetchEmitTupleTest.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata.serialization;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.pipes.emitter.EmitKey;
+import org.apache.tika.pipes.fetcher.FetchKey;
+import org.apache.tika.pipes.fetchiterator.FetchEmitTuple;
+import org.junit.Test;
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.io.StringWriter;
+
+import static org.junit.Assert.assertEquals;
+
+public class JsonFetchEmitTupleTest {
+
+ @Test
+ public void testBasic() throws Exception {
+ Metadata m = new Metadata();
+ m.add("m1", "v1");
+ m.add("m1", "v1");
+ m.add("m2", "v2");
+ m.add("m2", "v3");
+ m.add("m3", "v4");
+
+ FetchEmitTuple t = new FetchEmitTuple(
+ new FetchKey("fetcher1", "fetchkey1"),
+ new EmitKey("emitter1", "emitKey1"),
+ m);
+ StringWriter writer = new StringWriter();
+ JsonFetchEmitTuple.toJson(t, writer);
+
+ Reader reader = new StringReader(writer.toString());
+ FetchEmitTuple deserialized = JsonFetchEmitTuple.fromJson(reader);
+ assertEquals(t, deserialized);
+ }
+}
diff --git
a/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonMetadataListTest.java
b/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonMetadataListTest.java
index 522807e..768cd35 100644
---
a/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonMetadataListTest.java
+++
b/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonMetadataListTest.java
@@ -114,24 +114,22 @@ public class JsonMetadataListTest {
metadataList.add(m2);
StringWriter writer = new StringWriter();
JsonMetadataList.toJson(metadataList, writer);
- assertTrue(writer.toString().startsWith("[{\"tika:content\":\"this is
the content\",\"zk1\":[\"v1\",\"v2\","));
+ assertTrue(writer.toString().startsWith("["));
writer = new StringWriter();
JsonMetadataList.setPrettyPrinting(true);
JsonMetadataList.toJson(metadataList, writer);
- assertTrue(writer.toString().startsWith("[\n" +
- " {\n" +
- " \"zk1\": [\n" +
- " \"v1\",\n" +
- " \"v2\","));
- assertTrue(writer.toString().contains(" \"zk2\": \"v1\",\n" +
- " \"tika:content\": \"this is the content\"\n" +
- " },"));
+ assertTrue(writer.toString().startsWith("[ {\n" +
+ " \"zk1\" : [ \"v1\", \"v2\", \"v3\", \"v4\", \"v4\" ],\n" +
+ " \"zk2\" : \"v1\",\n" +
+ " \"tika:content\" : \"this is the content\"\n" +
+ "},"));
+
//now set it back to false
JsonMetadataList.setPrettyPrinting(false);
writer = new StringWriter();
JsonMetadataList.toJson(metadataList, writer);
- assertTrue(writer.toString().startsWith("[{\"tika:content\":\"this is
the content\",\"zk1\":[\"v1\",\"v2\","));
+ assertTrue(writer.toString().startsWith("["));
}
@Test
diff --git
a/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonMetadataTest.java
b/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonMetadataTest.java
index 4cf5367..3abeaa6 100644
---
a/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonMetadataTest.java
+++
b/tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonMetadataTest.java
@@ -17,6 +17,7 @@ package org.apache.tika.metadata.serialization;
* limitations under the License.
*/
+import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
@@ -63,18 +64,12 @@ public class JsonMetadataTest {
JsonMetadata.setPrettyPrinting(true);
JsonMetadata.toJson(metadata, writer);
assertTrue(writer.toString().contains(
- " \"json_escapes\": \"the: \\\"quick\\\" brown, fox\",\n" +
- " \"k1\": [\n" +
- " \"v1\",\n" +
- " \"v2\"\n" +
- " ],\n" +
- " \"k3\": [\n" +
- " \"v3\",\n" +
- " \"v3\"\n" +
- " ],\n" +
- " \"k4\": \"500,000\",\n" +
- " \"url\":
\"/myApp/myAction.html?method\\u003drouter\\u0026cmd\\u003d1\"\n" +
- "}"));
+ "\"json_escapes\" : \"the: \\\"quick\\\" brown, fox\",\n" +
+ " \"k1\" : [ \"v1\", \"v2\" ],\n" +
+ " \"k3\" : [ \"v3\", \"v3\" ],\n" +
+ " \"k4\" : \"500,000\",\n" +
+ " \"url\" :
\"/myApp/myAction.html?method=router&cmd=1\"\n" +
+ "}"));
}
@Test
@@ -84,7 +79,7 @@ public class JsonMetadataTest {
boolean ex = false;
try {
Metadata deserialized = JsonMetadata.fromJson(new
StringReader(json));
- } catch (TikaException e) {
+ } catch (IOException e) {
ex = true;
}
assertTrue(ex);
@@ -96,7 +91,7 @@ public class JsonMetadataTest {
boolean ex = false;
try {
JsonMetadata.toJson(null, writer);
- } catch (TikaException e) {
+ } catch (IOException e) {
ex = true;
}
assertFalse(ex);
diff --git
a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaDetectorsTest.java
b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaDetectorsTest.java
index 8d78ba5..dd5fd5c 100644
---
a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaDetectorsTest.java
+++
b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaDetectorsTest.java
@@ -26,8 +26,8 @@ import java.io.InputStream;
import java.util.List;
import java.util.Map;
-import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
+import com.fasterxml.jackson.core.JsonFactory;
+import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
@@ -42,9 +42,6 @@ import org.junit.Test;
public class TikaDetectorsTest extends CXFTestBase {
- private static final Gson GSON = new GsonBuilder().create();
-
-
private static final String DETECTORS_PATH = "/detectors";
@Override
@@ -108,7 +105,8 @@ public class TikaDetectorsTest extends CXFTestBase {
.get();
String jsonStr = getStringFromInputStream((InputStream)
response.getEntity());
- Map<String, Object> json = (Map<String, Object>)
GSON.fromJson(jsonStr, Map.class);
+ Map<String, Object> json =
+ new ObjectMapper().readerFor(Map.class).readValue(jsonStr);
// Should have a nested structure
assertTrue(json.containsKey("name"));
diff --git
a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaMimeTypesTest.java
b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaMimeTypesTest.java
index 028c68c..b727176 100644
---
a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaMimeTypesTest.java
+++
b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaMimeTypesTest.java
@@ -16,8 +16,8 @@
*/
package org.apache.tika.server.classic;
-import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
@@ -34,7 +34,6 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class TikaMimeTypesTest extends CXFTestBase {
- private static final Gson GSON = new GsonBuilder().create();
private static final String MIMETYPES_PATH = "/mime-types";
@@ -62,8 +61,7 @@ public class TikaMimeTypesTest extends CXFTestBase {
.get();
String jsonStr = getStringFromInputStream((InputStream)
response.getEntity());
- Map<String, Map<String, Object>> json = (Map<String, Map<String,
Object>>)
- GSON.fromJson(jsonStr, Map.class);
+ Map<String, Map<String, Object>> json = new
ObjectMapper().readerFor(Map.class).readValue(jsonStr);
assertEquals(true, json.containsKey("text/plain"));
assertEquals(true, json.containsKey("application/xml"));
diff --git
a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaParsersTest.java
b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaParsersTest.java
index 9b065e5..771cca6 100644
---
a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaParsersTest.java
+++
b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/TikaParsersTest.java
@@ -26,8 +26,7 @@ import java.io.InputStream;
import java.util.List;
import java.util.Map;
-import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
+import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
@@ -40,8 +39,6 @@ import org.junit.Test;
public class TikaParsersTest extends CXFTestBase {
- private static final Gson GSON = new GsonBuilder().create();
-
private static final String PARSERS_SUMMARY_PATH = "/parsers";
private static final String PARSERS_DETAILS_PATH = "/parsers/details";
@@ -142,8 +139,8 @@ public class TikaParsersTest extends CXFTestBase {
.get();
String jsonStr = getStringFromInputStream((InputStream)
response.getEntity());
- Map<String, Map<String, Object>> json = (Map<String, Map<String,
Object>>)
- GSON.fromJson(jsonStr, Map.class);
+ Map<String, Map<String, Object>> json =
+ new ObjectMapper().readerFor(Map.class).readValue(jsonStr);
// Should have a nested structure
assertEquals(true, json.containsKey("name"));
diff --git
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/EmitterResource.java
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/EmitterResource.java
index 3e5263d..871af8b 100644
---
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/EmitterResource.java
+++
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/EmitterResource.java
@@ -17,9 +17,9 @@
package org.apache.tika.server.core.resource;
-import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
+import org.apache.tika.metadata.serialization.JsonFetchEmitTuple;
import org.apache.tika.pipes.emitter.EmitKey;
import org.apache.tika.pipes.emitter.Emitter;
import org.apache.tika.pipes.emitter.TikaEmitterException;
@@ -158,13 +158,12 @@ public class EmitterResource {
@Context HttpHeaders httpHeaders,
@Context UriInfo info
) throws Exception {
-
- JsonFactory jfactory = new JsonFactory();
- JsonParser jParser = jfactory.createParser(is);
- FetchEmitTuple t = deserializeTuple(jParser);
+ FetchEmitTuple t = null;
+ try (Reader reader = new InputStreamReader(is,
StandardCharsets.UTF_8)) {
+ t = JsonFetchEmitTuple.fromJson(reader);
+ }
Metadata metadata = new Metadata();
-
List<Metadata> metadataList = null;
try (InputStream stream =
TikaResource.getConfig().getFetcherManager()
@@ -185,7 +184,13 @@ public class EmitterResource {
LOG.debug("post parse/pre emit metadata {}: {}",
n, metadataList.get(0).get(n));
}
- return emit(t.getEmitKey(), metadataList);
+ //use fetch key if emitter key is not specified
+ //clean this up?
+ EmitKey emitKey = t.getEmitKey();
+ if (StringUtils.isBlank(emitKey.getKey())) {
+ emitKey = new EmitKey(emitKey.getEmitterName(),
t.getFetchKey().getKey());
+ }
+ return emit(emitKey, metadataList);
}
private FetchEmitTuple deserializeTuple(JsonParser jParser) throws
IOException {
@@ -220,7 +225,6 @@ public class EmitterResource {
String key = jParser.getCurrentName();
JsonToken token = jParser.nextToken();
if (jParser.isExpectedStartArrayToken()) {
- List<String> vals = new ArrayList<>();
while (jParser.nextToken() != JsonToken.END_ARRAY) {
metadata.add(key, jParser.getText());
}
diff --git
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java
index 505a5e2..e01a4fa 100644
---
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java
+++
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java
@@ -40,6 +40,7 @@ import org.apache.tika.parser.RecursiveParserWrapper;
import org.apache.tika.sax.BasicContentHandlerFactory;
import org.apache.tika.sax.RecursiveParserWrapperHandler;
import org.apache.tika.server.core.MetadataList;
+import org.apache.tika.server.core.TikaServerParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -135,15 +136,15 @@ public class RecursiveMetadataResource {
UriInfo info, String
handlerTypeName)
throws Exception {
- final ParseContext context = new ParseContext();
- Parser parser = TikaResource.createParser();
- // TODO: parameterize choice of max chars/max embedded
attachments
- RecursiveParserWrapper wrapper = new
RecursiveParserWrapper(parser);
+ final ParseContext context = new ParseContext();
+ Parser parser = TikaResource.createParser();
+ // TODO: parameterize choice of max chars/max embedded attachments
+ RecursiveParserWrapper wrapper = new RecursiveParserWrapper(parser);
fillMetadata(parser, metadata, httpHeaders);
fillParseContext(httpHeaders, metadata, context);
- TikaResource.logRequest(LOG, info, metadata);
+ TikaResource.logRequest(LOG, info, metadata);
int writeLimit = -1;
if (httpHeaders.containsKey("writeLimit")) {
@@ -157,11 +158,13 @@ public class RecursiveMetadataResource {
BasicContentHandlerFactory.HANDLER_TYPE type =
BasicContentHandlerFactory.parseHandlerType(handlerTypeName,
DEFAULT_HANDLER_TYPE);
- RecursiveParserWrapperHandler handler = new
RecursiveParserWrapperHandler(
- new BasicContentHandlerFactory(type, writeLimit),
maxEmbeddedResources,
+ RecursiveParserWrapperHandler handler = new
RecursiveParserWrapperHandler(
+ new BasicContentHandlerFactory(type, writeLimit),
maxEmbeddedResources,
TikaResource.getConfig().getMetadataFilter());
- try {
+ try {
TikaResource.parse(wrapper, LOG, info.getPath(), is, handler,
metadata, context);
+ } catch (TikaServerParseException e) {
+ //do nothing
} catch (SecurityException|WebApplicationException e) {
throw e;
} catch (Exception e) {
diff --git
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaDetectors.java
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaDetectors.java
index ce163d5..5b738fb 100644
---
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaDetectors.java
+++
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaDetectors.java
@@ -19,13 +19,13 @@ package org.apache.tika.server.core.resource;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
+import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.tika.detect.CompositeDetector;
import org.apache.tika.detect.Detector;
import org.apache.tika.server.core.HTMLHelper;
@@ -36,8 +36,6 @@ import org.apache.tika.server.core.HTMLHelper;
*/
@Path("/detectors")
public class TikaDetectors {
- private static final Gson GSON = new
GsonBuilder().disableHtmlEscaping().create();
-
private HTMLHelper html;
@@ -77,10 +75,10 @@ public class TikaDetectors {
@GET
@Produces(javax.ws.rs.core.MediaType.APPLICATION_JSON)
- public String getDetectorsJSON() {
+ public String getDetectorsJSON() throws IOException {
Map<String, Object> details = new HashMap<String, Object>();
detectorAsMap(TikaResource.getConfig().getDetector(), details);
- return GSON.toJson(details);
+ return new ObjectMapper().writeValueAsString(details);
}
private void detectorAsMap(Detector d, Map<String, Object> details) {
diff --git
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaMimeTypes.java
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaMimeTypes.java
index be1d84b..c45fb25 100644
---
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaMimeTypes.java
+++
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaMimeTypes.java
@@ -19,6 +19,7 @@ package org.apache.tika.server.core.resource;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -26,8 +27,7 @@ import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
-import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
+import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MediaTypeRegistry;
import org.apache.tika.parser.CompositeParser;
@@ -41,8 +41,6 @@ import org.apache.tika.server.core.HTMLHelper;
@Path("/mime-types")
public class TikaMimeTypes {
- private static final Gson GSON = new
GsonBuilder().disableHtmlEscaping().create();
-
private HTMLHelper html;
public TikaMimeTypes() {
@@ -94,7 +92,7 @@ public class TikaMimeTypes {
@GET
@Produces(javax.ws.rs.core.MediaType.APPLICATION_JSON)
- public String getMimeTypesJSON() {
+ public String getMimeTypesJSON() throws IOException {
Map<String, Object> details = new HashMap<String, Object>();
for (MediaTypeDetails type : getMediaTypes()) {
@@ -111,7 +109,8 @@ public class TikaMimeTypes {
details.put(type.type.toString(), typeDets);
}
- return GSON.toJson(details);
+ return new ObjectMapper().writerWithDefaultPrettyPrinter()
+ .writeValueAsString(details);
}
private static String[] copyToStringArray(MediaType[] aliases) {
diff --git
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaParsers.java
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaParsers.java
index f1c4abb..400230c 100644
---
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaParsers.java
+++
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaParsers.java
@@ -19,6 +19,7 @@ package org.apache.tika.server.core.resource;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
@@ -28,8 +29,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
-import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
+import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.ParseContext;
@@ -45,7 +45,6 @@ import org.apache.tika.server.core.HTMLHelper;
@Path("/parsers")
public class TikaParsers {
private static final ParseContext EMPTY_PC = new ParseContext();
- private static final Gson GSON = new
GsonBuilder().disableHtmlEscaping().create();
private HTMLHelper html;
public TikaParsers() {
@@ -116,21 +115,23 @@ public class TikaParsers {
@GET
@Path("/details")
@Produces(javax.ws.rs.core.MediaType.APPLICATION_JSON)
- public String getParserDetailsJSON() {
+ public String getParserDetailsJSON() throws IOException {
return getParsersJSON(true);
}
@GET
@Produces(javax.ws.rs.core.MediaType.APPLICATION_JSON)
- public String getParsersJSON() {
+ public String getParsersJSON() throws IOException {
return getParsersJSON(false);
}
- protected String getParsersJSON(boolean withMimeTypes) {
+ protected String getParsersJSON(boolean withMimeTypes) throws IOException
{
Map<String, Object> details = new HashMap<String, Object>();
parserAsMap(new ParserDetails(TikaResource.getConfig().getParser()),
withMimeTypes, details);
-
- return GSON.toJson(details);
+ ObjectMapper objectMapper = new ObjectMapper();
+ return objectMapper
+ .writerWithDefaultPrettyPrinter()
+ .writeValueAsString(details);
}
private void parserAsMap(ParserDetails p, boolean withMimeTypes,
Map<String, Object> details) {
diff --git
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/JSONMessageBodyWriter.java
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/JSONMessageBodyWriter.java
index d3ff0a8..a0438af 100644
---
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/JSONMessageBodyWriter.java
+++
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/JSONMessageBodyWriter.java
@@ -53,13 +53,9 @@ public class JSONMessageBodyWriter implements
MessageBodyWriter<Metadata> {
public void writeTo(Metadata metadata, Class<?> type, Type genericType,
Annotation[] annotations,
MediaType mediaType, MultivaluedMap<String, Object>
httpHeaders, OutputStream entityStream) throws IOException,
WebApplicationException {
- try {
- Writer writer = new OutputStreamWriter(entityStream, UTF_8);
- JsonMetadata.toJson(metadata, writer);
- writer.flush();
- } catch (TikaException e) {
- throw new IOException(e);
- }
+ Writer writer = new OutputStreamWriter(entityStream, UTF_8);
+ JsonMetadata.toJson(metadata, writer);
+ writer.flush();
entityStream.flush();
}
}
diff --git
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/JSONObjWriter.java
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/JSONObjWriter.java
index f71fde3..c79ac48 100644
---
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/JSONObjWriter.java
+++
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/JSONObjWriter.java
@@ -17,8 +17,7 @@
package org.apache.tika.server.core.writer;
-import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
+import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.tika.metadata.Metadata;
import javax.ws.rs.Produces;
@@ -40,7 +39,7 @@ import static java.nio.charset.StandardCharsets.UTF_8;
@Provider
@Produces(MediaType.APPLICATION_JSON)
public class JSONObjWriter implements MessageBodyWriter<Map<String, Object>> {
- private static Gson GSON = new GsonBuilder().setPrettyPrinting().create();
+
public boolean isWriteable(Class<?> type, Type genericType, Annotation[]
annotations, MediaType mediaType) {
return Map.class.isAssignableFrom(type);
@@ -55,8 +54,8 @@ public class JSONObjWriter implements
MessageBodyWriter<Map<String, Object>> {
MediaType mediaType, MultivaluedMap<String, Object>
httpHeaders, OutputStream entityStream)
throws IOException, WebApplicationException {
Writer writer = new OutputStreamWriter(entityStream, UTF_8);
- GSON.toJson(map, writer);
- writer.flush();
+ ObjectMapper objectMapper = new ObjectMapper();
+ objectMapper.writerWithDefaultPrettyPrinter().writeValue(writer, map);
entityStream.flush();
}
}
diff --git
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/MetadataListMessageBodyWriter.java
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/MetadataListMessageBodyWriter.java
index cedfcba..647da2d 100644
---
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/MetadataListMessageBodyWriter.java
+++
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/MetadataListMessageBodyWriter.java
@@ -56,13 +56,9 @@ public class MetadataListMessageBodyWriter implements
MessageBodyWriter<Metadata
public void writeTo(MetadataList list, Class<?> type, Type genericType,
Annotation[] annotations,
MediaType mediaType, MultivaluedMap<String, Object>
httpHeaders, OutputStream entityStream) throws IOException,
WebApplicationException {
- try {
- Writer writer = new OutputStreamWriter(entityStream, UTF_8);
- JsonMetadataList.toJson(list.getMetadata(), writer);
- writer.flush();
- } catch (TikaException e) {
- throw new IOException(e);
- }
+ Writer writer = new OutputStreamWriter(entityStream, UTF_8);
+ JsonMetadataList.toJson(list.getMetadata(), writer);
+ writer.flush();
entityStream.flush();
}
}
diff --git
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaEmitterTest.java
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaEmitterTest.java
index 31a3cd0..3c92674 100644
---
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaEmitterTest.java
+++
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaEmitterTest.java
@@ -16,11 +16,8 @@
*/
package org.apache.tika.server.core;
-import com.google.gson.Gson;
-import com.google.gson.JsonArray;
-import com.google.gson.JsonObject;
-import com.google.gson.JsonParser;
-import com.google.gson.JsonPrimitive;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.io.FileUtils;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
@@ -29,7 +26,11 @@ import
org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.serialization.JsonFetchEmitTuple;
import org.apache.tika.metadata.serialization.JsonMetadataList;
+import org.apache.tika.pipes.emitter.EmitKey;
+import org.apache.tika.pipes.fetcher.FetchKey;
+import org.apache.tika.pipes.fetchiterator.FetchEmitTuple;
import org.apache.tika.server.core.resource.EmitterResource;
import org.apache.tika.server.core.writer.JSONObjWriter;
import org.junit.AfterClass;
@@ -42,6 +43,7 @@ import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
+import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
@@ -68,6 +70,11 @@ public class TikaEmitterTest extends CXFTestBase {
private static String HELLO_WORLD = "hello_world.xml";
private static String HELLO_WORLD_JSON = "hello_world.xml.json";
+ private static String[] VALUE_ARRAY = new String[]{
+ "my-value-1",
+ "my-value-2",
+ "my-value-3"
+ };
@BeforeClass
public static void setUpBeforeClass() throws Exception {
TMP_DIR = Files.createTempDirectory("tika-emitter-test-");
@@ -128,6 +135,7 @@ public class TikaEmitterTest extends CXFTestBase {
@Override
protected void setUpProviders(JAXRSServerFactoryBean sf) {
List<Object> providers = new ArrayList<>();
+ providers.add(new TikaServerParseExceptionMapper(true));
providers.add(new JSONObjWriter());
sf.setProviders(providers);
}
@@ -167,27 +175,24 @@ public class TikaEmitterTest extends CXFTestBase {
@Test
public void testPost() throws Exception {
- JsonObject root = new JsonObject();
- root.add("fetcher", new JsonPrimitive("fsf"));
- root.add("fetchKey", new JsonPrimitive("hello_world.xml"));
- root.add("emitter", new JsonPrimitive("fse"));
- JsonObject userMetadata = new JsonObject();
- String[] valueArray = new String[] {"my-value-1", "my-value-2",
"my-value-3"};
- JsonArray arr = new JsonArray();
- for (int i = 0; i < valueArray.length; i++) {
- arr.add(valueArray[i]);
+ Metadata userMetadata = new Metadata();
+ userMetadata.set("my-key", "my-value");
+ for (int i = 0; i < VALUE_ARRAY.length; i++) {
+ userMetadata.add("my-key-multi", VALUE_ARRAY[i]);
}
- userMetadata.add("my-key", new JsonPrimitive("my-value"));
- userMetadata.add("my-key-multi", arr);
- root.add("metadata", userMetadata);
- String jsonPost = new Gson().toJson(root);
+ FetchEmitTuple t = new FetchEmitTuple(
+ new FetchKey("fsf", "hello_world.xml"),
+ new EmitKey("fse", ""),
+ userMetadata);
+ StringWriter writer = new StringWriter();
+ JsonFetchEmitTuple.toJson(t, writer);
String getUrl = endPoint+EMITTER_PATH;
Response response = WebClient
.create(getUrl)
.accept("application/json")
- .post(jsonPost);
+ .post(writer.toString());
assertEquals(200, response.getStatus());
List<Metadata> metadataList = null;
@@ -202,7 +207,7 @@ public class TikaEmitterTest extends CXFTestBase {
assertEquals("你好,世界", metadata.get("title"));
assertEquals("application/mock+xml",
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("my-value", metadata.get("my-key"));
- assertArrayEquals(valueArray, metadata.getValues("my-key-multi"));
+ assertArrayEquals(VALUE_ARRAY, metadata.getValues("my-key-multi"));
}
@Test
@@ -235,36 +240,32 @@ public class TikaEmitterTest extends CXFTestBase {
@Test
public void testPostNPE() throws Exception {
-
- JsonObject root = new JsonObject();
- root.add("fetcher", new JsonPrimitive("fsf"));
- root.add("fetchKey", new JsonPrimitive("null_pointer.xml"));
- root.add("emitter", new JsonPrimitive("fse"));
- JsonObject userMetadata = new JsonObject();
- String[] valueArray = new String[] {"my-value-1", "my-value-2",
"my-value-3"};
- JsonArray arr = new JsonArray();
- for (int i = 0; i < valueArray.length; i++) {
- arr.add(valueArray[i]);
+ Metadata userMetadata = new Metadata();
+ userMetadata.set("my-key", "my-value");
+ for (int i = 0; i < VALUE_ARRAY.length; i++) {
+ userMetadata.add("my-key-multi", VALUE_ARRAY[i]);
}
- userMetadata.add("my-key", new JsonPrimitive("my-value"));
- userMetadata.add("my-key-multi", arr);
- root.add("metadata", userMetadata);
- String jsonPost = new Gson().toJson(root);
+ FetchEmitTuple t = new FetchEmitTuple(
+ new FetchKey("fsf", "null_pointer.xml"),
+ new EmitKey("fse", ""),
+ userMetadata);
+ StringWriter writer = new StringWriter();
+ JsonFetchEmitTuple.toJson(t, writer);
String getUrl = endPoint+EMITTER_PATH;
Response response = WebClient
.create(getUrl)
.accept("application/json")
- .post(jsonPost);
+ .post(writer.toString());
assertEquals(200, response.getStatus());
- JsonObject jsonResponse;
+ JsonNode jsonResponse;
try (Reader reader = new InputStreamReader(
(InputStream)response.getEntity(), StandardCharsets.UTF_8)) {
- jsonResponse = JsonParser.parseReader(reader).getAsJsonObject();
+ jsonResponse = new ObjectMapper().readTree(reader);
};
- String parseException =
jsonResponse.get("parse_exception").getAsString();
+ String parseException = jsonResponse.get("parse_exception").asText();
assertNotNull(parseException);
assertContains("NullPointerException", parseException);
@@ -276,7 +277,7 @@ public class TikaEmitterTest extends CXFTestBase {
Metadata metadata = metadataList.get(0);
assertEquals("application/mock+xml",
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("my-value", metadata.get("my-key"));
- assertArrayEquals(valueArray, metadata.getValues("my-key-multi"));
+ assertArrayEquals(VALUE_ARRAY, metadata.getValues("my-key-multi"));
assertContains("NullPointerException",
metadata.get(TikaCoreProperties.CONTAINER_EXCEPTION));
}
diff --git
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaMimeTypesTest.java
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaMimeTypesTest.java
index 567e681..2f76124 100644
---
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaMimeTypesTest.java
+++
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaMimeTypesTest.java
@@ -26,8 +26,6 @@ import java.io.InputStream;
import java.util.List;
import java.util.Map;
-import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
@@ -37,7 +35,6 @@ import org.junit.Test;
public class TikaMimeTypesTest extends CXFTestBase {
- private static final Gson GSON = new GsonBuilder().create();
private static final String MIMETYPES_PATH = "/mime-types";
diff --git
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerEmitterIntegrationTest.java
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerEmitterIntegrationTest.java
index 554aee1..ecf02de 100644
---
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerEmitterIntegrationTest.java
+++
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerEmitterIntegrationTest.java
@@ -16,11 +16,11 @@
*/
package org.apache.tika.server.core;
-import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
-import com.google.gson.JsonObject;
-import com.google.gson.JsonParser;
-import com.google.gson.JsonPrimitive;
+
+import com.fasterxml.jackson.core.JsonFactory;
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.io.FileUtils;
import org.apache.cxf.jaxrs.client.WebClient;
import org.junit.AfterClass;
@@ -32,9 +32,11 @@ import org.slf4j.LoggerFactory;
import javax.ws.rs.ProcessingException;
import javax.ws.rs.core.Response;
+import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
+import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
@@ -58,7 +60,6 @@ public class TikaServerEmitterIntegrationTest extends
IntegrationTestBase {
private static Path TMP_OUTPUT_DIR;
private static String TIKA_CONFIG_XML;
private static Path TIKA_CONFIG;
- private static Gson GSON = new GsonBuilder().create();
private static final String EMITTER_NAME = "fse";
private static final String FETCHER_NAME = "fsf";
@@ -192,8 +193,8 @@ public class TikaServerEmitterIntegrationTest extends
IntegrationTestBase {
};
serverThread.start();
try {
- JsonObject response = testOne("real_oom.xml", false);
- assertContains("heap space",
response.get("parse_error").getAsString());
+ JsonNode response = testOne("real_oom.xml", false);
+ assertContains("heap space", response.get("parse_error").asText());
} finally {
serverThread.interrupt();
}
@@ -218,8 +219,8 @@ public class TikaServerEmitterIntegrationTest extends
IntegrationTestBase {
};
serverThread.start();
try {
- JsonObject response = testOne("heavy_hang_30000.xml", false);
- assertContains("heap space",
response.get("parse_error").getAsString());
+ JsonNode response = testOne("heavy_hang_30000.xml", false);
+ assertContains("heap space", response.get("parse_error").asText());
} finally {
serverThread.interrupt();
}
@@ -251,7 +252,7 @@ public class TikaServerEmitterIntegrationTest extends
IntegrationTestBase {
elapsed + " ms");
}
- private JsonObject testOne(String fileName, boolean shouldFileExist)
throws Exception {
+ private JsonNode testOne(String fileName, boolean shouldFileExist) throws
Exception {
awaitServerStartup();
Response response = WebClient
.create(endPoint + "/emit")
@@ -262,14 +263,17 @@ public class TikaServerEmitterIntegrationTest extends
IntegrationTestBase {
assertTrue(Files.size(targFile) > 1);
}
Reader reader = new InputStreamReader((InputStream)
response.getEntity(), UTF_8);
- return JsonParser.parseReader(reader).getAsJsonObject();
+ return new ObjectMapper().readTree(reader);
}
- private String getJsonString(String fileName) {
- JsonObject root = new JsonObject();
- root.add("fetcher", new JsonPrimitive(FETCHER_NAME));
- root.add("fetchKey", new JsonPrimitive(fileName));
- root.add("emitter", new JsonPrimitive(EMITTER_NAME));
- return GSON.toJson(root);
+ private String getJsonString(String fileName) throws IOException {
+ StringWriter writer = new StringWriter();
+ try (JsonGenerator generator = new
JsonFactory().createGenerator(writer)) {
+ generator.writeStartObject();
+ generator.writeStringField("fetcher", FETCHER_NAME);
+ generator.writeStringField("fetchKey", fileName);
+ generator.writeStringField("emitter", EMITTER_NAME);
+ }
+ return writer.toString();
}
}
diff --git
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerIntegrationTest.java
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerIntegrationTest.java
index 1e3d4cc..9e2d7d5 100644
---
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerIntegrationTest.java
+++
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerIntegrationTest.java
@@ -16,18 +16,11 @@
*/
package org.apache.tika.server.core;
-import com.google.gson.JsonObject;
-import com.google.gson.JsonParser;
-import org.apache.commons.io.IOUtils;
-import org.apache.cxf.common.logging.LogUtils;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.cxf.jaxrs.client.WebClient;
-import org.apache.tika.TikaTest;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.serialization.JsonMetadataList;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import org.slf4j.Logger;
@@ -212,8 +205,8 @@ public class TikaServerIntegrationTest extends
IntegrationTestBase {
.get();
String jsonString =
CXFTestBase.getStringFromInputStream((InputStream)
response.getEntity());
- JsonObject root = JsonParser.parseString(jsonString).getAsJsonObject();
- return root.get("server_id").getAsJsonPrimitive().getAsString();
+ JsonNode root = new ObjectMapper().readTree(jsonString);
+ return root.get("server_id").asText();
}
private int getNumRestarts() throws Exception {
@@ -223,8 +216,8 @@ public class TikaServerIntegrationTest extends
IntegrationTestBase {
.get();
String jsonString =
CXFTestBase.getStringFromInputStream((InputStream)
response.getEntity());
- JsonObject root = JsonParser.parseString(jsonString).getAsJsonObject();
- return root.get("num_restarts").getAsJsonPrimitive().getAsInt();
+ JsonNode root = new ObjectMapper().readTree(jsonString);
+ return root.get("num_restarts").intValue();
}
@Test
diff --git
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerStatusTest.java
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerStatusTest.java
index 556ac01..d4b6c6d 100644
---
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerStatusTest.java
+++
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerStatusTest.java
@@ -16,13 +16,11 @@
*/
package org.apache.tika.server.core;
-import com.google.gson.JsonObject;
-import com.google.gson.JsonParser;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
-import org.apache.tika.server.core.CXFTestBase;
-import org.apache.tika.server.core.ServerStatus;
import org.apache.tika.server.core.resource.TikaServerStatus;
import org.apache.tika.server.core.writer.JSONObjWriter;
import org.junit.Test;
@@ -59,15 +57,15 @@ public class TikaServerStatusTest extends CXFTestBase {
Response response = WebClient.create(endPoint + STATUS_PATH).get();
String jsonString =
getStringFromInputStream((InputStream) response.getEntity());
- JsonObject root = JsonParser.parseString(jsonString).getAsJsonObject();
+ JsonNode root = new ObjectMapper().readTree(jsonString);
assertTrue(root.has("server_id"));
assertTrue(root.has("status"));
assertTrue(root.has("millis_since_last_parse_started"));
assertTrue(root.has("files_processed"));
- assertEquals("OPERATING",
root.getAsJsonPrimitive("status").getAsString());
- assertEquals(0, root.getAsJsonPrimitive("files_processed").getAsInt());
- long millis =
root.getAsJsonPrimitive("millis_since_last_parse_started").getAsInt();
+ assertEquals("OPERATING", root.get("status").asText());
+ assertEquals(0, root.get("files_processed").intValue());
+ long millis = root.get("millis_since_last_parse_started").longValue();
assertTrue(millis > 0 && millis < 360000);
- assertEquals(SERVER_ID,
root.getAsJsonPrimitive("server_id").getAsString());
+ assertEquals(SERVER_ID, root.get("server_id").asText());
}
}