This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4503
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 6e63d30115e7a80a9703b45aef6fb6dd7ce39139
Author: tallison <[email protected]>
AuthorDate: Fri Oct 3 11:06:27 2025 -0400

    TIKA-4503 -- refactor serialization
---
 ...tTuple.java => FetchEmitTupleDeserializer.java} | 113 +++-----------
 .../serialization/FetchEmitTupleSerializer.java    |  65 +++++++++
 .../pipes/core/serialization/JsonEmitData.java     |  45 ++----
 .../core/serialization/JsonFetchEmitTuple.java     | 162 ++-------------------
 .../core/serialization/JsonFetchEmitTupleList.java |  52 +++----
 .../apache/tika/serialization/JsonMetadata.java    | 140 ++++++------------
 .../tika/serialization/JsonMetadataList.java       | 102 +++++--------
 .../serialization/JsonStreamingSerializer.java     |  64 --------
 .../tika/serialization/MetadataDeserializer.java   |  71 +++++++++
 .../tika/serialization/MetadataSerializer.java     |  76 ++++++++++
 .../tika/serialization/ParseContextSerializer.java |   1 -
 .../tika/serialization/JsonMetadataListTest.java   |  77 ++++------
 .../tika/serialization/JsonMetadataTest.java       |  14 +-
 .../TestParseContextSerialization.java             |   2 -
 14 files changed, 406 insertions(+), 578 deletions(-)

diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/JsonFetchEmitTuple.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/FetchEmitTupleDeserializer.java
similarity index 53%
copy from 
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/JsonFetchEmitTuple.java
copy to 
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/FetchEmitTupleDeserializer.java
index b92685521..23b2d36e5 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/JsonFetchEmitTuple.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/FetchEmitTupleDeserializer.java
@@ -16,47 +16,39 @@
  */
 package org.apache.tika.pipes.core.serialization;
 
+import static 
org.apache.tika.pipes.core.serialization.FetchEmitTupleSerializer.EMITKEY;
+import static 
org.apache.tika.pipes.core.serialization.FetchEmitTupleSerializer.EMITTER;
+import static 
org.apache.tika.pipes.core.serialization.FetchEmitTupleSerializer.FETCHER;
+import static 
org.apache.tika.pipes.core.serialization.FetchEmitTupleSerializer.FETCHKEY;
+import static 
org.apache.tika.pipes.core.serialization.FetchEmitTupleSerializer.FETCH_RANGE_END;
+import static 
org.apache.tika.pipes.core.serialization.FetchEmitTupleSerializer.FETCH_RANGE_START;
+import static 
org.apache.tika.pipes.core.serialization.FetchEmitTupleSerializer.ID;
+import static 
org.apache.tika.pipes.core.serialization.FetchEmitTupleSerializer.METADATAKEY;
+import static 
org.apache.tika.pipes.core.serialization.FetchEmitTupleSerializer.ON_PARSE_EXCEPTION;
+import static 
org.apache.tika.serialization.ParseContextSerializer.PARSE_CONTEXT;
+
 import java.io.IOException;
-import java.io.Reader;
-import java.io.StringWriter;
-import java.io.Writer;
-import java.util.Locale;
 import java.util.Map;
 
-import com.fasterxml.jackson.core.JsonFactory;
-import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.core.JacksonException;
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.databind.DeserializationContext;
+import com.fasterxml.jackson.databind.JsonDeserializer;
 import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
 
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.pipes.core.FetchEmitTuple;
 import org.apache.tika.pipes.core.emitter.EmitKey;
 import org.apache.tika.pipes.core.fetcher.FetchKey;
-import org.apache.tika.serialization.JsonMetadata;
 import org.apache.tika.serialization.ParseContextDeserializer;
-import org.apache.tika.serialization.ParseContextSerializer;
-import org.apache.tika.utils.StringUtils;
-
-public class JsonFetchEmitTuple {
 
-    public static final String ID = "id";
-    public static final String FETCHER = "fetcher";
-    public static final String FETCHKEY = "fetchKey";
-    public static final String FETCH_RANGE_START = "fetchRangeStart";
-    public static final String FETCH_RANGE_END = "fetchRangeEnd";
-    public static final String EMITTER = "emitter";
-    public static final String EMITKEY = "emitKey";
-    public static final String METADATAKEY = "metadata";
-    public static final String ON_PARSE_EXCEPTION = "onParseException";
+public class FetchEmitTupleDeserializer extends 
JsonDeserializer<FetchEmitTuple> {
 
-    public static FetchEmitTuple fromJson(Reader reader) throws IOException {
-        JsonNode root = new ObjectMapper().readTree(reader);
-        return parseFetchEmitTuple(root);
-    }
+    @Override
+    public FetchEmitTuple deserialize(JsonParser jsonParser, 
DeserializationContext deserializationContext) throws IOException, 
JacksonException {
+        JsonNode root = jsonParser.readValueAsTree();
 
-
-    static FetchEmitTuple parseFetchEmitTuple(JsonNode root) throws 
IOException {
         String id = readVal(ID, root, null, true);
         String fetcherName = readVal(FETCHER, root, null, true);
         String fetchKey = readVal(FETCHKEY, root, null, true);
@@ -65,11 +57,12 @@ public class JsonFetchEmitTuple {
         long fetchRangeStart = readLong(FETCH_RANGE_START, root, -1l, false);
         long fetchRangeEnd = readLong(FETCH_RANGE_END, root, -1l, false);
         Metadata metadata = readMetadata(root);
-        JsonNode parseContextNode = 
root.get(ParseContextSerializer.PARSE_CONTEXT);
+        JsonNode parseContextNode = root.get(PARSE_CONTEXT);
         ParseContext parseContext = parseContextNode == null ? new 
ParseContext() : ParseContextDeserializer.readParseContext(parseContextNode);
         FetchEmitTuple.ON_PARSE_EXCEPTION onParseException = 
readOnParseException(root);
 
-        return new FetchEmitTuple(id, new FetchKey(fetcherName, fetchKey, 
fetchRangeStart, fetchRangeEnd), new EmitKey(emitterName, emitKey), metadata, 
parseContext,
+        return new FetchEmitTuple(id, new FetchKey(fetcherName, fetchKey, 
fetchRangeStart, fetchRangeEnd),
+                new EmitKey(emitterName, emitKey), metadata, parseContext,
                 onParseException);
     }
 
@@ -130,66 +123,4 @@ public class JsonFetchEmitTuple {
         return val.longValue();
     }
 
-    public static String toJson(FetchEmitTuple t) throws IOException {
-        StringWriter writer = new StringWriter();
-        toJson(t, writer);
-        return writer.toString();
-    }
-
-    public static void toJson(FetchEmitTuple t, Writer writer) throws 
IOException {
-
-        try (JsonGenerator jsonGenerator = new 
JsonFactory().createGenerator(writer)) {
-            writeTuple(t, jsonGenerator);
-        }
-    }
-
-    static void writeTuple(FetchEmitTuple t, JsonGenerator jsonGenerator) 
throws IOException {
-        jsonGenerator.writeStartObject();
-        jsonGenerator.writeStringField(ID, t.getId());
-        jsonGenerator.writeStringField(FETCHER, t
-                .getFetchKey()
-                .getFetcherName());
-        jsonGenerator.writeStringField(FETCHKEY, t
-                .getFetchKey()
-                .getFetchKey());
-        if (t
-                .getFetchKey()
-                .hasRange()) {
-            jsonGenerator.writeNumberField(FETCH_RANGE_START, t
-                    .getFetchKey()
-                    .getRangeStart());
-            jsonGenerator.writeNumberField(FETCH_RANGE_END, t
-                    .getFetchKey()
-                    .getRangeEnd());
-        }
-        jsonGenerator.writeStringField(EMITTER, t
-                .getEmitKey()
-                .getEmitterName());
-        if (!StringUtils.isBlank(t
-                .getEmitKey()
-                .getEmitKey())) {
-            jsonGenerator.writeStringField(EMITKEY, t
-                    .getEmitKey()
-                    .getEmitKey());
-        }
-        if (t
-                .getMetadata()
-                .size() > 0) {
-            jsonGenerator.writeFieldName(METADATAKEY);
-            JsonMetadata.writeMetadataObject(t.getMetadata(), jsonGenerator, 
false);
-        }
-
-        jsonGenerator.writeStringField(ON_PARSE_EXCEPTION, t
-                .getOnParseException()
-                .name()
-                .toLowerCase(Locale.US));
-        if (!t
-                .getParseContext()
-                .isEmpty()) {
-            ParseContextSerializer s = new ParseContextSerializer();
-            s.serialize(t.getParseContext(), jsonGenerator, null);
-        }
-        jsonGenerator.writeEndObject();
-
-    }
 }
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/FetchEmitTupleSerializer.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/FetchEmitTupleSerializer.java
new file mode 100644
index 000000000..3f203ae25
--- /dev/null
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/FetchEmitTupleSerializer.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.core.serialization;
+
+import static 
org.apache.tika.serialization.ParseContextSerializer.PARSE_CONTEXT;
+
+import java.io.IOException;
+import java.util.Locale;
+
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.databind.JsonSerializer;
+import com.fasterxml.jackson.databind.SerializerProvider;
+
+import org.apache.tika.pipes.core.FetchEmitTuple;
+import org.apache.tika.utils.StringUtils;
+
+public class FetchEmitTupleSerializer extends JsonSerializer<FetchEmitTuple> {
+    public static final String ID = "id";
+    public static final String FETCHER = "fetcher";
+    public static final String FETCHKEY = "fetchKey";
+    public static final String FETCH_RANGE_START = "fetchRangeStart";
+    public static final String FETCH_RANGE_END = "fetchRangeEnd";
+    public static final String EMITTER = "emitter";
+    public static final String EMITKEY = "emitKey";
+    public static final String METADATAKEY = "metadata";
+    public static final String ON_PARSE_EXCEPTION = "onParseException";
+
+    public void serialize(FetchEmitTuple t, JsonGenerator jsonGenerator, 
SerializerProvider serializerProvider) throws IOException {
+
+        jsonGenerator.writeStartObject();
+        jsonGenerator.writeStringField(ID, t.getId());
+        jsonGenerator.writeStringField(FETCHER, 
t.getFetchKey().getFetcherName());
+        jsonGenerator.writeStringField(FETCHKEY, 
t.getFetchKey().getFetchKey());
+        if (t.getFetchKey().hasRange()) {
+            jsonGenerator.writeNumberField(FETCH_RANGE_START, 
t.getFetchKey().getRangeStart());
+            jsonGenerator.writeNumberField(FETCH_RANGE_END, 
t.getFetchKey().getRangeEnd());
+        }
+        jsonGenerator.writeStringField(EMITTER, 
t.getEmitKey().getEmitterName());
+        if (!StringUtils.isBlank(t.getEmitKey().getEmitKey())) {
+            jsonGenerator.writeStringField(EMITKEY, 
t.getEmitKey().getEmitKey());
+        }
+        if (t.getMetadata().size() > 0) {
+            jsonGenerator.writeObjectField(METADATAKEY, t.getMetadata());
+        }
+        jsonGenerator.writeStringField(ON_PARSE_EXCEPTION, 
t.getOnParseException().name().toLowerCase(Locale.US));
+        if (!t.getParseContext().isEmpty()) {
+            jsonGenerator.writeObjectField(PARSE_CONTEXT, t.getParseContext());
+        }
+        jsonGenerator.writeEndObject();
+    }
+}
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/JsonEmitData.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/JsonEmitData.java
index 8eb03c51d..2ec5f9343 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/JsonEmitData.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/JsonEmitData.java
@@ -19,41 +19,28 @@ package org.apache.tika.pipes.core.serialization;
 import java.io.IOException;
 import java.io.Writer;
 
-import com.fasterxml.jackson.core.JsonFactory;
-import com.fasterxml.jackson.core.JsonGenerator;
-import com.fasterxml.jackson.core.StreamReadConstraints;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.module.SimpleModule;
 
-import org.apache.tika.config.TikaConfig;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.pipes.core.FetchEmitTuple;
 import org.apache.tika.pipes.core.emitter.EmitData;
-import org.apache.tika.pipes.core.emitter.EmitKey;
-import org.apache.tika.serialization.JsonMetadata;
+import org.apache.tika.serialization.MetadataSerializer;
+import org.apache.tika.serialization.ParseContextSerializer;
 
 public class JsonEmitData {
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+    static {
+        SimpleModule module = new SimpleModule();
+        module.addSerializer(FetchEmitTuple.class, new 
FetchEmitTupleSerializer());
+        module.addSerializer(ParseContext.class, new ParseContextSerializer());
+        module.addSerializer(Metadata.class, new MetadataSerializer());
+        OBJECT_MAPPER.registerModule(module);
+    }
 
     public static void toJson(EmitData emitData, Writer writer) throws 
IOException {
-        try (JsonGenerator jsonGenerator = new JsonFactory()
-                .setStreamReadConstraints(StreamReadConstraints
-                        .builder()
-                        
.maxStringLength(TikaConfig.getMaxJsonStringFieldLength())
-                        .build())
-                .createGenerator(writer)) {
-            jsonGenerator.writeStartObject();
-            EmitKey key = emitData.getEmitKey();
-            jsonGenerator.writeStringField(JsonFetchEmitTuple.EMITTER, 
key.getEmitterName());
-            jsonGenerator.writeStringField(JsonFetchEmitTuple.EMITKEY, 
key.getEmitKey());
-            if (!emitData
-                    .getParseContext()
-                    .isEmpty()) {
-                jsonGenerator.writeObject(emitData.getParseContext());
-            }
-            jsonGenerator.writeFieldName("data");
-            jsonGenerator.writeStartArray();
-            for (Metadata m : emitData.getMetadataList()) {
-                JsonMetadata.writeMetadataObject(m, jsonGenerator, false);
-            }
-            jsonGenerator.writeEndArray();
-            jsonGenerator.writeEndObject();
-        }
+        OBJECT_MAPPER.writeValue(writer, emitData);
     }
 }
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/JsonFetchEmitTuple.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/JsonFetchEmitTuple.java
index b92685521..6841379a0 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/JsonFetchEmitTuple.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/JsonFetchEmitTuple.java
@@ -20,114 +20,31 @@ import java.io.IOException;
 import java.io.Reader;
 import java.io.StringWriter;
 import java.io.Writer;
-import java.util.Locale;
-import java.util.Map;
 
-import com.fasterxml.jackson.core.JsonFactory;
-import com.fasterxml.jackson.core.JsonGenerator;
-import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.module.SimpleModule;
 
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.pipes.core.FetchEmitTuple;
-import org.apache.tika.pipes.core.emitter.EmitKey;
-import org.apache.tika.pipes.core.fetcher.FetchKey;
-import org.apache.tika.serialization.JsonMetadata;
-import org.apache.tika.serialization.ParseContextDeserializer;
+import org.apache.tika.serialization.MetadataSerializer;
 import org.apache.tika.serialization.ParseContextSerializer;
-import org.apache.tika.utils.StringUtils;
 
 public class JsonFetchEmitTuple {
 
-    public static final String ID = "id";
-    public static final String FETCHER = "fetcher";
-    public static final String FETCHKEY = "fetchKey";
-    public static final String FETCH_RANGE_START = "fetchRangeStart";
-    public static final String FETCH_RANGE_END = "fetchRangeEnd";
-    public static final String EMITTER = "emitter";
-    public static final String EMITKEY = "emitKey";
-    public static final String METADATAKEY = "metadata";
-    public static final String ON_PARSE_EXCEPTION = "onParseException";
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
 
-    public static FetchEmitTuple fromJson(Reader reader) throws IOException {
-        JsonNode root = new ObjectMapper().readTree(reader);
-        return parseFetchEmitTuple(root);
-    }
-
-
-    static FetchEmitTuple parseFetchEmitTuple(JsonNode root) throws 
IOException {
-        String id = readVal(ID, root, null, true);
-        String fetcherName = readVal(FETCHER, root, null, true);
-        String fetchKey = readVal(FETCHKEY, root, null, true);
-        String emitterName = readVal(EMITTER, root, "", false);
-        String emitKey = readVal(EMITKEY, root, "", false);
-        long fetchRangeStart = readLong(FETCH_RANGE_START, root, -1l, false);
-        long fetchRangeEnd = readLong(FETCH_RANGE_END, root, -1l, false);
-        Metadata metadata = readMetadata(root);
-        JsonNode parseContextNode = 
root.get(ParseContextSerializer.PARSE_CONTEXT);
-        ParseContext parseContext = parseContextNode == null ? new 
ParseContext() : ParseContextDeserializer.readParseContext(parseContextNode);
-        FetchEmitTuple.ON_PARSE_EXCEPTION onParseException = 
readOnParseException(root);
-
-        return new FetchEmitTuple(id, new FetchKey(fetcherName, fetchKey, 
fetchRangeStart, fetchRangeEnd), new EmitKey(emitterName, emitKey), metadata, 
parseContext,
-                onParseException);
-    }
-
-    private static FetchEmitTuple.ON_PARSE_EXCEPTION 
readOnParseException(JsonNode root) throws IOException {
-        JsonNode onParseExNode = root.get(ON_PARSE_EXCEPTION);
-        if (onParseExNode == null) {
-            return FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT;
-        }
-        String txt = onParseExNode.asText();
-        if ("skip".equalsIgnoreCase(txt)) {
-            return FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP;
-        } else if ("emit".equalsIgnoreCase(txt)) {
-            return FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT;
-        } else {
-            throw new IOException(ON_PARSE_EXCEPTION + " must be either 'skip' 
or 'emit'");
-        }
-    }
-
-    private static Metadata readMetadata(JsonNode root) {
-        JsonNode metadataNode = root.get(METADATAKEY);
-        if (metadataNode == null) {
-            return new Metadata();
-        }
-        Metadata metadata = new Metadata();
-        for (Map.Entry<String, JsonNode> e : metadataNode.properties()) {
-            JsonNode vals = e.getValue();
-            String k = e.getKey();
-            if (vals.isArray()) {
-                for (JsonNode arrVal : vals) {
-                    metadata.add(k, arrVal.textValue());
-                }
-            } else {
-                metadata.set(k, vals.asText());
-            }
-        }
-        return metadata;
+    static {
+        SimpleModule module = new SimpleModule();
+        module.addDeserializer(FetchEmitTuple.class, new 
FetchEmitTupleDeserializer());
+        module.addSerializer(FetchEmitTuple.class, new 
FetchEmitTupleSerializer());
+        module.addSerializer(Metadata.class, new MetadataSerializer());
+        module.addSerializer(ParseContext.class, new ParseContextSerializer());
+        OBJECT_MAPPER.registerModule(module);
     }
 
-    private static String readVal(String key, JsonNode jsonObj, String 
defaultRet, boolean isRequired) throws IOException {
-        JsonNode valNode = jsonObj.get(key);
-        if (valNode == null) {
-            if (isRequired) {
-                throw new IOException("required value string, but see: " + 
key);
-            }
-            return defaultRet;
-        }
-        return valNode.asText();
-    }
-
-    private static long readLong(String key, JsonNode jsonObj, long 
defaultVal, boolean isRequired) throws IOException {
-        JsonNode val = jsonObj.get(key);
-        if (val == null) {
-            if (isRequired) {
-                throw new IOException("required value long, but see: " + key);
-            }
-            return defaultVal;
-        }
-        return val.longValue();
+    public static FetchEmitTuple fromJson(Reader reader) throws IOException {
+        return OBJECT_MAPPER.readValue(reader, FetchEmitTuple.class);
     }
 
     public static String toJson(FetchEmitTuple t) throws IOException {
@@ -137,59 +54,6 @@ public class JsonFetchEmitTuple {
     }
 
     public static void toJson(FetchEmitTuple t, Writer writer) throws 
IOException {
-
-        try (JsonGenerator jsonGenerator = new 
JsonFactory().createGenerator(writer)) {
-            writeTuple(t, jsonGenerator);
-        }
-    }
-
-    static void writeTuple(FetchEmitTuple t, JsonGenerator jsonGenerator) 
throws IOException {
-        jsonGenerator.writeStartObject();
-        jsonGenerator.writeStringField(ID, t.getId());
-        jsonGenerator.writeStringField(FETCHER, t
-                .getFetchKey()
-                .getFetcherName());
-        jsonGenerator.writeStringField(FETCHKEY, t
-                .getFetchKey()
-                .getFetchKey());
-        if (t
-                .getFetchKey()
-                .hasRange()) {
-            jsonGenerator.writeNumberField(FETCH_RANGE_START, t
-                    .getFetchKey()
-                    .getRangeStart());
-            jsonGenerator.writeNumberField(FETCH_RANGE_END, t
-                    .getFetchKey()
-                    .getRangeEnd());
-        }
-        jsonGenerator.writeStringField(EMITTER, t
-                .getEmitKey()
-                .getEmitterName());
-        if (!StringUtils.isBlank(t
-                .getEmitKey()
-                .getEmitKey())) {
-            jsonGenerator.writeStringField(EMITKEY, t
-                    .getEmitKey()
-                    .getEmitKey());
-        }
-        if (t
-                .getMetadata()
-                .size() > 0) {
-            jsonGenerator.writeFieldName(METADATAKEY);
-            JsonMetadata.writeMetadataObject(t.getMetadata(), jsonGenerator, 
false);
-        }
-
-        jsonGenerator.writeStringField(ON_PARSE_EXCEPTION, t
-                .getOnParseException()
-                .name()
-                .toLowerCase(Locale.US));
-        if (!t
-                .getParseContext()
-                .isEmpty()) {
-            ParseContextSerializer s = new ParseContextSerializer();
-            s.serialize(t.getParseContext(), jsonGenerator, null);
-        }
-        jsonGenerator.writeEndObject();
-
+        OBJECT_MAPPER.writeValue(writer, t);
     }
 }
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/JsonFetchEmitTupleList.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/JsonFetchEmitTupleList.java
index 26a1cc501..8f53c8a87 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/JsonFetchEmitTupleList.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/JsonFetchEmitTupleList.java
@@ -20,35 +20,33 @@ import java.io.IOException;
 import java.io.Reader;
 import java.io.StringWriter;
 import java.io.Writer;
-import java.util.ArrayList;
-import java.util.Iterator;
 import java.util.List;
 
-import com.fasterxml.jackson.core.JsonFactory;
-import com.fasterxml.jackson.core.JsonGenerator;
-import com.fasterxml.jackson.core.StreamReadConstraints;
-import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.core.type.TypeReference;
 import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.module.SimpleModule;
 
-import org.apache.tika.config.TikaConfig;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
 import org.apache.tika.pipes.core.FetchEmitTuple;
+import org.apache.tika.serialization.MetadataSerializer;
+import org.apache.tika.serialization.ParseContextSerializer;
 
 public class JsonFetchEmitTupleList {
 
-    public static List<FetchEmitTuple> fromJson(Reader reader) throws 
IOException {
-        JsonNode root = new ObjectMapper().readTree(reader);
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
 
-        if (!root.isArray()) {
-            throw new IOException("FetchEmitTupleList must be an array");
-        }
-        List<FetchEmitTuple> list = new ArrayList<>();
-        Iterator<JsonNode> it = root.iterator();
-        while (it.hasNext()) {
-            JsonNode n = it.next();
-            FetchEmitTuple t = JsonFetchEmitTuple.parseFetchEmitTuple(n);
-            list.add(t);
-        }
-        return list;
+    static {
+        SimpleModule module = new SimpleModule();
+        module.addDeserializer(FetchEmitTuple.class, new 
FetchEmitTupleDeserializer());
+        module.addSerializer(FetchEmitTuple.class, new 
FetchEmitTupleSerializer());
+        module.addSerializer(Metadata.class, new MetadataSerializer());
+        module.addSerializer(ParseContext.class, new ParseContextSerializer());
+        OBJECT_MAPPER.registerModule(module);
+    }
+
+    public static List<FetchEmitTuple> fromJson(Reader reader) throws 
IOException {
+        return OBJECT_MAPPER.readValue(reader, new 
TypeReference<List<FetchEmitTuple>>() {});
     }
 
     public static String toJson(List<FetchEmitTuple> list) throws IOException {
@@ -58,18 +56,6 @@ public class JsonFetchEmitTupleList {
     }
 
     public static void toJson(List<FetchEmitTuple> list, Writer writer) throws 
IOException {
-
-        try (JsonGenerator jsonGenerator = new JsonFactory()
-                .setStreamReadConstraints(StreamReadConstraints
-                        .builder()
-                        
.maxStringLength(TikaConfig.getMaxJsonStringFieldLength())
-                        .build())
-                .createGenerator(writer)) {
-            jsonGenerator.writeStartArray();
-            for (FetchEmitTuple t : list) {
-                JsonFetchEmitTuple.writeTuple(t, jsonGenerator);
-            }
-            jsonGenerator.writeEndArray();
-        }
+        OBJECT_MAPPER.writeValue(writer, list);
     }
 }
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadata.java
 
b/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadata.java
index 72a9c3f9a..b47acde2e 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadata.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadata.java
@@ -20,15 +20,11 @@ package org.apache.tika.serialization;
 import java.io.IOException;
 import java.io.Reader;
 import java.io.Writer;
-import java.util.Arrays;
 
 import com.fasterxml.jackson.core.JsonFactory;
-import com.fasterxml.jackson.core.JsonGenerator;
-import com.fasterxml.jackson.core.JsonParser;
-import com.fasterxml.jackson.core.JsonToken;
 import com.fasterxml.jackson.core.StreamReadConstraints;
-import org.apache.commons.io.input.CloseShieldReader;
-import org.apache.commons.io.output.CloseShieldWriter;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.module.SimpleModule;
 
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.metadata.Metadata;
@@ -37,6 +33,17 @@ public class JsonMetadata {
 
     static volatile boolean PRETTY_PRINT = false;
 
+    private static ObjectMapper OBJECT_MAPPER;
+    private static final ObjectMapper PRETTY_SERIALIZER;
+
+    static {
+        OBJECT_MAPPER = 
buildObjectMapper(StreamReadConstraints.DEFAULT_MAX_STRING_LEN);
+        PRETTY_SERIALIZER = new ObjectMapper();
+        SimpleModule prettySerializerModule = new SimpleModule();
+        prettySerializerModule.addSerializer(Metadata.class, new 
MetadataSerializer(true));
+        PRETTY_SERIALIZER.registerModule(prettySerializerModule);
+    }
+
     /**
      * Serializes a Metadata object to Json.  This does not flush or close the 
writer.
      *
@@ -45,115 +52,56 @@ public class JsonMetadata {
      * @throws java.io.IOException if there is an IOException during writing
      */
     public static void toJson(Metadata metadata, Writer writer) throws 
IOException {
-        if (metadata == null) {
-            writer.write("null");
-            return;
-        }
-        long max = TikaConfig.getMaxJsonStringFieldLength();
-        try (JsonGenerator jsonGenerator = new JsonFactory()
-                .setStreamReadConstraints(StreamReadConstraints
-                        .builder()
-                        
.maxStringLength(TikaConfig.getMaxJsonStringFieldLength())
-                        .build())
-                .createGenerator(CloseShieldWriter.wrap(writer))) {
-            if (PRETTY_PRINT) {
-                jsonGenerator.useDefaultPrettyPrinter();
-            }
-            writeMetadataObject(metadata, jsonGenerator, PRETTY_PRINT);
+        if (PRETTY_PRINT) {
+            PRETTY_SERIALIZER
+                    .writerWithDefaultPrettyPrinter()
+                    .writeValue(writer, metadata);
+        } else {
+            OBJECT_MAPPER.writeValue(writer, metadata);
         }
     }
 
-    public static void writeMetadataObject(Metadata metadata, JsonGenerator 
jsonGenerator, boolean prettyPrint) throws IOException {
-        jsonGenerator.writeStartObject();
-        String[] names = metadata.names();
-        if (prettyPrint) {
-            Arrays.sort(names, new PrettyMetadataKeyComparator());
-        }
-        for (String n : names) {
-            String[] vals = metadata.getValues(n);
-            if (vals.length == 0) {
-                continue;
-            } else if (vals.length == 1) {
-                jsonGenerator.writeStringField(n, vals[0]);
-            } else if (vals.length > 1) {
-                jsonGenerator.writeArrayFieldStart(n);
-                for (String val : vals) {
-                    jsonGenerator.writeString(val);
-                }
-                jsonGenerator.writeEndArray();
-            }
-        }
-        jsonGenerator.writeEndObject();
-    }
-
     /**
      * Read metadata from reader.
      * <p>
      * This does not close the reader.
+     * <p>
+     * This will reset the OBJECT_MAPPER if the max string length differs from 
that in TikaConfig.
      *
      * @param reader reader to read from
      * @return Metadata or null if nothing could be read from the reader
      * @throws IOException in case of parse failure or IO failure with Reader
      */
     public static Metadata fromJson(Reader reader) throws IOException {
-        Metadata m = null;
-        try (JsonParser jParser = new JsonFactory()
-                .setStreamReadConstraints(StreamReadConstraints
-                        .builder()
-                        
.maxStringLength(TikaConfig.getMaxJsonStringFieldLength())
-                        .build())
-                .createParser(CloseShieldReader.wrap(reader))) {
-            m = readMetadataObject(jParser);
+        if (reader == null) {
+            return null;
         }
-        return m;
-    }
-
-    /**
-     * expects that jParser has not yet started on object or
-     * for jParser to be pointing to the start object.
-     *
-     * @param jParser
-     * @return
-     * @throws IOException
-     */
-    public static Metadata readMetadataObject(JsonParser jParser) throws 
IOException {
-        Metadata metadata = new Metadata();
-        JsonToken token = jParser.currentToken();
-        if (token == null) {
-            token = jParser.nextToken();
-            if (token != JsonToken.START_OBJECT) {
-                throw new IOException("expected start object, but got: " + 
token.name());
-            }
-            token = jParser.nextToken();
-        } else if (token == JsonToken.START_OBJECT) {
-            token = jParser.nextToken();
-        }
-
-        while (token != JsonToken.END_OBJECT) {
-            token = jParser.currentToken();
-            if (token != JsonToken.FIELD_NAME) {
-                throw new IOException("expected field name, but got: " + 
token.name());
-            }
-            String key = jParser.currentName();
-            token = jParser.nextToken();
-            if (token == JsonToken.START_ARRAY) {
-                while (jParser.nextToken() != JsonToken.END_ARRAY) {
-                    metadata.add(key, jParser.getText());
-                }
-            } else {
-                if (token != JsonToken.VALUE_STRING) {
-                    throw new IOException("expected string value, but found: " 
+ token.name());
-                }
-                String value = jParser.getValueAsString();
-                metadata.set(key, value);
-            }
-            token = jParser.nextToken();
+        if (OBJECT_MAPPER
+                .getFactory()
+                .streamReadConstraints()
+                .getMaxStringLength() != 
TikaConfig.getMaxJsonStringFieldLength()) {
+            OBJECT_MAPPER = 
buildObjectMapper(TikaConfig.getMaxJsonStringFieldLength());
         }
-        return metadata;
+        return OBJECT_MAPPER.readValue(reader, Metadata.class);
     }
 
     public static void setPrettyPrinting(boolean prettyPrint) {
         PRETTY_PRINT = prettyPrint;
     }
 
+    static ObjectMapper buildObjectMapper(int maxStringLen) {
+        JsonFactory factory = new JsonFactory();
+        factory.setStreamReadConstraints(StreamReadConstraints
+                .builder()
+                .maxNestingDepth(10)
+                .maxStringLength(maxStringLen)
+                .maxNumberLength(500)
+                .build());
+        ObjectMapper objectMapper = new ObjectMapper(factory);
+        SimpleModule baseModule = new SimpleModule();
+        baseModule.addDeserializer(Metadata.class, new MetadataDeserializer());
+        baseModule.addSerializer(Metadata.class, new MetadataSerializer());
+        objectMapper.registerModule(baseModule);
+        return objectMapper;
+    }
 }
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadataList.java
 
b/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadataList.java
index a7a3803e5..4b84e9f3a 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadataList.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadataList.java
@@ -16,27 +16,51 @@
  */
 package org.apache.tika.serialization;
 
+import static org.apache.tika.serialization.JsonMetadata.buildObjectMapper;
+
 import java.io.IOException;
 import java.io.Reader;
 import java.io.Writer;
-import java.util.ArrayList;
 import java.util.List;
 
 import com.fasterxml.jackson.core.JsonFactory;
-import com.fasterxml.jackson.core.JsonGenerator;
-import com.fasterxml.jackson.core.JsonParser;
-import com.fasterxml.jackson.core.JsonToken;
 import com.fasterxml.jackson.core.StreamReadConstraints;
-import org.apache.commons.io.input.CloseShieldReader;
-import org.apache.commons.io.output.CloseShieldWriter;
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.module.SimpleModule;
 
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
 
 public class JsonMetadataList {
+
     static volatile boolean PRETTY_PRINT = false;
 
+    private static ObjectMapper OBJECT_MAPPER;
+    private static final ObjectMapper PRETTY_SERIALIZER;
+
+    static {
+        JsonFactory factory = new JsonFactory();
+        factory.setStreamReadConstraints(StreamReadConstraints
+                .builder()
+                .maxNestingDepth(10)
+                .maxStringLength(TikaConfig.getMaxJsonStringFieldLength())
+                .maxNumberLength(500)
+//                                                              
.maxDocumentLength(1000000)
+                .build());
+        OBJECT_MAPPER = new ObjectMapper(factory);
+        SimpleModule baseModule = new SimpleModule();
+        baseModule.addDeserializer(Metadata.class, new MetadataDeserializer());
+        baseModule.addSerializer(Metadata.class, new MetadataSerializer());
+        OBJECT_MAPPER.registerModule(baseModule);
+
+        PRETTY_SERIALIZER = new ObjectMapper(factory);
+        SimpleModule prettySerializerModule = new SimpleModule();
+        prettySerializerModule.addSerializer(Metadata.class, new 
MetadataSerializer(true));
+        PRETTY_SERIALIZER.registerModule(prettySerializerModule);
+
+    }
+
     /**
      * Serializes a Metadata object to Json.  This does not flush or close the 
writer.
      *
@@ -46,24 +70,10 @@ public class JsonMetadataList {
      * @throws org.apache.tika.exception.TikaException if there is an 
IOException during writing
      */
     public static void toJson(List<Metadata> metadataList, Writer writer, 
boolean prettyPrint) throws IOException {
-        if (metadataList == null) {
-            writer.write("null");
-            return;
-        }
-        try (JsonGenerator jsonGenerator = new JsonFactory()
-                .setStreamReadConstraints(StreamReadConstraints
-                        .builder()
-                        
.maxStringLength(TikaConfig.getMaxJsonStringFieldLength())
-                        .build())
-                .createGenerator(CloseShieldWriter.wrap(writer))) {
-            if (prettyPrint) {
-                jsonGenerator.useDefaultPrettyPrinter();
-            }
-            jsonGenerator.writeStartArray();
-            for (Metadata m : metadataList) {
-                JsonMetadata.writeMetadataObject(m, jsonGenerator, 
prettyPrint);
-            }
-            jsonGenerator.writeEndArray();
+        if (prettyPrint) {
+            
PRETTY_SERIALIZER.writerWithDefaultPrettyPrinter().writeValue(writer, 
metadataList);
+        } else {
+            OBJECT_MAPPER.writeValue(writer, metadataList);
         }
     }
 
@@ -86,51 +96,19 @@ public class JsonMetadataList {
      * @throws IOException in case of parse failure or IO failure with Reader
      */
     public static List<Metadata> fromJson(Reader reader) throws IOException {
-        List<Metadata> ms = null;
         if (reader == null) {
-            return ms;
-        }
-        ms = new ArrayList<>();
-        try (JsonParser jParser = new JsonFactory()
-                .setStreamReadConstraints(StreamReadConstraints
-                        .builder()
-                        
.maxStringLength(TikaConfig.getMaxJsonStringFieldLength())
-                        .build())
-                .createParser(CloseShieldReader.wrap(reader))) {
-
-            JsonToken token = jParser.nextToken();
-            if (token != JsonToken.START_ARRAY) {
-                throw new IOException("metadata list must start with an array, 
but I see: " + token.name());
-            }
-            token = jParser.nextToken();
-            while (token != JsonToken.END_ARRAY) {
-                Metadata m = JsonMetadata.readMetadataObject(jParser);
-                ms.add(m);
-                token = jParser.nextToken();
-            }
-
-        }
-        if (ms == null) {
             return null;
         }
-        //if the last object is the main document,
-        //as happens with the streaming serializer,
-        //flip it to be the first element.
-        if (ms.size() > 1) {
-            Metadata last = ms.get(ms.size() - 1);
-            String embResourcePath = 
last.get(TikaCoreProperties.EMBEDDED_RESOURCE_PATH);
-            if (embResourcePath == null && ms
-                    .get(0)
-                    .get(TikaCoreProperties.EMBEDDED_RESOURCE_PATH) != null) {
-                ms.add(0, ms.remove(ms.size() - 1));
-            }
+        if 
(OBJECT_MAPPER.getFactory().streamReadConstraints().getMaxStringLength()
+                != TikaConfig.getMaxJsonStringFieldLength()) {
+            OBJECT_MAPPER = 
buildObjectMapper(TikaConfig.getMaxJsonStringFieldLength());
         }
-        return ms;
+
+        return OBJECT_MAPPER.readValue(reader, new 
TypeReference<List<Metadata>>(){});
     }
 
     public static void setPrettyPrinting(boolean prettyPrint) {
         PRETTY_PRINT = prettyPrint;
     }
 
-
 }
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/serialization/JsonStreamingSerializer.java
 
b/tika-serialization/src/main/java/org/apache/tika/serialization/JsonStreamingSerializer.java
deleted file mode 100644
index 259695ada..000000000
--- 
a/tika-serialization/src/main/java/org/apache/tika/serialization/JsonStreamingSerializer.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.serialization;
-
-
-import java.io.IOException;
-import java.io.Writer;
-import java.util.Arrays;
-
-import com.fasterxml.jackson.core.JsonFactory;
-import com.fasterxml.jackson.core.JsonGenerator;
-import com.fasterxml.jackson.core.StreamReadConstraints;
-
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.metadata.Metadata;
-
-
-public class JsonStreamingSerializer implements AutoCloseable {
-
-    private final Writer writer;
-    boolean hasStartedArray = false;
-    private JsonGenerator jsonGenerator;
-
-    public JsonStreamingSerializer(Writer writer) {
-        this.writer = writer;
-    }
-
-    public void add(Metadata metadata) throws IOException {
-        if (!hasStartedArray) {
-            jsonGenerator = new JsonFactory()
-                    .setStreamReadConstraints(StreamReadConstraints
-                            .builder()
-                            
.maxStringLength(TikaConfig.getMaxJsonStringFieldLength())
-                            .build())
-                    .createGenerator(writer);
-            jsonGenerator.writeStartArray();
-            hasStartedArray = true;
-        }
-        String[] names = metadata.names();
-        Arrays.sort(names);
-        JsonMetadata.writeMetadataObject(metadata, jsonGenerator, false);
-    }
-
-    @Override
-    public void close() throws IOException {
-        jsonGenerator.writeEndArray();
-        jsonGenerator.flush();
-        jsonGenerator.close();
-    }
-}
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/serialization/MetadataDeserializer.java
 
b/tika-serialization/src/main/java/org/apache/tika/serialization/MetadataDeserializer.java
new file mode 100644
index 000000000..4dc7c3a7e
--- /dev/null
+++ 
b/tika-serialization/src/main/java/org/apache/tika/serialization/MetadataDeserializer.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.serialization;
+
+import java.io.IOException;
+
+import com.fasterxml.jackson.core.JacksonException;
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonToken;
+import com.fasterxml.jackson.databind.DeserializationContext;
+import com.fasterxml.jackson.databind.JsonDeserializer;
+
+import org.apache.tika.metadata.Metadata;
+
+public class MetadataDeserializer extends JsonDeserializer<Metadata> {
+
+    @Override
+    public Metadata deserialize(JsonParser jsonParser, DeserializationContext 
deserializationContext) throws IOException, JacksonException {
+        Metadata metadata = new Metadata();
+        boolean keepGoing = true;
+        while (keepGoing) {
+            keepGoing = addField(jsonParser, metadata);
+        }
+        return metadata;
+    }
+
+    private boolean addField(JsonParser jsonParser, Metadata metadata) throws 
IOException {
+        String field = jsonParser.nextFieldName();
+        if (field == null) {
+            return false;
+        }
+        JsonToken token = jsonParser.nextValue();
+
+        if (token == null) {
+            return false;
+        }
+
+        if (token.isScalarValue()) {
+            metadata.set(field, jsonParser.getText());
+        } else if (jsonParser.isExpectedStartArrayToken()) {
+            token = jsonParser.nextToken();
+            while (token != null) {
+                if (token == JsonToken.END_ARRAY) {
+                    return true;
+                } else if (token.isScalarValue()) {
+                    metadata.add(field, jsonParser.getText());
+                } else {
+                    break;
+                }
+                token = jsonParser.nextToken();
+            }
+        } else {
+            return false;
+        }
+        return true;
+    }
+}
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/serialization/MetadataSerializer.java
 
b/tika-serialization/src/main/java/org/apache/tika/serialization/MetadataSerializer.java
new file mode 100644
index 000000000..68e7d6593
--- /dev/null
+++ 
b/tika-serialization/src/main/java/org/apache/tika/serialization/MetadataSerializer.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.serialization;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Comparator;
+
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.databind.JsonSerializer;
+import com.fasterxml.jackson.databind.SerializerProvider;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+
+public class MetadataSerializer extends JsonSerializer<Metadata> {
+    private static final String TIKA_CONTENT_KEY = 
TikaCoreProperties.TIKA_CONTENT.getName();
+
+    //always sort the content at the end
+    private static final Comparator<String> METADATA_KEY_COMPARATOR = new 
Comparator<String>() {
+        @Override
+        public int compare(String o1, String o2) {
+            if (o1.equals(TIKA_CONTENT_KEY)) {
+                return 1;
+            }
+            if (o2.equals(TIKA_CONTENT_KEY)) {
+                return -1;
+            }
+            return o1.compareTo(o2);
+        }
+    };
+
+    private boolean prettyPrint = false;
+
+    public MetadataSerializer() {
+
+    }
+
+    public MetadataSerializer(boolean prettyPrint) {
+        this.prettyPrint = prettyPrint;
+    }
+    @Override
+    public void serialize(Metadata metadata, JsonGenerator jsonGenerator, 
SerializerProvider serializerProvider) throws IOException {
+        jsonGenerator.writeStartObject();
+        String[] names = metadata.names();
+        if (prettyPrint) {
+            Arrays.sort(names, METADATA_KEY_COMPARATOR);
+        }
+        for (String n : names) {
+            String[] v = metadata.getValues(n);
+            if (v.length == 0) {
+                continue;
+            } else if (v.length == 1) {
+                jsonGenerator.writeStringField(n, v[0]);
+            } else {
+                jsonGenerator.writeFieldName(n);
+                jsonGenerator.writeArray(v, 0, v.length);
+            }
+        }
+        jsonGenerator.writeEndObject();
+    }
+}
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextSerializer.java
 
b/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextSerializer.java
index 584920e5c..adc0c4691 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextSerializer.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextSerializer.java
@@ -30,7 +30,6 @@ public class ParseContextSerializer extends 
JsonSerializer<ParseContext> {
 
     @Override
     public void serialize(ParseContext parseContext, JsonGenerator 
jsonGenerator, SerializerProvider serializerProvider) throws IOException {
-        jsonGenerator.writeFieldName(PARSE_CONTEXT);
         jsonGenerator.writeStartObject();
         for (String className : parseContext.keySet()) {
             try {
diff --git 
a/tika-serialization/src/test/java/org/apache/tika/serialization/JsonMetadataListTest.java
 
b/tika-serialization/src/test/java/org/apache/tika/serialization/JsonMetadataListTest.java
index 79ecf8df4..8c1c45a27 100644
--- 
a/tika-serialization/src/test/java/org/apache/tika/serialization/JsonMetadataListTest.java
+++ 
b/tika-serialization/src/test/java/org/apache/tika/serialization/JsonMetadataListTest.java
@@ -18,17 +18,20 @@ package org.apache.tika.serialization;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-import java.io.Reader;
+import java.io.InputStream;
 import java.io.StringReader;
 import java.io.StringWriter;
 import java.util.ArrayList;
 import java.util.LinkedList;
 import java.util.List;
 
+import com.fasterxml.jackson.databind.JsonMappingException;
 import org.junit.jupiter.api.Test;
 
+import org.apache.tika.config.TikaConfig;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
 
@@ -60,16 +63,6 @@ public class JsonMetadataListTest {
         JsonMetadataList.toJson(metadataList, writer);
         List<Metadata> deserialized = JsonMetadataList.fromJson(new 
StringReader(writer.toString()));
         assertEquals(metadataList, deserialized);
-
-        //now test streaming serializer
-        writer = new StringWriter();
-        try (JsonStreamingSerializer streamingSerializer = new 
JsonStreamingSerializer(writer)) {
-            streamingSerializer.add(m1);
-            streamingSerializer.add(m2);
-        }
-        deserialized = JsonMetadataList.fromJson(new 
StringReader(writer.toString()));
-        assertEquals(metadataList, deserialized);
-
     }
 
     @Test
@@ -87,8 +80,7 @@ public class JsonMetadataListTest {
     @Test
     public void testListCorrupted() throws Exception {
         String json = 
"[{\"k1\":[\"v1\",\"v2\",\"v3\",\"v4\",\"v4\"],\"k2\":\"v1\"}," + 
"\"k3\":[\"v1\",\"v2\",\"v3\",\"v4\",\"v4\"],\"k4\":\"v1\"}]";
-        List<Metadata> m = JsonMetadataList.fromJson(null);
-        assertNull(m);
+        Exception ex = assertThrows(JsonMappingException.class, () -> 
JsonMetadataList.fromJson(new StringReader(json)));
     }
 
     @Test
@@ -119,13 +111,17 @@ public class JsonMetadataListTest {
                 .toString()
                 .startsWith("["));
         writer = new StringWriter();
+        JsonMetadata.setPrettyPrinting(true);
+
+
         JsonMetadataList.setPrettyPrinting(true);
         JsonMetadataList.toJson(metadataList, writer);
-        assertTrue(writer
+        String expected = "[ {[NEWLINE]  \"zk1\" : [ \"v1\", \"v2\", \"v3\", 
\"v4\", \"v4\" ],[NEWLINE]  \"zk2\" : \"v1\",[NEWLINE]" +
+                "  \"X-TIKA:content\" : \"this is the content\"[NEWLINE]}, " +
+                "{[NEWLINE]  \"k3\" : [ \"v1\", \"v2\", \"v3\", \"v4\", \"v4\" 
],[NEWLINE]  \"k4\" : \"v1\"[NEWLINE]} ]";
+        assertEquals(expected, writer
                 .toString()
-                .replaceAll("\r\n", "\n")
-                .startsWith("[ {\n" + "  \"zk1\" : [ \"v1\", \"v2\", \"v3\", 
\"v4\", \"v4\" ],\n" + "  \"zk2\" : \"v1\",\n" + "  \"X-TIKA:content\" : \"this 
is the content\"\n" +
-                        "},"));
+                .replaceAll("[\r\n]+", "[NEWLINE]"));
 
 
         //now set it back to false
@@ -138,35 +134,24 @@ public class JsonMetadataListTest {
     }
 
     @Test
-    public void testSwitchingOrderOfMainDoc() throws Exception {
-        Metadata m1 = new Metadata();
-        m1.add("k1", "v1");
-        m1.add("k1", "v2");
-        m1.add("k1", "v3");
-        m1.add("k1", "v4");
-        m1.add("k1", "v4");
-        m1.add("k2", "v1");
-        m1.add(TikaCoreProperties.EMBEDDED_RESOURCE_PATH, "/embedded-1");
-
-        Metadata m2 = new Metadata();
-        m2.add("k3", "v1");
-        m2.add("k3", "v2");
-        m2.add("k3", "v3");
-        m2.add("k3", "v4");
-        m2.add("k3", "v4");
-        m2.add("k4", "v1");
-
-        List<Metadata> truth = new ArrayList<>();
-        truth.add(m2);
-        truth.add(m1);
-        StringWriter stringWriter = new StringWriter();
-        try (JsonStreamingSerializer serializer = new 
JsonStreamingSerializer(stringWriter)) {
-            serializer.add(m1);
-            serializer.add(m2);
+    public void testLargeValues() throws Exception {
+        //TIKA-4154
+        TikaConfig tikaConfig = null;
+        try (InputStream is = 
JsonMetadata.class.getResourceAsStream("/config/tika-config-json.xml")) {
+            tikaConfig = new TikaConfig(is);
         }
-        Reader reader = new StringReader(stringWriter.toString());
-        List<Metadata> deserialized = JsonMetadataList.fromJson(reader);
-        assertEquals(truth, deserialized);
-
+        StringBuilder sb = new StringBuilder();
+        for (int i = 0; i < 30000000; i++) {
+            sb.append("v");
+        }
+        Metadata m = new Metadata();
+        m.add("large_value", sb.toString());
+        List<Metadata> list = new ArrayList<>();
+        list.add(m);
+        list.add(m);
+        StringWriter writer = new StringWriter();
+        JsonMetadataList.toJson(list, writer);
+        List<Metadata> deserialized = JsonMetadataList.fromJson(new 
StringReader(writer.toString()));
+        assertEquals(list, deserialized);
     }
 }
diff --git 
a/tika-serialization/src/test/java/org/apache/tika/serialization/JsonMetadataTest.java
 
b/tika-serialization/src/test/java/org/apache/tika/serialization/JsonMetadataTest.java
index aa9b8ccad..80d32bdc4 100644
--- 
a/tika-serialization/src/test/java/org/apache/tika/serialization/JsonMetadataTest.java
+++ 
b/tika-serialization/src/test/java/org/apache/tika/serialization/JsonMetadataTest.java
@@ -29,12 +29,14 @@ import org.junit.jupiter.api.Test;
 
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
 
 public class JsonMetadataTest {
 
     @Test
     public void testBasicSerializationAndDeserialization() throws Exception {
         Metadata metadata = new Metadata();
+        metadata.add(TikaCoreProperties.TIKA_CONTENT, "this is the content");
         metadata.add("k1", "v1");
         metadata.add("k1", "v2");
         //test duplicate value
@@ -54,7 +56,7 @@ public class JsonMetadataTest {
         StringWriter writer = new StringWriter();
         JsonMetadata.toJson(metadata, writer);
         Metadata deserialized = JsonMetadata.fromJson(new 
StringReader(writer.toString()));
-        assertEquals(7, deserialized.names().length);
+        assertEquals(8, deserialized.names().length);
         assertEquals(metadata, deserialized);
 
         //test that this really is 6 Chinese characters
@@ -66,11 +68,13 @@ public class JsonMetadataTest {
         writer = new StringWriter();
         JsonMetadata.setPrettyPrinting(true);
         JsonMetadata.toJson(metadata, writer);
-        assertTrue(writer
+        String expected = "{[NEWLINE]  \"alma_mater\" : \"普林斯顿大学\",[NEWLINE]  
\"html\" : \"<html><body>&amp;&nbsp;</body></html>\"," +
+                "[NEWLINE]  \"json_escapes\" : \"the: \\\"quick\\\" brown, 
fox\"," +
+                "[NEWLINE]  \"k1\" : [ \"v1\", \"v2\" ],[NEWLINE]  \"k3\" : [ 
\"v3\", \"v3\" ],[NEWLINE]  \"k4\" : \"500,000\"," +
+                "[NEWLINE]  \"url\" : 
\"/myApp/myAction.html?method=router&cmd=1\",[NEWLINE]  \"X-TIKA:content\" : 
\"this is the content\"[NEWLINE]}";
+        assertEquals(expected, writer
                 .toString()
-                .replaceAll("\r\n", "\n")
-                .contains("\"json_escapes\" : \"the: \\\"quick\\\" brown, 
fox\",\n" + "  \"k1\" : [ \"v1\", \"v2\" ],\n" + "  \"k3\" : [ \"v3\", \"v3\" 
],\n" +
-                        "  \"k4\" : \"500,000\",\n" + "  \"url\" : 
\"/myApp/myAction.html?method=router&cmd=1\"\n" + "}"));
+                .replaceAll("[\r\n]+", "[NEWLINE]"));
     }
 
     @Test
diff --git 
a/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java
 
b/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java
index f6bf0a95f..89913d4b6 100644
--- 
a/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java
+++ 
b/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java
@@ -45,10 +45,8 @@ public class TestParseContextSerialization {
         String json;
         try (Writer writer = new StringWriter()) {
             try (JsonGenerator jsonGenerator = new 
JsonFactory().createGenerator(writer)) {
-                jsonGenerator.writeStartObject();
                 ParseContextSerializer serializer = new 
ParseContextSerializer();
                 serializer.serialize(pc, jsonGenerator, null);
-                jsonGenerator.writeEndObject();
             }
             json = writer.toString();
         }

Reply via email to