This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch clean-up-metadata-list-json-settings
in repository https://gitbox.apache.org/repos/asf/tika.git

commit bc52af0cabcc541cc6c995f6a64a5fa48f1ac00f
Author: tallison <[email protected]>
AuthorDate: Wed Dec 17 09:55:25 2025 -0500

    Clean up jackson settings on metadata list serialization/deserialization
---
 .../org/apache/tika/config/GlobalSettings.java     | 57 --------------
 .../apache/tika/config/loader/TikaJsonConfig.java  |  2 +-
 .../org/apache/tika/config/loader/TikaLoader.java  | 60 ++++++++++++---
 .../apache/tika/serialization/JsonMetadata.java    | 89 +++++++++++++---------
 .../tika/serialization/JsonMetadataList.java       | 74 ++++++++++++------
 .../test/resources/configs/tika-config-json.json   |  6 +-
 .../test/resources/configs/tika-config-json.json   |  4 +-
 7 files changed, 163 insertions(+), 129 deletions(-)

diff --git 
a/tika-serialization/src/main/java/org/apache/tika/config/GlobalSettings.java 
b/tika-serialization/src/main/java/org/apache/tika/config/GlobalSettings.java
index 7d07c3b9e..7493000ae 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/config/GlobalSettings.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/config/GlobalSettings.java
@@ -25,7 +25,6 @@ import com.fasterxml.jackson.annotation.JsonProperty;
  * <p>Example JSON:
  * <pre>
  * {
- *   "maxJsonStringFieldLength": 50000000,
  *   "xml-reader-utils": {
  *     "maxEntityExpansions": 1000,
  *     "maxNumReuses": 100,
@@ -36,20 +35,6 @@ import com.fasterxml.jackson.annotation.JsonProperty;
  */
 public class GlobalSettings {
 
-    /**
-     * Static maximum length for JSON string fields.
-     * Default: 20,000,000 (Jackson's default)
-     * This is static because it's a global setting that affects all JSON 
parsing.
-     */
-    private static Integer maxJsonStringFieldLength = 20_000_000;
-
-    /**
-     * Instance field for deserialization from JSON.
-     * The value is copied to the static field when set.
-     */
-    @JsonProperty("maxJsonStringFieldLength")
-    private Integer instanceMaxJsonStringFieldLength = 20_000_000;
-
     /**
      * Service loader configuration for handling initialization problems.
      */
@@ -62,48 +47,6 @@ public class GlobalSettings {
     @JsonProperty("xml-reader-utils")
     private XmlReaderUtilsConfig xmlReaderUtils;
 
-    /**
-     * Gets the static maximum JSON string field length.
-     *
-     * @return the max length, or null if not set
-     */
-    public static Integer getMaxJsonStringFieldLength() {
-        return maxJsonStringFieldLength;
-    }
-
-    /**
-     * Sets the static maximum JSON string field length.
-     * This affects all JSON parsing globally.
-     *
-     * @param length the max length to set
-     */
-    public static void setMaxJsonStringFieldLength(Integer length) {
-        maxJsonStringFieldLength = length;
-    }
-
-    /**
-     * Instance getter for deserialization.
-     * Returns the instance value which may differ from the static value.
-     *
-     * @return the instance max length
-     */
-    public Integer getInstanceMaxJsonStringFieldLength() {
-        return instanceMaxJsonStringFieldLength;
-    }
-
-    /**
-     * Instance setter for deserialization.
-     * Automatically updates the static field when set.
-     *
-     * @param length the max length to set
-     */
-    public void setInstanceMaxJsonStringFieldLength(Integer length) {
-        this.instanceMaxJsonStringFieldLength = length;
-        if (length != null) {
-            setMaxJsonStringFieldLength(length);
-        }
-    }
-
     public ServiceLoaderConfig getServiceLoader() {
         return serviceLoader;
     }
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
index 2eeb8bc7a..8ce14a30f 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
@@ -103,7 +103,7 @@ public class TikaJsonConfig {
      */
     private static final Set<String> KNOWN_KEYS = Set.of(
             // Globals
-            "maxJsonStringFieldLength",
+            "metadata-list",
             "service-loader",
             "xml-reader-utils",
             // Core Tika component keys
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
index 52e17d9d1..b82beb7d8 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
@@ -21,6 +21,8 @@ import java.nio.file.Path;
 import java.util.Collections;
 import java.util.List;
 
+import com.fasterxml.jackson.core.StreamReadConstraints;
+import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
 
 import org.apache.tika.config.GlobalSettings;
@@ -39,6 +41,8 @@ import org.apache.tika.parser.CompositeParser;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.renderer.CompositeRenderer;
 import org.apache.tika.renderer.Renderer;
+import org.apache.tika.serialization.JsonMetadata;
+import org.apache.tika.serialization.JsonMetadataList;
 
 /**
  * Main entry point for loading Tika components from JSON configuration.
@@ -396,17 +400,18 @@ public class TikaLoader {
      *
      * <p>Settings include:
      * <ul>
-     *   <li>maxJsonStringFieldLength - Maximum JSON string field length 
(static, affects all JSON parsing)</li>
-     *   <li>service-loader.initializableProblemHandler - How to handle 
initialization problems</li>
+     *   <li>metadata-list - Jackson StreamReadConstraints for 
JsonMetadata/JsonMetadataList serialization</li>
+     *   <li>service-loader - Service loader configuration</li>
      *   <li>xml-reader-utils - XML parser security settings</li>
      * </ul>
      *
      * <p>Example JSON:
      * <pre>
      * {
-     *   "maxJsonStringFieldLength": 50000000,
-     *   "service-loader": {
-     *     "initializableProblemHandler": "ignore"
+     *   "metadata-list": {
+     *     "maxStringLength": 50000000,
+     *     "maxNestingDepth": 10,
+     *     "maxNumberLength": 500
      *   },
      *   "xml-reader-utils": {
      *     "maxEntityExpansions": 1000,
@@ -423,11 +428,8 @@ public class TikaLoader {
         if (globalSettings == null) {
             globalSettings = new GlobalSettings();
 
-            // Load maxJsonStringFieldLength from top level and set it 
statically
-            if (config.getRootNode().has("maxJsonStringFieldLength")) {
-                GlobalSettings.setMaxJsonStringFieldLength(
-                        
config.getRootNode().get("maxJsonStringFieldLength").asInt());
-            }
+            // Load metadata-list config for JsonMetadata/JsonMetadataList 
serialization
+            loadMetadataListConfig();
 
             // Load service-loader config (official Tika config at root level)
             GlobalSettings.ServiceLoaderConfig serviceLoaderConfig =
@@ -446,6 +448,44 @@ public class TikaLoader {
         return globalSettings;
     }
 
+    /**
+     * Loads the metadata-list configuration section and applies it to
+     * JsonMetadata and JsonMetadataList serializers.
+     * <p>
+     * Configuration uses Jackson's StreamReadConstraints property names:
+     * <pre>
+     * {
+     *   "metadata-list": {
+     *     "maxStringLength": 20000000,
+     *     "maxNestingDepth": 10,
+     *     "maxNumberLength": 500
+     *   }
+     * }
+     * </pre>
+     */
+    private void loadMetadataListConfig() {
+        JsonNode metadataListNode = config.getRootNode().get("metadata-list");
+        if (metadataListNode == null) {
+            return;
+        }
+
+        StreamReadConstraints.Builder builder = 
StreamReadConstraints.builder();
+
+        if (metadataListNode.has("maxStringLength")) {
+            
builder.maxStringLength(metadataListNode.get("maxStringLength").asInt());
+        }
+        if (metadataListNode.has("maxNestingDepth")) {
+            
builder.maxNestingDepth(metadataListNode.get("maxNestingDepth").asInt());
+        }
+        if (metadataListNode.has("maxNumberLength")) {
+            
builder.maxNumberLength(metadataListNode.get("maxNumberLength").asInt());
+        }
+
+        StreamReadConstraints constraints = builder.build();
+        JsonMetadata.setStreamReadConstraints(constraints);
+        JsonMetadataList.setStreamReadConstraints(constraints);
+    }
+
     /**
      * Gets the global settings if they have been loaded.
      *
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadata.java
 
b/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadata.java
index e9adec234..504fb4f19 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadata.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadata.java
@@ -26,22 +26,69 @@ import com.fasterxml.jackson.core.StreamReadConstraints;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.module.SimpleModule;
 
-import org.apache.tika.config.GlobalSettings;
 import org.apache.tika.metadata.Metadata;
 
 public class JsonMetadata {
 
     static volatile boolean PRETTY_PRINT = false;
 
-    private static ObjectMapper OBJECT_MAPPER;
-    private static final ObjectMapper PRETTY_SERIALIZER;
+    /**
+     * Default stream read constraints for metadata serialization.
+     */
+    private static final StreamReadConstraints DEFAULT_CONSTRAINTS = 
StreamReadConstraints
+            .builder()
+            .maxNestingDepth(10)
+            .maxStringLength(20_000_000)
+            .maxNumberLength(500)
+            .build();
+
+    private static volatile StreamReadConstraints streamReadConstraints = 
DEFAULT_CONSTRAINTS;
+    private static volatile ObjectMapper OBJECT_MAPPER;
+    private static volatile ObjectMapper PRETTY_SERIALIZER;
 
     static {
-        OBJECT_MAPPER = 
buildObjectMapper(StreamReadConstraints.DEFAULT_MAX_STRING_LEN);
-        PRETTY_SERIALIZER = new ObjectMapper();
+        rebuildObjectMappers();
+    }
+
+    private static void rebuildObjectMappers() {
+        JsonFactory factory = new JsonFactory();
+        factory.setStreamReadConstraints(streamReadConstraints);
+
+        ObjectMapper mapper = new ObjectMapper(factory);
+        SimpleModule baseModule = new SimpleModule();
+        baseModule.addDeserializer(Metadata.class, new MetadataDeserializer());
+        baseModule.addSerializer(Metadata.class, new MetadataSerializer());
+        mapper.registerModule(baseModule);
+        OBJECT_MAPPER = mapper;
+
+        ObjectMapper prettyMapper = new ObjectMapper(factory);
         SimpleModule prettySerializerModule = new SimpleModule();
         prettySerializerModule.addSerializer(Metadata.class, new 
MetadataSerializer(true));
-        PRETTY_SERIALIZER.registerModule(prettySerializerModule);
+        prettyMapper.registerModule(prettySerializerModule);
+        PRETTY_SERIALIZER = prettyMapper;
+    }
+
+    /**
+     * Sets the stream read constraints for JSON parsing of metadata.
+     * This affects all subsequent calls to {@link #fromJson(Reader)}.
+     * <p>
+     * Typically called by TikaLoader during initialization based on the
+     * "metadata-list" configuration section.
+     *
+     * @param constraints the constraints to use
+     */
+    public static synchronized void 
setStreamReadConstraints(StreamReadConstraints constraints) {
+        streamReadConstraints = constraints;
+        rebuildObjectMappers();
+    }
+
+    /**
+     * Gets the current stream read constraints.
+     *
+     * @return the current constraints
+     */
+    public static StreamReadConstraints getStreamReadConstraints() {
+        return streamReadConstraints;
     }
 
     /**
@@ -62,46 +109,20 @@ public class JsonMetadata {
     }
 
     /**
-     * Read metadata from reader.
-     * <p>
-     * This does not close the reader.
-     * <p>
-     * This will reset the OBJECT_MAPPER if the max string length differs from 
that in TikaConfig.
+     * Read metadata from reader. This does not close the reader.
      *
      * @param reader reader to read from
-     * @return Metadata or null if nothing could be read from the reader
+     * @return Metadata or null if reader is null
      * @throws IOException in case of parse failure or IO failure with Reader
      */
     public static Metadata fromJson(Reader reader) throws IOException {
         if (reader == null) {
             return null;
         }
-        if (OBJECT_MAPPER
-                .getFactory()
-                .streamReadConstraints()
-                .getMaxStringLength() != 
GlobalSettings.getMaxJsonStringFieldLength()) {
-            OBJECT_MAPPER = 
buildObjectMapper(GlobalSettings.getMaxJsonStringFieldLength());
-        }
         return OBJECT_MAPPER.readValue(reader, Metadata.class);
     }
 
     public static void setPrettyPrinting(boolean prettyPrint) {
         PRETTY_PRINT = prettyPrint;
     }
-
-    static ObjectMapper buildObjectMapper(int maxStringLen) {
-        JsonFactory factory = new JsonFactory();
-        factory.setStreamReadConstraints(StreamReadConstraints
-                .builder()
-                .maxNestingDepth(10)
-                .maxStringLength(maxStringLen)
-                .maxNumberLength(500)
-                .build());
-        ObjectMapper objectMapper = new ObjectMapper(factory);
-        SimpleModule baseModule = new SimpleModule();
-        baseModule.addDeserializer(Metadata.class, new MetadataDeserializer());
-        baseModule.addSerializer(Metadata.class, new MetadataSerializer());
-        objectMapper.registerModule(baseModule);
-        return objectMapper;
-    }
 }
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadataList.java
 
b/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadataList.java
index 71427947b..7611cdfea 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadataList.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadataList.java
@@ -16,8 +16,6 @@
  */
 package org.apache.tika.serialization;
 
-import static org.apache.tika.serialization.JsonMetadata.buildObjectMapper;
-
 import java.io.IOException;
 import java.io.Reader;
 import java.io.Writer;
@@ -29,36 +27,69 @@ import com.fasterxml.jackson.core.type.TypeReference;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.module.SimpleModule;
 
-import org.apache.tika.config.GlobalSettings;
 import org.apache.tika.metadata.Metadata;
 
 public class JsonMetadataList {
 
     static volatile boolean PRETTY_PRINT = false;
 
-    private static ObjectMapper OBJECT_MAPPER;
-    private static final ObjectMapper PRETTY_SERIALIZER;
+    /**
+     * Default stream read constraints for metadata list serialization.
+     */
+    private static final StreamReadConstraints DEFAULT_CONSTRAINTS = 
StreamReadConstraints
+            .builder()
+            .maxNestingDepth(10)
+            .maxStringLength(20_000_000)
+            .maxNumberLength(500)
+            .build();
+
+    private static volatile StreamReadConstraints streamReadConstraints = 
DEFAULT_CONSTRAINTS;
+    private static volatile ObjectMapper OBJECT_MAPPER;
+    private static volatile ObjectMapper PRETTY_SERIALIZER;
 
     static {
+        rebuildObjectMappers();
+    }
+
+    private static void rebuildObjectMappers() {
         JsonFactory factory = new JsonFactory();
-        factory.setStreamReadConstraints(StreamReadConstraints
-                .builder()
-                .maxNestingDepth(10)
-                .maxStringLength(GlobalSettings.getMaxJsonStringFieldLength())
-                .maxNumberLength(500)
-//                                                              
.maxDocumentLength(1000000)
-                .build());
-        OBJECT_MAPPER = new ObjectMapper(factory);
+        factory.setStreamReadConstraints(streamReadConstraints);
+
+        ObjectMapper mapper = new ObjectMapper(factory);
         SimpleModule baseModule = new SimpleModule();
         baseModule.addDeserializer(Metadata.class, new MetadataDeserializer());
         baseModule.addSerializer(Metadata.class, new MetadataSerializer());
-        OBJECT_MAPPER.registerModule(baseModule);
+        mapper.registerModule(baseModule);
+        OBJECT_MAPPER = mapper;
 
-        PRETTY_SERIALIZER = new ObjectMapper(factory);
+        ObjectMapper prettyMapper = new ObjectMapper(factory);
         SimpleModule prettySerializerModule = new SimpleModule();
         prettySerializerModule.addSerializer(Metadata.class, new 
MetadataSerializer(true));
-        PRETTY_SERIALIZER.registerModule(prettySerializerModule);
+        prettyMapper.registerModule(prettySerializerModule);
+        PRETTY_SERIALIZER = prettyMapper;
+    }
+
+    /**
+     * Sets the stream read constraints for JSON parsing of metadata lists.
+     * This affects all subsequent calls to {@link #fromJson(Reader)}.
+     * <p>
+     * Typically called by TikaLoader during initialization based on the
+     * "metadata-list" configuration section.
+     *
+     * @param constraints the constraints to use
+     */
+    public static synchronized void 
setStreamReadConstraints(StreamReadConstraints constraints) {
+        streamReadConstraints = constraints;
+        rebuildObjectMappers();
+    }
 
+    /**
+     * Gets the current stream read constraints.
+     *
+     * @return the current constraints
+     */
+    public static StreamReadConstraints getStreamReadConstraints() {
+        return streamReadConstraints;
     }
 
     /**
@@ -89,21 +120,16 @@ public class JsonMetadataList {
     }
 
     /**
-     * Read metadata from reader. This does not close the reader
+     * Read metadata from reader. This does not close the reader.
      *
-     * @param reader
-     * @return Metadata or null if nothing could be read from the reader
+     * @param reader the reader to read from
+     * @return Metadata list or null if reader is null
      * @throws IOException in case of parse failure or IO failure with Reader
      */
     public static List<Metadata> fromJson(Reader reader) throws IOException {
         if (reader == null) {
             return null;
         }
-        if 
(OBJECT_MAPPER.getFactory().streamReadConstraints().getMaxStringLength()
-                != GlobalSettings.getMaxJsonStringFieldLength()) {
-            OBJECT_MAPPER = 
buildObjectMapper(GlobalSettings.getMaxJsonStringFieldLength());
-        }
-
         return OBJECT_MAPPER.readValue(reader, new 
TypeReference<List<Metadata>>(){});
     }
 
diff --git 
a/tika-serialization/src/test/resources/configs/tika-config-json.json 
b/tika-serialization/src/test/resources/configs/tika-config-json.json
index 8d1e5feb0..3650aab64 100644
--- a/tika-serialization/src/test/resources/configs/tika-config-json.json
+++ b/tika-serialization/src/test/resources/configs/tika-config-json.json
@@ -1,3 +1,5 @@
 {
-  "maxJsonStringFieldLength": 50000000
-}
\ No newline at end of file
+  "metadata-list": {
+    "maxStringLength": 50000000
+  }
+}
diff --git 
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-json.json
 
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-json.json
index 419a225e6..3650aab64 100644
--- 
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-json.json
+++ 
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-json.json
@@ -1,3 +1,5 @@
 {
-  "maxJsonStringFieldLength": 50000000
+  "metadata-list": {
+    "maxStringLength": 50000000
+  }
 }

Reply via email to