This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 45a9d21bac TIKA-4669 -- improve serdes (#2617)
45a9d21bac is described below

commit 45a9d21bac63d9aac24aec3990dfc5e9f58cd197
Author: Tim Allison <[email protected]>
AuthorDate: Wed Feb 18 16:25:05 2026 -0500

    TIKA-4669 -- improve serdes (#2617)
---
 .../java/org/apache/tika/parser/ParseContext.java  |  15 ++-
 .../org/apache/tika/parser/ParseContextTest.java   | 104 +++++++++++++++++++++
 .../serialization/FetchEmitTupleDeserializer.java  |   8 --
 3 files changed, 116 insertions(+), 11 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java 
b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
index 4a3bafa91b..8d0094de11 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
@@ -239,9 +239,18 @@ public class ParseContext implements Serializable {
         }
         // Copy typed objects
         context.putAll(source.context);
-        // Copy JSON configs
-        jsonConfigs.putAll(source.jsonConfigs);
-        // Copy resolved configs (if any)
+        // Copy JSON configs, invalidating stale resolved state for overridden 
keys.
+        // When a source jsonConfig overrides an existing entry, the 
previously resolved
+        // object is stale and must be cleared so resolveAll() will re-resolve 
from the
+        // new JSON config.
+        for (Map.Entry<String, JsonConfig> entry : 
source.jsonConfigs.entrySet()) {
+            String key = entry.getKey();
+            jsonConfigs.put(key, entry.getValue());
+            if (resolvedConfigs != null) {
+                resolvedConfigs.remove(key);
+            }
+        }
+        // Copy resolved configs from source (if any)
         if (source.resolvedConfigs != null && 
!source.resolvedConfigs.isEmpty()) {
             if (resolvedConfigs == null) {
                 resolvedConfigs = new HashMap<>();
diff --git 
a/tika-core/src/test/java/org/apache/tika/parser/ParseContextTest.java 
b/tika-core/src/test/java/org/apache/tika/parser/ParseContextTest.java
new file mode 100644
index 0000000000..59f24ee73b
--- /dev/null
+++ b/tika-core/src/test/java/org/apache/tika/parser/ParseContextTest.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+
+import org.junit.jupiter.api.Test;
+
+public class ParseContextTest {
+
+    @Test
+    public void testCopyFromInvalidatesStaleResolvedConfigs() {
+        // Simulate a "default" context that has already resolved a config
+        ParseContext defaults = new ParseContext();
+        defaults.setJsonConfig("my-component", "{\"value\":\"default\"}");
+        Object defaultResolved = new Object();
+        defaults.setResolvedConfig("my-component", defaultResolved);
+
+        // Simulate a request context that overrides only the jsonConfig (no 
resolvedConfig)
+        ParseContext request = new ParseContext();
+        request.setJsonConfig("my-component", "{\"value\":\"override\"}");
+
+        // Merge: defaults + request overlay
+        defaults.copyFrom(request);
+
+        // The stale resolvedConfig must be cleared so resolveAll() will 
re-resolve
+        assertNull(defaults.getResolvedConfig("my-component"),
+                "copyFrom must clear stale resolvedConfig when jsonConfig is 
overridden");
+
+        // The jsonConfig should be the override
+        assertNotNull(defaults.getJsonConfigs().get("my-component"));
+        assertEquals("{\"value\":\"override\"}",
+                defaults.getJsonConfigs().get("my-component").json());
+    }
+
+    @Test
+    public void testCopyFromPreservesResolvedConfigsForUnrelatedKeys() {
+        ParseContext defaults = new ParseContext();
+        defaults.setJsonConfig("component-a", "{\"a\":true}");
+        Object resolvedA = new Object();
+        defaults.setResolvedConfig("component-a", resolvedA);
+
+        // Request overrides a DIFFERENT key
+        ParseContext request = new ParseContext();
+        request.setJsonConfig("component-b", "{\"b\":true}");
+
+        defaults.copyFrom(request);
+
+        // component-a's resolvedConfig should be untouched
+        assertEquals(resolvedA, defaults.getResolvedConfig("component-a"),
+                "copyFrom must not clear resolvedConfigs for keys not 
overridden by source");
+    }
+
+    @Test
+    public void testCopyFromWithSourceResolvedConfigOverrides() {
+        ParseContext defaults = new ParseContext();
+        defaults.setJsonConfig("my-component", "{\"value\":\"default\"}");
+        Object defaultResolved = new Object();
+        defaults.setResolvedConfig("my-component", defaultResolved);
+
+        // Source has both jsonConfig AND resolvedConfig (e.g., already 
resolved upstream)
+        ParseContext source = new ParseContext();
+        source.setJsonConfig("my-component", "{\"value\":\"override\"}");
+        Object sourceResolved = new Object();
+        source.setResolvedConfig("my-component", sourceResolved);
+
+        defaults.copyFrom(source);
+
+        // Source's resolvedConfig should win
+        assertEquals(sourceResolved, 
defaults.getResolvedConfig("my-component"),
+                "copyFrom should use source's resolvedConfig when source has 
one");
+    }
+
+    @Test
+    public void testCopyFromEmptySourcePreservesDefaults() {
+        ParseContext defaults = new ParseContext();
+        defaults.setJsonConfig("my-component", "{\"value\":\"default\"}");
+        Object defaultResolved = new Object();
+        defaults.setResolvedConfig("my-component", defaultResolved);
+
+        ParseContext emptySource = new ParseContext();
+        defaults.copyFrom(emptySource);
+
+        // Empty source should not disturb existing state
+        assertEquals(defaultResolved, 
defaults.getResolvedConfig("my-component"),
+                "copyFrom with empty source must preserve existing 
resolvedConfigs");
+    }
+}
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/FetchEmitTupleDeserializer.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/FetchEmitTupleDeserializer.java
index 7af419c09d..c842d60aa6 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/FetchEmitTupleDeserializer.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/FetchEmitTupleDeserializer.java
@@ -38,13 +38,11 @@ import com.fasterxml.jackson.databind.JsonDeserializer;
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
 
-import org.apache.tika.exception.TikaConfigException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.pipes.api.FetchEmitTuple;
 import org.apache.tika.pipes.api.emitter.EmitKey;
 import org.apache.tika.pipes.api.fetcher.FetchKey;
-import org.apache.tika.serialization.ParseContextUtils;
 import org.apache.tika.serialization.serdes.ParseContextDeserializer;
 
 public class FetchEmitTupleDeserializer extends 
JsonDeserializer<FetchEmitTuple> {
@@ -64,12 +62,6 @@ public class FetchEmitTupleDeserializer extends 
JsonDeserializer<FetchEmitTuple>
         Metadata metadata = readMetadata(root);
         JsonNode parseContextNode = root.get(PARSE_CONTEXT);
         ParseContext parseContext = parseContextNode == null ? new 
ParseContext() : ParseContextDeserializer.readParseContext(parseContextNode, 
mapper);
-        // Resolve all friendly-named components from jsonConfigs to actual 
objects
-        try {
-            ParseContextUtils.resolveAll(parseContext, 
FetchEmitTupleDeserializer.class.getClassLoader());
-        } catch (TikaConfigException e) {
-            throw new IOException("Failed to resolve parse-context 
components", e);
-        }
         FetchEmitTuple.ON_PARSE_EXCEPTION onParseException = 
readOnParseException(root);
 
         return new FetchEmitTuple(id, new FetchKey(fetcherId, fetchKey, 
fetchRangeStart, fetchRangeEnd),

Reply via email to