This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new c0266e145b serialization tweaks (#2603)
c0266e145b is described below

commit c0266e145b09e1ce6db5a58edca0c300515bb474
Author: Tim Allison <[email protected]>
AuthorDate: Wed Feb 11 12:56:46 2026 -0500

    serialization tweaks (#2603)
---
 .../tika/parser/AutoDetectParserConfigTest.java    | 13 ++++++----
 .../resources/configs/tika-config-no-names.json    |  6 ++---
 ...a-config-upcasing-custom-handler-decorator.json |  2 +-
 .../resources/configs/tika-config-with-names.json  |  6 ++---
 .../org/apache/tika/pipes/core/PassbackFilter.java | 18 ++++++++++++--
 .../resources/configs/tika-config-truncate.json    |  4 +++-
 .../serdes/ParseContextDeserializer.java           |  9 +++++++
 .../TestParseContextSerialization.java             | 28 ++++++++++++++++++++++
 8 files changed, 71 insertions(+), 15 deletions(-)

diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/AutoDetectParserConfigTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/AutoDetectParserConfigTest.java
index 1e21fbef25..f8d86905ce 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/AutoDetectParserConfigTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/AutoDetectParserConfigTest.java
@@ -36,13 +36,16 @@ public class AutoDetectParserConfigTest extends TikaTest {
 
     @Test
     public void testConfiguringEmbeddedDocExtractor() throws Exception {
-
-        Parser p = 
TikaLoaderHelper.getLoader("tika-config-no-names.json").loadAutoDetectParser();
-        String xml = getXML("testEmbedded.zip", p).xml;
+        TikaLoader noNamesLoader = 
TikaLoaderHelper.getLoader("tika-config-no-names.json");
+        Parser p = noNamesLoader.loadAutoDetectParser();
+        ParseContext noNamesContext = noNamesLoader.loadParseContext();
+        String xml = getXML("testPPT_EmbeddedPDF.pptx", p, new Metadata(), 
noNamesContext).xml;
         assertNotContained("<h1>image3.jpg</h1>", xml);
 
-        p = 
TikaLoaderHelper.getLoader("tika-config-with-names.json").loadAutoDetectParser();
-        xml = getXML("testPPT_EmbeddedPDF.pptx", p).xml;
+        TikaLoader withNamesLoader = 
TikaLoaderHelper.getLoader("tika-config-with-names.json");
+        p = withNamesLoader.loadAutoDetectParser();
+        ParseContext withNamesContext = withNamesLoader.loadParseContext();
+        xml = getXML("testPPT_EmbeddedPDF.pptx", p, new Metadata(), 
withNamesContext).xml;
         assertContains("<h1>image3.jpg</h1>", xml);
     }
 
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.json
index feaa6f4494..58af2892bb 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.json
@@ -1,7 +1,7 @@
 {
   "parse-context": {
-    "standard-extractor-factory": {
-        "writeFileNameToContent": false
-      }
+    "sax-output-config": {
+      "writeFileNameToContent": false
     }
+  }
 }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.json
index 66f81f80a7..99d0ed132c 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.json
@@ -5,7 +5,7 @@
   },
   "parse-context": {
     "commons-digester-factory": {},
-    "standard-extractor-factory": {
+    "sax-output-config": {
       "writeFileNameToContent": true
     }
   }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.json
index 721ee36e35..c935a6d1ce 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.json
@@ -1,7 +1,7 @@
 {
   "parse-context": {
-    "standard-extractor-factory": {
-        "writeFileNameToContent": true
-      }
+    "sax-output-config": {
+      "writeFileNameToContent": true
     }
+  }
 }
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PassbackFilter.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PassbackFilter.java
index 5fa033929e..8d0a7968ff 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PassbackFilter.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PassbackFilter.java
@@ -17,12 +17,26 @@
 package org.apache.tika.pipes.core;
 
 import java.io.Serializable;
+import java.util.List;
 
-import org.apache.tika.metadata.filter.MetadataFilter;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
 
 /**
  * Filter/Select some of the emitted output and pass it back to the client 
parser.
+ * <p>
+ * This is intentionally NOT a MetadataFilter. MetadataFilter is applied 
before emission
+ * to transform metadata (e.g., remove fields, compute digests). 
PassbackFilter is applied
+ * after emission to select metadata to pass back from the forked PipesServer 
to the parent.
+ * They share a method signature but serve entirely different purposes.
  */
-public abstract class PassbackFilter extends MetadataFilter implements 
Serializable {
+public abstract class PassbackFilter implements Serializable {
 
+    /**
+     * Filters the metadata list in place, selecting which data to pass back 
to the client.
+     *
+     * @param metadataList the list to filter (must be mutable)
+     * @throws TikaException if filtering fails
+     */
+    public abstract void filter(List<Metadata> metadataList) throws 
TikaException;
 }
diff --git 
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-truncate.json
 
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-truncate.json
index 88b4cc1978..b6303b0cfc 100644
--- 
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-truncate.json
+++ 
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-truncate.json
@@ -48,8 +48,10 @@
   },
   "parse-context": {
     "mock-digester-factory": {},
+    "sax-output-config": {
+      "writeFileNameToContent": false
+    },
     "runpack-extractor-factory": {
-      "writeFileNameToContent": false,
       "maxEmbeddedBytesForExtraction": 10
     }
   },
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/serialization/serdes/ParseContextDeserializer.java
 
b/tika-serialization/src/main/java/org/apache/tika/serialization/serdes/ParseContextDeserializer.java
index bacbb40741..c8141c47d9 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/serialization/serdes/ParseContextDeserializer.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/serialization/serdes/ParseContextDeserializer.java
@@ -163,6 +163,15 @@ public class ParseContextDeserializer extends 
JsonDeserializer<ParseContext> {
         }
 
         ComponentInfo info = infoOpt.get();
+
+        // Self-configuring components (e.g., parsers) stay as JSON configs 
and are
+        // accessed by string key at runtime via 
ParseContextConfig.getConfig().
+        // They never get resolved to typed objects in the context map, so 
multiple
+        // self-configuring components with the same context key are not 
duplicates.
+        if (info.selfConfiguring()) {
+            return;
+        }
+
         Class<?> contextKey = determineContextKey(info);
 
         String existingName = seenContextKeys.get(contextKey);
diff --git 
a/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java
 
b/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java
index db9b75ce9c..960f1a2e7a 100644
--- 
a/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java
+++ 
b/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java
@@ -464,6 +464,34 @@ public class TestParseContextSerialization {
                 "Exception should mention the conflicting key: " + 
ex.getMessage());
     }
 
+    /**
+     * Test that multiple self-configuring components (e.g., parsers) with the 
same
+     * context key are allowed.  Self-configuring components stay as JSON 
configs and
+     * are accessed by string key at runtime, so they never conflict in the 
context map.
+     */
+    @Test
+    public void testSelfConfiguringComponentsAllowDuplicateContextKeys() 
throws Exception {
+        // Both parsers resolve to Parser.class as context key, but Parser 
extends
+        // SelfConfiguring, so they should be allowed to coexist.
+        String json = """
+                {
+                  "configurable-test-parser": {
+                    "maxItems": 5
+                  },
+                  "minimal-test-parser": {}
+                }
+                """;
+
+        ObjectMapper mapper = createMapper();
+        // Should NOT throw - self-configuring components skip duplicate 
detection
+        ParseContext deserialized = mapper.readValue(json, ParseContext.class);
+
+        assertTrue(deserialized.hasJsonConfig("configurable-test-parser"),
+                "configurable-test-parser should be stored as JSON config");
+        assertTrue(deserialized.hasJsonConfig("minimal-test-parser"),
+                "minimal-test-parser should be stored as JSON config");
+    }
+
     /**
      * Test that a single component per context key is allowed (no false 
positives).
      */

Reply via email to