This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch roundtrip-pojos-parse-context
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 69a50f7c85aa46cd8fd04f3f4d1ccf9513e65e26
Author: tallison <[email protected]>
AuthorDate: Wed Dec 17 09:27:34 2025 -0500

    Round trip pojos that exist in the registry in the ParseContext
---
 .../tika/serialization/TikaAbstractTypeMixins.java | 112 +++++++++++++++++++++
 .../TestParseContextSerialization.java             |  23 ++++-
 2 files changed, 134 insertions(+), 1 deletion(-)

diff --git 
a/tika-serialization/src/main/java/org/apache/tika/serialization/TikaAbstractTypeMixins.java
 
b/tika-serialization/src/main/java/org/apache/tika/serialization/TikaAbstractTypeMixins.java
index 7c68042aa..2a11b0e76 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/serialization/TikaAbstractTypeMixins.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/serialization/TikaAbstractTypeMixins.java
@@ -19,6 +19,7 @@ package org.apache.tika.serialization;
 import java.io.IOException;
 import java.lang.reflect.Modifier;
 
+import com.fasterxml.jackson.core.JsonGenerator;
 import com.fasterxml.jackson.core.JsonParser;
 import com.fasterxml.jackson.databind.BeanDescription;
 import com.fasterxml.jackson.databind.DeserializationConfig;
@@ -26,9 +27,13 @@ import com.fasterxml.jackson.databind.DeserializationContext;
 import com.fasterxml.jackson.databind.JsonDeserializer;
 import com.fasterxml.jackson.databind.JsonMappingException;
 import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.JsonSerializer;
 import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.SerializationConfig;
+import com.fasterxml.jackson.databind.SerializerProvider;
 import com.fasterxml.jackson.databind.deser.BeanDeserializerModifier;
 import com.fasterxml.jackson.databind.module.SimpleModule;
+import com.fasterxml.jackson.databind.ser.BeanSerializerModifier;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -64,12 +69,15 @@ public final class TikaAbstractTypeMixins {
 
     /**
      * Registers the abstract type handling module on the given ObjectMapper.
+     * This includes both serializers (to add type wrappers) and deserializers
+     * (to resolve type wrappers).
      *
      * @param mapper the ObjectMapper to configure
      */
     public static void registerDeserializers(ObjectMapper mapper) {
         SimpleModule module = new SimpleModule("TikaAbstractTypes");
         module.setDeserializerModifier(new 
AbstractTypeDeserializerModifier(mapper));
+        module.setSerializerModifier(new 
AbstractTypeSerializerModifier(mapper));
         mapper.registerModule(module);
     }
 
@@ -198,4 +206,108 @@ public final class TikaAbstractTypeMixins {
             }
         }
     }
+
+    /**
+     * Modifier that intercepts serialization of values declared as abstract 
types
+     * and wraps them with type information.
+     */
+    private static class AbstractTypeSerializerModifier extends 
BeanSerializerModifier {
+
+        private final ObjectMapper mapper;
+
+        AbstractTypeSerializerModifier(ObjectMapper mapper) {
+            this.mapper = mapper;
+        }
+
+        @Override
+        public JsonSerializer<?> modifySerializer(SerializationConfig config,
+                                                   BeanDescription beanDesc,
+                                                   JsonSerializer<?> 
serializer) {
+            Class<?> beanClass = beanDesc.getBeanClass();
+
+            // Skip types that shouldn't use wrapper format
+            if (shouldSkip(beanClass)) {
+                return serializer;
+            }
+
+            // For concrete Tika types, wrap with type name if they 
extend/implement an abstract type
+            // This ensures polymorphic types in lists get properly wrapped
+            if (isTikaPolymorphicType(beanClass)) {
+                LOG.debug("Registering wrapper serializer for polymorphic 
type: {}",
+                        beanClass.getName());
+                return new WrapperObjectSerializer<>(serializer, mapper);
+            }
+
+            return serializer;
+        }
+
+        private boolean shouldSkip(Class<?> beanClass) {
+            // Skip primitives and their wrappers
+            if (beanClass.isPrimitive()) {
+                return true;
+            }
+
+            // Skip common JDK types
+            String name = beanClass.getName();
+            if (name.startsWith("java.") || name.startsWith("javax.")) {
+                return true;
+            }
+
+            // Skip arrays
+            if (beanClass.isArray()) {
+                return true;
+            }
+
+            // Skip abstract types (we want to wrap concrete implementations, 
not the abstract types themselves)
+            if (beanClass.isInterface() || 
Modifier.isAbstract(beanClass.getModifiers())) {
+                return true;
+            }
+
+            return false;
+        }
+
+        /**
+         * Checks if this class should be wrapped with type information during 
serialization.
+         * Only types registered in the component registry are wrapped - this 
excludes
+         * container types (like CompositeMetadataFilter) that are not in the 
registry.
+         */
+        private boolean isTikaPolymorphicType(Class<?> beanClass) {
+            // Only wrap types that have a registered friendly name in the 
registry
+            return ComponentNameResolver.getFriendlyName(beanClass) != null;
+        }
+    }
+
+    /**
+     * Serializer that wraps objects with their type name.
+     * Output format: {"type-name": {...properties...}}
+     */
+    private static class WrapperObjectSerializer<T> extends JsonSerializer<T> {
+
+        private final JsonSerializer<T> delegate;
+        private final ObjectMapper mapper;
+
+        @SuppressWarnings("unchecked")
+        WrapperObjectSerializer(JsonSerializer<?> delegate, ObjectMapper 
mapper) {
+            this.delegate = (JsonSerializer<T>) delegate;
+            this.mapper = mapper;
+        }
+
+        @Override
+        public void serialize(T value, JsonGenerator gen, SerializerProvider 
serializers)
+                throws IOException {
+            if (value == null) {
+                gen.writeNull();
+                return;
+            }
+
+            // Get the friendly name (guaranteed to exist since we only wrap 
registered types)
+            String typeName = 
ComponentNameResolver.getFriendlyName(value.getClass());
+
+            // Write wrapper: {"type-name": {...}}
+            gen.writeStartObject();
+            gen.writeFieldName(typeName);
+            delegate.serialize(value, gen, serializers);
+            gen.writeEndObject();
+        }
+    }
 }
diff --git 
a/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java
 
b/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java
index 3b06f4079..5292ece26 100644
--- 
a/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java
+++ 
b/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java
@@ -23,6 +23,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import java.io.StringWriter;
 import java.io.Writer;
+import java.util.List;
 
 import com.fasterxml.jackson.core.JsonGenerator;
 import com.fasterxml.jackson.databind.JsonNode;
@@ -38,6 +39,7 @@ import org.apache.tika.extractor.SkipEmbeddedDocumentSelector;
 import org.apache.tika.metadata.filter.AttachmentCountingListFilter;
 import org.apache.tika.metadata.filter.CompositeMetadataFilter;
 import org.apache.tika.metadata.filter.MetadataFilter;
+import org.apache.tika.metadata.filter.MockUpperCaseFilter;
 import org.apache.tika.parser.ParseContext;
 
 /**
@@ -300,7 +302,7 @@ public class TestParseContextSerialization {
     }
 
     @Test
-    public void testMetadataList() throws Exception {
+    public void testMetadataListConfigContainer() throws Exception {
         ConfigContainer configContainer = new ConfigContainer();
         configContainer.set("metadata-filters", """
             [
@@ -322,6 +324,25 @@ public class TestParseContextSerialization {
         assertEquals(AttachmentCountingListFilter.class, 
deserFilter.getFilters().get(0).getClass());
     }
 
+
+    @Test
+    public void testMetadataListPOJO() throws Exception {
+        CompositeMetadataFilter metadataFilter = new 
CompositeMetadataFilter(List.of(new AttachmentCountingListFilter(), new 
MockUpperCaseFilter()));
+
+        ParseContext parseContext = new ParseContext();
+        parseContext.set(MetadataFilter.class, metadataFilter);
+
+        ObjectMapper mapper = createMapper();
+        String json = mapper.writeValueAsString(parseContext);
+
+        ParseContext deser = mapper.readValue(json, ParseContext.class);
+        MetadataFilter resolvedFilter = deser.get(MetadataFilter.class);
+        assertNotNull(resolvedFilter, "MetadataFilter should be resolved");
+        assertEquals(CompositeMetadataFilter.class, resolvedFilter.getClass());
+        CompositeMetadataFilter deserFilter = (CompositeMetadataFilter) 
resolvedFilter;
+        assertEquals(AttachmentCountingListFilter.class, 
deserFilter.getFilters().get(0).getClass());
+    }
+
     @Test
     public void testContextKeyDeserialization() throws Exception {
         // Test that components with @TikaComponent(contextKey=...) are stored

Reply via email to