This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch roundtrip-pojos-parse-context in repository https://gitbox.apache.org/repos/asf/tika.git
commit 69a50f7c85aa46cd8fd04f3f4d1ccf9513e65e26 Author: tallison <[email protected]> AuthorDate: Wed Dec 17 09:27:34 2025 -0500 Round trip pojos that exist in the registry in the ParseContext --- .../tika/serialization/TikaAbstractTypeMixins.java | 112 +++++++++++++++++++++ .../TestParseContextSerialization.java | 23 ++++- 2 files changed, 134 insertions(+), 1 deletion(-) diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/TikaAbstractTypeMixins.java b/tika-serialization/src/main/java/org/apache/tika/serialization/TikaAbstractTypeMixins.java index 7c68042aa..2a11b0e76 100644 --- a/tika-serialization/src/main/java/org/apache/tika/serialization/TikaAbstractTypeMixins.java +++ b/tika-serialization/src/main/java/org/apache/tika/serialization/TikaAbstractTypeMixins.java @@ -19,6 +19,7 @@ package org.apache.tika.serialization; import java.io.IOException; import java.lang.reflect.Modifier; +import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.databind.BeanDescription; import com.fasterxml.jackson.databind.DeserializationConfig; @@ -26,9 +27,13 @@ import com.fasterxml.jackson.databind.DeserializationContext; import com.fasterxml.jackson.databind.JsonDeserializer; import com.fasterxml.jackson.databind.JsonMappingException; import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.JsonSerializer; import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationConfig; +import com.fasterxml.jackson.databind.SerializerProvider; import com.fasterxml.jackson.databind.deser.BeanDeserializerModifier; import com.fasterxml.jackson.databind.module.SimpleModule; +import com.fasterxml.jackson.databind.ser.BeanSerializerModifier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -64,12 +69,15 @@ public final class TikaAbstractTypeMixins { /** * Registers the abstract type handling module on the given ObjectMapper. + * This includes both serializers (to add type wrappers) and deserializers + * (to resolve type wrappers). * * @param mapper the ObjectMapper to configure */ public static void registerDeserializers(ObjectMapper mapper) { SimpleModule module = new SimpleModule("TikaAbstractTypes"); module.setDeserializerModifier(new AbstractTypeDeserializerModifier(mapper)); + module.setSerializerModifier(new AbstractTypeSerializerModifier(mapper)); mapper.registerModule(module); } @@ -198,4 +206,108 @@ public final class TikaAbstractTypeMixins { } } } + + /** + * Modifier that intercepts serialization of values declared as abstract types + * and wraps them with type information. + */ + private static class AbstractTypeSerializerModifier extends BeanSerializerModifier { + + private final ObjectMapper mapper; + + AbstractTypeSerializerModifier(ObjectMapper mapper) { + this.mapper = mapper; + } + + @Override + public JsonSerializer<?> modifySerializer(SerializationConfig config, + BeanDescription beanDesc, + JsonSerializer<?> serializer) { + Class<?> beanClass = beanDesc.getBeanClass(); + + // Skip types that shouldn't use wrapper format + if (shouldSkip(beanClass)) { + return serializer; + } + + // For concrete Tika types, wrap with type name if they extend/implement an abstract type + // This ensures polymorphic types in lists get properly wrapped + if (isTikaPolymorphicType(beanClass)) { + LOG.debug("Registering wrapper serializer for polymorphic type: {}", + beanClass.getName()); + return new WrapperObjectSerializer<>(serializer, mapper); + } + + return serializer; + } + + private boolean shouldSkip(Class<?> beanClass) { + // Skip primitives and their wrappers + if (beanClass.isPrimitive()) { + return true; + } + + // Skip common JDK types + String name = beanClass.getName(); + if (name.startsWith("java.") || name.startsWith("javax.")) { + return true; + } + + // Skip arrays + if (beanClass.isArray()) { + return true; + } + + // Skip abstract types (we want to wrap concrete implementations, not the abstract types themselves) + if (beanClass.isInterface() || Modifier.isAbstract(beanClass.getModifiers())) { + return true; + } + + return false; + } + + /** + * Checks if this class should be wrapped with type information during serialization. + * Only types registered in the component registry are wrapped - this excludes + * container types (like CompositeMetadataFilter) that are not in the registry. + */ + private boolean isTikaPolymorphicType(Class<?> beanClass) { + // Only wrap types that have a registered friendly name in the registry + return ComponentNameResolver.getFriendlyName(beanClass) != null; + } + } + + /** + * Serializer that wraps objects with their type name. + * Output format: {"type-name": {...properties...}} + */ + private static class WrapperObjectSerializer<T> extends JsonSerializer<T> { + + private final JsonSerializer<T> delegate; + private final ObjectMapper mapper; + + @SuppressWarnings("unchecked") + WrapperObjectSerializer(JsonSerializer<?> delegate, ObjectMapper mapper) { + this.delegate = (JsonSerializer<T>) delegate; + this.mapper = mapper; + } + + @Override + public void serialize(T value, JsonGenerator gen, SerializerProvider serializers) + throws IOException { + if (value == null) { + gen.writeNull(); + return; + } + + // Get the friendly name (guaranteed to exist since we only wrap registered types) + String typeName = ComponentNameResolver.getFriendlyName(value.getClass()); + + // Write wrapper: {"type-name": {...}} + gen.writeStartObject(); + gen.writeFieldName(typeName); + delegate.serialize(value, gen, serializers); + gen.writeEndObject(); + } + } } diff --git a/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java b/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java index 3b06f4079..5292ece26 100644 --- a/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java +++ b/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java @@ -23,6 +23,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.StringWriter; import java.io.Writer; +import java.util.List; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.JsonNode; @@ -38,6 +39,7 @@ import org.apache.tika.extractor.SkipEmbeddedDocumentSelector; import org.apache.tika.metadata.filter.AttachmentCountingListFilter; import org.apache.tika.metadata.filter.CompositeMetadataFilter; import org.apache.tika.metadata.filter.MetadataFilter; +import org.apache.tika.metadata.filter.MockUpperCaseFilter; import org.apache.tika.parser.ParseContext; /** @@ -300,7 +302,7 @@ public class TestParseContextSerialization { } @Test - public void testMetadataList() throws Exception { + public void testMetadataListConfigContainer() throws Exception { ConfigContainer configContainer = new ConfigContainer(); configContainer.set("metadata-filters", """ [ @@ -322,6 +324,25 @@ public class TestParseContextSerialization { assertEquals(AttachmentCountingListFilter.class, deserFilter.getFilters().get(0).getClass()); } + + @Test + public void testMetadataListPOJO() throws Exception { + CompositeMetadataFilter metadataFilter = new CompositeMetadataFilter(List.of(new AttachmentCountingListFilter(), new MockUpperCaseFilter())); + + ParseContext parseContext = new ParseContext(); + parseContext.set(MetadataFilter.class, metadataFilter); + + ObjectMapper mapper = createMapper(); + String json = mapper.writeValueAsString(parseContext); + + ParseContext deser = mapper.readValue(json, ParseContext.class); + MetadataFilter resolvedFilter = deser.get(MetadataFilter.class); + assertNotNull(resolvedFilter, "MetadataFilter should be resolved"); + assertEquals(CompositeMetadataFilter.class, resolvedFilter.getClass()); + CompositeMetadataFilter deserFilter = (CompositeMetadataFilter) resolvedFilter; + assertEquals(AttachmentCountingListFilter.class, deserFilter.getFilters().get(0).getClass()); + } + @Test public void testContextKeyDeserialization() throws Exception { // Test that components with @TikaComponent(contextKey=...) are stored
