This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4581-metadata-list-serialization-again in repository https://gitbox.apache.org/repos/asf/tika.git
commit d1f14ce69e5f680eade4df1d6a72f0710439118f Author: tallison <[email protected]> AuthorDate: Wed Dec 17 13:47:17 2025 -0500 TIKA-4581 -- further serialization on metadata list --- .../java/org/apache/tika/config/loader/ComponentRegistry.java | 11 ++++++----- .../tika/metadata/filter/AttachmentCountingListFilter.java | 11 +++++++++++ .../tika/serialization/TestParseContextSerialization.java | 10 ++++++++-- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentRegistry.java b/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentRegistry.java index 700d93761..1ab7014ed 100644 --- a/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentRegistry.java +++ b/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentRegistry.java @@ -49,7 +49,7 @@ import org.apache.tika.exception.TikaConfigException; public class ComponentRegistry { private final Map<String, ComponentInfo> components; - private final Map<Class<?>, String> classToName; // Reverse lookup + private final Map<String, String> classNameToFriendlyName; // Reverse lookup by class name private final ClassLoader classLoader; /** @@ -64,10 +64,10 @@ public class ComponentRegistry { throws TikaConfigException { this.classLoader = classLoader; this.components = loadComponents(indexFileName); - // Build reverse lookup - this.classToName = new HashMap<>(); + // Build reverse lookup by class name (not Class object) to handle classloader differences + this.classNameToFriendlyName = new HashMap<>(); for (Map.Entry<String, ComponentInfo> entry : components.entrySet()) { - classToName.put(entry.getValue().componentClass(), entry.getKey()); + classNameToFriendlyName.put(entry.getValue().componentClass().getName(), entry.getKey()); } } @@ -120,12 +120,13 @@ public class ComponentRegistry { /** * Looks up a component's friendly name by its class. + * Uses class name (not Class object) for lookup to handle classloader differences. * * @param clazz the component class * @return the friendly name, or null if not registered */ public String getFriendlyName(Class<?> clazz) { - return classToName.get(clazz); + return classNameToFriendlyName.get(clazz.getName()); } private Map<String, ComponentInfo> loadComponents(String indexFileName) diff --git a/tika-serialization/src/test/java/org/apache/tika/metadata/filter/AttachmentCountingListFilter.java b/tika-serialization/src/test/java/org/apache/tika/metadata/filter/AttachmentCountingListFilter.java index ffd73db11..e33390a7c 100644 --- a/tika-serialization/src/test/java/org/apache/tika/metadata/filter/AttachmentCountingListFilter.java +++ b/tika-serialization/src/test/java/org/apache/tika/metadata/filter/AttachmentCountingListFilter.java @@ -24,12 +24,23 @@ import org.apache.tika.metadata.Metadata; @TikaComponent public class AttachmentCountingListFilter extends MetadataFilter { + + private Integer count = 0; @Override public List<Metadata> filter(List<Metadata> metadataList) throws TikaException { if (metadataList == null || metadataList.isEmpty()) { return metadataList; } metadataList.get(0).set("X-TIKA:attachment_count", Integer.toString(metadataList.size() - 1)); + count += metadataList.size(); return metadataList; } + + public Integer getCount() { + return count; + } + + public void setCount(Integer count) { + this.count = count; + } } diff --git a/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java b/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java index 5292ece26..4a300830c 100644 --- a/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java +++ b/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java @@ -24,6 +24,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.StringWriter; import java.io.Writer; import java.util.List; +import java.util.Set; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.JsonNode; @@ -38,6 +39,7 @@ import org.apache.tika.extractor.DocumentSelector; import org.apache.tika.extractor.SkipEmbeddedDocumentSelector; import org.apache.tika.metadata.filter.AttachmentCountingListFilter; import org.apache.tika.metadata.filter.CompositeMetadataFilter; +import org.apache.tika.metadata.filter.IncludeFieldMetadataFilter; import org.apache.tika.metadata.filter.MetadataFilter; import org.apache.tika.metadata.filter.MockUpperCaseFilter; import org.apache.tika.parser.ParseContext; @@ -327,7 +329,9 @@ public class TestParseContextSerialization { @Test public void testMetadataListPOJO() throws Exception { - CompositeMetadataFilter metadataFilter = new CompositeMetadataFilter(List.of(new AttachmentCountingListFilter(), new MockUpperCaseFilter())); + CompositeMetadataFilter metadataFilter = + new CompositeMetadataFilter(List.of(new MockUpperCaseFilter(), new AttachmentCountingListFilter(), + new IncludeFieldMetadataFilter(Set.of("blah", "blah2")))); ParseContext parseContext = new ParseContext(); parseContext.set(MetadataFilter.class, metadataFilter); @@ -340,7 +344,9 @@ public class TestParseContextSerialization { assertNotNull(resolvedFilter, "MetadataFilter should be resolved"); assertEquals(CompositeMetadataFilter.class, resolvedFilter.getClass()); CompositeMetadataFilter deserFilter = (CompositeMetadataFilter) resolvedFilter; - assertEquals(AttachmentCountingListFilter.class, deserFilter.getFilters().get(0).getClass()); + assertEquals(MockUpperCaseFilter.class, deserFilter.getFilters().get(0).getClass()); + assertEquals(AttachmentCountingListFilter.class, deserFilter.getFilters().get(1).getClass()); + assertEquals(IncludeFieldMetadataFilter.class, deserFilter.getFilters().get(2).getClass()); } @Test
