This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new ec02aebb37 TIKA-4641 -- step 2: refactor serialization, further. add 
docs (#2567)
ec02aebb37 is described below

commit ec02aebb37157649457cd56524fea8ff74effcf1
Author: Tim Allison <[email protected]>
AuthorDate: Sun Feb 1 10:37:56 2026 -0500

    TIKA-4641 -- step 2: refactor serialization, further. add docs (#2567)
---
 docs/modules/ROOT/nav.adoc                         |   2 +
 docs/modules/ROOT/pages/developers/index.adoc      |  30 ++
 .../ROOT/pages/developers/serialization.adoc       | 340 +++++++++++++++++++++
 .../tika/annotation/TikaComponentProcessor.java    |  53 +---
 .../tika/serialization/ParseContextUtils.java      |  19 +-
 .../org/apache/tika/serialization/TikaModule.java  |  22 +-
 .../serdes/ParseContextDeserializer.java           |  28 +-
 7 files changed, 445 insertions(+), 49 deletions(-)

diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc
index 3d63a73145..89ea2c0c8c 100644
--- a/docs/modules/ROOT/nav.adoc
+++ b/docs/modules/ROOT/nav.adoc
@@ -36,6 +36,8 @@
 ** xref:advanced/setting-limits.adoc[Setting Limits]
 ** xref:advanced/spooling.adoc[Spooling]
 ** xref:advanced/embedded-documents.adoc[Embedded Document Metadata]
+* xref:developers/index.adoc[Developers]
+** xref:developers/serialization.adoc[Serialization and Configuration]
 * xref:faq.adoc[FAQ]
 * xref:security.adoc[Security]
 * xref:roadmap.adoc[Roadmap]
diff --git a/docs/modules/ROOT/pages/developers/index.adoc 
b/docs/modules/ROOT/pages/developers/index.adoc
new file mode 100644
index 0000000000..08e56a7065
--- /dev/null
+++ b/docs/modules/ROOT/pages/developers/index.adoc
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+= Developer Guide
+
+This section provides documentation for developers who want to extend Tika
+with custom parsers, detectors, and other components.
+
+== Topics
+
+* xref:serialization.adoc[Serialization and Configuration] - JSON 
configuration,
+  @TikaComponent annotation, and creating custom components
+
+== Coming Soon
+
+* Creating Custom Parsers
+* Creating Custom Detectors
+* Plugin Development with PF4J
diff --git a/docs/modules/ROOT/pages/developers/serialization.adoc 
b/docs/modules/ROOT/pages/developers/serialization.adoc
new file mode 100644
index 0000000000..6ec426b061
--- /dev/null
+++ b/docs/modules/ROOT/pages/developers/serialization.adoc
@@ -0,0 +1,340 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+= Serialization and Configuration
+
+Tika 4.x uses JSON-based configuration and serialization throughout the system.
+This document explains how the serialization system works and how to create
+components that integrate with it.
+
+== Overview
+
+Tika's serialization system provides:
+
+* **JSON Configuration**: Configure Tika components using JSON files
+* **Friendly Names**: Reference components by name (e.g., `pdf-parser`) 
instead of class names
+* **ParseContext Serialization**: Send per-request configuration via 
`FetchEmitTuple`
+* **Security**: Only registered components can be instantiated from JSON
+
+The system is built on Jackson with custom serializers/deserializers in the
+`tika-serialization` module.
+
+== JSON Configuration Format
+
+Tika uses a compact format for component configuration:
+
+[source,json]
+----
+{
+  "auto-detect-parser": {
+    "throwOnZeroBytes": false
+  },
+  "parse-context": {
+    "commons-digester-factory": {
+      "digests": [
+        { "algorithm": "MD5" },
+        { "algorithm": "SHA256" }
+      ]
+    }
+  }
+}
+----
+
+Components can be specified as:
+
+* **String**: `"pdf-parser"` - creates instance with defaults
+* **Object**: `{"pdf-parser": {"ocrStrategy": "AUTO"}}` - creates configured 
instance
+
+== The @TikaComponent Annotation
+
+The `@TikaComponent` annotation is required for any class that should be
+configurable via JSON. It serves multiple purposes:
+
+1. **Registration**: Registers the class with a friendly name
+2. **Index Generation**: Creates lookup files for name-to-class resolution
+3. **SPI Registration**: Optionally registers for Java ServiceLoader
+4. **Security**: Acts as an allowlist for deserialization
+
+=== Basic Usage
+
+[source,java]
+----
+@TikaComponent
+public class MyCustomParser implements Parser {
+    // Parser implementation
+}
+----
+
+This automatically:
+
+* Generates friendly name `my-custom-parser` from the class name
+* Adds to `META-INF/tika/parsers.idx` for name lookup
+* Adds to `META-INF/services/org.apache.tika.parser.Parser` for SPI
+
+=== Annotation Attributes
+
+[cols="1,1,3"]
+|===
+| Attribute | Default | Description
+
+| `name`
+| (auto-generated)
+| Custom friendly name instead of deriving from class name
+
+| `spi`
+| `true`
+| Whether to register in `META-INF/services/` for ServiceLoader
+
+| `contextKey`
+| (auto-detected)
+| Class to use as ParseContext key (rarely needed)
+
+| `defaultFor`
+| (none)
+| Marks as default implementation for an interface
+|===
+
+=== Example with Attributes
+
+[source,java]
+----
+@TikaComponent(name = "my-parser", spi = false)
+public class MyInternalParser implements Parser {
+    // Not auto-discovered via SPI, but configurable via JSON
+}
+----
+
+== Context Key Detection
+
+When storing components in `ParseContext`, Tika needs to know which class
+to use as the lookup key. For example, `CommonsDigesterFactory` should be
+retrievable via `parseContext.get(DigesterFactory.class)`.
+
+=== Automatic Detection
+
+Tika automatically detects the context key by checking if your class implements
+one of these known interfaces:
+
+* `Parser`, `Detector`, `EncodingDetector`
+* `MetadataFilter`, `Translator`, `Renderer`
+* `DigesterFactory`, `ContentHandlerFactory`
+* `EmbeddedDocumentExtractorFactory`, `MetadataWriteLimiterFactory`
+
+[source,java]
+----
+@TikaComponent
+public class CommonsDigesterFactory implements DigesterFactory {
+    // Context key automatically detected as DigesterFactory.class
+}
+----
+
+=== Explicit Context Key
+
+For interfaces not in the auto-detection list, specify explicitly:
+
+[source,java]
+----
+@TikaComponent(contextKey = DocumentSelector.class)
+public class SkipEmbeddedDocumentSelector implements DocumentSelector { }
+----
+
+== Service Interface Categories
+
+=== First-Class Service Interfaces
+
+These are loaded via SPI and have dedicated index files:
+
+[cols="1,1"]
+|===
+| Interface | Index File
+
+| `Parser` | `parsers.idx`
+| `Detector` | `detectors.idx`
+| `EncodingDetector` | `encoding-detectors.idx`
+| `LanguageDetector` | `language-detectors.idx`
+| `Translator` | `translators.idx`
+| `Renderer` | `renderers.idx`
+| `MetadataFilter` | `metadata-filters.idx`
+|===
+
+=== ParseContext Components
+
+Components not implementing first-class interfaces go to `parse-context.idx`:
+
+* `DigesterFactory` - Digest/checksum calculation
+* `ContentHandlerFactory` - SAX content handler creation
+* `EmbeddedDocumentExtractorFactory` - Embedded document handling
+* `MetadataWriteLimiterFactory` - Metadata write limiting
+
+== Self-Configuring Components
+
+Components implementing `SelfConfiguring` handle their own configuration
+at runtime rather than during initial loading:
+
+[source,java]
+----
+@TikaComponent
+public class PDFParser extends AbstractParser implements SelfConfiguring {
+
+    private PDFParserConfig defaultConfig = new PDFParserConfig();
+
+    @Override
+    public void configure(ParseContext parseContext) {
+        PDFParserConfig config = ParseContextConfig.getConfig(
+            parseContext, "pdf-parser", PDFParserConfig.class, defaultConfig);
+        // Use config...
+    }
+}
+----
+
+Benefits:
+
+* Per-request configuration via `ParseContext`
+* Lazy loading - config only parsed when needed
+* Merging with defaults handled automatically
+
+== ParseContext Serialization
+
+`ParseContext` can be serialized to JSON for transmission (e.g., in 
`FetchEmitTuple`):
+
+[source,json]
+----
+{
+  "parseContext": {
+    "pdf-parser": {
+      "ocrStrategy": "AUTO",
+      "extractInlineImages": true
+    },
+    "commons-digester-factory": {
+      "digests": [{"algorithm": "SHA256"}]
+    }
+  }
+}
+----
+
+=== Typed Section
+
+For components that need immediate deserialization (not lazy loading):
+
+[source,json]
+----
+{
+  "parseContext": {
+    "typed": {
+      "handler-config": {
+        "type": "XML",
+        "writeLimit": 100000
+      }
+    }
+  }
+}
+----
+
+== Security Model
+
+The serialization system implements a security allowlist:
+
+1. **@TikaComponent Required**: Only annotated classes are registered
+2. **Registry Lookup**: Deserialization only instantiates registered classes
+3. **No Arbitrary Classes**: Unknown class names cause errors, not 
instantiation
+
+This prevents attacks where malicious JSON specifies dangerous classes
+for instantiation.
+
+[source,java]
+----
+// This will FAIL - class not registered
+{
+  "parse-context": {
+    "java.lang.Runtime": {}  // Error: Unknown component
+  }
+}
+----
+
+== Creating a Custom Component
+
+Complete example of a custom metadata filter:
+
+[source,java]
+----
+package com.example.tika;
+
+import org.apache.tika.config.TikaComponent;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.filter.MetadataFilter;
+
+@TikaComponent
+public class UpperCaseFilter implements MetadataFilter {
+
+    private String fieldName = "title";
+
+    public void setFieldName(String fieldName) {
+        this.fieldName = fieldName;
+    }
+
+    public String getFieldName() {
+        return fieldName;
+    }
+
+    @Override
+    public void filter(Metadata metadata) throws TikaException {
+        String value = metadata.get(fieldName);
+        if (value != null) {
+            metadata.set(fieldName, value.toUpperCase());
+        }
+    }
+}
+----
+
+Configure in JSON:
+
+[source,json]
+----
+{
+  "metadata-filters": [
+    {"upper-case-filter": {"fieldName": "dc:title"}}
+  ]
+}
+----
+
+Or with defaults:
+
+[source,json]
+----
+{
+  "metadata-filters": ["upper-case-filter"]
+}
+----
+
+== Troubleshooting
+
+=== "Unknown component name" Error
+
+* Ensure class has `@TikaComponent` annotation
+* Verify annotation processing ran during compilation
+* Check that `META-INF/tika/*.idx` file exists in JAR
+
+=== Component Not Found in ParseContext
+
+* Verify you're using the correct interface type for lookup
+* Check if explicit `contextKey` is needed
+* For self-configuring components, ensure `configure()` was called
+
+=== SPI Not Loading Component
+
+* Check that `spi = true` (the default)
+* Verify `META-INF/services/` file exists
+* Ensure JAR is on classpath
diff --git 
a/tika-annotation-processor/src/main/java/org/apache/tika/annotation/TikaComponentProcessor.java
 
b/tika-annotation-processor/src/main/java/org/apache/tika/annotation/TikaComponentProcessor.java
index b00be101e3..132e022dfa 100644
--- 
a/tika-annotation-processor/src/main/java/org/apache/tika/annotation/TikaComponentProcessor.java
+++ 
b/tika-annotation-processor/src/main/java/org/apache/tika/annotation/TikaComponentProcessor.java
@@ -65,9 +65,9 @@ public class TikaComponentProcessor extends AbstractProcessor 
{
      * Known Tika service interfaces for SPI generation.
      * Only classes implementing these interfaces will have SPI files 
generated.
      * <p>
-     * Note: DigesterFactory and ContentHandlerFactory are NOT in this map 
because
-     * they are parse-context components, not top-level service interfaces.
-     * Their implementations go to parse-context.idx instead.
+     * Components that don't implement any of these interfaces (e.g., 
DigesterFactory,
+     * ContentHandlerFactory implementations) go to parse-context.idx instead.
+     * These should specify their contextKey explicitly via 
@TikaComponent(contextKey=...).
      */
     private static final Map<String, String> SERVICE_INTERFACES = new 
LinkedHashMap<>();
 
@@ -82,18 +82,6 @@ public class TikaComponentProcessor extends 
AbstractProcessor {
         
SERVICE_INTERFACES.put("org.apache.tika.metadata.filter.MetadataFilter", 
"metadata-filters");
     }
 
-    /**
-     * Interfaces whose implementations should go to parse-context.idx.
-     * These are factory interfaces used via ParseContext, not loaded via SPI.
-     */
-    private static final Set<String> PARSE_CONTEXT_INTERFACES = Set.of(
-            "org.apache.tika.digest.DigesterFactory",
-            "org.apache.tika.sax.ContentHandlerFactory",
-            "org.apache.tika.metadata.writefilter.MetadataWriteLimiterFactory",
-            "org.apache.tika.extractor.EmbeddedDocumentExtractorFactory",
-            "org.apache.tika.extractor.UnpackSelector"
-    );
-
     private Messager messager;
     private Filer filer;
 
@@ -155,13 +143,8 @@ public class TikaComponentProcessor extends 
AbstractProcessor {
                 " (SPI: " + includeSpi + ", contextKey: " + contextKey +
                 ", defaultFor: " + defaultFor + ")");
 
-        // Find all implemented service interfaces (both SPI and parse-context)
+        // Find all implemented service interfaces
         List<String> serviceInterfaces = findServiceInterfaces(element);
-        List<String> parseContextInterfaces = 
findParseContextInterfaces(element);
-
-        // Combine all interfaces for context key detection
-        List<String> allInterfaces = new ArrayList<>(serviceInterfaces);
-        allInterfaces.addAll(parseContextInterfaces);
 
         // Build the index entry value (className or className:key=X[:default])
         // Auto-detect contextKey from service interface if not explicitly 
specified
@@ -169,16 +152,16 @@ public class TikaComponentProcessor extends 
AbstractProcessor {
         if (contextKey != null) {
             // Explicit contextKey specified
             indexValue = className + ":key=" + contextKey;
-        } else if (allInterfaces.size() == 1) {
-            // Auto-detect contextKey from single interface
-            indexValue = className + ":key=" + allInterfaces.get(0);
+        } else if (serviceInterfaces.size() == 1) {
+            // Auto-detect contextKey from single service interface
+            indexValue = className + ":key=" + serviceInterfaces.get(0);
             messager.printMessage(Diagnostic.Kind.NOTE,
-                    "Auto-detected contextKey=" + allInterfaces.get(0) + " for 
" + className);
-        } else if (allInterfaces.size() > 1) {
+                    "Auto-detected contextKey=" + serviceInterfaces.get(0) + " 
for " + className);
+        } else if (serviceInterfaces.size() > 1) {
             // Multiple interfaces - warn that contextKey should be specified
             messager.printMessage(Diagnostic.Kind.WARNING,
                     "Class " + className + " implements multiple interfaces: " 
+
-                    allInterfaces + ". Consider specifying 
@TikaComponent(contextKey=...) " +
+                    serviceInterfaces + ". Consider specifying 
@TikaComponent(contextKey=...) " +
                     "to select which one to use as ParseContext key.", 
element);
         }
 
@@ -187,9 +170,9 @@ public class TikaComponentProcessor extends 
AbstractProcessor {
             indexValue = indexValue + ":default";
         }
 
-        // Check if this is a parse-context component (implements a 
parse-context interface
-        // or doesn't implement any known service interface)
-        if (!parseContextInterfaces.isEmpty() || serviceInterfaces.isEmpty()) {
+        // Components that don't implement any known service interface go to 
parse-context.idx
+        // These should specify their contextKey explicitly via 
@TikaComponent(contextKey=...)
+        if (serviceInterfaces.isEmpty()) {
             // Put in parse-context.idx
             messager.printMessage(Diagnostic.Kind.NOTE,
                     "Class " + className + " is a parse-context component, " +
@@ -292,16 +275,6 @@ public class TikaComponentProcessor extends 
AbstractProcessor {
         return result;
     }
 
-    /**
-     * Finds all parse-context interfaces implemented by the given type 
element.
-     */
-    private List<String> findParseContextInterfaces(TypeElement element) {
-        List<String> result = new ArrayList<>();
-        Set<String> visited = new LinkedHashSet<>();
-        findInterfacesRecursive(element.asType(), result, visited, 
PARSE_CONTEXT_INTERFACES);
-        return result;
-    }
-
     /**
      * Recursively searches for interfaces in the type hierarchy.
      *
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextUtils.java
 
b/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextUtils.java
index 30826befa5..70104c9f26 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextUtils.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextUtils.java
@@ -159,18 +159,29 @@ public class ParseContextUtils {
     /**
      * Determines the ParseContext key for a component.
      * <p>
-     * The contextKey is auto-detected by the annotation processor from the 
service
-     * interface implemented by the component. If not detected (e.g., 
component implements
-     * multiple interfaces), falls back to the component class.
+     * Resolution order:
+     * <ol>
+     *   <li>Explicit contextKey from .idx file (via @TikaComponent 
annotation)</li>
+     *   <li>Auto-detect from implemented interfaces (using 
TikaModule.COMPACT_FORMAT_INTERFACES)</li>
+     *   <li>Fall back to the component class itself</li>
+     * </ol>
+     * <p>
+     * Security note: This only determines the context key - it does NOT 
affect which
+     * classes can be instantiated. Classes must still be registered via 
@TikaComponent.
      *
      * @param info the component info
      * @return the class to use as ParseContext key
      */
     private static Class<?> determineContextKey(ComponentInfo info) {
-        // Use contextKey from .idx file (auto-detected or explicit from 
@TikaComponent)
+        // Use explicit contextKey from .idx file if specified
         if (info.contextKey() != null) {
             return info.contextKey();
         }
+        // Auto-detect from implemented interfaces at runtime
+        Class<?> contextKeyInterface = 
TikaModule.findContextKeyInterface(info.componentClass());
+        if (contextKeyInterface != null) {
+            return contextKeyInterface;
+        }
         // Fall back to the component class itself
         return info.componentClass();
     }
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/serialization/TikaModule.java
 
b/tika-serialization/src/main/java/org/apache/tika/serialization/TikaModule.java
index 2fc6406b53..8277632830 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/serialization/TikaModule.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/serialization/TikaModule.java
@@ -52,6 +52,7 @@ import org.apache.tika.detect.EncodingDetector;
 import org.apache.tika.digest.DigesterFactory;
 import org.apache.tika.exception.TikaConfigException;
 import org.apache.tika.extractor.EmbeddedDocumentExtractorFactory;
+import org.apache.tika.extractor.UnpackSelector;
 import org.apache.tika.language.translate.Translator;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.filter.MetadataFilter;
@@ -116,6 +117,7 @@ public class TikaModule extends SimpleModule {
         COMPACT_FORMAT_INTERFACES.add(MetadataWriteLimiterFactory.class);
         COMPACT_FORMAT_INTERFACES.add(ContentHandlerDecoratorFactory.class);
         COMPACT_FORMAT_INTERFACES.add(ContentHandlerFactory.class);
+        COMPACT_FORMAT_INTERFACES.add(UnpackSelector.class);
     }
 
     /**
@@ -123,12 +125,28 @@ public class TikaModule extends SimpleModule {
      * Returns true if the type implements any of the registered compact 
format interfaces.
      */
     private static boolean usesCompactFormat(Class<?> type) {
+        return findContextKeyInterface(type) != null;
+    }
+
+    /**
+     * Finds the appropriate context key interface for a given type.
+     * This is used to determine which interface should be used as the 
ParseContext key
+     * when storing instances of this type.
+     * <p>
+     * Security note: This method only helps determine the context key - it 
does NOT
+     * affect which classes can be instantiated. Classes must still be 
registered
+     * via @TikaComponent to be deserializable.
+     *
+     * @param type the type to find the context key for
+     * @return the interface to use as context key, or null if none found
+     */
+    public static Class<?> findContextKeyInterface(Class<?> type) {
         for (Class<?> iface : COMPACT_FORMAT_INTERFACES) {
             if (iface.isAssignableFrom(type)) {
-                return true;
+                return iface;
             }
         }
-        return false;
+        return null;
     }
 
     public TikaModule() {
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/serialization/serdes/ParseContextDeserializer.java
 
b/tika-serialization/src/main/java/org/apache/tika/serialization/serdes/ParseContextDeserializer.java
index 639822cee9..bacbb40741 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/serialization/serdes/ParseContextDeserializer.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/serialization/serdes/ParseContextDeserializer.java
@@ -36,6 +36,7 @@ import org.slf4j.LoggerFactory;
 import org.apache.tika.config.loader.ComponentInfo;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.serialization.ComponentNameResolver;
+import org.apache.tika.serialization.TikaModule;
 
 /**
  * Deserializes ParseContext from JSON.
@@ -127,6 +128,21 @@ public class ParseContextDeserializer extends 
JsonDeserializer<ParseContext> {
         return parseContext;
     }
 
+    /**
+     * Determines the context key for a component.
+     * Uses explicit contextKey if available, otherwise auto-detects from 
interfaces.
+     */
+    private static Class<?> determineContextKey(ComponentInfo info) {
+        if (info.contextKey() != null) {
+            return info.contextKey();
+        }
+        Class<?> interfaceKey = 
TikaModule.findContextKeyInterface(info.componentClass());
+        if (interfaceKey != null) {
+            return interfaceKey;
+        }
+        return info.componentClass();
+    }
+
     /**
      * Checks if a JSON config entry would create a duplicate context key.
      * <p>
@@ -147,7 +163,7 @@ public class ParseContextDeserializer extends 
JsonDeserializer<ParseContext> {
         }
 
         ComponentInfo info = infoOpt.get();
-        Class<?> contextKey = info.contextKey() != null ? info.contextKey() : 
info.componentClass();
+        Class<?> contextKey = determineContextKey(info);
 
         String existingName = seenContextKeys.get(contextKey);
         if (existingName != null) {
@@ -205,8 +221,14 @@ public class ParseContextDeserializer extends 
JsonDeserializer<ParseContext> {
                 }
             }
 
-            // Use contextKey if available, otherwise use the config class 
itself
-            Class<?> parseContextKey = (contextKeyClass != null) ? 
contextKeyClass : configClass;
+            // Determine context key: explicit > interface detection > class 
itself
+            Class<?> parseContextKey = contextKeyClass;
+            if (parseContextKey == null) {
+                parseContextKey = 
TikaModule.findContextKeyInterface(configClass);
+            }
+            if (parseContextKey == null) {
+                parseContextKey = configClass;
+            }
 
             // Check for duplicate context key
             String existingName = seenContextKeys.get(parseContextKey);

Reply via email to