This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4545
in repository https://gitbox.apache.org/repos/asf/tika.git

commit dab213d05aa46d84f3a881458bbac969497284fe
Author: tallison <[email protected]>
AuthorDate: Tue Nov 25 11:17:31 2025 -0500

    TIKA-4545 -- baseline improvements to support integration
---
 tika-annotation-processor/pom.xml                  |   9 -
 .../tika/annotation/TikaComponentProcessor.java    |  68 ++-
 .../java/org/apache/tika/config/TikaComponent.java |   5 +-
 tika-core/pom.xml                                  |  21 +
 .../org/apache/tika/parser/AutoDetectParser.java   |  10 +
 .../org/apache/tika/parser/CompositeParser.java    |   4 +
 tika-serialization/pom.xml                         |   7 +
 .../apache/tika/config/loader/ConfigLoader.java    | 356 ++++++++++++
 .../tika/config/loader/KebabCaseConverter.java     |  72 +++
 .../apache/tika/config/loader/ParserLoader.java    |   2 +
 .../apache/tika/config/loader/TikaJsonConfig.java  |  12 +-
 .../org/apache/tika/config/loader/TikaLoader.java  |  37 ++
 .../tika/config/loader/ConfigLoaderTest.java       | 646 +++++++++++++++++++++
 .../test/resources/configs/test-config-loader.json |  32 +
 .../resources/configs/test-interface-no-type.json  |   6 +
 .../test/resources/configs/test-invalid-class.json |   3 +
 .../resources/configs/test-partial-config.json     |  15 +
 .../resources/configs/test-unexpected-field.json   |   8 +
 .../test/resources/configs/test-wrong-type.json    |   3 +
 19 files changed, 1288 insertions(+), 28 deletions(-)

diff --git a/tika-annotation-processor/pom.xml 
b/tika-annotation-processor/pom.xml
index 9c93eb9ac..459dc4ea6 100644
--- a/tika-annotation-processor/pom.xml
+++ b/tika-annotation-processor/pom.xml
@@ -37,15 +37,6 @@
   </description>
   <url>https://tika.apache.org</url>
 
-  <dependencies>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-core</artifactId>
-      <version>${project.version}</version>
-      <scope>provided</scope>
-    </dependency>
-  </dependencies>
-
   <build>
     <plugins>
       <plugin>
diff --git 
a/tika-annotation-processor/src/main/java/org/apache/tika/annotation/TikaComponentProcessor.java
 
b/tika-annotation-processor/src/main/java/org/apache/tika/annotation/TikaComponentProcessor.java
index 3a1800679..02424f862 100644
--- 
a/tika-annotation-processor/src/main/java/org/apache/tika/annotation/TikaComponentProcessor.java
+++ 
b/tika-annotation-processor/src/main/java/org/apache/tika/annotation/TikaComponentProcessor.java
@@ -19,6 +19,7 @@ package org.apache.tika.annotation;
 import java.io.IOException;
 import java.io.Writer;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
@@ -54,7 +55,7 @@ import org.apache.tika.config.TikaComponent;
  * to avoid generating SPI files for utility interfaces like Serializable, 
Initializable, etc.
  */
 @SupportedAnnotationTypes("org.apache.tika.config.TikaComponent")
-@SupportedSourceVersion(SourceVersion.RELEASE_11)
+@SupportedSourceVersion(SourceVersion.RELEASE_17)
 public class TikaComponentProcessor extends AbstractProcessor {
 
     /**
@@ -145,22 +146,24 @@ public class TikaComponentProcessor extends 
AbstractProcessor {
                         .add(className);
             }
 
-            // Always add to index files (regardless of SPI setting)
-            String indexFileName = SERVICE_INTERFACES.get(serviceInterface);
-            if (indexFileName != null) {
-                Map<String, String> index = 
indexFiles.computeIfAbsent(indexFileName,
-                        k -> new LinkedHashMap<>());
-
-                // Check for duplicate names
-                if (index.containsKey(componentName)) {
-                    String existingClass = index.get(componentName);
-                    if (!existingClass.equals(className)) {
-                        messager.printMessage(Diagnostic.Kind.ERROR,
-                                "Duplicate component name '" + componentName + 
"' for classes: " +
-                                existingClass + " and " + className, element);
+            // Add to index files only if spi = true
+            if (includeSpi) {
+                String indexFileName = 
SERVICE_INTERFACES.get(serviceInterface);
+                if (indexFileName != null) {
+                    Map<String, String> index = 
indexFiles.computeIfAbsent(indexFileName,
+                            k -> new LinkedHashMap<>());
+
+                    // Check for duplicate names
+                    if (index.containsKey(componentName)) {
+                        String existingClass = index.get(componentName);
+                        if (!existingClass.equals(className)) {
+                            messager.printMessage(Diagnostic.Kind.ERROR,
+                                    "Duplicate component name '" + 
componentName + "' for classes: " +
+                                    existingClass + " and " + className, 
element);
+                        }
+                    } else {
+                        index.put(componentName, className);
                     }
-                } else {
-                    index.put(componentName, className);
                 }
             }
         }
@@ -219,14 +222,20 @@ public class TikaComponentProcessor extends 
AbstractProcessor {
             String serviceInterface = entry.getKey();
             Set<String> implementations = entry.getValue();
 
+            // Sort implementations alphabetically for deterministic output
+            List<String> sortedImplementations = new 
ArrayList<>(implementations);
+            Collections.sort(sortedImplementations);
+
             try {
                 FileObject file = 
filer.createResource(StandardLocation.CLASS_OUTPUT, "",
                         "META-INF/services/" + serviceInterface);
 
                 try (Writer writer = file.openWriter()) {
+                    writeApacheLicenseHeader(writer);
+                    writer.write("\n\n");
                     writer.write("# Generated by TikaComponentProcessor\n");
                     writer.write("# Do not edit manually\n");
-                    for (String impl : implementations) {
+                    for (String impl : sortedImplementations) {
                         writer.write(impl);
                         writer.write("\n");
                     }
@@ -256,6 +265,7 @@ public class TikaComponentProcessor extends 
AbstractProcessor {
                         "META-INF/tika/" + fileName + ".idx");
 
                 try (Writer writer = file.openWriter()) {
+                    writeApacheLicenseHeader(writer);
                     writer.write("# Generated by TikaComponentProcessor\n");
                     writer.write("# Do not edit manually\n");
                     writer.write("# Format: 
component-name=fully.qualified.ClassName\n");
@@ -277,4 +287,28 @@ public class TikaComponentProcessor extends 
AbstractProcessor {
             }
         }
     }
+
+    /**
+     * Writes the Apache License 2.0 header to a file.
+     */
+    private void writeApacheLicenseHeader(Writer writer) throws IOException {
+        String header = """
+                #  Licensed to the Apache Software Foundation (ASF) under one 
or more
+                #  contributor license agreements.  See the NOTICE file 
distributed with
+                #  this work for additional information regarding copyright 
ownership.
+                #  The ASF licenses this file to You under the Apache License, 
Version 2.0
+                #  (the "License"); you may not use this file except in 
compliance with
+                #  the License.  You may obtain a copy of the License at
+                #
+                #       http://www.apache.org/licenses/LICENSE-2.0
+                #
+                #  Unless required by applicable law or agreed to in writing, 
software
+                #  distributed under the License is distributed on an "AS IS" 
BASIS,
+                #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 
express or implied.
+                #  See the License for the specific language governing 
permissions and
+                #  limitations under the License.
+
+                """;
+        writer.write(header);
+    }
 }
diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaComponent.java 
b/tika-annotation-processor/src/main/java/org/apache/tika/config/TikaComponent.java
similarity index 93%
rename from tika-core/src/main/java/org/apache/tika/config/TikaComponent.java
rename to 
tika-annotation-processor/src/main/java/org/apache/tika/config/TikaComponent.java
index 8696ab2db..6632bdeb7 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaComponent.java
+++ 
b/tika-annotation-processor/src/main/java/org/apache/tika/config/TikaComponent.java
@@ -34,6 +34,9 @@ import java.lang.annotation.Target;
  *   <li>Component index files (META-INF/tika/{type}.idx) for name-based 
lookup</li>
  * </ul>
  *
+ * <p>This annotation is only used at compile time by the annotation processor.
+ * It is retained in .class files for tooling but not loaded by the runtime 
JVM.
+ *
  * <p>Example usage:
  * <pre>
  * {@code @TikaComponent}
@@ -54,7 +57,7 @@ import java.lang.annotation.Target;
  *
  * @since 3.1.0
  */
-@Retention(RetentionPolicy.RUNTIME)
+@Retention(RetentionPolicy.CLASS)
 @Target(ElementType.TYPE)
 public @interface TikaComponent {
 
diff --git a/tika-core/pom.xml b/tika-core/pom.xml
index e0c408e34..7e97e367d 100644
--- a/tika-core/pom.xml
+++ b/tika-core/pom.xml
@@ -64,6 +64,14 @@
       <scope>provided</scope>
     </dependency>
 
+    <!-- Annotation processor - contains @TikaComponent and ensures build 
order -->
+    <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-annotation-processor</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+
     <!-- Test dependencies -->
     <dependency>
       <groupId>com.google.guava</groupId>
@@ -130,6 +138,19 @@
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration>
+          <annotationProcessorPaths>
+            <path>
+              <groupId>org.apache.tika</groupId>
+              <artifactId>tika-annotation-processor</artifactId>
+              <version>${project.version}</version>
+            </path>
+          </annotationProcessorPaths>
+        </configuration>
+      </plugin>
       <plugin>
         <groupId>org.apache.felix</groupId>
         <artifactId>maven-bundle-plugin</artifactId>
diff --git 
a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java 
b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
index 86eae692a..7fd9e0b0b 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
@@ -90,6 +90,16 @@ public class AutoDetectParser extends CompositeParser {
         setAutoDetectParserConfig(AutoDetectParserConfig.DEFAULT);
     }
 
+    public AutoDetectParser(CompositeParser parser, Detector detector, 
AutoDetectParserConfig autoDetectParserConfig) {
+        super(parser);
+        setDetector(detector);
+        setAutoDetectParserConfig(autoDetectParserConfig);
+    }
+
+    public static Parser build(CompositeParser parser, Detector detector, 
AutoDetectParserConfig autoDetectParserConfig) {
+        return new AutoDetectParser(parser, detector, autoDetectParserConfig);
+    }
+
     public AutoDetectParser(TikaConfig config) {
         super(config.getMediaTypeRegistry(), getParser(config));
         setFallback(buildFallbackParser(config));
diff --git 
a/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java 
b/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
index 3b50b4da7..b3aaebf7a 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
@@ -97,6 +97,10 @@ public class CompositeParser implements Parser {
         this(new MediaTypeRegistry());
     }
 
+    public CompositeParser(CompositeParser compositeParser) {
+        this(compositeParser.registry, compositeParser);
+    }
+
     public Map<MediaType, Parser> getParsers(ParseContext context) {
         Map<MediaType, Parser> map = new HashMap<>();
         for (Parser parser : parsers) {
diff --git a/tika-serialization/pom.xml b/tika-serialization/pom.xml
index f5eea54f9..38849fdf7 100644
--- a/tika-serialization/pom.xml
+++ b/tika-serialization/pom.xml
@@ -47,6 +47,13 @@
       <version>${project.version}</version>
       <scope>provided</scope>
     </dependency>
+    <!-- Annotation processor - contains @TikaComponent -->
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-annotation-processor</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
       <artifactId>jackson-core</artifactId>
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ConfigLoader.java
 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ConfigLoader.java
new file mode 100644
index 000000000..62db2d168
--- /dev/null
+++ 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ConfigLoader.java
@@ -0,0 +1,356 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config.loader;
+
+import java.lang.reflect.Modifier;
+import java.util.Set;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+
+import org.apache.tika.exception.TikaConfigException;
+
+/**
+ * Loader for simple configuration objects from JSON.
+ * <p>
+ * This class handles straightforward POJOs that can be deserialized directly 
from JSON.
+ * For complex components like Parsers, Detectors, etc., use the specific 
methods on
+ * {@link TikaLoader} instead (e.g., {@code loadParsers()}, {@code 
loadDetectors()}).
+ *
+ * <p>Usage:
+ * <pre>
+ * TikaLoader loader = TikaLoader.load(configPath);
+ *
+ * // Load by explicit key
+ * HandlerConfig config = loader.configs().load("handler-config", 
HandlerConfig.class);
+ *
+ * // Load by class name (auto-converts to kebab-case)
+ * HandlerConfig config = loader.configs().load(HandlerConfig.class);
+ * </pre>
+ */
+public class ConfigLoader {
+
+    /**
+     * Reserved keys for complex components that require special handling.
+     * These cannot be loaded via ConfigLoader - use TikaLoader methods 
instead.
+     */
+    private static final Set<String> PROHIBITED_KEYS = Set.of(
+        "parsers",
+        "detectors",
+        "encoding-detectors",
+        "encodingDetectors",
+        "metadata-filters",
+        "metadataFilters",
+        "renderers",
+        "translators"
+    );
+
+    private final TikaJsonConfig config;
+    private final ObjectMapper objectMapper;
+
+    ConfigLoader(TikaJsonConfig config, ObjectMapper objectMapper) {
+        this.config = config;
+        this.objectMapper = objectMapper;
+    }
+
+    /**
+     * Loads a configuration object using the class name converted to 
kebab-case.
+     * <p>
+     * For example, {@code HandlerConfig.class} will look for key 
"handler-config".
+     * Class name suffixes like "Config", "Configuration", "Settings" are 
stripped first.
+     * <p>
+     * For interfaces, the JSON must specify the implementation (see {@link 
#load(String, Class)}).
+     *
+     * @param clazz The class to deserialize into (can be interface, abstract, 
or concrete)
+     * @param <T> The type to load
+     * @return the deserialized object, or null if key not found in config
+     * @throws TikaConfigException if loading fails or class is not 
instantiable
+     */
+    public <T> T load(Class<T> clazz) throws TikaConfigException {
+        String key = deriveKeyFromClass(clazz);
+        return load(key, clazz);
+    }
+
+    /**
+     * Loads a configuration object using the class name, with a default value.
+     *
+     * @param clazz The class to deserialize into
+     * @param defaultValue The value to return if key not found in config
+     * @param <T> The type to load
+     * @return the deserialized object, or defaultValue if not present
+     * @throws TikaConfigException if loading fails or class is not 
instantiable
+     */
+    public <T> T load(Class<T> clazz, T defaultValue) throws 
TikaConfigException {
+        T result = load(clazz);
+        return result != null ? result : defaultValue;
+    }
+
+    /**
+     * Loads a configuration object from the specified JSON key.
+     * <p>
+     * Supports three formats for interfaces:
+     * <ul>
+     *   <li>String value: treated as class name or component name to look 
up</li>
+     *   <li>Object with "@class": explicit type specification</li>
+     *   <li>Object without "@class": attempts direct deserialization (works 
for concrete classes)</li>
+     * </ul>
+     *
+     * @param key The JSON key to load from
+     * @param clazz The class to deserialize into (can be interface, abstract, 
or concrete)
+     * @param <T> The type to load
+     * @return the deserialized object, or null if key not found
+     * @throws TikaConfigException if loading fails or class cannot be 
instantiated
+     */
+    public <T> T load(String key, Class<T> clazz) throws TikaConfigException {
+        validateKey(key);
+        validateClass(clazz);
+
+        JsonNode node = config.getRootNode().get(key);
+        if (node == null || node.isNull()) {
+            return null;
+        }
+
+        try {
+            // Strategy 1: String value - treat as class name
+            if (node.isTextual()) {
+                return loadFromClassName(node.asText(), clazz);
+            }
+
+            // Strategy 2: Object with @class field - explicit type
+            if (node.isObject() && node.has("@class")) {
+                String className = node.get("@class").asText();
+                Class<?> targetClass = Class.forName(className);
+                if (!clazz.isAssignableFrom(targetClass)) {
+                    throw new TikaConfigException(
+                        "Class " + className + " is not assignable to " + 
clazz.getName());
+                }
+                // Remove @class field before deserializing (Jackson doesn't 
recognize it)
+                ObjectNode objectNode = ((ObjectNode) node).deepCopy();
+                objectNode.remove("@class");
+                return objectMapper.treeToValue(objectNode, (Class<T>) 
targetClass);
+            }
+
+            // Strategy 3: Direct deserialization (for concrete classes)
+            if (clazz.isInterface() || 
Modifier.isAbstract(clazz.getModifiers())) {
+                throw new TikaConfigException(
+                    "Cannot deserialize " + clazz.getName() + " - it is " +
+                    (clazz.isInterface() ? "an interface" : "abstract") + ". " 
+
+                    "Specify implementation using:\n" +
+                    "  - String value: \"" + key + "\": 
\"com.example.MyImpl\"\n" +
+                    "  - Object with @class: \"" + key + "\": {\"@class\": 
\"com.example.MyImpl\", ...}");
+            }
+
+            return objectMapper.treeToValue(node, clazz);
+        } catch (ClassNotFoundException e) {
+            throw new TikaConfigException(
+                "Class not found for '" + key + "': " + e.getMessage(), e);
+        } catch (JsonProcessingException e) {
+            throw new TikaConfigException(
+                "Failed to deserialize '" + key + "' into " + clazz.getName(), 
e);
+        }
+    }
+
+    /**
+     * Loads a class from a string (fully qualified class name).
+     */
+    @SuppressWarnings("unchecked")
+    private <T> T loadFromClassName(String className, Class<T> expectedType)
+            throws TikaConfigException {
+        try {
+            Class<?> clazz = Class.forName(className);
+            if (!expectedType.isAssignableFrom(clazz)) {
+                throw new TikaConfigException(
+                    "Class " + className + " is not assignable to " + 
expectedType.getName());
+            }
+
+            // Try to instantiate with no-arg constructor
+            return (T) clazz.getDeclaredConstructor().newInstance();
+        } catch (ClassNotFoundException e) {
+            throw new TikaConfigException("Class not found: " + className, e);
+        } catch (ReflectiveOperationException e) {
+            throw new TikaConfigException(
+                "Failed to instantiate " + className +
+                ". Ensure it has a public no-argument constructor.", e);
+        }
+    }
+
+    /**
+     * Loads a configuration object from the specified JSON key, with a 
default value.
+     *
+     * @param key The JSON key to load from
+     * @param clazz The class to deserialize into
+     * @param defaultValue The value to return if key not found in config
+     * @param <T> The type to load
+     * @return the deserialized object, or defaultValue if not present
+     * @throws TikaConfigException if loading fails or class is not 
instantiable
+     */
+    public <T> T load(String key, Class<T> clazz, T defaultValue) throws 
TikaConfigException {
+        T result = load(key, clazz);
+        return result != null ? result : defaultValue;
+    }
+
+    /**
+     * Loads a configuration object by merging JSON properties into a copy of 
the default instance.
+     * <p>
+     * This allows partial configuration where only some properties are 
specified in JSON,
+     * and the rest retain their default values. The original defaultValue 
object is NOT modified.
+     *
+     * <p>Example:
+     * <pre>
+     * HandlerConfig defaults = new HandlerConfig();
+     * defaults.setTimeout(30000);
+     * defaults.setRetries(2);
+     * defaults.setEnabled(false);
+     *
+     * // JSON: { "enabled": true }
+     * // Result: timeout=30000, retries=2, enabled=true (merged!)
+     * // Note: 'defaults' object remains unchanged
+     * HandlerConfig config = 
loader.configs().loadWithDefaults("handler-config",
+     *                                                           
HandlerConfig.class,
+     *                                                           defaults);
+     * </pre>
+     *
+     * @param key The JSON key to load from
+     * @param clazz The class type (not used for deserialization, but for type 
safety)
+     * @param defaultValue The object with default values (will NOT be 
modified)
+     * @param <T> The type to load
+     * @return a new object with defaults merged with JSON properties, or the 
original default if key not found
+     * @throws TikaConfigException if loading fails
+     */
+    public <T> T loadWithDefaults(String key, Class<T> clazz, T defaultValue)
+            throws TikaConfigException {
+        validateKey(key);
+        validateClass(clazz);
+
+        JsonNode node = config.getRootNode().get(key);
+        if (node == null || node.isNull()) {
+            return defaultValue;
+        }
+
+        try {
+            // Create a deep copy of defaultValue to avoid mutating the 
original
+            // Using convertValue is efficient and doesn't require serializing 
to bytes
+            @SuppressWarnings("unchecked")
+            T copy = objectMapper.convertValue(defaultValue, (Class<T>) 
defaultValue.getClass());
+
+            // Merge JSON properties into the copy
+            return objectMapper.readerForUpdating(copy).readValue(node);
+        } catch (Exception e) {
+            throw new TikaConfigException(
+                "Failed to merge '" + key + "' into " + clazz.getName(), e);
+        }
+    }
+
+    /**
+     * Loads a configuration object by class name with defaults, merging JSON 
properties.
+     *
+     * @param clazz The class to deserialize into
+     * @param defaultValue The object with default values to merge into
+     * @param <T> The type to load
+     * @return the default object updated with JSON properties, or the 
original default if key not found
+     * @throws TikaConfigException if loading fails
+     */
+    public <T> T loadWithDefaults(Class<T> clazz, T defaultValue) throws 
TikaConfigException {
+        String key = deriveKeyFromClass(clazz);
+        return loadWithDefaults(key, clazz, defaultValue);
+    }
+
+    /**
+     * Checks if a configuration key exists in the JSON config.
+     *
+     * @param key The JSON key to check
+     * @return true if the key exists and is not null
+     */
+    public boolean hasKey(String key) {
+        JsonNode node = config.getRootNode().get(key);
+        return node != null && !node.isNull();
+    }
+
+    /**
+     * Derives a kebab-case key from a class name.
+     * <p>
+     * Uses the full class name converted to kebab-case for consistency with
+     * the annotation processor's component naming.
+     *
+     * @param clazz the class to derive the key from
+     * @return kebab-case version of the class name
+     */
+    private String deriveKeyFromClass(Class<?> clazz) {
+        String simpleName = clazz.getSimpleName();
+        return toKebabCase(simpleName);
+    }
+
+    /**
+     * Converts a camelCase or PascalCase string to kebab-case.
+     * Delegates to {@link KebabCaseConverter} for consistent behavior
+     * with the annotation processor.
+     */
+    private String toKebabCase(String name) {
+        return KebabCaseConverter.toKebabCase(name);
+    }
+
+    /**
+     * Validates that the key is not reserved for complex components.
+     */
+    private void validateKey(String key) throws TikaConfigException {
+        if (PROHIBITED_KEYS.contains(key)) {
+            throw new TikaConfigException(
+                "Cannot load '" + key + "' via ConfigLoader. " +
+                "This is a complex component that requires special handling. " 
+
+                "Use TikaLoader.load" + toPascalCase(key) + "() instead.");
+        }
+    }
+
+    /**
+     * Validates that complex Tika components aren't loaded via this method.
+     * Interfaces and abstract classes are allowed, but require explicit type 
info in JSON.
+     */
+    private void validateClass(Class<?> clazz) throws TikaConfigException {
+        // Check for known complex component types (defense in depth)
+        String className = clazz.getName();
+        if (className.equals("org.apache.tika.parser.Parser") ||
+            className.equals("org.apache.tika.detect.Detector") ||
+            className.equals("org.apache.tika.renderer.Renderer") ||
+            className.equals("org.apache.tika.detect.EncodingDetector") ||
+            
className.equals("org.apache.tika.metadata.filter.MetadataFilter")) {
+            throw new TikaConfigException(
+                clazz.getSimpleName() + " is a Tika component interface. " +
+                "Use the appropriate TikaLoader method (e.g., loadParsers(), 
loadDetectors()).");
+        }
+    }
+
+    /**
+     * Converts kebab-case to PascalCase for error messages.
+     */
+    private String toPascalCase(String kebabCase) {
+        StringBuilder result = new StringBuilder();
+        boolean capitalizeNext = true;
+        for (char c : kebabCase.toCharArray()) {
+            if (c == '-') {
+                capitalizeNext = true;
+            } else if (capitalizeNext) {
+                result.append(Character.toUpperCase(c));
+                capitalizeNext = false;
+            } else {
+                result.append(c);
+            }
+        }
+        return result.toString();
+    }
+}
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/KebabCaseConverter.java
 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/KebabCaseConverter.java
new file mode 100644
index 000000000..8a12a5033
--- /dev/null
+++ 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/KebabCaseConverter.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config.loader;
+
+import java.util.Locale;
+
+/**
+ * Utility for converting Java class names to kebab-case.
+ * Used for automatic component name generation from class names.
+ *
+ * <p><strong>Note:</strong> This is a copy of the implementation in
+ * {@code org.apache.tika.annotation.KebabCaseConverter} to avoid
+ * a runtime dependency on the annotation processor module. The two
+ * implementations must be kept in sync.
+ *
+ * <p>Examples:
+ * <ul>
+ *   <li>PDFParser → pdf-parser</li>
+ *   <li>OCRParser → ocr-parser</li>
+ *   <li>HTMLParser → html-parser</li>
+ *   <li>DefaultParser → default-parser</li>
+ *   <li>TesseractOCRParser → tesseract-ocr-parser</li>
+ * </ul>
+ */
+class KebabCaseConverter {
+
+    private KebabCaseConverter() {
+        // Utility class
+    }
+
+    /**
+     * Converts a Java class name to kebab-case.
+     *
+     * @param className the simple class name (without package)
+     * @return the kebab-case version of the name
+     */
+    static String toKebabCase(String className) {
+        if (className == null || className.isEmpty()) {
+            return className;
+        }
+
+        // Insert hyphen before uppercase letters that follow lowercase letters
+        // or before uppercase letters that are followed by lowercase letters
+        String result = className
+                // Insert hyphen between lowercase and uppercase: "aB" -> "a-B"
+                .replaceAll("([a-z])([A-Z])", "$1-$2")
+                // Insert hyphen before uppercase letter followed by lowercase
+                // in a sequence of uppercase letters: "HTMLParser" -> 
"HTML-Parser"
+                .replaceAll("([A-Z]+)([A-Z][a-z])", "$1-$2")
+                // Insert hyphen between letter and digit: "PDF2Text" -> 
"PDF2-Text"
+                .replaceAll("([a-zA-Z])(\\d)", "$1-$2")
+                // Insert hyphen between digit and letter: "2Text" -> "2-Text"
+                .replaceAll("(\\d)([a-zA-Z])", "$1-$2")
+                .toLowerCase(Locale.ROOT);
+
+        return result;
+    }
+}
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ParserLoader.java
 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ParserLoader.java
index 96f668af8..5cfc3cd11 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ParserLoader.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ParserLoader.java
@@ -211,12 +211,14 @@ public class ParserLoader {
     @SuppressWarnings("unchecked")
     private Parser instantiateParser(Class<?> parserClass, String configJson)
             throws TikaConfigException {
+
         try {
             // Try constructor with String parameter (JSON config)
             try {
                 Constructor<?> constructor = 
parserClass.getConstructor(String.class);
                 return (Parser) constructor.newInstance(configJson);
             } catch (NoSuchMethodException e) {
+                // TODO -- entrypoint for actual configuration
                 // Fall back to zero-arg constructor
                 return (Parser) ServiceLoaderUtils.newInstance(parserClass,
                         new org.apache.tika.config.ServiceLoader(classLoader));
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
index 7b7bf1c0b..851e6fc2d 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
@@ -26,10 +26,13 @@ import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 
+import com.fasterxml.jackson.databind.DeserializationFeature;
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
 
 import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.parser.CompositeParser;
+import org.apache.tika.parser.Parser;
 
 /**
  * Parsed representation of a Tika JSON configuration file.
@@ -57,7 +60,14 @@ import org.apache.tika.exception.TikaConfigException;
  */
 public class TikaJsonConfig {
 
-    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+    private static final ObjectMapper OBJECT_MAPPER = createObjectMapper();
+
+    private static ObjectMapper createObjectMapper() {
+        ObjectMapper mapper = new ObjectMapper();
+        // Fail on unknown properties to catch configuration errors early
+        mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, 
true);
+        return mapper;
+    }
 
     private final JsonNode rootNode;
     private final Map<String, Map<String, JsonNode>> componentsByType;
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
index 483596199..6e3a3feab 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
@@ -16,9 +16,12 @@
  */
 package org.apache.tika.config.loader;
 
+import java.io.IOException;
 import java.nio.file.Path;
 import java.util.List;
 
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
 
 import org.apache.tika.detect.CompositeDetector;
@@ -29,6 +32,9 @@ import org.apache.tika.exception.TikaConfigException;
 import org.apache.tika.metadata.filter.CompositeMetadataFilter;
 import org.apache.tika.metadata.filter.MetadataFilter;
 import org.apache.tika.mime.MediaTypeRegistry;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.AutoDetectParserConfig;
+import org.apache.tika.parser.CompositeParser;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.renderer.CompositeRenderer;
 import org.apache.tika.renderer.Renderer;
@@ -78,6 +84,7 @@ public class TikaLoader {
     private EncodingDetector encodingDetectors;
     private MetadataFilter metadataFilters;
     private Renderer renderers;
+    private ConfigLoader configLoader;
 
     private TikaLoader(TikaJsonConfig config, ClassLoader classLoader,
                        MediaTypeRegistry mediaTypeRegistry) {
@@ -222,6 +229,36 @@ public class TikaLoader {
         return renderers;
     }
 
+    public Parser loadAutoDetectParser() throws TikaConfigException, 
IOException {
+        AutoDetectParserConfig adpConfig = 
configs().load(AutoDetectParserConfig.class);
+        if (adpConfig == null) {
+            adpConfig = new AutoDetectParserConfig();
+        }
+        return AutoDetectParser.build((CompositeParser)loadParsers(), 
loadDetectors(), adpConfig);
+    }
+
+    /**
+     * Returns a ConfigLoader for loading simple configuration objects.
+     * <p>
+     * Use this for POJOs and simple config classes. For complex components 
like
+     * Parsers, Detectors, etc., use the specific load methods on TikaLoader.
+     *
+     * <p>Usage:
+     * <pre>
+     * HandlerConfig config = loader.configs().load("handler-config", 
HandlerConfig.class);
+     * // Or use kebab-case auto-conversion:
+     * HandlerConfig config = loader.configs().load(HandlerConfig.class);
+     * </pre>
+     *
+     * @return the ConfigLoader instance
+     */
+    public synchronized ConfigLoader configs() {
+        if (configLoader == null) {
+            configLoader = new ConfigLoader(config, objectMapper);
+        }
+        return configLoader;
+    }
+
     /**
      * Gets the underlying JSON configuration.
      *
diff --git 
a/tika-serialization/src/test/java/org/apache/tika/config/loader/ConfigLoaderTest.java
 
b/tika-serialization/src/test/java/org/apache/tika/config/loader/ConfigLoaderTest.java
new file mode 100644
index 000000000..be207dba3
--- /dev/null
+++ 
b/tika-serialization/src/test/java/org/apache/tika/config/loader/ConfigLoaderTest.java
@@ -0,0 +1,646 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config.loader;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import org.apache.tika.exception.TikaConfigException;
+
+/**
+ * Unit tests for {@link ConfigLoader}.
+ */
+public class ConfigLoaderTest {
+
+    private TikaLoader tikaLoader;
+    private ConfigLoader configLoader;
+
+    @BeforeEach
+    public void setUp() throws Exception {
+        Path configPath = Paths.get(
+                
getClass().getResource("/configs/test-config-loader.json").toURI());
+        tikaLoader = TikaLoader.load(configPath);
+        configLoader = tikaLoader.configs();
+    }
+
+    // ==================== Test POJOs ====================
+
+    /**
+     * Simple config POJO with properties.
+     */
+    public static class HandlerConfig {
+        private int timeout;
+        private int retries;
+        private boolean enabled;
+
+        public int getTimeout() {
+            return timeout;
+        }
+
+        public void setTimeout(int timeout) {
+            this.timeout = timeout;
+        }
+
+        public int getRetries() {
+            return retries;
+        }
+
+        public void setRetries(int retries) {
+            this.retries = retries;
+        }
+
+        public boolean isEnabled() {
+            return enabled;
+        }
+
+        public void setEnabled(boolean enabled) {
+            this.enabled = enabled;
+        }
+    }
+
+    /**
+     * Config class with suffix that should be stripped.
+     */
+    public static class TikaTaskTimeout {
+        private long millis;
+
+        public long getMillis() {
+            return millis;
+        }
+
+        public void setMillis(long millis) {
+            this.millis = millis;
+        }
+    }
+
+    /**
+     * Config class with "Settings" suffix.
+     */
+    public static class MyFeatureSettings {
+        private String featureName;
+        private int priority;
+
+        public String getFeatureName() {
+            return featureName;
+        }
+
+        public void setFeatureName(String featureName) {
+            this.featureName = featureName;
+        }
+
+        public int getPriority() {
+            return priority;
+        }
+
+        public void setPriority(int priority) {
+            this.priority = priority;
+        }
+    }
+
+    /**
+     * Interface for testing interface handling.
+     */
+    public interface TestHandler {
+        String getName();
+    }
+
+    /**
+     * Simple implementation with no-arg constructor.
+     */
+    public static class SimpleHandlerImpl implements TestHandler {
+        public SimpleHandlerImpl() {
+        }
+
+        @Override
+        public String getName() {
+            return "simple";
+        }
+    }
+
+    /**
+     * Implementation with configuration properties.
+     */
+    public static class ConfiguredHandlerImpl implements TestHandler {
+        private int maxSize;
+        private String prefix;
+
+        public ConfiguredHandlerImpl() {
+        }
+
+        @Override
+        public String getName() {
+            return "configured";
+        }
+
+        public int getMaxSize() {
+            return maxSize;
+        }
+
+        public void setMaxSize(int maxSize) {
+            this.maxSize = maxSize;
+        }
+
+        public String getPrefix() {
+            return prefix;
+        }
+
+        public void setPrefix(String prefix) {
+            this.prefix = prefix;
+        }
+    }
+
+    /**
+     * Abstract class for testing abstract class handling.
+     */
+    public abstract static class AbstractHandler implements TestHandler {
+        public abstract void doSomething();
+    }
+
+    // ==================== Tests ====================
+
+    @Test
+    public void testLoadByExplicitKey() throws Exception {
+        HandlerConfig config = configLoader.load("handler-config", 
HandlerConfig.class);
+
+        assertNotNull(config);
+        assertEquals(5000, config.getTimeout());
+        assertEquals(3, config.getRetries());
+        assertTrue(config.isEnabled());
+    }
+
+    @Test
+    public void testLoadByClassNameKebabCase() throws Exception {
+        HandlerConfig config = configLoader.load(HandlerConfig.class);
+
+        assertNotNull(config);
+        assertEquals(5000, config.getTimeout());
+    }
+
+    @Test
+    public void testLoadByClassNameTikaTaskTimeout() throws Exception {
+        // TikaTaskTimeout -> "tika-task-timeout" (no suffix stripping)
+        // JSON has "tika-task-timeout"
+        TikaTaskTimeout timeout = configLoader.load(TikaTaskTimeout.class);
+
+        assertNotNull(timeout);
+        assertEquals(30000, timeout.getMillis());
+    }
+
+    @Test
+    public void testLoadByClassNameMyFeatureSettings() throws Exception {
+        // MyFeatureSettings -> "my-feature-settings" (full name, no suffix 
stripping)
+        // JSON has "my-feature-settings"
+        MyFeatureSettings settings = 
configLoader.load(MyFeatureSettings.class);
+
+        assertNotNull(settings);
+        assertEquals("test-feature", settings.getFeatureName());
+        assertEquals(10, settings.getPriority());
+    }
+
+    @Test
+    public void testLoadWithDefaultValue() throws Exception {
+        HandlerConfig config = configLoader.load("handler-config", 
HandlerConfig.class);
+        assertNotNull(config);
+
+        // Non-existent key with default
+        HandlerConfig defaultConfig = new HandlerConfig();
+        defaultConfig.setTimeout(9999);
+
+        HandlerConfig result = configLoader.load("non-existent", 
HandlerConfig.class, defaultConfig);
+        assertEquals(9999, result.getTimeout());
+    }
+
+    @Test
+    public void testLoadMissingKeyReturnsNull() throws Exception {
+        HandlerConfig config = configLoader.load("non-existent-key", 
HandlerConfig.class);
+        assertNull(config);
+    }
+
+    @Test
+    public void testLoadInterfaceAsString() throws Exception {
+        // JSON: "simple-handler": 
"org.apache.tika.config.loader.ConfigLoaderTest$SimpleHandlerImpl"
+        TestHandler handler = configLoader.load("simple-handler", 
TestHandler.class);
+
+        assertNotNull(handler);
+        assertTrue(handler instanceof SimpleHandlerImpl);
+        assertEquals("simple", handler.getName());
+    }
+
+    @Test
+    public void testLoadInterfaceWithAtClassAndProperties() throws Exception {
+        // JSON: "configured-handler": { "@class": "...", "maxSize": 100000, 
... }
+        TestHandler handler = configLoader.load("configured-handler", 
TestHandler.class);
+
+        assertNotNull(handler);
+        assertTrue(handler instanceof ConfiguredHandlerImpl);
+        assertEquals("configured", handler.getName());
+
+        ConfiguredHandlerImpl impl = (ConfiguredHandlerImpl) handler;
+        assertEquals(100000, impl.getMaxSize());
+        assertEquals("test-", impl.getPrefix());
+    }
+
+    @Test
+    public void testLoadInterfaceWithoutTypeInfoFails() throws Exception {
+        // Create a minimal config with just properties, no @class
+        Path configPath = Paths.get(
+                
getClass().getResource("/configs/test-interface-no-type.json").toURI());
+        TikaLoader loader = TikaLoader.load(configPath);
+
+        TikaConfigException ex = assertThrows(TikaConfigException.class, () ->
+                loader.configs().load("handler-no-type", TestHandler.class));
+
+        assertTrue(ex.getMessage().contains("interface"));
+        assertTrue(ex.getMessage().contains("@class"));
+    }
+
+    @Test
+    public void testLoadAbstractClassFails() throws Exception {
+        TikaConfigException ex = assertThrows(TikaConfigException.class, () ->
+                configLoader.load("abstract-handler", AbstractHandler.class));
+
+        assertTrue(ex.getMessage().contains("abstract"));
+    }
+
+    @Test
+    public void testLoadProhibitedKeyParsers() throws Exception {
+        TikaConfigException ex = assertThrows(TikaConfigException.class, () ->
+                configLoader.load("parsers", Object.class));
+
+        assertTrue(ex.getMessage().contains("Cannot load 'parsers'"));
+        assertTrue(ex.getMessage().contains("TikaLoader"));
+    }
+
+    @Test
+    public void testLoadProhibitedKeyDetectors() throws Exception {
+        TikaConfigException ex = assertThrows(TikaConfigException.class, () ->
+                configLoader.load("detectors", Object.class));
+
+        assertTrue(ex.getMessage().contains("Cannot load 'detectors'"));
+    }
+
+    @Test
+    public void testLoadProhibitedKeyMetadataFilters() throws Exception {
+        TikaConfigException ex = assertThrows(TikaConfigException.class, () ->
+                configLoader.load("metadata-filters", Object.class));
+
+        assertTrue(ex.getMessage().contains("Cannot load 'metadata-filters'"));
+    }
+
+    @Test
+    public void testHasKey() throws Exception {
+        assertTrue(configLoader.hasKey("handler-config"));
+        assertTrue(configLoader.hasKey("simple-handler"));
+        assertFalse(configLoader.hasKey("non-existent"));
+    }
+
+    @Test
+    public void testLoadInvalidClassName() throws Exception {
+        Path configPath = Paths.get(
+                
getClass().getResource("/configs/test-invalid-class.json").toURI());
+        TikaLoader loader = TikaLoader.load(configPath);
+
+        TikaConfigException ex = assertThrows(TikaConfigException.class, () ->
+                loader.configs().load("handler", TestHandler.class));
+
+        assertTrue(ex.getMessage().contains("Class not found"));
+    }
+
+    @Test
+    public void testLoadWrongTypeAssignment() throws Exception {
+        // String class name that doesn't implement the interface
+        Path configPath = Paths.get(
+                
getClass().getResource("/configs/test-wrong-type.json").toURI());
+        TikaLoader loader = TikaLoader.load(configPath);
+
+        TikaConfigException ex = assertThrows(TikaConfigException.class, () ->
+                loader.configs().load("handler", TestHandler.class));
+
+        assertTrue(ex.getMessage().contains("not assignable"));
+    }
+
+    @Test
+    public void testLoadWithUnexpectedFieldFails() throws Exception {
+        // Verify that unexpected/unrecognized fields cause an exception
+        Path configPath = Paths.get(
+                
getClass().getResource("/configs/test-unexpected-field.json").toURI());
+        TikaLoader loader = TikaLoader.load(configPath);
+
+        TikaConfigException ex = assertThrows(TikaConfigException.class, () ->
+                loader.configs().load("handler-config", HandlerConfig.class));
+
+        // Should contain information about the unrecognized field
+        assertTrue(ex.getMessage().contains("handler-config") ||
+                   ex.getCause().getMessage().contains("Unrecognized") ||
+                   ex.getCause().getMessage().contains("unexpectedField"),
+                   "Exception should mention the unrecognized field");
+    }
+
+    @Test
+    public void testKebabCaseConversion() throws Exception {
+        // Test that kebab-case conversion works correctly
+        // MyFeatureSettings should look for "my-feature-settings" (full 
kebab-case, no stripping)
+        MyFeatureSettings settings = 
configLoader.load(MyFeatureSettings.class);
+        assertNotNull(settings);
+        assertEquals("test-feature", settings.getFeatureName());
+    }
+
+    @Test
+    public void testLoadByClassWithDefault() throws Exception {
+        HandlerConfig config = configLoader.load(HandlerConfig.class);
+        assertNotNull(config);
+
+        // Non-existent class
+        TikaTaskTimeout defaultTimeout = new TikaTaskTimeout();
+        defaultTimeout.setMillis(60000);
+
+        // Use a class name that won't match
+        TikaTaskTimeout result = configLoader.load("NonExistentConfig.class",
+                                                    TikaTaskTimeout.class,
+                                                    defaultTimeout);
+        assertEquals(60000, result.getMillis());
+    }
+
+    // ==================== Tests for loadWithDefaults (Partial Config) 
====================
+
+    @Test
+    public void testLoadWithDefaultsPartialConfig() throws Exception {
+        // Load config that merges defaults with partial JSON
+        Path configPath = Paths.get(
+                
getClass().getResource("/configs/test-partial-config.json").toURI());
+        TikaLoader loader = TikaLoader.load(configPath);
+
+        // Set up defaults
+        HandlerConfig defaults = new HandlerConfig();
+        defaults.setTimeout(30000);
+        defaults.setRetries(2);
+        defaults.setEnabled(false);
+
+        // JSON only has: { "enabled": true }
+        HandlerConfig config = 
loader.configs().loadWithDefaults("handler-config",
+                                                                  
HandlerConfig.class,
+                                                                  defaults);
+
+        assertNotNull(config);
+        assertEquals(30000, config.getTimeout()); // ✅ From defaults
+        assertEquals(2, config.getRetries());      // ✅ From defaults
+        assertTrue(config.isEnabled());            // ✅ From JSON (overridden)
+    }
+
+    @Test
+    public void testLoadWithDefaultsFullOverride() throws Exception {
+        // Test that JSON can override all defaults
+        Path configPath = Paths.get(
+                
getClass().getResource("/configs/test-partial-config.json").toURI());
+        TikaLoader loader = TikaLoader.load(configPath);
+
+        HandlerConfig defaults = new HandlerConfig();
+        defaults.setTimeout(30000);
+        defaults.setRetries(2);
+        defaults.setEnabled(false);
+
+        // JSON has: { "timeout": 10000, "retries": 5, "enabled": false }
+        HandlerConfig config = 
loader.configs().loadWithDefaults("handler-config-full",
+                                                                  
HandlerConfig.class,
+                                                                  defaults);
+
+        assertNotNull(config);
+        assertEquals(10000, config.getTimeout()); // All overridden
+        assertEquals(5, config.getRetries());
+        assertFalse(config.isEnabled());
+    }
+
+    @Test
+    public void testLoadWithDefaultsMissingKey() throws Exception {
+        // When key doesn't exist, should return original defaults unchanged
+        HandlerConfig defaults = new HandlerConfig();
+        defaults.setTimeout(30000);
+        defaults.setRetries(2);
+        defaults.setEnabled(false);
+
+        HandlerConfig config = 
configLoader.loadWithDefaults("non-existent-key",
+                                                              
HandlerConfig.class,
+                                                              defaults);
+
+        assertNotNull(config);
+        assertEquals(30000, config.getTimeout());
+        assertEquals(2, config.getRetries());
+        assertFalse(config.isEnabled());
+    }
+
+    @Test
+    public void testLoadWithDefaultsByClass() throws Exception {
+        // Test the class-name version
+        Path configPath = Paths.get(
+                
getClass().getResource("/configs/test-partial-config.json").toURI());
+        TikaLoader loader = TikaLoader.load(configPath);
+
+        HandlerConfig defaults = new HandlerConfig();
+        defaults.setTimeout(30000);
+        defaults.setRetries(2);
+        defaults.setEnabled(false);
+
+        // Uses kebab-case: HandlerConfig -> "handler-config"
+        HandlerConfig config = 
loader.configs().loadWithDefaults(HandlerConfig.class, defaults);
+
+        assertNotNull(config);
+        assertEquals(30000, config.getTimeout());
+        assertEquals(2, config.getRetries());
+        assertTrue(config.isEnabled()); // Overridden from JSON
+    }
+
+    @Test
+    public void testLoadVsLoadWithDefaults() throws Exception {
+        // Demonstrate difference between load() and loadWithDefaults()
+        Path configPath = Paths.get(
+                
getClass().getResource("/configs/test-partial-config.json").toURI());
+        TikaLoader loader = TikaLoader.load(configPath);
+
+        HandlerConfig defaults = new HandlerConfig();
+        defaults.setTimeout(30000);
+        defaults.setRetries(2);
+        defaults.setEnabled(false);
+
+        // Using load() - creates new object, loses defaults
+        HandlerConfig config1 = loader.configs().load("handler-config", 
HandlerConfig.class);
+        assertEquals(0, config1.getTimeout());  // ❌ Lost default!
+        assertEquals(0, config1.getRetries());  // ❌ Lost default!
+        assertTrue(config1.isEnabled());        // ✅ From JSON
+
+        // Using loadWithDefaults() - merges into defaults
+        HandlerConfig config2 = 
loader.configs().loadWithDefaults("handler-config",
+                                                                   
HandlerConfig.class,
+                                                                   defaults);
+        assertEquals(30000, config2.getTimeout()); // ✅ Kept default!
+        assertEquals(2, config2.getRetries());     // ✅ Kept default!
+        assertTrue(config2.isEnabled());           // ✅ From JSON
+    }
+
+    // ==================== Immutability Tests ====================
+
+    @Test
+    public void testLoadWithDefaultsDoesNotMutateOriginal() throws Exception {
+        // Verify that the original defaults object is NOT modified
+        Path configPath = Paths.get(
+                
getClass().getResource("/configs/test-partial-config.json").toURI());
+        TikaLoader loader = TikaLoader.load(configPath);
+
+        HandlerConfig defaults = new HandlerConfig();
+        defaults.setTimeout(30000);
+        defaults.setRetries(2);
+        defaults.setEnabled(false);
+
+        // Load config with partial override (JSON only has "enabled": true)
+        HandlerConfig result = 
loader.configs().loadWithDefaults("handler-config",
+                                                                  
HandlerConfig.class,
+                                                                  defaults);
+
+        // Verify result has merged values
+        assertEquals(30000, result.getTimeout());
+        assertEquals(2, result.getRetries());
+        assertTrue(result.isEnabled());  // Overridden from JSON
+
+        // CRITICAL: Verify original defaults object is unchanged
+        assertEquals(30000, defaults.getTimeout());  // ✅ Still original value
+        assertEquals(2, defaults.getRetries());      // ✅ Still original value
+        assertFalse(defaults.isEnabled());           // ✅ Still original value 
(NOT changed!)
+
+        // Verify they are different objects
+        assertNotEquals(System.identityHashCode(defaults),
+                       System.identityHashCode(result),
+                       "Result should be a different object than defaults");
+    }
+
+    @Test
+    public void testLoadWithDefaultsReusableDefaults() throws Exception {
+        // Verify defaults can be safely reused for multiple loads
+        Path configPath = Paths.get(
+                
getClass().getResource("/configs/test-partial-config.json").toURI());
+        TikaLoader loader = TikaLoader.load(configPath);
+
+        HandlerConfig defaults = new HandlerConfig();
+        defaults.setTimeout(30000);
+        defaults.setRetries(2);
+        defaults.setEnabled(false);
+
+        // Load multiple times with same defaults
+        HandlerConfig config1 = 
loader.configs().loadWithDefaults("handler-config",
+                                                                   
HandlerConfig.class,
+                                                                   defaults);
+        HandlerConfig config2 = 
loader.configs().loadWithDefaults("handler-config-full",
+                                                                   
HandlerConfig.class,
+                                                                   defaults);
+
+        // Verify results are different
+        assertTrue(config1.isEnabled());   // From partial config
+        assertFalse(config2.isEnabled());  // From full config
+
+        // Verify defaults still unchanged and can be used again
+        assertEquals(30000, defaults.getTimeout());
+        assertEquals(2, defaults.getRetries());
+        assertFalse(defaults.isEnabled());
+
+        // Use defaults one more time
+        HandlerConfig config3 = 
loader.configs().loadWithDefaults("non-existent",
+                                                                   
HandlerConfig.class,
+                                                                   defaults);
+        assertEquals(defaults, config3);  // Should return original when key 
missing
+    }
+
+    @Test
+    public void testLoadWithDefaultsComplexObjectImmutability() throws 
Exception {
+        // Test with nested/complex objects to ensure deep copy works
+        Path configPath = Paths.get(
+                
getClass().getResource("/configs/test-partial-config.json").toURI());
+        TikaLoader loader = TikaLoader.load(configPath);
+
+        TikaTaskTimeout defaults = new TikaTaskTimeout();
+        defaults.setMillis(60000);
+
+        // Note: tika-task-timeout in JSON has millis: 30000
+        TikaTaskTimeout result = 
loader.configs().loadWithDefaults("tika-task-timeout",
+                                                                    
TikaTaskTimeout.class,
+                                                                    defaults);
+
+        // Result should have JSON value
+        assertEquals(30000, result.getMillis());
+
+        // Original should be unchanged
+        assertEquals(60000, defaults.getMillis());
+    }
+
+    @Test
+    public void testLoadWithDefaultsMissingKeyDoesNotClone() throws Exception {
+        // When key is missing, should return the original object (no 
unnecessary cloning)
+        HandlerConfig defaults = new HandlerConfig();
+        defaults.setTimeout(30000);
+        defaults.setRetries(2);
+        defaults.setEnabled(false);
+
+        HandlerConfig result = 
configLoader.loadWithDefaults("non-existent-key",
+                                                              
HandlerConfig.class,
+                                                              defaults);
+
+        // Should return the exact same object when key is missing
+        assertEquals(defaults, result);
+        assertEquals(System.identityHashCode(defaults),
+                    System.identityHashCode(result),
+                    "Should return same object when key missing (no 
unnecessary clone)");
+    }
+
+    @Test
+    public void testLoadWithDefaultsThreadSafety() throws Exception {
+        // Demonstrate that defaults can be safely shared across threads
+        Path configPath = Paths.get(
+                
getClass().getResource("/configs/test-partial-config.json").toURI());
+        TikaLoader loader = TikaLoader.load(configPath);
+
+        // Shared defaults object
+        HandlerConfig sharedDefaults = new HandlerConfig();
+        sharedDefaults.setTimeout(30000);
+        sharedDefaults.setRetries(2);
+        sharedDefaults.setEnabled(false);
+
+        // Simulate concurrent usage (not a real concurrency test, just 
demonstrates safety)
+        HandlerConfig result1 = 
loader.configs().loadWithDefaults("handler-config",
+                                                                   
HandlerConfig.class,
+                                                                   
sharedDefaults);
+        HandlerConfig result2 = 
loader.configs().loadWithDefaults("handler-config-full",
+                                                                   
HandlerConfig.class,
+                                                                   
sharedDefaults);
+
+        // Both results should be valid
+        assertNotNull(result1);
+        assertNotNull(result2);
+
+        // Shared defaults should still be unchanged
+        assertEquals(30000, sharedDefaults.getTimeout());
+        assertEquals(2, sharedDefaults.getRetries());
+        assertFalse(sharedDefaults.isEnabled());
+    }
+}
diff --git 
a/tika-serialization/src/test/resources/configs/test-config-loader.json 
b/tika-serialization/src/test/resources/configs/test-config-loader.json
new file mode 100644
index 000000000..8f6e89a8c
--- /dev/null
+++ b/tika-serialization/src/test/resources/configs/test-config-loader.json
@@ -0,0 +1,32 @@
+{
+  "handler-config": {
+    "timeout": 5000,
+    "retries": 3,
+    "enabled": true
+  },
+
+  "simple-handler": 
"org.apache.tika.config.loader.ConfigLoaderTest$SimpleHandlerImpl",
+
+  "configured-handler": {
+    "@class": 
"org.apache.tika.config.loader.ConfigLoaderTest$ConfiguredHandlerImpl",
+    "maxSize": 100000,
+    "prefix": "test-"
+  },
+
+  "tika-task-timeout": {
+    "millis": 30000
+  },
+
+  "parsers": [
+    {"pdf-parser": {}}
+  ],
+
+  "my-feature-settings": {
+    "featureName": "test-feature",
+    "priority": 10
+  },
+
+  "abstract-handler": {
+    "someProperty": "value"
+  }
+}
diff --git 
a/tika-serialization/src/test/resources/configs/test-interface-no-type.json 
b/tika-serialization/src/test/resources/configs/test-interface-no-type.json
new file mode 100644
index 000000000..15a3d35b2
--- /dev/null
+++ b/tika-serialization/src/test/resources/configs/test-interface-no-type.json
@@ -0,0 +1,6 @@
+{
+  "handler-no-type": {
+    "maxSize": 50000,
+    "prefix": "no-type-"
+  }
+}
diff --git 
a/tika-serialization/src/test/resources/configs/test-invalid-class.json 
b/tika-serialization/src/test/resources/configs/test-invalid-class.json
new file mode 100644
index 000000000..f0bf4bf4e
--- /dev/null
+++ b/tika-serialization/src/test/resources/configs/test-invalid-class.json
@@ -0,0 +1,3 @@
+{
+  "handler": "com.example.NonExistentClass"
+}
diff --git 
a/tika-serialization/src/test/resources/configs/test-partial-config.json 
b/tika-serialization/src/test/resources/configs/test-partial-config.json
new file mode 100644
index 000000000..fb010c3e8
--- /dev/null
+++ b/tika-serialization/src/test/resources/configs/test-partial-config.json
@@ -0,0 +1,15 @@
+{
+  "handler-config": {
+    "enabled": true
+  },
+
+  "handler-config-full": {
+    "timeout": 10000,
+    "retries": 5,
+    "enabled": false
+  },
+
+  "tika-task-timeout": {
+    "millis": 30000
+  }
+}
diff --git 
a/tika-serialization/src/test/resources/configs/test-unexpected-field.json 
b/tika-serialization/src/test/resources/configs/test-unexpected-field.json
new file mode 100644
index 000000000..ada7f9bdf
--- /dev/null
+++ b/tika-serialization/src/test/resources/configs/test-unexpected-field.json
@@ -0,0 +1,8 @@
+{
+  "handler-config": {
+    "timeout": 5000,
+    "retries": 3,
+    "enabled": true,
+    "unexpectedField": "this should cause an error"
+  }
+}
diff --git a/tika-serialization/src/test/resources/configs/test-wrong-type.json 
b/tika-serialization/src/test/resources/configs/test-wrong-type.json
new file mode 100644
index 000000000..b25e9f644
--- /dev/null
+++ b/tika-serialization/src/test/resources/configs/test-wrong-type.json
@@ -0,0 +1,3 @@
+{
+  "handler": "java.lang.String"
+}

Reply via email to