This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new cd2654772b TIKA-4641 (#2562)
cd2654772b is described below
commit cd2654772b463ef425d265acc286ca29d8bff6bf
Author: Tim Allison <[email protected]>
AuthorDate: Fri Jan 30 21:40:05 2026 -0500
TIKA-4641 (#2562)
---
.../ROOT/pages/advanced/setting-limits.adoc | 40 ++++---
docs/modules/ROOT/pages/advanced/spooling.adoc | 11 +-
.../ROOT/pages/configuration/digesters.adoc | 62 +++++------
.../tika/annotation/TikaComponentProcessor.java | 118 +++++++++++++++------
.../java/org/apache/tika/config/TikaComponent.java | 25 +++++
.../org/apache/tika/config/EmbeddedLimits.java | 2 +-
.../java/org/apache/tika/config/OutputLimits.java | 2 +-
.../java/org/apache/tika/config/TimeoutLimits.java | 2 +-
.../java/org/apache/tika/digest/DigestHelper.java | 12 +--
.../org/apache/tika/digest/DigesterFactory.java | 18 ++--
.../org/apache/tika/parser/AutoDetectParser.java | 2 +-
.../tika/sax/BasicContentHandlerFactory.java | 2 +-
.../java/org/apache/tika/sax/SAXOutputConfig.java | 3 +
.../apache/tika/pipes/grpc/TikaGrpcServerImpl.java | 2 +-
.../digestutils/BouncyCastleDigesterFactory.java | 4 +-
.../parser/digestutils/CommonsDigesterFactory.java | 4 +-
.../tika/parser/AutoDetectParserConfigTest.java | 5 +-
.../tika/parser/digest/DigestConfigTest.java | 2 +-
.../digest/SkipContainerDocumentDigestTest.java | 2 +-
.../src/test/resources/configs/tika-4533.json | 6 +-
.../configs/tika-config-bc-digests-base32.json | 6 +-
.../configs/tika-config-bc-digests-basic.json | 6 +-
.../configs/tika-config-bc-digests-multiple.json | 6 +-
.../configs/tika-config-commons-digests-basic.json | 6 +-
.../configs/tika-config-digests-pdf-only.json | 6 +-
.../tika-config-digests-skip-container.json | 6 +-
.../resources/configs/tika-config-digests.json | 14 ++-
.../resources/configs/tika-config-md5-digest.json | 6 +-
.../resources/configs/tika-config-no-names.json | 6 +-
...a-config-upcasing-custom-handler-decorator.json | 12 +--
.../resources/configs/tika-config-with-names.json | 6 +-
.../configs/tika-config-write-filter.json | 28 +++--
.../tika/pipes/core/server/ParseHandler.java | 2 +-
.../apache/tika/pipes/core/server/PipesServer.java | 2 +-
.../src/test/resources/configs/tika-4533.json | 6 +-
.../test/resources/configs/tika-config-basic.json | 8 +-
.../resources/configs/tika-config-passback.json | 8 +-
.../resources/configs/tika-config-truncate.json | 14 +--
.../resources/configs/tika-config-uppercasing.json | 8 +-
.../configs/tika-config-write-limiter.json | 16 ++-
.../apache/tika/config/loader/ComponentInfo.java | 15 ++-
.../tika/config/loader/ComponentRegistry.java | 41 +++++--
.../apache/tika/config/loader/ConfigLoader.java | 33 +++---
.../apache/tika/config/loader/TikaJsonConfig.java | 12 +--
.../org/apache/tika/config/loader/TikaLoader.java | 106 +++++++++++-------
.../config/loader/TikaObjectMapperFactory.java | 2 +-
.../tika/serialization/ParseContextUtils.java | 4 +-
.../serdes/ParseContextDeserializer.java | 64 ++++++++++-
.../serdes/ParseContextSerializer.java | 14 +++
.../java/org/apache/tika/config/AllLimitsTest.java | 25 +++--
.../org/apache/tika/config/EmbeddedLimitsTest.java | 3 +-
.../org/apache/tika/config/OutputLimitsTest.java | 3 +-
.../org/apache/tika/config/TimeoutLimitsTest.java | 3 +-
.../tika/config/loader/ConfigLoaderTest.java | 100 +++++++++--------
.../apache/tika/config/loader/TikaLoaderTest.java | 47 ++++++++
.../writefilter/StandardMetadataLimiterTest.java | 10 +-
.../TestParseContextSerialization.java | 66 +++++++++++-
.../test/resources/configs/TIKA-3695-exclude.json | 8 +-
.../test/resources/configs/TIKA-3695-fields.json | 20 ++--
.../src/test/resources/configs/TIKA-3695.json | 14 ++-
.../test/resources/configs/all-limits-test.json | 14 ++-
.../resources/configs/embedded-limits-test.json | 2 +-
.../test/resources/configs/output-limits-test.json | 2 +-
.../test/resources/configs/test-config-loader.json | 2 +-
.../resources/configs/test-interface-no-type.json | 2 +-
.../test/resources/configs/test-invalid-class.json | 2 +-
.../resources/configs/test-partial-config.json | 2 +-
.../resources/configs/test-unexpected-field.json | 2 +-
.../test/resources/configs/test-wrong-type.json | 2 +-
.../resources/configs/timeout-limits-test.json | 2 +-
.../server/core/resource/MetadataResource.java | 2 +-
.../tika/server/core/resource/TikaResource.java | 8 +-
.../org/apache/tika/server/core/CXFTestBase.java | 12 +--
.../resources/configs/cxf-test-base-template.json | 8 +-
.../resources/configs/cxf-test-base-template.json | 8 +-
.../configs/tika-config-for-server-tests.json | 8 +-
.../tika-config-langdetect-opennlp-filter.json | 8 +-
.../tika-config-langdetect-optimaize-filter.json | 8 +-
78 files changed, 717 insertions(+), 463 deletions(-)
diff --git a/docs/modules/ROOT/pages/advanced/setting-limits.adoc
b/docs/modules/ROOT/pages/advanced/setting-limits.adoc
index 4dd02a4dd0..8064a4d88e 100644
--- a/docs/modules/ROOT/pages/advanced/setting-limits.adoc
+++ b/docs/modules/ROOT/pages/advanced/setting-limits.adoc
@@ -23,7 +23,7 @@ Tika provides several mechanisms for limiting resource usage
during parsing.
== Overview
-Tika 4.x provides a unified configuration system for all limits through the
`other-configs`
+Tika 4.x provides a unified configuration system for all limits through the
`parse-context`
section of the JSON configuration file. All limits are loaded into the
`ParseContext` and
flow through the parsing pipeline.
@@ -36,7 +36,7 @@ This is the same configuration tested in `AllLimitsTest.java`:
----
{
"parsers": ["default-parser"],
- "other-configs": {
+ "parse-context": {
"embedded-limits": {
"maxDepth": 10,
"throwOnMaxDepth": false,
@@ -54,13 +54,11 @@ This is the same configuration tested in
`AllLimitsTest.java`:
"timeout-limits": {
"taskTimeoutMillis": 60000
},
- "metadata-write-limiter-factory": {
- "standard-metadata-limiter-factory": {
- "maxTotalBytes": 1048576,
- "maxFieldSize": 102400,
- "maxKeySize": 1024,
- "maxValuesPerField": 100
- }
+ "standard-metadata-limiter-factory": {
+ "maxTotalBytes": 1048576,
+ "maxFieldSize": 102400,
+ "maxKeySize": 1024,
+ "maxValuesPerField": 100
}
}
}
@@ -138,7 +136,7 @@ container.zip (depth 0)
[source,json]
----
{
- "other-configs": {
+ "parse-context": {
"embedded-limits": {
"maxDepth": 5,
"throwOnMaxDepth": true,
@@ -215,7 +213,7 @@ and protection against zip bombs.
[source,json]
----
{
- "other-configs": {
+ "parse-context": {
"output-limits": {
"writeLimit": 50000,
"throwOnWriteLimit": true,
@@ -263,7 +261,7 @@ The `TimeoutLimits` class controls time-based limits for
parsing operations.
[source,json]
----
{
- "other-configs": {
+ "parse-context": {
"timeout-limits": {
"taskTimeoutMillis": 120000
}
@@ -358,16 +356,14 @@ Use this to extract only the metadata you need.
----
{
"parsers": ["default-parser"],
- "other-configs": {
- "metadata-write-limiter-factory": {
- "standard-metadata-limiter-factory": {
- "maxTotalBytes": 1048576,
- "maxFieldSize": 102400,
- "maxKeySize": 1024,
- "maxValuesPerField": 100,
- "includeFields": ["dc:title", "dc:creator", "dc:subject"],
- "excludeFields": ["pdf:unmappedUnicodeCharsPerPage"]
- }
+ "parse-context": {
+ "standard-metadata-limiter-factory": {
+ "maxTotalBytes": 1048576,
+ "maxFieldSize": 102400,
+ "maxKeySize": 1024,
+ "maxValuesPerField": 100,
+ "includeFields": ["dc:title", "dc:creator", "dc:subject"],
+ "excludeFields": ["pdf:unmappedUnicodeCharsPerPage"]
}
}
}
diff --git a/docs/modules/ROOT/pages/advanced/spooling.adoc
b/docs/modules/ROOT/pages/advanced/spooling.adoc
index 81d3bb18e4..29b30bf297 100644
--- a/docs/modules/ROOT/pages/advanced/spooling.adoc
+++ b/docs/modules/ROOT/pages/advanced/spooling.adoc
@@ -159,12 +159,12 @@ The default spool types are:
=== JSON Configuration
SpoolingStrategy can be configured via JSON in your `tika-config.json` file.
-Place the configuration in the `other-configs` section:
+Place the configuration in the `parse-context` section:
[source,json]
----
{
- "other-configs": {
+ "parse-context": {
"spooling-strategy": {
"spoolTypes": [
"application/zip",
@@ -181,11 +181,8 @@ Load the configuration using `TikaLoader`:
[source,java]
----
TikaLoader loader = TikaLoader.load(Path.of("tika-config.json"));
-SpoolingStrategy strategy = loader.configs().load(SpoolingStrategy.class);
-
-// Add to parse context
-ParseContext context = new ParseContext();
-context.set(SpoolingStrategy.class, strategy);
+ParseContext context = loader.loadParseContext();
+// SpoolingStrategy is automatically loaded into the ParseContext
----
=== Best Practices
diff --git a/docs/modules/ROOT/pages/configuration/digesters.adoc
b/docs/modules/ROOT/pages/configuration/digesters.adoc
index f09deb8446..b3e66f1dad 100644
--- a/docs/modules/ROOT/pages/configuration/digesters.adoc
+++ b/docs/modules/ROOT/pages/configuration/digesters.adoc
@@ -33,7 +33,7 @@ Tika provides two digester implementations:
== JSON Configuration
-Configure digesters in the `other-configs.digester-factory` section of your
tika-config.json.
+Configure digesters in the `parse-context` section of your tika-config.json.
=== Basic Example with CommonsDigester
@@ -43,15 +43,13 @@ This example configures multiple digest algorithms:
[source,json]
----
{
- "other-configs": {
- "digester-factory": {
- "commons-digester-factory": {
- "digests": [
- { "algorithm": "MD5" },
- { "algorithm": "SHA256" },
- { "algorithm": "SHA512" }
- ]
- }
+ "parse-context": {
+ "commons-digester-factory": {
+ "digests": [
+ { "algorithm": "MD5" },
+ { "algorithm": "SHA256" },
+ { "algorithm": "SHA512" }
+ ]
}
}
}
@@ -65,15 +63,13 @@ For SHA3 algorithms, use the BouncyCastle digester:
[source,json]
----
{
- "other-configs": {
- "digester-factory": {
- "bouncy-castle-digester-factory": {
- "digests": [
- { "algorithm": "MD5" },
- { "algorithm": "SHA256" },
- { "algorithm": "SHA3_512" }
- ]
- }
+ "parse-context": {
+ "bouncy-castle-digester-factory": {
+ "digests": [
+ { "algorithm": "MD5" },
+ { "algorithm": "SHA256" },
+ { "algorithm": "SHA3_512" }
+ ]
}
}
}
@@ -87,14 +83,12 @@ By default, digest values are encoded as lowercase
hexadecimal. You can specify
[source,json]
----
{
- "other-configs": {
- "digester-factory": {
- "commons-digester-factory": {
- "digests": [
- { "algorithm": "SHA256", "encoding": "BASE32" },
- { "algorithm": "MD5" }
- ]
- }
+ "parse-context": {
+ "commons-digester-factory": {
+ "digests": [
+ { "algorithm": "SHA256", "encoding": "BASE32" },
+ { "algorithm": "MD5" }
+ ]
}
}
}
@@ -112,14 +106,12 @@ to `true`:
[source,json]
----
{
- "other-configs": {
- "digester-factory": {
- "commons-digester-factory": {
- "digests": [
- { "algorithm": "MD5" }
- ],
- "skipContainerDocumentDigest": true
- }
+ "parse-context": {
+ "commons-digester-factory": {
+ "digests": [
+ { "algorithm": "MD5" }
+ ],
+ "skipContainerDocumentDigest": true
}
}
}
diff --git
a/tika-annotation-processor/src/main/java/org/apache/tika/annotation/TikaComponentProcessor.java
b/tika-annotation-processor/src/main/java/org/apache/tika/annotation/TikaComponentProcessor.java
index cd757664e8..bcf862c4c3 100644
---
a/tika-annotation-processor/src/main/java/org/apache/tika/annotation/TikaComponentProcessor.java
+++
b/tika-annotation-processor/src/main/java/org/apache/tika/annotation/TikaComponentProcessor.java
@@ -64,6 +64,10 @@ public class TikaComponentProcessor extends
AbstractProcessor {
/**
* Known Tika service interfaces for SPI generation.
* Only classes implementing these interfaces will have SPI files
generated.
+ * <p>
+ * Note: DigesterFactory and ContentHandlerFactory are NOT in this map
because
+ * they are parse-context components, not top-level service interfaces.
+ * Their implementations go to parse-context.idx instead.
*/
private static final Map<String, String> SERVICE_INTERFACES = new
LinkedHashMap<>();
@@ -76,11 +80,19 @@ public class TikaComponentProcessor extends
AbstractProcessor {
SERVICE_INTERFACES.put("org.apache.tika.language.translate.Translator",
"translators");
SERVICE_INTERFACES.put("org.apache.tika.renderer.Renderer",
"renderers");
SERVICE_INTERFACES.put("org.apache.tika.metadata.filter.MetadataFilter",
"metadata-filters");
- SERVICE_INTERFACES.put("org.apache.tika.digest.DigesterFactory",
"digester-factories");
- SERVICE_INTERFACES.put("org.apache.tika.sax.ContentHandlerFactory",
- "content-handler-factories");
}
+ /**
+ * Interfaces whose implementations should go to parse-context.idx.
+ * These are factory interfaces used via ParseContext, not loaded via SPI.
+ */
+ private static final Set<String> PARSE_CONTEXT_INTERFACES = Set.of(
+ "org.apache.tika.digest.DigesterFactory",
+ "org.apache.tika.sax.ContentHandlerFactory",
+ "org.apache.tika.metadata.writefilter.MetadataWriteLimiterFactory",
+ "org.apache.tika.extractor.EmbeddedDocumentExtractorFactory"
+ );
+
private Messager messager;
private Filer filer;
@@ -134,45 +146,60 @@ public class TikaComponentProcessor extends
AbstractProcessor {
// Get contextKey if specified (need to use mirror API for Class types)
String contextKey = getContextKeyFromAnnotation(element);
+ // Get defaultFor if specified (need to use mirror API for Class types)
+ String defaultFor = getDefaultForFromAnnotation(element);
+
messager.printMessage(Diagnostic.Kind.NOTE,
"Processing @TikaComponent: " + className + " -> " +
componentName +
- " (SPI: " + includeSpi + ", contextKey: " + contextKey + ")");
+ " (SPI: " + includeSpi + ", contextKey: " + contextKey +
+ ", defaultFor: " + defaultFor + ")");
- // Find all implemented service interfaces
+ // Find all implemented service interfaces (both SPI and parse-context)
List<String> serviceInterfaces = findServiceInterfaces(element);
+ List<String> parseContextInterfaces =
findParseContextInterfaces(element);
+
+ // Combine all interfaces for context key detection
+ List<String> allInterfaces = new ArrayList<>(serviceInterfaces);
+ allInterfaces.addAll(parseContextInterfaces);
- // Build the index entry value (className or className:key=X)
+ // Build the index entry value (className or className:key=X[:default])
// Auto-detect contextKey from service interface if not explicitly
specified
String indexValue = className;
if (contextKey != null) {
// Explicit contextKey specified
indexValue = className + ":key=" + contextKey;
- } else if (serviceInterfaces.size() == 1) {
- // Auto-detect contextKey from single service interface
- indexValue = className + ":key=" + serviceInterfaces.get(0);
+ } else if (allInterfaces.size() == 1) {
+ // Auto-detect contextKey from single interface
+ indexValue = className + ":key=" + allInterfaces.get(0);
messager.printMessage(Diagnostic.Kind.NOTE,
- "Auto-detected contextKey=" + serviceInterfaces.get(0) + "
for " + className);
- } else if (serviceInterfaces.size() > 1) {
+ "Auto-detected contextKey=" + allInterfaces.get(0) + " for
" + className);
+ } else if (allInterfaces.size() > 1) {
// Multiple interfaces - warn that contextKey should be specified
messager.printMessage(Diagnostic.Kind.WARNING,
- "Class " + className + " implements multiple service
interfaces: " +
- serviceInterfaces + ". Consider specifying
@TikaComponent(contextKey=...) " +
+ "Class " + className + " implements multiple interfaces: "
+
+ allInterfaces + ". Consider specifying
@TikaComponent(contextKey=...) " +
"to select which one to use as ParseContext key.",
element);
}
- if (serviceInterfaces.isEmpty()) {
- // No known service interface - put in other-configs.idx
+ // Add :default marker if defaultFor is specified
+ if (defaultFor != null) {
+ indexValue = indexValue + ":default";
+ }
+
+ // Check if this is a parse-context component (implements a
parse-context interface
+ // or doesn't implement any known service interface)
+ if (!parseContextInterfaces.isEmpty() || serviceInterfaces.isEmpty()) {
+ // Put in parse-context.idx
messager.printMessage(Diagnostic.Kind.NOTE,
- "Class " + className + " does not implement known service
interface, " +
- "adding to other-configs.idx", element);
+ "Class " + className + " is a parse-context component, " +
+ "adding to parse-context.idx", element);
- Map<String, String> index =
indexFiles.computeIfAbsent("other-configs",
+ Map<String, String> index =
indexFiles.computeIfAbsent("parse-context",
k -> new LinkedHashMap<>());
addToIndex(index, componentName, indexValue, className, element);
- return;
}
- // Process each service interface
+ // Process SPI service interfaces (these also get their own idx files)
for (String serviceInterface : serviceInterfaces) {
// Add to SPI services only if spi = true
if (includeSpi) {
@@ -216,17 +243,33 @@ public class TikaComponentProcessor extends
AbstractProcessor {
* Returns null if contextKey is void.class (the default).
*/
private String getContextKeyFromAnnotation(TypeElement element) {
+ return getClassAttributeFromAnnotation(element, "contextKey");
+ }
+
+ /**
+ * Gets the defaultFor value from the annotation using the mirror API.
+ * Returns null if defaultFor is void.class (the default).
+ */
+ private String getDefaultForFromAnnotation(TypeElement element) {
+ return getClassAttributeFromAnnotation(element, "defaultFor");
+ }
+
+ /**
+ * Gets a Class-typed attribute value from the annotation using the mirror
API.
+ * Returns null if the attribute is void.class (the default).
+ */
+ private String getClassAttributeFromAnnotation(TypeElement element, String
attributeName) {
for (AnnotationMirror mirror : element.getAnnotationMirrors()) {
DeclaredType annotationType = mirror.getAnnotationType();
if
(annotationType.toString().equals(TikaComponent.class.getName())) {
for (Map.Entry<? extends ExecutableElement, ? extends
AnnotationValue> entry
: mirror.getElementValues().entrySet()) {
- if
(entry.getKey().getSimpleName().toString().equals("contextKey")) {
+ if
(entry.getKey().getSimpleName().toString().equals(attributeName)) {
// The value is a TypeMirror for Class types
Object value = entry.getValue().getValue();
if (value instanceof TypeMirror) {
String typeName = value.toString();
- // void.class is the default, meaning "auto-detect"
+ // void.class is the default, meaning "not
specified"
if (!"void".equals(typeName) &&
!"java.lang.Void".equals(typeName)) {
return typeName;
}
@@ -244,15 +287,30 @@ public class TikaComponentProcessor extends
AbstractProcessor {
private List<String> findServiceInterfaces(TypeElement element) {
List<String> result = new ArrayList<>();
Set<String> visited = new LinkedHashSet<>();
- findServiceInterfacesRecursive(element.asType(), result, visited);
+ findInterfacesRecursive(element.asType(), result, visited,
SERVICE_INTERFACES.keySet());
+ return result;
+ }
+
+ /**
+ * Finds all parse-context interfaces implemented by the given type
element.
+ */
+ private List<String> findParseContextInterfaces(TypeElement element) {
+ List<String> result = new ArrayList<>();
+ Set<String> visited = new LinkedHashSet<>();
+ findInterfacesRecursive(element.asType(), result, visited,
PARSE_CONTEXT_INTERFACES);
return result;
}
/**
- * Recursively searches for service interfaces in the type hierarchy.
+ * Recursively searches for interfaces in the type hierarchy.
+ *
+ * @param type the type to search from
+ * @param result list to add found interfaces to
+ * @param visited set of already visited types (to avoid infinite loops)
+ * @param targetInterfaces the set of interface names to look for
*/
- private void findServiceInterfacesRecursive(TypeMirror type, List<String>
result,
- Set<String> visited) {
+ private void findInterfacesRecursive(TypeMirror type, List<String> result,
+ Set<String> visited, Set<String>
targetInterfaces) {
if (type == null || !(type instanceof DeclaredType)) {
return;
}
@@ -266,8 +324,8 @@ public class TikaComponentProcessor extends
AbstractProcessor {
return;
}
- // Check if this is a service interface
- if (SERVICE_INTERFACES.containsKey(typeName)) {
+ // Check if this is a target interface
+ if (targetInterfaces.contains(typeName)) {
if (!result.contains(typeName)) {
result.add(typeName);
}
@@ -275,11 +333,11 @@ public class TikaComponentProcessor extends
AbstractProcessor {
// Check superclass
TypeMirror superclass = typeElement.getSuperclass();
- findServiceInterfacesRecursive(superclass, result, visited);
+ findInterfacesRecursive(superclass, result, visited, targetInterfaces);
// Check interfaces
for (TypeMirror interfaceType : typeElement.getInterfaces()) {
- findServiceInterfacesRecursive(interfaceType, result, visited);
+ findInterfacesRecursive(interfaceType, result, visited,
targetInterfaces);
}
}
diff --git
a/tika-annotation-processor/src/main/java/org/apache/tika/config/TikaComponent.java
b/tika-annotation-processor/src/main/java/org/apache/tika/config/TikaComponent.java
index 69e42570b3..cce8466e20 100644
---
a/tika-annotation-processor/src/main/java/org/apache/tika/config/TikaComponent.java
+++
b/tika-annotation-processor/src/main/java/org/apache/tika/config/TikaComponent.java
@@ -58,6 +58,11 @@ import java.lang.annotation.Target;
* public class MyFilter implements MetadataFilter, AnotherInterface {
* // explicit ParseContext key when class implements multiple known
interfaces
* }
+ *
+ * {@code @TikaComponent(defaultFor = ContentHandlerFactory.class)}
+ * public class BasicContentHandlerFactory implements ContentHandlerFactory {
+ * // marks this as the default implementation for ContentHandlerFactory
+ * }
* </pre>
*
* @since 3.1.0
@@ -106,4 +111,24 @@ public @interface TikaComponent {
* @return the class to use as ParseContext key, or void.class for
auto-detection
*/
Class<?> contextKey() default void.class;
+
+ /**
+ * Marks this component as the default implementation for the specified
interface.
+ * <p>
+ * When set, this component will be used as the default when loading a
ParseContext
+ * with defaults (via {@code loadParseContextWithDefaults()}) and no
explicit
+ * configuration is provided for the interface.
+ * <p>
+ * The specified class should be an interface that this component
implements.
+ * For example:
+ * <pre>
+ * {@code @TikaComponent(defaultFor = ContentHandlerFactory.class)}
+ * public class BasicContentHandlerFactory implements
ContentHandlerFactory {
+ * // This will be instantiated by default when no
ContentHandlerFactory is configured
+ * }
+ * </pre>
+ *
+ * @return the interface this component is the default for, or void.class
if not a default
+ */
+ Class<?> defaultFor() default void.class;
}
diff --git a/tika-core/src/main/java/org/apache/tika/config/EmbeddedLimits.java
b/tika-core/src/main/java/org/apache/tika/config/EmbeddedLimits.java
index 104e0fe8a0..74cde0dca1 100644
--- a/tika-core/src/main/java/org/apache/tika/config/EmbeddedLimits.java
+++ b/tika-core/src/main/java/org/apache/tika/config/EmbeddedLimits.java
@@ -54,7 +54,7 @@ import org.apache.tika.parser.ParseContext;
* Example configuration:
* <pre>
* {
- * "other-configs": {
+ * "parse-context": {
* "embedded-limits": {
* "maxDepth": 10,
* "throwOnMaxDepth": false,
diff --git a/tika-core/src/main/java/org/apache/tika/config/OutputLimits.java
b/tika-core/src/main/java/org/apache/tika/config/OutputLimits.java
index d33bc87432..4069b019b1 100644
--- a/tika-core/src/main/java/org/apache/tika/config/OutputLimits.java
+++ b/tika-core/src/main/java/org/apache/tika/config/OutputLimits.java
@@ -46,7 +46,7 @@ import org.apache.tika.parser.ParseContext;
* Example configuration:
* <pre>
* {
- * "other-configs": {
+ * "parse-context": {
* "output-limits": {
* "writeLimit": 100000,
* "throwOnWriteLimit": false,
diff --git a/tika-core/src/main/java/org/apache/tika/config/TimeoutLimits.java
b/tika-core/src/main/java/org/apache/tika/config/TimeoutLimits.java
index b43b98360c..c0dffbc423 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TimeoutLimits.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TimeoutLimits.java
@@ -43,7 +43,7 @@ import org.apache.tika.parser.ParseContext;
* Example configuration:
* <pre>
* {
- * "other-configs": {
+ * "parse-context": {
* "timeout-limits": {
* "taskTimeoutMillis": 120000
* }
diff --git a/tika-core/src/main/java/org/apache/tika/digest/DigestHelper.java
b/tika-core/src/main/java/org/apache/tika/digest/DigestHelper.java
index a06d8393cc..bbcd6b1242 100644
--- a/tika-core/src/main/java/org/apache/tika/digest/DigestHelper.java
+++ b/tika-core/src/main/java/org/apache/tika/digest/DigestHelper.java
@@ -33,14 +33,12 @@ import org.apache.tika.parser.ParseContext;
* Utility class for computing digests on streams.
* <p>
* The DigesterFactory is retrieved from ParseContext. Configure it via
- * the "other-configs" section in tika-config.json:
+ * the "parse-context" section in tika-config.json:
* <pre>
- * "other-configs": {
- * "digester-factory": {
- * "commons-digester-factory": {
- * "digests": [{ "algorithm": "SHA256" }],
- * "skipContainerDocumentDigest": true
- * }
+ * "parse-context": {
+ * "commons-digester-factory": {
+ * "digests": [{ "algorithm": "SHA256" }],
+ * "skipContainerDocumentDigest": true
* }
* }
* </pre>
diff --git
a/tika-core/src/main/java/org/apache/tika/digest/DigesterFactory.java
b/tika-core/src/main/java/org/apache/tika/digest/DigesterFactory.java
index 0c35d33c01..0a2fc05600 100644
--- a/tika-core/src/main/java/org/apache/tika/digest/DigesterFactory.java
+++ b/tika-core/src/main/java/org/apache/tika/digest/DigesterFactory.java
@@ -21,22 +21,20 @@ package org.apache.tika.digest;
* Implementations should be annotated with {@code @TikaComponent} and
* provide bean properties for configuration (e.g., digests).
* <p>
- * Configure this factory in the "other-configs" section of tika-config.json.
+ * Configure this factory in the "parse-context" section of tika-config.json.
* The factory is loaded into the ParseContext and used by AutoDetectParser
* during parsing to compute digests.
* <p>
* Example JSON configuration:
* <pre>
* {
- * "other-configs": {
- * "digester-factory": {
- * "commons-digester-factory": {
- * "digests": [
- * { "algorithm": "MD5" },
- * { "algorithm": "SHA256", "encoding": "BASE32" }
- * ],
- * "skipContainerDocumentDigest": true
- * }
+ * "parse-context": {
+ * "commons-digester-factory": {
+ * "digests": [
+ * { "algorithm": "MD5" },
+ * { "algorithm": "SHA256", "encoding": "BASE32" }
+ * ],
+ * "skipContainerDocumentDigest": true
* }
* }
* }
diff --git
a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
index 752c0c2e35..5205010f55 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
@@ -146,7 +146,7 @@ public class AutoDetectParser extends CompositeParser {
public void parse(TikaInputStream tis, ContentHandler handler, Metadata
metadata,
ParseContext context) throws IOException, SAXException,
TikaException {
// Compute digests before type detection if configured
- // DigesterFactory is retrieved from ParseContext (configured via
other-configs)
+ // DigesterFactory is retrieved from ParseContext (configured via
parse-context)
DigestHelper.maybeDigest(tis, metadata, context);
// Automatically detect the MIME type of the document
diff --git
a/tika-core/src/main/java/org/apache/tika/sax/BasicContentHandlerFactory.java
b/tika-core/src/main/java/org/apache/tika/sax/BasicContentHandlerFactory.java
index 4d62e1fdac..16195b9de5 100644
---
a/tika-core/src/main/java/org/apache/tika/sax/BasicContentHandlerFactory.java
+++
b/tika-core/src/main/java/org/apache/tika/sax/BasicContentHandlerFactory.java
@@ -36,7 +36,7 @@ import org.apache.tika.parser.ParseContext;
* Implements {@link StreamingContentHandlerFactory} to support both in-memory
* content extraction and streaming output to an OutputStream.
*/
-@TikaComponent
+@TikaComponent(defaultFor = ContentHandlerFactory.class)
public class BasicContentHandlerFactory implements
StreamingContentHandlerFactory, WriteLimiter {
private HANDLER_TYPE type = HANDLER_TYPE.TEXT;
diff --git a/tika-core/src/main/java/org/apache/tika/sax/SAXOutputConfig.java
b/tika-core/src/main/java/org/apache/tika/sax/SAXOutputConfig.java
index b85e1b1fd8..e748b89c22 100644
--- a/tika-core/src/main/java/org/apache/tika/sax/SAXOutputConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/sax/SAXOutputConfig.java
@@ -18,6 +18,8 @@ package org.apache.tika.sax;
import java.io.Serializable;
+import org.apache.tika.config.TikaComponent;
+
/**
* Configuration for SAX output behavior.
* <p>
@@ -25,6 +27,7 @@ import java.io.Serializable;
* how content handlers and embedded document extractors generate output.
* </p>
*/
+@TikaComponent(spi = false)
public class SAXOutputConfig implements Serializable {
private static final long serialVersionUID = 1L;
diff --git
a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java
b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java
index a21c32a77e..2885ff0020 100644
--- a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java
+++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java
@@ -105,7 +105,7 @@ class TikaGrpcServerImpl extends TikaGrpc.TikaImplBase {
TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(configPath);
- // Load PipesConfig directly from root level (not from "other-configs")
+ // Load PipesConfig directly from root level (not from "parse-context")
pipesConfig = tikaJsonConfig.deserialize("pipes", PipesConfig.class);
if (pipesConfig == null) {
pipesConfig = new PipesConfig();
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/BouncyCastleDigesterFactory.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/BouncyCastleDigesterFactory.java
index d62e38e843..b5f4bf4e87 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/BouncyCastleDigesterFactory.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/BouncyCastleDigesterFactory.java
@@ -32,10 +32,10 @@ import org.apache.tika.digest.DigesterFactory;
* BouncyCastle supports additional algorithms beyond the standard Java ones,
* such as SHA3-256, SHA3-384, SHA3-512.
* <p>
- * Example JSON configuration (in other-configs section):
+ * Example JSON configuration (in parse-context section):
* <pre>
* {
- * "other-configs": {
+ * "parse-context": {
* "digester-factory": {
* "bouncy-castle-digester-factory": {
* "digests": [
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/CommonsDigesterFactory.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/CommonsDigesterFactory.java
index 5c0c81a54d..9f8399a52b 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/CommonsDigesterFactory.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/CommonsDigesterFactory.java
@@ -29,10 +29,10 @@ import org.apache.tika.digest.DigesterFactory;
* <p>
* Default: MD5 with HEX encoding.
* <p>
- * Example JSON configuration (in other-configs section):
+ * Example JSON configuration (in parse-context section):
* <pre>
* {
- * "other-configs": {
+ * "parse-context": {
* "digester-factory": {
* "commons-digester-factory": {
* "digests": [
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/AutoDetectParserConfigTest.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/AutoDetectParserConfigTest.java
index 0049b03f1f..1e21fbef25 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/AutoDetectParserConfigTest.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/AutoDetectParserConfigTest.java
@@ -31,7 +31,6 @@ import org.apache.tika.TikaTest;
import org.apache.tika.config.loader.TikaLoader;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.metadata.writefilter.MetadataWriteLimiterFactory;
public class AutoDetectParserConfigTest extends TikaTest {
@@ -84,9 +83,7 @@ public class AutoDetectParserConfigTest extends TikaTest {
public void testWriteFilter() throws Exception {
TikaLoader loader =
TikaLoaderHelper.getLoader("tika-config-write-filter.json");
Parser p = loader.loadAutoDetectParser();
- MetadataWriteLimiterFactory factory =
loader.configs().load(MetadataWriteLimiterFactory.class);
- ParseContext parseContext = new ParseContext();
- parseContext.set(MetadataWriteLimiterFactory.class, factory);
+ ParseContext parseContext = loader.loadParseContext();
Metadata metadata = Metadata.newInstance(parseContext);
List<Metadata> metadataList =
getRecursiveMetadata("testPPT_EmbeddedPDF.pptx", p,
metadata, parseContext, true);
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/digest/DigestConfigTest.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/digest/DigestConfigTest.java
index e5fa61735d..267c677716 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/digest/DigestConfigTest.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/digest/DigestConfigTest.java
@@ -105,7 +105,7 @@ public class DigestConfigTest extends TikaTest {
@Test
public void testCommonsDigesterSkipContainer() throws Exception {
- // Tests skipContainerDocumentDigest on the factory (configured in
other-configs)
+ // Tests skipContainerDocumentDigest on the factory (configured in
parse-context)
TikaLoader loader =
TikaLoaderHelper.getLoader("tika-config-digests-skip-container.json");
Parser p = loader.loadAutoDetectParser();
ParseContext context = loader.loadParseContext();
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/digest/SkipContainerDocumentDigestTest.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/digest/SkipContainerDocumentDigestTest.java
index 52904b6589..8b25189d89 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/digest/SkipContainerDocumentDigestTest.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/digest/SkipContainerDocumentDigestTest.java
@@ -35,7 +35,7 @@ import
org.apache.tika.parser.digestutils.CommonsDigesterFactory;
/**
* Tests for SkipContainerDocumentDigest functionality with MockParser and
embedded documents.
- * DigesterFactory is now configured via ParseContext (via other-configs in
JSON).
+ * DigesterFactory is now configured via ParseContext (via parse-context in
JSON).
*/
public class SkipContainerDocumentDigestTest extends TikaTest {
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4533.json
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4533.json
index f87df79434..e5a9c850b3 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4533.json
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4533.json
@@ -2,13 +2,11 @@
"auto-detect-parser": {
"throwOnZeroBytes": false
},
- "other-configs": {
- "digester-factory": {
- "commons-digester-factory": {
+ "parse-context": {
+ "commons-digester-factory": {
"digests": [
{ "algorithm": "SHA256" }
]
}
}
- }
}
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-base32.json
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-base32.json
index 25ffe85de7..e1ca5547cf 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-base32.json
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-base32.json
@@ -2,13 +2,11 @@
"auto-detect-parser": {
"throwOnZeroBytes": false
},
- "other-configs": {
- "digester-factory": {
- "bouncy-castle-digester-factory": {
+ "parse-context": {
+ "bouncy-castle-digester-factory": {
"digests": [
{ "algorithm": "SHA1", "encoding": "BASE32" }
]
}
}
- }
}
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-basic.json
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-basic.json
index 9098c8607f..78e69166de 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-basic.json
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-basic.json
@@ -2,9 +2,8 @@
"auto-detect-parser": {
"throwOnZeroBytes": false
},
- "other-configs": {
- "digester-factory": {
- "bouncy-castle-digester-factory": {
+ "parse-context": {
+ "bouncy-castle-digester-factory": {
"digests": [
{ "algorithm": "MD2" },
{ "algorithm": "MD5" },
@@ -15,5 +14,4 @@
]
}
}
- }
}
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-multiple.json
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-multiple.json
index c8d4c29aa5..3ea4238c33 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-multiple.json
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-multiple.json
@@ -2,9 +2,8 @@
"auto-detect-parser": {
"throwOnZeroBytes": false
},
- "other-configs": {
- "digester-factory": {
- "bouncy-castle-digester-factory": {
+ "parse-context": {
+ "bouncy-castle-digester-factory": {
"digests": [
{ "algorithm": "MD5" },
{ "algorithm": "SHA256" },
@@ -14,5 +13,4 @@
]
}
}
- }
}
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-commons-digests-basic.json
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-commons-digests-basic.json
index b0ae0326e9..6bbdb3f70f 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-commons-digests-basic.json
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-commons-digests-basic.json
@@ -2,9 +2,8 @@
"auto-detect-parser": {
"throwOnZeroBytes": false
},
- "other-configs": {
- "digester-factory": {
- "commons-digester-factory": {
+ "parse-context": {
+ "commons-digester-factory": {
"digests": [
{ "algorithm": "MD2" },
{ "algorithm": "MD5" },
@@ -15,5 +14,4 @@
]
}
}
- }
}
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.json
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.json
index b14d19509b..6753b80a2a 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.json
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.json
@@ -11,14 +11,12 @@
"auto-detect-parser": {
"throwOnZeroBytes": false
},
- "other-configs": {
- "digester-factory": {
- "commons-digester-factory": {
+ "parse-context": {
+ "commons-digester-factory": {
"digests": [
{ "algorithm": "SHA256", "encoding": "BASE32" },
{ "algorithm": "MD5" }
]
}
}
- }
}
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-skip-container.json
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-skip-container.json
index 23186720bc..30115de6f8 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-skip-container.json
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-skip-container.json
@@ -2,9 +2,8 @@
"auto-detect-parser": {
"throwOnZeroBytes": false
},
- "other-configs": {
- "digester-factory": {
- "commons-digester-factory": {
+ "parse-context": {
+ "commons-digester-factory": {
"digests": [
{ "algorithm": "SHA256", "encoding": "BASE32" },
{ "algorithm": "MD5" }
@@ -12,5 +11,4 @@
"skipContainerDocumentDigest": true
}
}
- }
}
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests.json
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests.json
index 0671621fc0..95cf6dd5fa 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests.json
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests.json
@@ -2,14 +2,12 @@
"auto-detect-parser": {
"throwOnZeroBytes": false
},
- "other-configs": {
- "digester-factory": {
- "commons-digester-factory": {
- "digests": [
- { "algorithm": "SHA256", "encoding": "BASE32" },
- { "algorithm": "MD5" }
- ]
- }
+ "parse-context": {
+ "commons-digester-factory": {
+ "digests": [
+ { "algorithm": "SHA256", "encoding": "BASE32" },
+ { "algorithm": "MD5" }
+ ]
}
}
}
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-md5-digest.json
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-md5-digest.json
index 3aa9e04375..7d922943b5 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-md5-digest.json
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-md5-digest.json
@@ -1,11 +1,9 @@
{
- "other-configs": {
- "digester-factory": {
- "commons-digester-factory": {
+ "parse-context": {
+ "commons-digester-factory": {
"digests": [
{ "algorithm": "MD5" }
]
}
}
- }
}
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.json
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.json
index 33fcd5ffd7..feaa6f4494 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.json
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.json
@@ -1,9 +1,7 @@
{
- "other-configs": {
- "embedded-document-extractor-factory": {
- "standard-extractor-factory": {
+ "parse-context": {
+ "standard-extractor-factory": {
"writeFileNameToContent": false
}
}
- }
}
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.json
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.json
index 8e9b5b6012..66f81f80a7 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.json
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.json
@@ -3,14 +3,10 @@
"contentHandlerDecoratorFactory":
"upcasing-content-handler-decorator-factory",
"throwOnZeroBytes": true
},
- "other-configs": {
- "digester-factory": {
- "commons-digester-factory": {}
- },
- "embedded-document-extractor-factory": {
- "standard-extractor-factory": {
- "writeFileNameToContent": true
- }
+ "parse-context": {
+ "commons-digester-factory": {},
+ "standard-extractor-factory": {
+ "writeFileNameToContent": true
}
}
}
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.json
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.json
index 28f542245b..721ee36e35 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.json
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.json
@@ -1,9 +1,7 @@
{
- "other-configs": {
- "embedded-document-extractor-factory": {
- "standard-extractor-factory": {
+ "parse-context": {
+ "standard-extractor-factory": {
"writeFileNameToContent": true
}
}
- }
}
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-write-filter.json
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-write-filter.json
index 3179f5aceb..1d96edb631 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-write-filter.json
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-write-filter.json
@@ -2,23 +2,19 @@
"auto-detect-parser": {
"throwOnZeroBytes": false
},
- "other-configs": {
- "digester-factory": {
- "commons-digester-factory": {
- "digests": [
- { "algorithm": "SHA256", "encoding": "BASE32" },
- { "algorithm": "MD5" }
- ],
- "skipContainerDocumentDigest": true
- }
+ "parse-context": {
+ "commons-digester-factory": {
+ "digests": [
+ { "algorithm": "SHA256", "encoding": "BASE32" },
+ { "algorithm": "MD5" }
+ ],
+ "skipContainerDocumentDigest": true
},
- "metadata-write-limiter-factory": {
- "standard-metadata-limiter-factory": {
- "includeFields": [
- "X-TIKA:content",
- "dc:creator"
- ]
- }
+ "standard-metadata-limiter-factory": {
+ "includeFields": [
+ "X-TIKA:content",
+ "dc:creator"
+ ]
}
}
}
diff --git
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java
index 6e86502d2b..0b91c7a458 100644
---
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java
+++
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java
@@ -122,7 +122,7 @@ class ParseHandler {
private void _preParse(FetchEmitTuple t, TikaInputStream tis, Metadata
metadata,
ParseContext parseContext) {
- // Get DigesterFactory from ParseContext (configured via other-configs)
+ // Get DigesterFactory from ParseContext (configured via parse-context)
DigesterFactory digesterFactory =
parseContext.get(DigesterFactory.class);
if (digesterFactory != null &&
!digesterFactory.isSkipContainerDocumentDigest()) {
try {
diff --git
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
index d5a6c72497..4fb69bd2c3 100644
---
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
+++
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
@@ -180,7 +180,7 @@ public class PipesServer implements AutoCloseable {
MetadataFilter metadataFilter = tikaLoader.loadMetadataFilters();
ContentHandlerFactory contentHandlerFactory =
tikaLoader.loadContentHandlerFactory();
- MetadataWriteLimiterFactory metadataWriteLimiterFactory =
tikaLoader.configs().load(MetadataWriteLimiterFactory.class);
+ MetadataWriteLimiterFactory metadataWriteLimiterFactory =
tikaLoader.loadParseContext().get(MetadataWriteLimiterFactory.class);
PipesServer pipesServer = new PipesServer(pipesClientId,
tikaLoader, pipesConfig, socket, dis, dos, metadataFilter,
contentHandlerFactory, metadataWriteLimiterFactory);
pipesServer.initializeResources();
LOG.debug("pipesClientId={}: PipesServer loaded and ready",
pipesClientId);
diff --git
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-4533.json
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-4533.json
index b741ae8921..0f9e359070 100644
---
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-4533.json
+++
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-4533.json
@@ -2,18 +2,16 @@
"auto-detect-parser": {
"throwOnZeroBytes": false
},
- "other-configs": {
+ "parse-context": {
"output-limits": {
"zipBombRatio": 100,
"maxXmlDepth": 100,
"maxPackageEntryDepth": 100
},
- "digester-factory": {
- "commons-digester-factory": {
+ "commons-digester-factory": {
"digests": [
{ "algorithm": "SHA256" }
]
}
}
- }
}
diff --git
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-basic.json
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-basic.json
index fd6bfa852c..0a8d40b1fa 100644
---
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-basic.json
+++
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-basic.json
@@ -46,10 +46,8 @@
"auto-detect-parser": {
"throwOnZeroBytes": false
},
- "other-configs": {
- "digester-factory": {
- "mock-digester-factory": {}
- }
- },
+ "parse-context": {
+ "mock-digester-factory": {}
+ },
"plugin-roots": "PLUGINS_PATHS"
}
diff --git
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-passback.json
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-passback.json
index c55fd2a026..3cbbab6950 100644
---
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-passback.json
+++
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-passback.json
@@ -45,10 +45,8 @@
"auto-detect-parser": {
"throwOnZeroBytes": false
},
- "other-configs": {
- "digester-factory": {
- "mock-digester-factory": {}
- }
- },
+ "parse-context": {
+ "mock-digester-factory": {}
+ },
"plugin-roots": "PLUGINS_PATHS"
}
diff --git
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-truncate.json
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-truncate.json
index d8acd13939..88b4cc1978 100644
---
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-truncate.json
+++
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-truncate.json
@@ -46,15 +46,11 @@
"auto-detect-parser": {
"throwOnZeroBytes": false
},
- "other-configs": {
- "digester-factory": {
- "mock-digester-factory": {}
- },
- "embedded-document-extractor-factory": {
- "runpack-extractor-factory": {
- "writeFileNameToContent": false,
- "maxEmbeddedBytesForExtraction": 10
- }
+ "parse-context": {
+ "mock-digester-factory": {},
+ "runpack-extractor-factory": {
+ "writeFileNameToContent": false,
+ "maxEmbeddedBytesForExtraction": 10
}
},
"plugin-roots": "PLUGINS_PATHS"
diff --git
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-uppercasing.json
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-uppercasing.json
index a7549f9385..2dd4c0c31b 100644
---
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-uppercasing.json
+++
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-uppercasing.json
@@ -42,10 +42,8 @@
"auto-detect-parser": {
"throwOnZeroBytes": false
},
- "other-configs": {
- "digester-factory": {
- "mock-digester-factory": {}
- }
- },
+ "parse-context": {
+ "mock-digester-factory": {}
+ },
"plugin-roots": "PLUGINS_PATHS"
}
diff --git
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-write-limiter.json
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-write-limiter.json
index 0e2a8e85ab..dbfbcd4f31 100644
---
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-write-limiter.json
+++
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-write-limiter.json
@@ -46,15 +46,13 @@
"auto-detect-parser": {
"throwOnZeroBytes": false
},
- "other-configs": {
- "metadata-write-limiter-factory": {
- "standard-metadata-limiter-factory": {
- "includeFields": ["dc:creator", "Content-Type", "X-TIKA:content"],
- "maxKeySize": 100,
- "maxFieldSize": 1000,
- "maxTotalBytes": 10000,
- "maxValuesPerField": 5
- }
+ "parse-context": {
+ "standard-metadata-limiter-factory": {
+ "includeFields": ["dc:creator", "Content-Type", "X-TIKA:content"],
+ "maxKeySize": 100,
+ "maxFieldSize": 1000,
+ "maxTotalBytes": 10000,
+ "maxValuesPerField": 5
}
},
"plugin-roots": "PLUGINS_PATHS"
diff --git
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentInfo.java
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentInfo.java
index fa96517aa5..5dbad73518 100644
---
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentInfo.java
+++
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentInfo.java
@@ -24,16 +24,25 @@ package org.apache.tika.config.loader;
* (reads its own config from ParseContext's
jsonConfigs)
* @param contextKey the class to use as the key when adding to ParseContext,
* or null to auto-detect based on known interfaces
+ * @param isDefault whether this component is the default implementation for
its contextKey
*/
public record ComponentInfo(
Class<?> componentClass,
boolean selfConfiguring,
- Class<?> contextKey
+ Class<?> contextKey,
+ boolean isDefault
) {
/**
- * Creates a ComponentInfo with no explicit context key (auto-detect).
+ * Creates a ComponentInfo with no explicit context key (auto-detect) and
not default.
*/
public ComponentInfo(Class<?> componentClass, boolean selfConfiguring) {
- this(componentClass, selfConfiguring, null);
+ this(componentClass, selfConfiguring, null, false);
+ }
+
+ /**
+ * Creates a ComponentInfo with explicit context key but not default.
+ */
+ public ComponentInfo(Class<?> componentClass, boolean selfConfiguring,
Class<?> contextKey) {
+ this(componentClass, selfConfiguring, contextKey, false);
}
}
diff --git
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentRegistry.java
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentRegistry.java
index cbd9b932b8..576dc2e88e 100644
---
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentRegistry.java
+++
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentRegistry.java
@@ -122,6 +122,21 @@ public class ComponentRegistry {
return Collections.unmodifiableMap(components);
}
+ /**
+ * Returns all components marked as defaults.
+ *
+ * @return unmodifiable map of component names to component info for
default implementations
+ */
+ public Map<String, ComponentInfo> getDefaultComponents() {
+ Map<String, ComponentInfo> defaults = new LinkedHashMap<>();
+ for (Map.Entry<String, ComponentInfo> entry : components.entrySet()) {
+ if (entry.getValue().isDefault()) {
+ defaults.put(entry.getKey(), entry.getValue());
+ }
+ }
+ return Collections.unmodifiableMap(defaults);
+ }
+
/**
* Checks if a component with the given name is registered.
*
@@ -201,20 +216,28 @@ public class ComponentRegistry {
": name or class is empty");
}
- // Parse value: className or className:key=contextKeyClass
+ // Parse value: className or
className:key=contextKeyClass[:default]
String className = value;
String contextKeyClassName = null;
+ boolean isDefault = false;
+ // Parse suffixes (e.g., :key=SomeClass:default)
int colonIndex = value.indexOf(':');
if (colonIndex != -1) {
className = value.substring(0, colonIndex);
- String suffix = value.substring(colonIndex + 1);
- if (suffix.startsWith("key=")) {
- contextKeyClassName = suffix.substring(4);
- } else {
- throw new TikaConfigException(
- "Invalid index file format at " + url + " line
" + lineNumber +
- ": unknown suffix '" + suffix + "', expected
'key=...'");
+ String suffixes = value.substring(colonIndex + 1);
+
+ // Parse each colon-separated suffix
+ for (String suffix : suffixes.split(":")) {
+ if (suffix.startsWith("key=")) {
+ contextKeyClassName = suffix.substring(4);
+ } else if (suffix.equals("default")) {
+ isDefault = true;
+ } else if (!suffix.isEmpty()) {
+ throw new TikaConfigException(
+ "Invalid index file format at " + url + "
line " + lineNumber +
+ ": unknown suffix '" + suffix + "',
expected 'key=...' or 'default'");
+ }
}
}
@@ -235,7 +258,7 @@ public class ComponentRegistry {
}
}
- result.put(name, new ComponentInfo(clazz, selfConfiguring,
contextKey));
+ result.put(name, new ComponentInfo(clazz, selfConfiguring,
contextKey, isDefault));
} catch (ClassNotFoundException e) {
throw new TikaConfigException(
"Component class not found: " + className + "
(from " + url + ")", e);
diff --git
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ConfigLoader.java
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ConfigLoader.java
index e262bd6412..1b494b398a 100644
---
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ConfigLoader.java
+++
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ConfigLoader.java
@@ -26,11 +26,11 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.tika.exception.TikaConfigException;
/**
- * Loader for custom configuration objects from the "other-configs" section.
+ * Loader for configuration objects from the "parse-context" section.
* <p>
- * This class handles custom POJOs and test configurations that are not part of
- * Tika's official configuration schema. All configurations loaded via
ConfigLoader
- * must be placed under the "other-configs" top-level node in the JSON.
+ * This class handles ParseContext components and configuration POJOs that are
loaded
+ * into a ParseContext for use during parsing. All configurations loaded via
ConfigLoader
+ * must be placed under the "parse-context" top-level node in the JSON.
* <p>
* For official Tika components and configurations (parsers, detectors, async,
server, etc.),
* use the specific methods on {@link TikaLoader} or load directly from {@link
TikaJsonConfig}.
@@ -55,14 +55,17 @@ import org.apache.tika.exception.TikaConfigException;
* "pipes": {...},
* "server": {...},
*
- * // Custom configs MUST be in "other-configs" (loaded via configs())
- * "other-configs": {
- * "my-config": {
- * "timeout": 5000,
- * "retries": 3
+ * // ParseContext configs in "parse-context" (loaded via configs())
+ * "parse-context": {
+ * "embedded-limits": {
+ * "maxDepth": 10,
+ * "maxCount": 1000
* },
- * "my-custom-config": {
- * "enabled": true
+ * "output-limits": {
+ * "writeLimit": 100000
+ * },
+ * "commons-digester-factory": {
+ * "algorithms": ["MD5", "SHA-256"]
* }
* }
* }
@@ -277,16 +280,16 @@ public class ConfigLoader {
}
/**
- * Gets a node by key from the "other-configs".
+ * Gets a node by key from the "parse-context" section.
*
* @param key The JSON key to look for
* @return the node, or null if not found
*/
private JsonNode getNode(String key) {
- JsonNode otherConfigs = config.getRootNode().get("other-configs");
- if (otherConfigs != null && otherConfigs.isObject()) {
- return otherConfigs.get(key);
+ JsonNode parseContext = config.getRootNode().get("parse-context");
+ if (parseContext != null && parseContext.isObject()) {
+ return parseContext.get(key);
}
return null;
diff --git
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
index df536e159f..344faa66c7 100644
---
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
+++
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
@@ -343,12 +343,10 @@ public class TikaJsonConfig {
}
/**
- * Validates that all top-level configuration keys are known or custom
extensions.
+ * Validates that all top-level configuration keys are known.
* <p>
* This catches typos like "parser" instead of "parsers" or
"pipes-reporter"
* instead of "pipes-reporters".
- * <p>
- * The "other-configs" node is allowed for custom configurations.
*
* @throws TikaConfigException if unknown keys are found
*/
@@ -363,11 +361,6 @@ public class TikaJsonConfig {
while (fieldNames.hasNext()) {
String key = fieldNames.next();
- // Ignore custom configs node
- if (key.equals("other-configs")) {
- continue;
- }
-
// Must be a known key
if (!KNOWN_KEYS.contains(key)) {
unknownKeys.add(key);
@@ -377,8 +370,7 @@ public class TikaJsonConfig {
if (!unknownKeys.isEmpty()) {
throw new TikaConfigException(
"Unknown configuration key(s): " + unknownKeys + ". " +
- "Valid keys: " + KNOWN_KEYS + " " +
- "(or use 'other-configs' node for custom keys)");
+ "Valid keys: " + KNOWN_KEYS);
}
}
diff --git
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
index 0277a82e85..36b0f69325 100644
---
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
+++
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
@@ -31,23 +31,17 @@ import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
-import org.apache.tika.config.EmbeddedLimits;
import org.apache.tika.config.GlobalSettings;
-import org.apache.tika.config.OutputLimits;
-import org.apache.tika.config.TimeoutLimits;
import org.apache.tika.detect.CompositeDetector;
import org.apache.tika.detect.CompositeEncodingDetector;
import org.apache.tika.detect.Detector;
import org.apache.tika.detect.EncodingDetector;
-import org.apache.tika.digest.DigesterFactory;
import org.apache.tika.exception.TikaConfigException;
-import org.apache.tika.extractor.EmbeddedDocumentExtractorFactory;
import org.apache.tika.language.translate.DefaultTranslator;
import org.apache.tika.language.translate.Translator;
import org.apache.tika.metadata.filter.CompositeMetadataFilter;
import org.apache.tika.metadata.filter.MetadataFilter;
import org.apache.tika.metadata.filter.NoOpFilter;
-import org.apache.tika.metadata.writefilter.MetadataWriteLimiterFactory;
import org.apache.tika.mime.MediaTypeRegistry;
import org.apache.tika.mime.MimeTypes;
import org.apache.tika.parser.AutoDetectParser;
@@ -59,11 +53,12 @@ import org.apache.tika.renderer.CompositeRenderer;
import org.apache.tika.renderer.Renderer;
import org.apache.tika.sax.BasicContentHandlerFactory;
import org.apache.tika.sax.ContentHandlerFactory;
-import org.apache.tika.sax.SAXOutputConfig;
import org.apache.tika.serialization.ComponentConfig;
import org.apache.tika.serialization.ComponentNameResolver;
import org.apache.tika.serialization.JsonMetadata;
import org.apache.tika.serialization.JsonMetadataList;
+import org.apache.tika.serialization.ParseContextUtils;
+import org.apache.tika.serialization.serdes.ParseContextDeserializer;
/**
* Main entry point for loading Tika components from JSON configuration.
@@ -374,7 +369,7 @@ public class TikaLoader {
*/
public synchronized Parser loadAutoDetectParser() throws
TikaConfigException, IOException {
if (autoDetectParser == null) {
- // Load directly from root-level config (not via configs() which
only looks in "other-configs")
+ // Load directly from root-level config (not via configs() which
only looks in "parse-context")
AutoDetectParserConfig adpConfig = loadAutoDetectParserConfig();
if (adpConfig == null) {
adpConfig = new AutoDetectParserConfig();
@@ -385,13 +380,11 @@ public class TikaLoader {
}
/**
- * Loads and returns a ParseContext populated with components from the
"other-configs" section.
+ * Loads and returns a ParseContext populated with components from the
"parse-context" section.
* <p>
- * This method loads components that should be passed via ParseContext,
such as:
- * <ul>
- * <li>DigesterFactory (from "digester-factory")</li>
- * <li>MetadataWriteLimiterFactory (from
"metadata-write-limiter-factory")</li>
- * </ul>
+ * This method deserializes the parse-context JSON and resolves all
component references
+ * using the component registry. Components are looked up by their
friendly names
+ * (e.g., "embedded-limits", "pdf-parser-config") and deserialized to
their appropriate types.
* <p>
* Use this method when you need a pre-configured ParseContext for parsing
operations.
*
@@ -408,40 +401,79 @@ public class TikaLoader {
* @throws TikaConfigException if loading fails
*/
public ParseContext loadParseContext() throws TikaConfigException {
- ParseContext context = new ParseContext();
- loadOne(DigesterFactory.class, context);
- loadOne(MetadataWriteLimiterFactory.class, context);
- loadOne(EmbeddedDocumentExtractorFactory.class, context);
- loadOne(EmbeddedLimits.class, context);
- loadOne(OutputLimits.class, context);
- loadOne(TimeoutLimits.class, context);
- loadOne(SAXOutputConfig.class, context);
- return context;
- }
-
- private <T> void loadOne(Class<T> clazz, ParseContext context) throws
TikaConfigException {
- T instnce = configs().load(clazz);
- if (instnce != null) {
- context.set(clazz, instnce);
+ JsonNode parseContextNode = config.getRootNode().get("parse-context");
+ if (parseContextNode == null) {
+ return new ParseContext();
+ }
+ try {
+ ParseContext context =
+
ParseContextDeserializer.readParseContext(parseContextNode, objectMapper);
+ ParseContextUtils.resolveAll(context, classLoader);
+ return context;
+ } catch (IOException e) {
+ throw new TikaConfigException("Failed to load parse-context", e);
}
}
/**
- * Returns a ConfigLoader for loading simple configuration objects.
+ * Loads a configuration object from the "parse-context" section, merging
with defaults.
+ * <p>
+ * This method is useful when you have a base configuration (e.g., from
code defaults or
+ * a previous load) and want to overlay values from the JSON config.
Properties not
+ * specified in the JSON retain their default values.
* <p>
- * Use this for POJOs and simple config classes. For complex components
like
- * Parsers, Detectors, etc., use the specific load methods on TikaLoader.
+ * The original defaults object is NOT modified - a new instance is
returned.
*
- * <p>Usage:
+ * <p>Example usage for PDFParserConfig:
* <pre>
- * MyConfig config = loader.configs().load("my-config", MyConfig.class);
- * // Or use kebab-case auto-conversion:
- * MyConfig config = loader.configs().load(MyConfig.class);
+ * // Load base config from tika-config.json at init time
+ * TikaLoader loader = TikaLoader.load(configPath);
+ * PDFParserConfig baseConfig = loader.loadConfig(PDFParserConfig.class,
new PDFParserConfig());
+ *
+ * // At runtime, create per-request overrides
+ * PDFParserConfig requestConfig = new PDFParserConfig();
+ * requestConfig.setOcrStrategy(PDFParserConfig.OCR_STRATEGY.NO_OCR);
+ *
+ * // Merge: base config values + request overrides
+ * // (Note: for runtime merging, use JsonMergeUtils directly or
loadConfig on a runtime loader)
* </pre>
*
+ * @param clazz the class to deserialize into
+ * @param defaults the default values to use for properties not in the
JSON config
+ * @param <T> the configuration type
+ * @return a new instance with defaults merged with JSON config, or the
original defaults if not configured
+ * @throws TikaConfigException if loading fails
+ */
+ public <T> T loadConfig(Class<T> clazz, T defaults) throws
TikaConfigException {
+ return configs().loadWithDefaults(clazz, defaults);
+ }
+
+ /**
+ * Loads a configuration object from the "parse-context" section by
explicit key, merging with defaults.
+ * <p>
+ * This method is useful when the JSON key doesn't match the class name's
kebab-case conversion,
+ * or when you want to load from a specific key.
+ *
+ * @param key the JSON key in the "parse-context" section
+ * @param clazz the class to deserialize into
+ * @param defaults the default values to use for properties not in the
JSON config
+ * @param <T> the configuration type
+ * @return a new instance with defaults merged with JSON config, or the
original defaults if not configured
+ * @throws TikaConfigException if loading fails
+ */
+ public <T> T loadConfig(String key, Class<T> clazz, T defaults) throws
TikaConfigException {
+ return configs().loadWithDefaults(key, clazz, defaults);
+ }
+
+ /**
+ * Returns a ConfigLoader for loading simple configuration objects.
+ * <p>
+ * This is internal - external code should use {@link #loadParseContext()}
or
+ * {@link #loadConfig(Class, Object)} instead.
+ *
* @return the ConfigLoader instance
*/
- public synchronized ConfigLoader configs() {
+ private synchronized ConfigLoader configs() {
if (configLoader == null) {
configLoader = new ConfigLoader(config, objectMapper);
}
diff --git
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaObjectMapperFactory.java
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaObjectMapperFactory.java
index e65eee07b9..e832dc8d4b 100644
---
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaObjectMapperFactory.java
+++
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaObjectMapperFactory.java
@@ -50,7 +50,7 @@ public class TikaObjectMapperFactory {
"translators",
"digester-factories",
"content-handler-factories",
- "other-configs"
+ "parse-context"
};
private static ObjectMapper MAPPER = null;
diff --git
a/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextUtils.java
b/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextUtils.java
index 10fe4ac927..30826befa5 100644
---
a/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextUtils.java
+++
b/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextUtils.java
@@ -101,7 +101,7 @@ public class ParseContextUtils {
}
// First, process known array configs (e.g., "metadata-filters")
- // These don't depend on the other-configs registry
+ // These don't depend on the parse-context registry
for (String friendlyName : new ArrayList<>(jsonConfigs.keySet())) {
if (ARRAY_CONFIGS.containsKey(friendlyName)) {
JsonConfig jsonConfig = jsonConfigs.get(friendlyName);
@@ -112,7 +112,7 @@ public class ParseContextUtils {
}
// Then, try to resolve single component configs using
ComponentNameResolver
- // This searches all registered component registries, not just
"other-configs"
+ // This searches all registered component registries, not just
"parse-context"
for (Map.Entry<String, JsonConfig> entry : jsonConfigs.entrySet()) {
String friendlyName = entry.getKey();
JsonConfig jsonConfig = entry.getValue();
diff --git
a/tika-serialization/src/main/java/org/apache/tika/serialization/serdes/ParseContextDeserializer.java
b/tika-serialization/src/main/java/org/apache/tika/serialization/serdes/ParseContextDeserializer.java
index 2e2b4dec7d..3cc05f3272 100644
---
a/tika-serialization/src/main/java/org/apache/tika/serialization/serdes/ParseContextDeserializer.java
+++
b/tika-serialization/src/main/java/org/apache/tika/serialization/serdes/ParseContextDeserializer.java
@@ -20,7 +20,9 @@ import static
org.apache.tika.serialization.serdes.ParseContextSerializer.PARSE_
import static
org.apache.tika.serialization.serdes.ParseContextSerializer.TYPED;
import java.io.IOException;
+import java.util.HashMap;
import java.util.Iterator;
+import java.util.Map;
import java.util.Optional;
import com.fasterxml.jackson.core.JsonParser;
@@ -75,11 +77,15 @@ public class ParseContextDeserializer extends
JsonDeserializer<ParseContext> {
* <p>
* The "typed" section is deserialized directly to typed objects in the
context map.
* All other fields are stored as JSON config strings for lazy resolution.
+ * <p>
+ * Duplicate detection is performed within a single document: if multiple
entries
+ * resolve to the same context key (e.g., both "bouncy-castle-digester" and
+ * "commons-digester" resolve to DigesterFactory), an IOException is
thrown.
*
* @param jsonNode the JSON node containing the ParseContext data
* @param mapper the ObjectMapper for deserializing typed objects
* @return the deserialized ParseContext
- * @throws IOException if deserialization fails
+ * @throws IOException if deserialization fails or duplicate context keys
are detected
*/
public static ParseContext readParseContext(JsonNode jsonNode,
ObjectMapper mapper)
throws IOException {
@@ -95,6 +101,10 @@ public class ParseContextDeserializer extends
JsonDeserializer<ParseContext> {
return parseContext;
}
+ // Track context keys to detect duplicates within this document
+ // Maps contextKey -> friendlyName for error messages
+ Map<Class<?>, String> seenContextKeys = new HashMap<>();
+
Iterator<String> fieldNames = contextNode.fieldNames();
while (fieldNames.hasNext()) {
String name = fieldNames.next();
@@ -102,8 +112,11 @@ public class ParseContextDeserializer extends
JsonDeserializer<ParseContext> {
if (TYPED.equals(name)) {
// Deserialize typed objects directly to context map
- deserializeTypedObjects(value, parseContext, mapper);
+ deserializeTypedObjects(value, parseContext, mapper,
seenContextKeys);
} else {
+ // Check for duplicate context key before storing
+ checkForDuplicateContextKey(name, seenContextKeys);
+
// Store as JSON config for lazy resolution
// Use plain JSON mapper since the main mapper may be binary
(Smile)
String json = JSON_MAPPER.writeValueAsString(value);
@@ -114,12 +127,49 @@ public class ParseContextDeserializer extends
JsonDeserializer<ParseContext> {
return parseContext;
}
+ /**
+ * Checks if a JSON config entry would create a duplicate context key.
+ * <p>
+ * Looks up the friendly name in the component registry to determine its
context key,
+ * then checks if that key has already been seen in this document.
+ *
+ * @param friendlyName the friendly name of the config entry
+ * @param seenContextKeys map of already-seen context keys to their
friendly names
+ * @throws IOException if a duplicate context key is detected
+ */
+ private static void checkForDuplicateContextKey(String friendlyName,
+ Map<Class<?>, String>
seenContextKeys)
+ throws IOException {
+ Optional<ComponentInfo> infoOpt =
ComponentNameResolver.getComponentInfo(friendlyName);
+ if (infoOpt.isEmpty()) {
+ // Not a registered component - can't check for duplicates, that's
okay
+ return;
+ }
+
+ ComponentInfo info = infoOpt.get();
+ Class<?> contextKey = info.contextKey() != null ? info.contextKey() :
info.componentClass();
+
+ String existingName = seenContextKeys.get(contextKey);
+ if (existingName != null) {
+ throw new IOException("Duplicate parse-context entries resolve to
the same key " +
+ contextKey.getName() + ": '" + existingName + "' and '" +
friendlyName + "'");
+ }
+ seenContextKeys.put(contextKey, friendlyName);
+ }
+
/**
* Deserializes the "typed" section into typed objects in the context map.
+ *
+ * @param typedNode the JSON node containing typed objects
+ * @param parseContext the ParseContext to add objects to
+ * @param mapper the ObjectMapper for deserializing
+ * @param seenContextKeys map tracking context keys to their friendly
names (for duplicate detection)
+ * @throws IOException if deserialization fails or duplicate context keys
are detected
*/
@SuppressWarnings("unchecked")
private static void deserializeTypedObjects(JsonNode typedNode,
ParseContext parseContext,
- ObjectMapper mapper) throws
IOException {
+ ObjectMapper mapper,
+ Map<Class<?>, String>
seenContextKeys) throws IOException {
if (!typedNode.isObject()) {
return;
}
@@ -158,6 +208,14 @@ public class ParseContextDeserializer extends
JsonDeserializer<ParseContext> {
// Use contextKey if available, otherwise use the config class
itself
Class<?> parseContextKey = (contextKeyClass != null) ?
contextKeyClass : configClass;
+ // Check for duplicate context key
+ String existingName = seenContextKeys.get(parseContextKey);
+ if (existingName != null) {
+ throw new IOException("Duplicate parse-context entries resolve
to the same key " +
+ parseContextKey.getName() + ": '" + existingName + "'
and '" + componentName + "'");
+ }
+ seenContextKeys.put(parseContextKey, componentName);
+
// Deserialize and add to context
try {
Object config = mapper.treeToValue(configNode, configClass);
diff --git
a/tika-serialization/src/main/java/org/apache/tika/serialization/serdes/ParseContextSerializer.java
b/tika-serialization/src/main/java/org/apache/tika/serialization/serdes/ParseContextSerializer.java
index cf73ad8129..643d76f5c2 100644
---
a/tika-serialization/src/main/java/org/apache/tika/serialization/serdes/ParseContextSerializer.java
+++
b/tika-serialization/src/main/java/org/apache/tika/serialization/serdes/ParseContextSerializer.java
@@ -17,7 +17,9 @@
package org.apache.tika.serialization.serdes;
import java.io.IOException;
+import java.util.HashSet;
import java.util.Map;
+import java.util.Set;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.JsonSerializer;
@@ -62,6 +64,10 @@ public class ParseContextSerializer extends
JsonSerializer<ParseContext> {
SerializerProvider serializers) throws IOException {
gen.writeStartObject();
+ // Track which friendly names have been serialized under "typed"
+ // so we can skip them when serializing jsonConfigs (avoid duplicates)
+ Set<String> serializedNames = new HashSet<>();
+
// First, serialize typed objects from the context map under "typed"
key
Map<String, Object> contextMap = parseContext.getContextMap();
boolean hasTypedObjects = false;
@@ -94,6 +100,9 @@ public class ParseContextSerializer extends
JsonSerializer<ParseContext> {
// Use writeTree instead of writeRawValue for binary format
support (e.g., Smile)
// and stricter validation (fails early if value can't be
serialized)
gen.writeTree(PLAIN_MAPPER.valueToTree(value));
+
+ // Track this name so we skip it in jsonConfigs
+ serializedNames.add(keyName);
}
if (hasTypedObjects) {
@@ -101,8 +110,13 @@ public class ParseContextSerializer extends
JsonSerializer<ParseContext> {
}
// Then, serialize JSON configs at the top level
+ // Skip entries that were already serialized under "typed" (they've
been resolved)
Map<String, JsonConfig> jsonConfigs = parseContext.getJsonConfigs();
for (Map.Entry<String, JsonConfig> entry : jsonConfigs.entrySet()) {
+ if (serializedNames.contains(entry.getKey())) {
+ // Already serialized under "typed", skip to avoid duplicate
+ continue;
+ }
gen.writeFieldName(entry.getKey());
// Parse the JSON string into a tree for binary format support
gen.writeTree(PLAIN_MAPPER.readTree(entry.getValue().json()));
diff --git
a/tika-serialization/src/test/java/org/apache/tika/config/AllLimitsTest.java
b/tika-serialization/src/test/java/org/apache/tika/config/AllLimitsTest.java
index 6ab03f828a..b509d03bab 100644
--- a/tika-serialization/src/test/java/org/apache/tika/config/AllLimitsTest.java
+++ b/tika-serialization/src/test/java/org/apache/tika/config/AllLimitsTest.java
@@ -31,13 +31,13 @@ import org.apache.tika.parser.ParseContext;
* Tests loading all limit configurations from a single tika-config.json file.
* <p>
* This test demonstrates how to configure all limits in one place using
- * the "other-configs" section of the JSON configuration.
+ * the "parse-context" section of the JSON configuration.
* <p>
* Configuration file: configs/all-limits-test.json
* <pre>
* {
* "parsers": ["default-parser"],
- * "other-configs": {
+ * "parse-context": {
* "embedded-limits": {
* "maxDepth": 10,
* "throwOnMaxDepth": false,
@@ -55,13 +55,11 @@ import org.apache.tika.parser.ParseContext;
* "timeout-limits": {
* "taskTimeoutMillis": 60000
* },
- * "metadata-write-limiter-factory": {
- * "standard-metadata-limiter-factory": {
- * "maxTotalBytes": 1048576,
- * "maxFieldSize": 102400,
- * "maxKeySize": 1024,
- * "maxValuesPerField": 100
- * }
+ * "standard-metadata-limiter-factory": {
+ * "maxTotalBytes": 1048576,
+ * "maxFieldSize": 102400,
+ * "maxKeySize": 1024,
+ * "maxValuesPerField": 100
* }
* }
* }
@@ -107,17 +105,18 @@ public class AllLimitsTest extends TikaTest {
@Test
public void testLoadIndividualLimits() throws Exception {
TikaLoader loader = TikaLoader.load(getConfigPath(getClass(),
"all-limits-test.json"));
+ ParseContext context = loader.loadParseContext();
- // Load individual limit configs directly
- EmbeddedLimits embeddedLimits =
loader.configs().load(EmbeddedLimits.class);
+ // Load individual limit configs from ParseContext
+ EmbeddedLimits embeddedLimits = context.get(EmbeddedLimits.class);
assertNotNull(embeddedLimits);
assertEquals(10, embeddedLimits.getMaxDepth());
- OutputLimits outputLimits = loader.configs().load(OutputLimits.class);
+ OutputLimits outputLimits = context.get(OutputLimits.class);
assertNotNull(outputLimits);
assertEquals(100000, outputLimits.getWriteLimit());
- TimeoutLimits timeoutLimits =
loader.configs().load(TimeoutLimits.class);
+ TimeoutLimits timeoutLimits = context.get(TimeoutLimits.class);
assertNotNull(timeoutLimits);
assertEquals(60000, timeoutLimits.getTaskTimeoutMillis());
}
diff --git
a/tika-serialization/src/test/java/org/apache/tika/config/EmbeddedLimitsTest.java
b/tika-serialization/src/test/java/org/apache/tika/config/EmbeddedLimitsTest.java
index f362271fb3..4bb4873701 100644
---
a/tika-serialization/src/test/java/org/apache/tika/config/EmbeddedLimitsTest.java
+++
b/tika-serialization/src/test/java/org/apache/tika/config/EmbeddedLimitsTest.java
@@ -32,7 +32,8 @@ public class EmbeddedLimitsTest extends TikaTest {
@Test
public void testLoadFromConfig() throws Exception {
TikaLoader loader = TikaLoader.load(getConfigPath(getClass(),
"embedded-limits-test.json"));
- EmbeddedLimits limits = loader.configs().load(EmbeddedLimits.class);
+ ParseContext context = loader.loadParseContext();
+ EmbeddedLimits limits = context.get(EmbeddedLimits.class);
assertNotNull(limits);
assertEquals(5, limits.getMaxDepth());
diff --git
a/tika-serialization/src/test/java/org/apache/tika/config/OutputLimitsTest.java
b/tika-serialization/src/test/java/org/apache/tika/config/OutputLimitsTest.java
index 6e543a085d..41e23cd570 100644
---
a/tika-serialization/src/test/java/org/apache/tika/config/OutputLimitsTest.java
+++
b/tika-serialization/src/test/java/org/apache/tika/config/OutputLimitsTest.java
@@ -32,7 +32,8 @@ public class OutputLimitsTest extends TikaTest {
@Test
public void testLoadFromConfig() throws Exception {
TikaLoader loader = TikaLoader.load(getConfigPath(getClass(),
"output-limits-test.json"));
- OutputLimits limits = loader.configs().load(OutputLimits.class);
+ ParseContext context = loader.loadParseContext();
+ OutputLimits limits = context.get(OutputLimits.class);
assertNotNull(limits);
assertEquals(50000, limits.getWriteLimit());
diff --git
a/tika-serialization/src/test/java/org/apache/tika/config/TimeoutLimitsTest.java
b/tika-serialization/src/test/java/org/apache/tika/config/TimeoutLimitsTest.java
index 24c15af7ab..4217349134 100644
---
a/tika-serialization/src/test/java/org/apache/tika/config/TimeoutLimitsTest.java
+++
b/tika-serialization/src/test/java/org/apache/tika/config/TimeoutLimitsTest.java
@@ -32,7 +32,8 @@ public class TimeoutLimitsTest extends TikaTest {
@Test
public void testLoadFromConfig() throws Exception {
TikaLoader loader = TikaLoader.load(getConfigPath(getClass(),
"timeout-limits-test.json"));
- TimeoutLimits limits = loader.configs().load(TimeoutLimits.class);
+ ParseContext context = loader.loadParseContext();
+ TimeoutLimits limits = context.get(TimeoutLimits.class);
assertNotNull(limits);
assertEquals(120000, limits.getTaskTimeoutMillis());
diff --git
a/tika-serialization/src/test/java/org/apache/tika/config/loader/ConfigLoaderTest.java
b/tika-serialization/src/test/java/org/apache/tika/config/loader/ConfigLoaderTest.java
index 12695472c8..5efa4136ac 100644
---
a/tika-serialization/src/test/java/org/apache/tika/config/loader/ConfigLoaderTest.java
+++
b/tika-serialization/src/test/java/org/apache/tika/config/loader/ConfigLoaderTest.java
@@ -27,6 +27,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
import java.nio.file.Path;
import java.nio.file.Paths;
+import com.fasterxml.jackson.databind.ObjectMapper;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@@ -39,15 +40,16 @@ import org.apache.tika.mime.MediaType;
*/
public class ConfigLoaderTest {
- private TikaLoader tikaLoader;
+ private TikaJsonConfig tikaJsonConfig;
private ConfigLoader configLoader;
@BeforeEach
public void setUp() throws Exception {
Path configPath = Paths.get(
getClass().getResource("/configs/test-config-loader.json").toURI());
- tikaLoader = TikaLoader.load(configPath);
- configLoader = tikaLoader.configs();
+ tikaJsonConfig = TikaJsonConfig.load(configPath);
+ ObjectMapper objectMapper = TikaObjectMapperFactory.getMapper();
+ configLoader = new ConfigLoader(tikaJsonConfig, objectMapper);
}
// ==================== Test POJOs ====================
@@ -286,10 +288,11 @@ public class ConfigLoaderTest {
// because Jackson can't instantiate interfaces directly
Path configPath = Paths.get(
getClass().getResource("/configs/test-interface-no-type.json").toURI());
- TikaLoader loader = TikaLoader.load(configPath);
+ TikaJsonConfig config = TikaJsonConfig.load(configPath);
+ ConfigLoader loader = new ConfigLoader(config,
TikaObjectMapperFactory.getMapper());
TikaConfigException ex = assertThrows(TikaConfigException.class, () ->
- loader.configs().load("handler-no-type", TestHandler.class));
+ loader.load("handler-no-type", TestHandler.class));
assertTrue(ex.getMessage().contains("Failed to deserialize"));
}
@@ -337,10 +340,11 @@ public class ConfigLoaderTest {
public void testLoadInvalidClassName() throws Exception {
Path configPath = Paths.get(
getClass().getResource("/configs/test-invalid-class.json").toURI());
- TikaLoader loader = TikaLoader.load(configPath);
+ TikaJsonConfig config = TikaJsonConfig.load(configPath);
+ ConfigLoader loader = new ConfigLoader(config,
TikaObjectMapperFactory.getMapper());
TikaConfigException ex = assertThrows(TikaConfigException.class, () ->
- loader.configs().load("handler", TestHandler.class));
+ loader.load("handler", TestHandler.class));
assertTrue(ex.getMessage().contains("Class not found"));
}
@@ -350,10 +354,11 @@ public class ConfigLoaderTest {
// String class name that doesn't implement the interface
Path configPath = Paths.get(
getClass().getResource("/configs/test-wrong-type.json").toURI());
- TikaLoader loader = TikaLoader.load(configPath);
+ TikaJsonConfig config = TikaJsonConfig.load(configPath);
+ ConfigLoader loader = new ConfigLoader(config,
TikaObjectMapperFactory.getMapper());
TikaConfigException ex = assertThrows(TikaConfigException.class, () ->
- loader.configs().load("handler", TestHandler.class));
+ loader.load("handler", TestHandler.class));
assertTrue(ex.getMessage().contains("not assignable"));
}
@@ -363,10 +368,11 @@ public class ConfigLoaderTest {
// Verify that unexpected/unrecognized fields cause an exception
Path configPath = Paths.get(
getClass().getResource("/configs/test-unexpected-field.json").toURI());
- TikaLoader loader = TikaLoader.load(configPath);
+ TikaJsonConfig config = TikaJsonConfig.load(configPath);
+ ConfigLoader loader = new ConfigLoader(config,
TikaObjectMapperFactory.getMapper());
TikaConfigException ex = assertThrows(TikaConfigException.class, () ->
- loader.configs().load("retry-config", RetryConfig.class));
+ loader.load("retry-config", RetryConfig.class));
// Should contain information about the unrecognized field
assertTrue(ex.getMessage().contains("retry-config") ||
@@ -407,7 +413,8 @@ public class ConfigLoaderTest {
// Load config that merges defaults with partial JSON
Path configPath = Paths.get(
getClass().getResource("/configs/test-partial-config.json").toURI());
- TikaLoader loader = TikaLoader.load(configPath);
+ TikaJsonConfig config = TikaJsonConfig.load(configPath);
+ ConfigLoader loader = new ConfigLoader(config,
TikaObjectMapperFactory.getMapper());
// Set up defaults
RetryConfig defaults = new RetryConfig();
@@ -416,14 +423,14 @@ public class ConfigLoaderTest {
defaults.setEnabled(false);
// JSON only has: { "enabled": true }
- RetryConfig config = loader.configs().loadWithDefaults("retry-config",
+ RetryConfig result = loader.loadWithDefaults("retry-config",
RetryConfig.class,
defaults);
- assertNotNull(config);
- assertEquals(30000, config.getTimeout()); // ✅ From defaults
- assertEquals(2, config.getRetries()); // ✅ From defaults
- assertTrue(config.isEnabled()); // ✅ From JSON (overridden)
+ assertNotNull(result);
+ assertEquals(30000, result.getTimeout()); // ✅ From defaults
+ assertEquals(2, result.getRetries()); // ✅ From defaults
+ assertTrue(result.isEnabled()); // ✅ From JSON (overridden)
}
@Test
@@ -431,7 +438,8 @@ public class ConfigLoaderTest {
// Test that JSON can override all defaults
Path configPath = Paths.get(
getClass().getResource("/configs/test-partial-config.json").toURI());
- TikaLoader loader = TikaLoader.load(configPath);
+ TikaJsonConfig config = TikaJsonConfig.load(configPath);
+ ConfigLoader loader = new ConfigLoader(config,
TikaObjectMapperFactory.getMapper());
RetryConfig defaults = new RetryConfig();
defaults.setTimeout(30000);
@@ -439,14 +447,14 @@ public class ConfigLoaderTest {
defaults.setEnabled(false);
// JSON has: { "timeout": 10000, "retries": 5, "enabled": false }
- RetryConfig config =
loader.configs().loadWithDefaults("retry-config-full",
+ RetryConfig result = loader.loadWithDefaults("retry-config-full",
RetryConfig.class,
defaults);
- assertNotNull(config);
- assertEquals(10000, config.getTimeout()); // All overridden
- assertEquals(5, config.getRetries());
- assertFalse(config.isEnabled());
+ assertNotNull(result);
+ assertEquals(10000, result.getTimeout()); // All overridden
+ assertEquals(5, result.getRetries());
+ assertFalse(result.isEnabled());
}
@Test
@@ -472,7 +480,8 @@ public class ConfigLoaderTest {
// Test the class-name version
Path configPath = Paths.get(
getClass().getResource("/configs/test-partial-config.json").toURI());
- TikaLoader loader = TikaLoader.load(configPath);
+ TikaJsonConfig config = TikaJsonConfig.load(configPath);
+ ConfigLoader loader = new ConfigLoader(config,
TikaObjectMapperFactory.getMapper());
RetryConfig defaults = new RetryConfig();
defaults.setTimeout(30000);
@@ -480,12 +489,12 @@ public class ConfigLoaderTest {
defaults.setEnabled(false);
// Uses kebab-case: RetryConfig -> "retry-config"
- RetryConfig config =
loader.configs().loadWithDefaults(RetryConfig.class, defaults);
+ RetryConfig result = loader.loadWithDefaults(RetryConfig.class,
defaults);
- assertNotNull(config);
- assertEquals(30000, config.getTimeout());
- assertEquals(2, config.getRetries());
- assertTrue(config.isEnabled()); // Overridden from JSON
+ assertNotNull(result);
+ assertEquals(30000, result.getTimeout());
+ assertEquals(2, result.getRetries());
+ assertTrue(result.isEnabled()); // Overridden from JSON
}
@Test
@@ -493,7 +502,8 @@ public class ConfigLoaderTest {
// Demonstrate difference between load() and loadWithDefaults()
Path configPath = Paths.get(
getClass().getResource("/configs/test-partial-config.json").toURI());
- TikaLoader loader = TikaLoader.load(configPath);
+ TikaJsonConfig config = TikaJsonConfig.load(configPath);
+ ConfigLoader loader = new ConfigLoader(config,
TikaObjectMapperFactory.getMapper());
RetryConfig defaults = new RetryConfig();
defaults.setTimeout(30000);
@@ -501,13 +511,13 @@ public class ConfigLoaderTest {
defaults.setEnabled(false);
// Using load() - creates new object, loses defaults
- RetryConfig config1 = loader.configs().load("retry-config",
RetryConfig.class);
+ RetryConfig config1 = loader.load("retry-config", RetryConfig.class);
assertEquals(0, config1.getTimeout()); // ❌ Lost default!
assertEquals(0, config1.getRetries()); // ❌ Lost default!
assertTrue(config1.isEnabled()); // ✅ From JSON
// Using loadWithDefaults() - merges into defaults
- RetryConfig config2 = loader.configs().loadWithDefaults("retry-config",
+ RetryConfig config2 = loader.loadWithDefaults("retry-config",
RetryConfig.class,
defaults);
assertEquals(30000, config2.getTimeout()); // ✅ Kept default!
@@ -522,7 +532,8 @@ public class ConfigLoaderTest {
// Verify that the original defaults object is NOT modified
Path configPath = Paths.get(
getClass().getResource("/configs/test-partial-config.json").toURI());
- TikaLoader loader = TikaLoader.load(configPath);
+ TikaJsonConfig config = TikaJsonConfig.load(configPath);
+ ConfigLoader loader = new ConfigLoader(config,
TikaObjectMapperFactory.getMapper());
RetryConfig defaults = new RetryConfig();
defaults.setTimeout(30000);
@@ -530,7 +541,7 @@ public class ConfigLoaderTest {
defaults.setEnabled(false);
// Load config with partial override (JSON only has "enabled": true)
- RetryConfig result = loader.configs().loadWithDefaults("retry-config",
+ RetryConfig result = loader.loadWithDefaults("retry-config",
RetryConfig.class,
defaults);
@@ -555,7 +566,8 @@ public class ConfigLoaderTest {
// Verify defaults can be safely reused for multiple loads
Path configPath = Paths.get(
getClass().getResource("/configs/test-partial-config.json").toURI());
- TikaLoader loader = TikaLoader.load(configPath);
+ TikaJsonConfig config = TikaJsonConfig.load(configPath);
+ ConfigLoader loader = new ConfigLoader(config,
TikaObjectMapperFactory.getMapper());
RetryConfig defaults = new RetryConfig();
defaults.setTimeout(30000);
@@ -563,10 +575,10 @@ public class ConfigLoaderTest {
defaults.setEnabled(false);
// Load multiple times with same defaults
- RetryConfig config1 = loader.configs().loadWithDefaults("retry-config",
+ RetryConfig config1 = loader.loadWithDefaults("retry-config",
RetryConfig.class,
defaults);
- RetryConfig config2 =
loader.configs().loadWithDefaults("retry-config-full",
+ RetryConfig config2 = loader.loadWithDefaults("retry-config-full",
RetryConfig.class,
defaults);
@@ -580,7 +592,7 @@ public class ConfigLoaderTest {
assertFalse(defaults.isEnabled());
// Use defaults one more time
- RetryConfig config3 = loader.configs().loadWithDefaults("non-existent",
+ RetryConfig config3 = loader.loadWithDefaults("non-existent",
RetryConfig.class,
defaults);
assertEquals(defaults, config3); // Should return original when key
missing
@@ -591,13 +603,14 @@ public class ConfigLoaderTest {
// Test with nested/complex objects to ensure deep copy works
Path configPath = Paths.get(
getClass().getResource("/configs/test-partial-config.json").toURI());
- TikaLoader loader = TikaLoader.load(configPath);
+ TikaJsonConfig config = TikaJsonConfig.load(configPath);
+ ConfigLoader loader = new ConfigLoader(config,
TikaObjectMapperFactory.getMapper());
TikaTaskTimeout defaults = new TikaTaskTimeout();
defaults.setMillis(60000);
// Note: tika-task-timeout in JSON has millis: 30000
- TikaTaskTimeout result =
loader.configs().loadWithDefaults("tika-task-timeout",
+ TikaTaskTimeout result = loader.loadWithDefaults("tika-task-timeout",
TikaTaskTimeout.class,
defaults);
@@ -632,7 +645,8 @@ public class ConfigLoaderTest {
// Demonstrate that defaults can be safely shared across threads
Path configPath = Paths.get(
getClass().getResource("/configs/test-partial-config.json").toURI());
- TikaLoader loader = TikaLoader.load(configPath);
+ TikaJsonConfig config = TikaJsonConfig.load(configPath);
+ ConfigLoader loader = new ConfigLoader(config,
TikaObjectMapperFactory.getMapper());
// Shared defaults object
RetryConfig sharedDefaults = new RetryConfig();
@@ -641,10 +655,10 @@ public class ConfigLoaderTest {
sharedDefaults.setEnabled(false);
// Simulate concurrent usage (not a real concurrency test, just
demonstrates safety)
- RetryConfig result1 = loader.configs().loadWithDefaults("retry-config",
+ RetryConfig result1 = loader.loadWithDefaults("retry-config",
RetryConfig.class,
sharedDefaults);
- RetryConfig result2 =
loader.configs().loadWithDefaults("retry-config-full",
+ RetryConfig result2 = loader.loadWithDefaults("retry-config-full",
RetryConfig.class,
sharedDefaults);
diff --git
a/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaLoaderTest.java
b/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaLoaderTest.java
index 5b8e44c788..403464d9c4 100644
---
a/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaLoaderTest.java
+++
b/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaLoaderTest.java
@@ -30,6 +30,7 @@ import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.xml.sax.helpers.DefaultHandler;
+import org.apache.tika.config.EmbeddedLimits;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
@@ -322,6 +323,52 @@ public class TikaLoaderTest {
"Should NOT support application/test+optin (opt-in only, not
in SPI)");
}
+ @Test
+ public void testLoadConfigWithDefaults() throws Exception {
+ // Test the loadConfig method that merges JSON config with defaults
+ URL configUrl =
getClass().getResource("/configs/embedded-limits-test.json");
+ Path configPath = Path.of(configUrl.toURI());
+
+ TikaLoader loader = TikaLoader.load(configPath);
+
+ // Create defaults - some values will be overridden by JSON, others
kept
+ EmbeddedLimits defaults = new EmbeddedLimits();
+ // Default values from EmbeddedLimits: maxDepth=UNLIMITED,
maxCount=UNLIMITED, throwOnMax*=false
+
+ // Load with defaults - JSON has: maxDepth=5, throwOnMaxDepth=true,
maxCount=100, throwOnMaxCount=false
+ EmbeddedLimits config = loader.loadConfig(EmbeddedLimits.class,
defaults);
+
+ assertNotNull(config, "Config should not be null");
+ assertEquals(5, config.getMaxDepth(), "maxDepth should be from JSON");
+ assertTrue(config.isThrowOnMaxDepth(), "throwOnMaxDepth should be from
JSON");
+ assertEquals(100, config.getMaxCount(), "maxCount should be from
JSON");
+ assertFalse(config.isThrowOnMaxCount(), "throwOnMaxCount should be
from JSON");
+
+ // Verify original defaults object was NOT modified
+ assertEquals(EmbeddedLimits.UNLIMITED, defaults.getMaxDepth(),
"Original defaults should be unchanged");
+ }
+
+ @Test
+ public void testLoadConfigMissingKeyReturnsDefaults() throws Exception {
+ // Test that loadConfig returns defaults when key is not in config
+ URL configUrl =
getClass().getResource("/configs/test-loader-config.json");
+ Path configPath = Path.of(configUrl.toURI());
+
+ TikaLoader loader = TikaLoader.load(configPath);
+
+ // Create defaults
+ EmbeddedLimits defaults = new EmbeddedLimits(10, true, 500, false);
+
+ // Load with defaults - this config doesn't have embedded-limits
+ EmbeddedLimits config = loader.loadConfig(EmbeddedLimits.class,
defaults);
+
+ // Should return the defaults since key is missing
+ assertEquals(10, config.getMaxDepth(), "Should return defaults when
key missing");
+ assertTrue(config.isThrowOnMaxDepth(), "Should return defaults when
key missing");
+ assertEquals(500, config.getMaxCount(), "Should return defaults when
key missing");
+ assertFalse(config.isThrowOnMaxCount(), "Should return defaults when
key missing");
+ }
+
// TODO: TIKA-SERIALIZATION-FOLLOWUP - Jackson may need configuration to
fail on unknown properties
@Disabled("TIKA-SERIALIZATION-FOLLOWUP")
@Test
diff --git
a/tika-serialization/src/test/java/org/apache/tika/metadata/writefilter/StandardMetadataLimiterTest.java
b/tika-serialization/src/test/java/org/apache/tika/metadata/writefilter/StandardMetadataLimiterTest.java
index c23e2b1045..5edfc80ea2 100644
---
a/tika-serialization/src/test/java/org/apache/tika/metadata/writefilter/StandardMetadataLimiterTest.java
+++
b/tika-serialization/src/test/java/org/apache/tika/metadata/writefilter/StandardMetadataLimiterTest.java
@@ -51,7 +51,8 @@ public class StandardMetadataLimiterTest extends TikaTest {
public void testMetadataFactoryConfig() throws Exception {
TikaLoader loader = TikaLoader.load(getConfigPath(getClass(),
"TIKA-3695.json"));
AutoDetectParser parser = (AutoDetectParser)
loader.loadAutoDetectParser();
- MetadataWriteLimiterFactory factory =
loader.configs().load(MetadataWriteLimiterFactory.class);
+ ParseContext context = loader.loadParseContext();
+ MetadataWriteLimiterFactory factory =
context.get(MetadataWriteLimiterFactory.class);
assertEquals(330, ((StandardMetadataLimiterFactory)
factory).getMaxTotalBytes());
assertFalse(((StandardMetadataLimiterFactory)
factory).getIncludeFields().isEmpty(),
"includeFields should not be empty");
@@ -84,7 +85,8 @@ public class StandardMetadataLimiterTest extends TikaTest {
public void testMetadataFactoryFieldsConfig() throws Exception {
TikaLoader loader = TikaLoader.load(getConfigPath(getClass(),
"TIKA-3695-fields.json"));
AutoDetectParser parser = (AutoDetectParser)
loader.loadAutoDetectParser();
- MetadataWriteLimiterFactory factory =
loader.configs().load(MetadataWriteLimiterFactory.class);
+ ParseContext context = loader.loadParseContext();
+ MetadataWriteLimiterFactory factory =
context.get(MetadataWriteLimiterFactory.class);
assertEquals(421, ((StandardMetadataLimiterFactory)
factory).getMaxTotalBytes());
assertEquals(999, ((StandardMetadataLimiterFactory)
factory).getMaxKeySize());
assertEquals(10001, ((StandardMetadataLimiterFactory)
factory).getMaxFieldSize());
@@ -285,9 +287,7 @@ public class StandardMetadataLimiterTest extends TikaTest {
public void testExclude() throws Exception {
TikaLoader loader = TikaLoader.load(getConfigPath(getClass(),
"TIKA-3695-exclude.json"));
Parser parser = loader.loadAutoDetectParser();
- MetadataWriteLimiterFactory factory =
loader.configs().load(MetadataWriteLimiterFactory.class);
- ParseContext parseContext = new ParseContext();
- parseContext.set(MetadataWriteLimiterFactory.class, factory);
+ ParseContext parseContext = loader.loadParseContext();
String mock = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>" +
"<mock>";
mock += "<metadata action=\"add\"
name=\"dc:creator\">01234567890123456789</metadata>";
diff --git
a/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java
b/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java
index 1df1567073..f22ae013ce 100644
---
a/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java
+++
b/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java
@@ -19,6 +19,7 @@ package org.apache.tika.serialization;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.StringWriter;
@@ -349,7 +350,7 @@ public class TestParseContextSerialization {
* Test that BasicContentHandlerFactory can be configured via JSON,
serialized,
* deserialized, and resolved via ParseContextUtils.resolveAll().
* This verifies the fix for TIKA-4582 where ContentHandlerFactory was not
being
- * resolved because it wasn't in the "other-configs" registry.
+ * resolved because it wasn't in the "parse-context" registry.
*/
@Test
public void testContentHandlerFactoryRoundTrip() throws Exception {
@@ -429,4 +430,67 @@ public class TestParseContextSerialization {
assertEquals(10000, basicFactory.getWriteLimit());
assertFalse(basicFactory.isThrowOnWriteLimitReached());
}
+
+ /**
+ * Test that duplicate context keys within a single JSON document are
detected and rejected.
+ * Both BasicContentHandlerFactory and UppercasingContentHandlerFactory
resolve to
+ * ContentHandlerFactory.class as their context key, so configuring both
should fail.
+ */
+ @Test
+ public void testDuplicateContextKeyDetection() throws Exception {
+ // Both of these resolve to ContentHandlerFactory.class as the context
key
+ String json = """
+ {
+ "basic-content-handler-factory": {
+ "type": "XML",
+ "writeLimit": 50000
+ },
+ "uppercasing-content-handler-factory": {}
+ }
+ """;
+
+ ObjectMapper mapper = createMapper();
+
+ // Should throw an exception due to duplicate context key
+ Exception ex = assertThrows(Exception.class, () ->
+ mapper.readValue(json, ParseContext.class));
+
+ // Verify the error message mentions the duplicate
+ assertTrue(ex.getMessage().contains("Duplicate") ||
+ (ex.getCause() != null &&
ex.getCause().getMessage().contains("Duplicate")),
+ "Exception should mention duplicate context key: " +
ex.getMessage());
+ assertTrue(ex.getMessage().contains("ContentHandlerFactory") ||
+ (ex.getCause() != null &&
ex.getCause().getMessage().contains("ContentHandlerFactory")),
+ "Exception should mention the conflicting key: " +
ex.getMessage());
+ }
+
+ /**
+ * Test that a single component per context key is allowed (no false
positives).
+ */
+ @Test
+ public void testNoDuplicateWhenDifferentContextKeys() throws Exception {
+ // These have different context keys, so both should be allowed
+ String json = """
+ {
+ "basic-content-handler-factory": {
+ "type": "TEXT",
+ "writeLimit": 10000
+ },
+ "skip-embedded-document-selector": {}
+ }
+ """;
+
+ ObjectMapper mapper = createMapper();
+ ParseContext deserialized = mapper.readValue(json, ParseContext.class);
+
+ // Both should be present as JSON configs
+
assertTrue(deserialized.hasJsonConfig("basic-content-handler-factory"));
+
assertTrue(deserialized.hasJsonConfig("skip-embedded-document-selector"));
+
+ // Resolve and verify both work
+ ParseContextUtils.resolveAll(deserialized,
Thread.currentThread().getContextClassLoader());
+
+ assertNotNull(deserialized.get(ContentHandlerFactory.class));
+ assertNotNull(deserialized.get(DocumentSelector.class));
+ }
}
diff --git
a/tika-serialization/src/test/resources/configs/TIKA-3695-exclude.json
b/tika-serialization/src/test/resources/configs/TIKA-3695-exclude.json
index 8dfe51784b..10d5ed9a37 100644
--- a/tika-serialization/src/test/resources/configs/TIKA-3695-exclude.json
+++ b/tika-serialization/src/test/resources/configs/TIKA-3695-exclude.json
@@ -2,11 +2,9 @@
"parsers": [
"default-parser"
],
- "other-configs": {
- "metadata-write-limiter-factory": {
- "standard-metadata-limiter-factory": {
- "excludeFields": ["subject"]
- }
+ "parse-context": {
+ "standard-metadata-limiter-factory": {
+ "excludeFields": ["subject"]
}
}
}
diff --git
a/tika-serialization/src/test/resources/configs/TIKA-3695-fields.json
b/tika-serialization/src/test/resources/configs/TIKA-3695-fields.json
index 7e408b8aba..4216706846 100644
--- a/tika-serialization/src/test/resources/configs/TIKA-3695-fields.json
+++ b/tika-serialization/src/test/resources/configs/TIKA-3695-fields.json
@@ -2,17 +2,15 @@
"parsers": [
"default-parser"
],
- "other-configs": {
- "metadata-write-limiter-factory": {
- "standard-metadata-limiter-factory": {
- "maxKeySize": 999,
- "maxFieldSize": 10001,
- // maxTotalBytes accounts for ~244 bytes of ALWAYS_SET_FIELDS overhead
- // (Content-Type, X-TIKA:Parsed-By, etc.) plus room for dc:title and 3
dc:creator values
- "maxTotalBytes": 421,
- "maxValuesPerField": 100,
- "includeFields": ["dc:creator", "dc:title"]
- }
+ "parse-context": {
+ "standard-metadata-limiter-factory": {
+ "maxKeySize": 999,
+ "maxFieldSize": 10001,
+ // maxTotalBytes accounts for ~244 bytes of ALWAYS_SET_FIELDS overhead
+ // (Content-Type, X-TIKA:Parsed-By, etc.) plus room for dc:title and 3
dc:creator values
+ "maxTotalBytes": 421,
+ "maxValuesPerField": 100,
+ "includeFields": ["dc:creator", "dc:title"]
}
}
}
diff --git a/tika-serialization/src/test/resources/configs/TIKA-3695.json
b/tika-serialization/src/test/resources/configs/TIKA-3695.json
index ef95f8003b..a4b785fbec 100644
--- a/tika-serialization/src/test/resources/configs/TIKA-3695.json
+++ b/tika-serialization/src/test/resources/configs/TIKA-3695.json
@@ -2,14 +2,12 @@
"parsers": [
"default-parser"
],
- "other-configs": {
- "metadata-write-limiter-factory": {
- "standard-metadata-limiter-factory": {
- // maxTotalBytes accounts for ~244 bytes of ALWAYS_SET_FIELDS overhead
- // (Content-Type, X-TIKA:Parsed-By, etc.) plus room for 2 dc:creator
values
- "maxTotalBytes": 330,
- "includeFields": ["dc:creator"]
- }
+ "parse-context": {
+ "standard-metadata-limiter-factory": {
+ // maxTotalBytes accounts for ~244 bytes of ALWAYS_SET_FIELDS overhead
+ // (Content-Type, X-TIKA:Parsed-By, etc.) plus room for 2 dc:creator
values
+ "maxTotalBytes": 330,
+ "includeFields": ["dc:creator"]
}
}
}
diff --git a/tika-serialization/src/test/resources/configs/all-limits-test.json
b/tika-serialization/src/test/resources/configs/all-limits-test.json
index 40faa856ec..daaf6be58f 100644
--- a/tika-serialization/src/test/resources/configs/all-limits-test.json
+++ b/tika-serialization/src/test/resources/configs/all-limits-test.json
@@ -2,7 +2,7 @@
"parsers": [
"default-parser"
],
- "other-configs": {
+ "parse-context": {
"embedded-limits": {
"maxDepth": 10,
"throwOnMaxDepth": false,
@@ -20,13 +20,11 @@
"timeout-limits": {
"taskTimeoutMillis": 60000
},
- "metadata-write-limiter-factory": {
- "standard-metadata-limiter-factory": {
- "maxTotalBytes": 1048576,
- "maxFieldSize": 102400,
- "maxKeySize": 1024,
- "maxValuesPerField": 100
- }
+ "standard-metadata-limiter-factory": {
+ "maxTotalBytes": 1048576,
+ "maxFieldSize": 102400,
+ "maxKeySize": 1024,
+ "maxValuesPerField": 100
}
}
}
diff --git
a/tika-serialization/src/test/resources/configs/embedded-limits-test.json
b/tika-serialization/src/test/resources/configs/embedded-limits-test.json
index 14ce20d2be..a17c722cdb 100644
--- a/tika-serialization/src/test/resources/configs/embedded-limits-test.json
+++ b/tika-serialization/src/test/resources/configs/embedded-limits-test.json
@@ -1,5 +1,5 @@
{
- "other-configs": {
+ "parse-context": {
"embedded-limits": {
"maxDepth": 5,
"throwOnMaxDepth": true,
diff --git
a/tika-serialization/src/test/resources/configs/output-limits-test.json
b/tika-serialization/src/test/resources/configs/output-limits-test.json
index ccd9fa0840..fdf95dbf6c 100644
--- a/tika-serialization/src/test/resources/configs/output-limits-test.json
+++ b/tika-serialization/src/test/resources/configs/output-limits-test.json
@@ -1,5 +1,5 @@
{
- "other-configs": {
+ "parse-context": {
"output-limits": {
"writeLimit": 50000,
"throwOnWriteLimit": true,
diff --git
a/tika-serialization/src/test/resources/configs/test-config-loader.json
b/tika-serialization/src/test/resources/configs/test-config-loader.json
index c5c24254eb..2dc8eb30a8 100644
--- a/tika-serialization/src/test/resources/configs/test-config-loader.json
+++ b/tika-serialization/src/test/resources/configs/test-config-loader.json
@@ -3,7 +3,7 @@
{"pdf-parser": {}}
],
- "other-configs": {
+ "parse-context": {
"retry-config": {
"timeout": 5000,
"retries": 3,
diff --git
a/tika-serialization/src/test/resources/configs/test-interface-no-type.json
b/tika-serialization/src/test/resources/configs/test-interface-no-type.json
index da2e606bb5..af20c1dd25 100644
--- a/tika-serialization/src/test/resources/configs/test-interface-no-type.json
+++ b/tika-serialization/src/test/resources/configs/test-interface-no-type.json
@@ -1,5 +1,5 @@
{
- "other-configs": {
+ "parse-context": {
"handler-no-type": {
"maxSize": 50000,
"prefix": "no-type-"
diff --git
a/tika-serialization/src/test/resources/configs/test-invalid-class.json
b/tika-serialization/src/test/resources/configs/test-invalid-class.json
index c927b6e1cc..3c290ddcdc 100644
--- a/tika-serialization/src/test/resources/configs/test-invalid-class.json
+++ b/tika-serialization/src/test/resources/configs/test-invalid-class.json
@@ -1,5 +1,5 @@
{
- "other-configs": {
+ "parse-context": {
"handler": "com.example.NonExistentClass"
}
}
diff --git
a/tika-serialization/src/test/resources/configs/test-partial-config.json
b/tika-serialization/src/test/resources/configs/test-partial-config.json
index 5c5eab6992..50b8144867 100644
--- a/tika-serialization/src/test/resources/configs/test-partial-config.json
+++ b/tika-serialization/src/test/resources/configs/test-partial-config.json
@@ -1,5 +1,5 @@
{
- "other-configs": {
+ "parse-context": {
"retry-config": {
"enabled": true
},
diff --git
a/tika-serialization/src/test/resources/configs/test-unexpected-field.json
b/tika-serialization/src/test/resources/configs/test-unexpected-field.json
index 5946b399ea..bddae5db93 100644
--- a/tika-serialization/src/test/resources/configs/test-unexpected-field.json
+++ b/tika-serialization/src/test/resources/configs/test-unexpected-field.json
@@ -1,5 +1,5 @@
{
- "other-configs": {
+ "parse-context": {
"retry-config": {
"timeout": 5000,
"retries": 3,
diff --git a/tika-serialization/src/test/resources/configs/test-wrong-type.json
b/tika-serialization/src/test/resources/configs/test-wrong-type.json
index ece5fe3aeb..34cc7674a9 100644
--- a/tika-serialization/src/test/resources/configs/test-wrong-type.json
+++ b/tika-serialization/src/test/resources/configs/test-wrong-type.json
@@ -1,5 +1,5 @@
{
- "other-configs": {
+ "parse-context": {
"handler": "java.lang.String"
}
}
diff --git
a/tika-serialization/src/test/resources/configs/timeout-limits-test.json
b/tika-serialization/src/test/resources/configs/timeout-limits-test.json
index edd158b5ba..ac9090e8f2 100644
--- a/tika-serialization/src/test/resources/configs/timeout-limits-test.json
+++ b/tika-serialization/src/test/resources/configs/timeout-limits-test.json
@@ -1,5 +1,5 @@
{
- "other-configs": {
+ "parse-context": {
"timeout-limits": {
"taskTimeoutMillis": 120000
}
diff --git
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/MetadataResource.java
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/MetadataResource.java
index 3a516d7751..6a7e5a5b2d 100644
---
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/MetadataResource.java
+++
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/MetadataResource.java
@@ -172,7 +172,7 @@ public class MetadataResource {
protected Metadata parseMetadata(TikaInputStream tis, Metadata metadata,
MultivaluedMap<String, String> httpHeaders, UriInfo info)
throws IOException, TikaConfigException {
- // Load default context from config (includes DigesterFactory from
other-configs)
+ // Load default context from config (includes DigesterFactory from
parse-context)
final ParseContext context = TikaResource.createParseContext();
Parser parser = TikaResource.createParser();
fillMetadata(parser, metadata, httpHeaders);
diff --git
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
index e3d86f3f3f..57c2d76d92 100644
---
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
+++
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
@@ -98,11 +98,7 @@ public class TikaResource {
TIKA_LOADER = tikaLoader;
SERVER_STATUS = serverStatus;
PIPES_PARSING_HELPER = pipesParsingHelper;
- try {
- DEFAULT_METADATA_WRITE_LIMITER_FACTORY =
tikaLoader.configs().load(MetadataWriteLimiterFactory.class);
- } catch (TikaConfigException e) {
- LOG.debug("No MetadataWriteLimiterFactory specified in the
config", e);
- }
+ // MetadataWriteLimiterFactory is now loaded dynamically via
loadParseContext()
}
/**
@@ -116,7 +112,7 @@ public class TikaResource {
/**
* Creates a new ParseContext with defaults loaded from tika-config.
- * This loads components from "other-configs" such as DigesterFactory and
MetadataWriteLimiterFactory.
+ * This loads components from "parse-context" such as DigesterFactory and
MetadataWriteLimiterFactory.
*
* @return a new ParseContext with defaults applied
*/
diff --git
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
index 7b025fe5ad..cf76ec4b7e 100644
---
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
+++
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
@@ -80,13 +80,11 @@ public abstract class CXFTestBase {
"auto-detect-parser": {
"throwOnZeroBytes": false
},
- "other-configs": {
- "digester-factory": {
- "commons-digester-factory": {
- "digests": [
- { "algorithm": "MD5" }
- ]
- }
+ "parse-context": {
+ "commons-digester-factory": {
+ "digests": [
+ { "algorithm": "MD5" }
+ ]
}
}
}
diff --git
a/tika-server/tika-server-core/src/test/resources/configs/cxf-test-base-template.json
b/tika-server/tika-server-core/src/test/resources/configs/cxf-test-base-template.json
index 14e1c6c511..c360dd70c7 100644
---
a/tika-server/tika-server-core/src/test/resources/configs/cxf-test-base-template.json
+++
b/tika-server/tika-server-core/src/test/resources/configs/cxf-test-base-template.json
@@ -47,15 +47,13 @@
"auto-detect-parser": {
"throwOnZeroBytes": false
},
- "other-configs": {
- "digester-factory": {
- "commons-digester-factory": {
+ "parse-context": {
+ "commons-digester-factory": {
"digests": [
{ "algorithm": "MD5" },
{ "algorithm": "SHA1", "encoding": "BASE32" }
]
}
- }
- },
+ },
"plugin-roots": "PLUGINS_PATHS"
}
diff --git
a/tika-server/tika-server-standard/src/test/resources/configs/cxf-test-base-template.json
b/tika-server/tika-server-standard/src/test/resources/configs/cxf-test-base-template.json
index 6c6f2c5df9..40bb34e201 100644
---
a/tika-server/tika-server-standard/src/test/resources/configs/cxf-test-base-template.json
+++
b/tika-server/tika-server-standard/src/test/resources/configs/cxf-test-base-template.json
@@ -49,15 +49,13 @@
"auto-detect-parser": {
"throwOnZeroBytes": false
},
- "other-configs": {
- "digester-factory": {
- "commons-digester-factory": {
+ "parse-context": {
+ "commons-digester-factory": {
"digests": [
{ "algorithm": "MD5" },
{ "algorithm": "SHA1", "encoding": "BASE32" }
]
}
- }
- },
+ },
"plugin-roots": "PLUGINS_PATHS"
}
diff --git
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-for-server-tests.json
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-for-server-tests.json
index dfbcbb8ee4..174b340a78 100644
---
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-for-server-tests.json
+++
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-for-server-tests.json
@@ -12,16 +12,14 @@
"auto-detect-parser": {
"throwOnZeroBytes": false
},
- "other-configs": {
- "digester-factory": {
- "commons-digester-factory": {
+ "parse-context": {
+ "commons-digester-factory": {
"digests": [
{ "algorithm": "MD5" },
{ "algorithm": "SHA1", "encoding": "BASE32" }
]
}
- }
- },
+ },
"fetchers": {
"file-system-fetcher": {
"file-system-fetcher": {
diff --git
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-opennlp-filter.json
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-opennlp-filter.json
index 6efc957936..9a6a9779a3 100644
---
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-opennlp-filter.json
+++
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-opennlp-filter.json
@@ -17,16 +17,14 @@
"auto-detect-parser": {
"throwOnZeroBytes": false
},
- "other-configs": {
- "digester-factory": {
- "commons-digester-factory": {
+ "parse-context": {
+ "commons-digester-factory": {
"digests": [
{ "algorithm": "MD5" },
{ "algorithm": "SHA1", "encoding": "BASE32" }
]
}
- }
- },
+ },
"fetchers": {
"file-system-fetcher": {
"file-system-fetcher": {
diff --git
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-optimaize-filter.json
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-optimaize-filter.json
index 05a1cdf374..ef02357640 100644
---
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-optimaize-filter.json
+++
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-optimaize-filter.json
@@ -17,16 +17,14 @@
"auto-detect-parser": {
"throwOnZeroBytes": false
},
- "other-configs": {
- "digester-factory": {
- "commons-digester-factory": {
+ "parse-context": {
+ "commons-digester-factory": {
"digests": [
{ "algorithm": "MD5" },
{ "algorithm": "SHA1", "encoding": "BASE32" }
]
}
- }
- },
+ },
"fetchers": {
"file-system-fetcher": {
"file-system-fetcher": {