This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4545-rm-TikaConfigs in repository https://gitbox.apache.org/repos/asf/tika.git
commit 749e55829eee70c651a1b194b5fc3efb50d3c74a Author: tallison <[email protected]> AuthorDate: Thu Dec 4 09:36:04 2025 -0500 TIKA-4545 - rm TikaConfigs and streamline PipesConfig/AsyncConfig --- .../test/resources/configs/config-template.json | 2 +- .../src/test/resources/kafka/plugins-template.json | 2 +- .../pipes/opensearch/tests/OpenSearchTest.java | 6 +- .../resources/opensearch/plugins-template.json | 2 +- .../opensearch/tika-config-opensearch.json | 2 +- .../src/test/resources/s3/plugins-template.json | 2 +- .../src/test/resources/solr/plugins-template.json | 2 +- .../src/test/resources/tika-config-solr-urls.json | 2 +- .../java/org/apache/tika/TikaLoaderHelper.java | 3 +- .../src/test/resources/configs/tika-4533.json | 2 +- .../configs/tika-config-digests-pdf-only.json | 2 +- .../tika-config-digests-skip-container.json | 2 +- .../resources/configs/tika-config-digests.json | 2 +- ...a-config-doubling-custom-handler-decorator.json | 2 +- .../resources/configs/tika-config-no-names.json | 5 +- ...a-config-upcasing-custom-handler-decorator.json | 2 +- .../resources/configs/tika-config-with-names.json | 5 +- .../org/apache/tika/async/cli/PluginsWriter.java | 14 +- .../org/apache/tika/async/cli/TikaAsyncCLI.java | 12 +- .../tika/async/cli/TikaConfigAsyncWriterTest.java | 10 +- .../test/resources/configs/config-template.json | 2 +- .../org/apache/tika/pipes/core/PipesConfig.java | 105 ++++++++++++++ .../org/apache/tika/pipes/core/PipesServer.java | 13 +- .../apache/tika/pipes/core/async/AsyncConfig.java | 104 -------------- .../apache/tika/pipes/core/async/AsyncEmitter.java | 5 +- .../tika/pipes/core/async/AsyncProcessor.java | 19 +-- .../tika/pipes/core/emitter/EmitterManager.java | 7 +- .../tika/pipes/core/fetcher/FetcherManager.java | 7 +- .../core/pipesiterator/PipesIteratorManager.java | 7 +- .../tika/pipes/core/reporter/ReporterManager.java | 7 +- tika-pipes/tika-pipes-integration-tests/pom.xml | 2 +- .../apache/tika/pipes/core/PassbackFilterTest.java | 6 +- .../apache/tika/pipes/core/PipesClientTest.java | 6 +- .../apache/tika/pipes/core/PipesServerTest.java | 20 +-- .../apache/tika/pipes/core/PluginManagerTest.java | 8 +- .../test/resources/configs/tika-config-basic.json | 4 +- .../resources/configs/tika-config-passback.json | 4 +- .../resources/configs/tika-config-truncate.json | 4 +- .../tika/pipes/fetcher/http/HttpFetcherTest.java | 6 +- .../java/org/apache/tika/plugins/TikaConfigs.java | 160 --------------------- .../org/apache/tika/plugins/TikaPluginManager.java | 38 ++--- .../org/apache/tika/plugins/TikaConfigsTest.java | 147 ------------------- .../apache/tika/config/loader/ConfigLoader.java | 79 +++++----- .../apache/tika/config/loader/TikaJsonConfig.java | 84 ++++++++++- .../org/apache/tika/config/loader/TikaLoader.java | 21 +-- .../test/resources/configs/test-config-loader.json | 48 ++++--- .../resources/configs/test-interface-no-type.json | 8 +- .../test/resources/configs/test-invalid-class.json | 4 +- .../resources/configs/test-partial-config.json | 22 +-- .../resources/configs/test-unexpected-field.json | 12 +- .../test/resources/configs/test-wrong-type.json | 4 +- .../apache/tika/server/client/TikaClientCLI.java | 8 +- .../apache/tika/server/core/TikaServerConfig.java | 2 +- .../apache/tika/server/core/TikaServerProcess.java | 8 +- .../tika/server/core/resource/AsyncResource.java | 8 +- .../tika/server/core/resource/PipesResource.java | 6 +- .../org/apache/tika/server/core/CXFTestBase.java | 2 +- .../org/apache/tika/server/core/TikaPipesTest.java | 8 +- .../tika/server/core/TikaResourceFetcherTest.java | 9 +- .../resources/configs/cxf-test-base-template.json | 4 +- .../apache/tika/server/standard/FetcherTest.java | 8 +- .../apache/tika/server/standard/TikaPipesTest.java | 8 +- .../resources/configs/cxf-test-base-template.json | 4 +- .../configs/tika-config-for-server-tests.json | 2 +- .../tika-config-langdetect-opennlp-filter.json | 2 +- .../tika-config-langdetect-optimaize-filter.json | 2 +- 66 files changed, 447 insertions(+), 678 deletions(-) diff --git a/tika-app/src/test/resources/configs/config-template.json b/tika-app/src/test/resources/configs/config-template.json index 7a256e301..4039a1628 100644 --- a/tika-app/src/test/resources/configs/config-template.json +++ b/tika-app/src/test/resources/configs/config-template.json @@ -37,7 +37,7 @@ } } }, - "async": { + "pipes": { "emitWithinMillis": 10000, "emitMaxEstimatedBytes": 100000, "queueSize": 10000, diff --git a/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/resources/kafka/plugins-template.json b/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/resources/kafka/plugins-template.json index be46240aa..79a47981c 100644 --- a/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/resources/kafka/plugins-template.json +++ b/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/resources/kafka/plugins-template.json @@ -93,7 +93,7 @@ } } }, - "async": { + "pipes": { "maxForEmitBatchBytes": 10000, "emitMaxEstimatedBytes": 100000, "emitWithinMillis": 10, diff --git a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/OpenSearchTest.java b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/OpenSearchTest.java index 421ef15b3..80c6b6933 100644 --- a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/OpenSearchTest.java +++ b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/OpenSearchTest.java @@ -48,6 +48,7 @@ import org.testcontainers.utility.DockerImageName; import org.apache.tika.cli.TikaCLI; import org.apache.tika.client.HttpClientFactory; +import org.apache.tika.config.loader.TikaJsonConfig; import org.apache.tika.exception.TikaConfigException; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; @@ -57,7 +58,6 @@ import org.apache.tika.pipes.core.emitter.EmitterManager; import org.apache.tika.pipes.emitter.opensearch.HttpClientConfig; import org.apache.tika.pipes.emitter.opensearch.JsonResponse; import org.apache.tika.pipes.emitter.opensearch.OpenSearchEmitterConfig; -import org.apache.tika.plugins.TikaConfigs; import org.apache.tika.plugins.TikaPluginManager; @Testcontainers(disabledWithoutDocker = true) @@ -364,9 +364,9 @@ public class OpenSearchTest { OpenSearchEmitterConfig.UpdateStrategy.UPSERT, HandlerConfig.PARSE_MODE.RMETA, endpoint, testDocDirectory); - TikaConfigs tikaConfigs = TikaConfigs.load(pluginsConfigFile); + TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(pluginsConfigFile); Emitter emitter = EmitterManager - .load(TikaPluginManager.load(tikaConfigs), tikaConfigs).getEmitter(); + .load(TikaPluginManager.load(tikaJsonConfig), tikaJsonConfig).getEmitter(); Metadata metadata = new Metadata(); metadata.set("mime", "mimeA"); metadata.set("title", "titleA"); diff --git a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/resources/opensearch/plugins-template.json b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/resources/opensearch/plugins-template.json index 3073ceee3..c301f3226 100644 --- a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/resources/opensearch/plugins-template.json +++ b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/resources/opensearch/plugins-template.json @@ -59,7 +59,7 @@ } } }, - "async": { + "pipes": { "maxForEmitBatchBytes": 10000, "emitMaxEstimatedBytes": 100000, "emitWithinMillis": 60000, diff --git a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/resources/opensearch/tika-config-opensearch.json b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/resources/opensearch/tika-config-opensearch.json index d46bfe2fe..d304e3c9b 100644 --- a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/resources/opensearch/tika-config-opensearch.json +++ b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/resources/opensearch/tika-config-opensearch.json @@ -100,7 +100,7 @@ } } }, - "async": { + "pipes": { "maxForEmitBatchBytes": 10000, "emitMaxEstimatedBytes": 100000, "emitWithinMillis": 60000, diff --git a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/plugins-template.json b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/plugins-template.json index 01d01a2bd..52f9c7ab1 100644 --- a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/plugins-template.json +++ b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/plugins-template.json @@ -55,7 +55,7 @@ } } }, - "async": { + "pipes": { "maxForEmitBatchBytes": 10000, "emitMaxEstimatedBytes": 100000, "emitWithinMillis": 10, diff --git a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/solr/plugins-template.json b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/solr/plugins-template.json index 37595e57b..1588f4f7a 100644 --- a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/solr/plugins-template.json +++ b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/solr/plugins-template.json @@ -90,7 +90,7 @@ } } }, - "async": { + "pipes": { "maxForEmitBatchBytes": 10000, "emitMaxEstimatedBytes": 100000, "emitWithinMillis": 10, diff --git a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.json b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.json index b8bfe5f5d..baee81c2b 100644 --- a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.json +++ b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.json @@ -40,7 +40,7 @@ } } ], - "async": { + "pipes": { "maxForEmitBatchBytes": 10000, "emitMaxEstimatedBytes": 100000, "emitWithinMillis": 10, diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/TikaLoaderHelper.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/TikaLoaderHelper.java index fd4d41e2e..d8e16eeed 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/TikaLoaderHelper.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/TikaLoaderHelper.java @@ -16,6 +16,7 @@ */ package org.apache.tika; +import java.io.IOException; import java.net.URISyntaxException; import java.nio.file.Paths; @@ -29,7 +30,7 @@ public class TikaLoaderHelper { return TikaLoader.load(Paths.get(TikaLoaderHelper.class .getResource("/configs/" + config) .toURI())); - } catch (URISyntaxException | TikaConfigException e) { + } catch (URISyntaxException | TikaConfigException | IOException e) { throw new RuntimeException(e); } } diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4533.json b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4533.json index 6fed19e4d..bab3af07e 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4533.json +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4533.json @@ -1,5 +1,5 @@ { - "auto-detect-parser-config": { + "auto-detect-parser": { "maximumCompressionRatio": 100, "maximumDepth": 100, "maximumPackageEntryDepth": 100, diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.json b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.json index 8c8192eaa..9f31bfbc9 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.json +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.json @@ -8,7 +8,7 @@ } } ], - "auto-detect-parser-config": { + "auto-detect-parser": { "spoolToDisk": 1000000, "outputThreshold": 1000000, "digesterFactory": { diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-skip-container.json b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-skip-container.json index b06f96841..5fa5e7897 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-skip-container.json +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-skip-container.json @@ -1,5 +1,5 @@ { - "auto-detect-parser-config": { + "auto-detect-parser": { "spoolToDisk": 1000000, "outputThreshold": 1000000, "digesterFactory": { diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests.json b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests.json index b98e959d6..bf12e17d7 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests.json +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests.json @@ -1,5 +1,5 @@ { - "auto-detect-parser-config": { + "auto-detect-parser": { "spoolToDisk": 1000000, "outputThreshold": 1000000, "digesterFactory": { diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-doubling-custom-handler-decorator.json b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-doubling-custom-handler-decorator.json index a3945c03d..dcd44b7f3 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-doubling-custom-handler-decorator.json +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-doubling-custom-handler-decorator.json @@ -1,5 +1,5 @@ { - "auto-detect-parser-config": { + "auto-detect-parser": { "spoolToDisk": 1000, "outputThreshold": 1000, "contentHandlerDecoratorFactory": { diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.json b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.json index 2f0ac2a2f..5938163df 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.json +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.json @@ -3,9 +3,8 @@ "spoolToDisk": 123450, "outputThreshold": 678900, "embeddedDocumentExtractorFactory": { - "runpack-extractor-factory": { - "writeFileNameToContent": false - } + "@class": "org.apache.tika.extractor.RUnpackExtractorFactory", + "writeFileNameToContent": false } } } diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.json b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.json index 2d088ccdc..d573606b9 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.json +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.json @@ -1,5 +1,5 @@ { - "auto-detect-parser-config": { + "auto-detect-parser": { "spoolToDisk": 1000, "outputThreshold": 1000, "maximumCompressionRatio": 0.8, diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.json b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.json index 0659adb85..ea1519ec0 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.json +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.json @@ -3,9 +3,8 @@ "spoolToDisk": 123450, "outputThreshold": 678900, "embeddedDocumentExtractorFactory": { - "runpack-extractor-factory": { - "writeFileNameToContent": true - } + "@class": "org.apache.tika.extractor.RUnpackExtractorFactory", + "writeFileNameToContent": true } } } diff --git a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java index f3236a05b..5c863c491 100644 --- a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java +++ b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java @@ -28,7 +28,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; import org.apache.tika.config.loader.PolymorphicObjectMapperFactory; -import org.apache.tika.pipes.core.async.AsyncConfig; +import org.apache.tika.pipes.core.PipesConfig; import org.apache.tika.utils.StringUtils; public class PluginsWriter { @@ -61,20 +61,20 @@ public class PluginsWriter { pluginString = plugins.toAbsolutePath().toString(); } json = json.replace("PLUGIN_ROOTS", pluginString).replace("\\", "/"); - AsyncConfig asyncConfig = new AsyncConfig(); + PipesConfig pipesConfig = new PipesConfig(); - asyncConfig.setNumClients(simpleAsyncConfig.getNumClients() == null ? 2 : simpleAsyncConfig.getNumClients()); - asyncConfig.setTikaConfig(output.toAbsolutePath().toString()); + pipesConfig.setNumClients(simpleAsyncConfig.getNumClients() == null ? 2 : simpleAsyncConfig.getNumClients()); + pipesConfig.setTikaConfig(output.toAbsolutePath().toString()); if (simpleAsyncConfig.getXmx() != null) { - asyncConfig.setForkedJvmArgs(new ArrayList<>(List.of(simpleAsyncConfig.getXmx()))); + pipesConfig.setForkedJvmArgs(new ArrayList<>(List.of(simpleAsyncConfig.getXmx()))); } if (simpleAsyncConfig.getTimeoutMs() != null) { - asyncConfig.setTimeoutMillis(simpleAsyncConfig.getTimeoutMs()); + pipesConfig.setTimeoutMillis(simpleAsyncConfig.getTimeoutMs()); } ObjectMapper objectMapper = PolymorphicObjectMapperFactory.getMapper(); ObjectNode root = (ObjectNode) objectMapper.readTree(json.getBytes(StandardCharsets.UTF_8)); - root.set("async", objectMapper.valueToTree(asyncConfig)); + root.set("pipes", objectMapper.valueToTree(pipesConfig)); Files.writeString(output, root.toString()); } catch (Exception e) { diff --git a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java index 656e67aed..3a9c5cec3 100644 --- a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java +++ b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java @@ -33,6 +33,7 @@ import org.apache.commons.cli.ParseException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.tika.config.loader.TikaJsonConfig; import org.apache.tika.exception.TikaConfigException; import org.apache.tika.parser.ParseContext; import org.apache.tika.pipes.api.FetchEmitTuple; @@ -44,7 +45,6 @@ import org.apache.tika.pipes.core.async.AsyncProcessor; import org.apache.tika.pipes.core.extractor.EmbeddedDocumentBytesConfig; import org.apache.tika.pipes.core.pipesiterator.PipesIteratorManager; import org.apache.tika.plugins.ExtensionConfig; -import org.apache.tika.plugins.TikaConfigs; import org.apache.tika.plugins.TikaPluginManager; import org.apache.tika.sax.BasicContentHandlerFactory; import org.apache.tika.utils.StringUtils; @@ -84,8 +84,8 @@ public class TikaAsyncCLI { if (args.length == 1) { if (args[0].endsWith(".json")) { LOG.warn("processing args"); - TikaConfigs tikaConfigs = TikaConfigs.load(Paths.get(args[0])); - Optional<PipesIterator> pipesIteratorOpt = PipesIteratorManager.load(TikaPluginManager.load(tikaConfigs), tikaConfigs); + TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(Paths.get(args[0])); + Optional<PipesIterator> pipesIteratorOpt = PipesIteratorManager.load(TikaPluginManager.load(tikaJsonConfig), tikaJsonConfig); if (pipesIteratorOpt.isEmpty()) { throw new IllegalArgumentException("Must specify a pipes iterator if supplying a .json file"); } @@ -121,10 +121,10 @@ public class TikaAsyncCLI { private static PipesIterator buildPipesIterator(Path pluginsConfig, SimpleAsyncConfig simpleAsyncConfig) throws TikaConfigException, IOException { - TikaConfigs tikaConfigs = TikaConfigs.load(pluginsConfig); + TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(pluginsConfig); String inputDirString = simpleAsyncConfig.getInputDir(); if (StringUtils.isBlank(inputDirString)) { - Optional<PipesIterator> pipesIteratorOpt = PipesIteratorManager.load(TikaPluginManager.load(tikaConfigs), tikaConfigs); + Optional<PipesIterator> pipesIteratorOpt = PipesIteratorManager.load(TikaPluginManager.load(tikaJsonConfig), tikaJsonConfig); if (pipesIteratorOpt.isEmpty()) { throw new TikaConfigException("something went wrong loading: pipesIterator from the tika configs"); } @@ -134,7 +134,7 @@ public class TikaAsyncCLI { if (Files.isRegularFile(p)) { return new SingleFilePipesIterator(p.getFileName().toString()); } - Optional<PipesIterator> pipesIteratorOpt = PipesIteratorManager.load(TikaPluginManager.load(tikaConfigs), tikaConfigs); + Optional<PipesIterator> pipesIteratorOpt = PipesIteratorManager.load(TikaPluginManager.load(tikaJsonConfig), tikaJsonConfig); if (pipesIteratorOpt.isEmpty()) { throw new TikaConfigException("something went wrong loading: pipesIterator from the tika configs"); } diff --git a/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/TikaConfigAsyncWriterTest.java b/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/TikaConfigAsyncWriterTest.java index 5e009f1f8..225bf031e 100644 --- a/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/TikaConfigAsyncWriterTest.java +++ b/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/TikaConfigAsyncWriterTest.java @@ -25,8 +25,8 @@ import java.nio.file.Paths; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -import org.apache.tika.pipes.core.async.AsyncConfig; -import org.apache.tika.plugins.TikaConfigs; +import org.apache.tika.config.loader.TikaJsonConfig; +import org.apache.tika.pipes.core.PipesConfig; import org.apache.tika.sax.BasicContentHandlerFactory; public class TikaConfigAsyncWriterTest { @@ -44,9 +44,9 @@ public class TikaConfigAsyncWriterTest { Path tmp = Files.createTempFile(dir, "plugins-",".json"); pluginsWriter.write(tmp); - TikaConfigs configs = TikaConfigs.load(tmp); - AsyncConfig asyncConfig = AsyncConfig.load(configs); - assertEquals("-Xmx1g", asyncConfig.getForkedJvmArgs().get(0)); + TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(tmp); + PipesConfig pipesConfig = PipesConfig.load(tikaJsonConfig); + assertEquals("-Xmx1g", pipesConfig.getForkedJvmArgs().get(0)); } } diff --git a/tika-pipes/tika-async-cli/src/test/resources/configs/config-template.json b/tika-pipes/tika-async-cli/src/test/resources/configs/config-template.json index 9e0ff05ef..082939278 100644 --- a/tika-pipes/tika-async-cli/src/test/resources/configs/config-template.json +++ b/tika-pipes/tika-async-cli/src/test/resources/configs/config-template.json @@ -21,7 +21,7 @@ } } }, - "async": { + "pipes": { "emitWithinMillis": 10000, "emitMaxEstimatedBytes": 100000, "queueSize": 10000, diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesConfig.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesConfig.java index 4f2093530..2acfc486a 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesConfig.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesConfig.java @@ -16,12 +16,16 @@ */ package org.apache.tika.pipes.core; +import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import com.fasterxml.jackson.annotation.JsonIgnore; +import org.apache.tika.config.loader.TikaJsonConfig; +import org.apache.tika.exception.TikaConfigException; + public class PipesConfig { /** @@ -60,12 +64,48 @@ public class PipesConfig { public static final int DEFAULT_STALE_FETCHER_DELAY_SECONDS = 60; private int staleFetcherDelaySeconds = DEFAULT_STALE_FETCHER_DELAY_SECONDS; + // Async-specific fields (used by AsyncProcessor, ignored by PipesServer) + public static final long DEFAULT_EMIT_WITHIN_MILLIS = 10000; + public static final long DEFAULT_EMIT_MAX_ESTIMATED_BYTES = 100000; + public static final int DEFAULT_QUEUE_SIZE = 10000; + public static final int DEFAULT_NUM_EMITTERS = 1; + + private long emitWithinMillis = DEFAULT_EMIT_WITHIN_MILLIS; + private long emitMaxEstimatedBytes = DEFAULT_EMIT_MAX_ESTIMATED_BYTES; + private int queueSize = DEFAULT_QUEUE_SIZE; + private int numEmitters = DEFAULT_NUM_EMITTERS; + private boolean emitIntermediateResults = false; + private ArrayList<String> forkedJvmArgs = new ArrayList<>(); private String javaPath = "java"; private String tikaConfig; + /** + * Loads PipesConfig from the "pipes" section of the JSON configuration. + * <p> + * This configuration is used by both PipesServer (forking process) and + * AsyncProcessor (async processing). Some fields are specific to each: + * <ul> + * <li>PipesServer uses: numClients, timeoutMillis, maxForEmitBatchBytes, etc.</li> + * <li>AsyncProcessor uses: emitWithinMillis, queueSize, numEmitters, etc.</li> + * </ul> + * Unused fields in each context are simply ignored. + * + * @param tikaJsonConfig the JSON configuration to load from + * @return the loaded PipesConfig, or a new default instance if not found in config + * @throws IOException if deserialization fails + * @throws TikaConfigException if configuration is invalid + */ + public static PipesConfig load(TikaJsonConfig tikaJsonConfig) throws IOException, TikaConfigException { + PipesConfig config = tikaJsonConfig.deserialize("pipes", PipesConfig.class); + if (config == null) { + return new PipesConfig(); + } + return config; + } + public long getTimeoutMillis() { return timeoutMillis; } @@ -201,4 +241,69 @@ public class PipesConfig { public void setMaxWaitForClientMillis(long maxWaitForClientMillis) { this.maxWaitForClientMillis = maxWaitForClientMillis; } + + // Async-specific getters/setters (used by AsyncProcessor, ignored by PipesServer) + + public long getEmitWithinMillis() { + return emitWithinMillis; + } + + /** + * If nothing has been emitted in this amount of time + * and the {@link #getEmitMaxEstimatedBytes()} has not been reached yet, + * emit what's in the emit queue. + * + * @param emitWithinMillis time in milliseconds + */ + public void setEmitWithinMillis(long emitWithinMillis) { + this.emitWithinMillis = emitWithinMillis; + } + + /** + * When the emit queue hits this estimated size (sum of + * estimated extract sizes), emit the batch. + * + * @return the maximum estimated bytes before emitting + */ + public long getEmitMaxEstimatedBytes() { + return emitMaxEstimatedBytes; + } + + public void setEmitMaxEstimatedBytes(long emitMaxEstimatedBytes) { + this.emitMaxEstimatedBytes = emitMaxEstimatedBytes; + } + + /** + * FetchEmitTuple queue size + * + * @return the queue size + */ + public int getQueueSize() { + return queueSize; + } + + public void setQueueSize(int queueSize) { + this.queueSize = queueSize; + } + + /** + * Number of emitters + * + * @return the number of emitters + */ + public int getNumEmitters() { + return numEmitters; + } + + public void setNumEmitters(int numEmitters) { + this.numEmitters = numEmitters; + } + + public boolean isEmitIntermediateResults() { + return emitIntermediateResults; + } + + public void setEmitIntermediateResults(boolean emitIntermediateResults) { + this.emitIntermediateResults = emitIntermediateResults; + } } diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesServer.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesServer.java index 2d6379ee3..bdc31cc43 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesServer.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesServer.java @@ -43,6 +43,7 @@ import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; import org.apache.tika.config.TikaTaskTimeout; +import org.apache.tika.config.loader.TikaJsonConfig; import org.apache.tika.config.loader.TikaLoader; import org.apache.tika.detect.Detector; import org.apache.tika.exception.EncryptedDocumentException; @@ -79,7 +80,6 @@ import org.apache.tika.pipes.core.extractor.BasicEmbeddedDocumentBytesHandler; import org.apache.tika.pipes.core.extractor.EmbeddedDocumentBytesConfig; import org.apache.tika.pipes.core.extractor.EmittingEmbeddedDocumentBytesHandler; import org.apache.tika.pipes.core.fetcher.FetcherManager; -import org.apache.tika.plugins.TikaConfigs; import org.apache.tika.plugins.TikaPluginManager; import org.apache.tika.sax.BasicContentHandlerFactory; import org.apache.tika.sax.ContentHandlerFactory; @@ -162,7 +162,7 @@ public class PipesServer implements Runnable { public PipesServer(Path tikaConfigPath, InputStream in, PrintStream out, long maxForEmitBatchBytes, long serverParseTimeoutMillis, - long serverWaitTimeoutMillis) throws TikaConfigException { + long serverWaitTimeoutMillis) throws TikaConfigException, IOException { this.tikaLoader = TikaLoader.load(tikaConfigPath); this.defaultMetadataFilter = tikaLoader.loadMetadataFilters(); this.input = new DataInputStream(in); @@ -834,16 +834,15 @@ public class PipesServer implements Runnable { protected void initializeResources() throws TikaException, IOException, SAXException { - TikaPluginManager tikaPluginManager = TikaPluginManager.load(tikaLoader.getConfig()); + TikaJsonConfig tikaJsonConfig = tikaLoader.getConfig(); + TikaPluginManager tikaPluginManager = TikaPluginManager.load(tikaJsonConfig); - //TODO -- fix this -- get rid of TikaConfigs - TikaConfigs tikaConfigs = TikaConfigs.load(tikaLoader.getConfig()); //TODO allowed named configurations in tika config - this.fetcherManager = FetcherManager.load(tikaPluginManager, tikaConfigs); + this.fetcherManager = FetcherManager.load(tikaPluginManager, tikaJsonConfig); //skip initialization of the emitters if emitting //from the pipesserver is turned off. if (maxForEmitBatchBytes > -1) { - this.emitterManager = EmitterManager.load(tikaPluginManager, tikaConfigs); + this.emitterManager = EmitterManager.load(tikaPluginManager, tikaJsonConfig); } else { LOG.debug("'maxForEmitBatchBytes' < 0. Not initializing emitters in PipesServer"); this.emitterManager = null; diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/AsyncConfig.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/AsyncConfig.java deleted file mode 100644 index a48a2d0db..000000000 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/AsyncConfig.java +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.pipes.core.async; - -import java.io.IOException; - -import org.apache.tika.exception.TikaConfigException; -import org.apache.tika.pipes.core.PipesConfig; -import org.apache.tika.plugins.TikaConfigs; - -public class AsyncConfig extends PipesConfig { - - private long emitWithinMillis = 10000; - private long emitMaxEstimatedBytes = 100000; - - private int queueSize = 10000; - private int numEmitters = 1; - - private boolean emitIntermediateResults = false; - - public static AsyncConfig load(TikaConfigs tikaConfigs) throws IOException, TikaConfigException { - AsyncConfig a = tikaConfigs.deserialize(AsyncConfig.class, "async"); - if (a == null) { - return new AsyncConfig(); - } - return a; - } - - public long getEmitWithinMillis() { - return emitWithinMillis; - } - - /** - * If nothing has been emitted in this amount of time - * and the {@link #getEmitMaxEstimatedBytes()} has not been reached yet, - * emit what's in the emit queue. - * - * @param emitWithinMillis - */ - public void setEmitWithinMillis(long emitWithinMillis) { - this.emitWithinMillis = emitWithinMillis; - } - - /** - * When the emit queue hits this estimated size (sum of - * estimated extract sizes), emit the batch. - * @return - */ - public long getEmitMaxEstimatedBytes() { - return emitMaxEstimatedBytes; - } - - public void setEmitMaxEstimatedBytes(long emitMaxEstimatedBytes) { - this.emitMaxEstimatedBytes = emitMaxEstimatedBytes; - } - - - public void setNumEmitters(int numEmitters) { - this.numEmitters = numEmitters; - } - - /** - * FetchEmitTuple queue size - * @return - */ - public int getQueueSize() { - return queueSize; - } - - public void setQueueSize(int queueSize) { - this.queueSize = queueSize; - } - - /** - * Number of emitters - * - * @return - */ - public int getNumEmitters() { - return numEmitters; - } - - public void setEmitIntermediateResults(boolean emitIntermediateResults) { - this.emitIntermediateResults = emitIntermediateResults; - } - - public boolean isEmitIntermediateResults() { - return emitIntermediateResults; - } -} diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/AsyncEmitter.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/AsyncEmitter.java index ccffd03de..ea084aa65 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/AsyncEmitter.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/AsyncEmitter.java @@ -32,6 +32,7 @@ import org.slf4j.LoggerFactory; import org.apache.tika.pipes.api.emitter.EmitData; import org.apache.tika.pipes.api.emitter.Emitter; +import org.apache.tika.pipes.core.PipesConfig; import org.apache.tika.pipes.core.emitter.EmitterManager; import org.apache.tika.utils.ExceptionUtils; @@ -46,13 +47,13 @@ public class AsyncEmitter implements Callable<Integer> { private static final Logger LOG = LoggerFactory.getLogger(AsyncEmitter.class); - private final AsyncConfig asyncConfig; + private final PipesConfig asyncConfig; private final EmitterManager emitterManager; private final ArrayBlockingQueue<EmitDataPair> emitDataQueue; Instant lastEmitted = Instant.now(); - public AsyncEmitter(AsyncConfig asyncConfig, ArrayBlockingQueue<EmitDataPair> emitData, + public AsyncEmitter(PipesConfig asyncConfig, ArrayBlockingQueue<EmitDataPair> emitData, EmitterManager emitterManager) { this.asyncConfig = asyncConfig; this.emitDataQueue = emitData; diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/AsyncProcessor.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/AsyncProcessor.java index 3b7ac66c8..a83387a42 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/AsyncProcessor.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/AsyncProcessor.java @@ -33,6 +33,7 @@ import java.util.concurrent.atomic.AtomicLong; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.tika.config.loader.TikaJsonConfig; import org.apache.tika.config.loader.TikaLoader; import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.filter.MetadataFilter; @@ -43,11 +44,11 @@ import org.apache.tika.pipes.api.pipesiterator.TotalCountResult; import org.apache.tika.pipes.api.pipesiterator.TotalCounter; import org.apache.tika.pipes.api.reporter.PipesReporter; import org.apache.tika.pipes.core.PipesClient; +import org.apache.tika.pipes.core.PipesConfig; import org.apache.tika.pipes.core.PipesException; import org.apache.tika.pipes.core.PipesResults; import org.apache.tika.pipes.core.emitter.EmitterManager; import org.apache.tika.pipes.core.reporter.ReporterManager; -import org.apache.tika.plugins.TikaConfigs; import org.apache.tika.plugins.TikaPluginManager; /** @@ -66,7 +67,7 @@ public class AsyncProcessor implements Closeable { private final ArrayBlockingQueue<EmitDataPair> emitDatumTuples; private final ExecutorCompletionService<Integer> executorCompletionService; private final ExecutorService executorService; - private final AsyncConfig asyncConfig; + private final PipesConfig asyncConfig; private final PipesReporter pipesReporter; private final AtomicLong totalProcessed = new AtomicLong(0); private static long MAX_OFFER_WAIT_MS = 120000; @@ -80,11 +81,11 @@ public class AsyncProcessor implements Closeable { } public AsyncProcessor(Path tikaConfigPath, PipesIterator pipesIterator) throws TikaException, IOException { - TikaConfigs tikaConfigs = TikaConfigs.load(tikaConfigPath); - TikaPluginManager tikaPluginManager = TikaPluginManager.load(tikaConfigs); + TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(tikaConfigPath); + TikaPluginManager tikaPluginManager = TikaPluginManager.load(tikaJsonConfig); MetadataFilter metadataFilter = TikaLoader.load(tikaConfigPath).loadMetadataFilters(); - this.asyncConfig = AsyncConfig.load(tikaConfigs); - this.pipesReporter = ReporterManager.load(tikaPluginManager, tikaConfigs); + this.asyncConfig = PipesConfig.load(tikaJsonConfig); + this.pipesReporter = ReporterManager.load(tikaPluginManager, tikaJsonConfig); LOG.debug("loaded reporter {}", pipesReporter.getClass()); this.fetchEmitTuples = new ArrayBlockingQueue<>(asyncConfig.getQueueSize()); this.emitDatumTuples = new ArrayBlockingQueue<>(100); @@ -121,7 +122,7 @@ public class AsyncProcessor implements Closeable { new FetchEmitWorker(asyncConfig, fetchEmitTuples, emitDatumTuples)); } - EmitterManager emitterManager = EmitterManager.load(tikaPluginManager, tikaConfigs); + EmitterManager emitterManager = EmitterManager.load(tikaPluginManager, tikaJsonConfig); for (int i = 0; i < asyncConfig.getNumEmitters(); i++) { executorCompletionService.submit( new AsyncEmitter(asyncConfig, emitDatumTuples, emitterManager)); @@ -270,11 +271,11 @@ public class AsyncProcessor implements Closeable { private class FetchEmitWorker implements Callable<Integer> { - private final AsyncConfig asyncConfig; + private final PipesConfig asyncConfig; private final ArrayBlockingQueue<FetchEmitTuple> fetchEmitTuples; private final ArrayBlockingQueue<EmitDataPair> emitDataTupleQueue; - private FetchEmitWorker(AsyncConfig asyncConfig, + private FetchEmitWorker(PipesConfig asyncConfig, ArrayBlockingQueue<FetchEmitTuple> fetchEmitTuples, ArrayBlockingQueue<EmitDataPair> emitDataTupleQueue) { this.asyncConfig = asyncConfig; diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/emitter/EmitterManager.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/emitter/EmitterManager.java index a99cde81c..ee5ac55a4 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/emitter/EmitterManager.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/emitter/EmitterManager.java @@ -26,11 +26,11 @@ import org.pf4j.PluginManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.tika.config.loader.TikaJsonConfig; import org.apache.tika.exception.TikaConfigException; import org.apache.tika.pipes.api.emitter.Emitter; import org.apache.tika.pipes.api.emitter.EmitterFactory; import org.apache.tika.plugins.PluginComponentLoader; -import org.apache.tika.plugins.TikaConfigs; /** * Utility class that will apply the appropriate emitter @@ -45,9 +45,8 @@ public class EmitterManager { private final Map<String, Emitter> emitterMap = new ConcurrentHashMap<>(); - public static EmitterManager load(PluginManager pluginManager, TikaConfigs tikaConfigs) throws IOException, TikaConfigException { - JsonNode fetchersNode = tikaConfigs.getTikaJsonConfig() - .getRootNode().get(CONFIG_KEY); + public static EmitterManager load(PluginManager pluginManager, TikaJsonConfig tikaJsonConfig) throws IOException, TikaConfigException { + JsonNode fetchersNode = tikaJsonConfig.getRootNode().get(CONFIG_KEY); Map<String, Emitter> fetchers = PluginComponentLoader.loadInstances(pluginManager, EmitterFactory.class, fetchersNode); return new EmitterManager(fetchers); diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java index a0c926d40..e30a833fa 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java @@ -26,12 +26,12 @@ import org.pf4j.PluginManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.tika.config.loader.TikaJsonConfig; import org.apache.tika.exception.TikaConfigException; import org.apache.tika.exception.TikaException; import org.apache.tika.pipes.api.fetcher.Fetcher; import org.apache.tika.pipes.api.fetcher.FetcherFactory; import org.apache.tika.plugins.PluginComponentLoader; -import org.apache.tika.plugins.TikaConfigs; /** * Utility class to hold multiple fetchers. @@ -44,9 +44,8 @@ public class FetcherManager { private static final Logger LOG = LoggerFactory.getLogger(FetcherManager.class); - public static FetcherManager load(PluginManager pluginManager, TikaConfigs tikaConfigs) throws TikaConfigException, IOException { - JsonNode fetchersNode = tikaConfigs.getTikaJsonConfig() - .getRootNode().get(CONFIG_KEY); + public static FetcherManager load(PluginManager pluginManager, TikaJsonConfig tikaJsonConfig) throws TikaConfigException, IOException { + JsonNode fetchersNode = tikaJsonConfig.getRootNode().get(CONFIG_KEY); Map<String, Fetcher> fetchers = PluginComponentLoader.loadInstances(pluginManager, FetcherFactory.class, fetchersNode); return new FetcherManager(fetchers); diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/pipesiterator/PipesIteratorManager.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/pipesiterator/PipesIteratorManager.java index b80f24baf..f374dec44 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/pipesiterator/PipesIteratorManager.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/pipesiterator/PipesIteratorManager.java @@ -22,11 +22,11 @@ import java.util.Optional; import com.fasterxml.jackson.databind.JsonNode; import org.pf4j.PluginManager; +import org.apache.tika.config.loader.TikaJsonConfig; import org.apache.tika.exception.TikaConfigException; import org.apache.tika.pipes.api.pipesiterator.PipesIterator; import org.apache.tika.pipes.api.pipesiterator.PipesIteratorFactory; import org.apache.tika.plugins.PluginComponentLoader; -import org.apache.tika.plugins.TikaConfigs; /** * Utility class to hold a single pipes iterator @@ -37,10 +37,9 @@ public class PipesIteratorManager { public static final String CONFIG_KEY = "pipes-iterator"; - public static Optional<PipesIterator> load(PluginManager pluginManager, TikaConfigs tikaConfigs) throws IOException, TikaConfigException { + public static Optional<PipesIterator> load(PluginManager pluginManager, TikaJsonConfig tikaJsonConfig) throws IOException, TikaConfigException { - JsonNode node = tikaConfigs.getTikaJsonConfig() - .getRootNode().get(CONFIG_KEY); + JsonNode node = tikaJsonConfig.getRootNode().get(CONFIG_KEY); return PluginComponentLoader.loadSingleton(pluginManager, PipesIteratorFactory.class, node); } diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/reporter/ReporterManager.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/reporter/ReporterManager.java index a539103c5..56a8dcc74 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/reporter/ReporterManager.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/reporter/ReporterManager.java @@ -22,11 +22,11 @@ import java.util.List; import com.fasterxml.jackson.databind.JsonNode; import org.pf4j.PluginManager; +import org.apache.tika.config.loader.TikaJsonConfig; import org.apache.tika.exception.TikaConfigException; import org.apache.tika.pipes.api.reporter.PipesReporter; import org.apache.tika.pipes.api.reporter.PipesReporterFactory; import org.apache.tika.plugins.PluginComponentLoader; -import org.apache.tika.plugins.TikaConfigs; /** * Utility class to hold multiple fetchers. @@ -37,10 +37,9 @@ public class ReporterManager { public static final String CONFIG_KEY = "pipes-reporters"; - public static PipesReporter load(PluginManager pluginManager, TikaConfigs tikaConfigs) throws IOException, TikaConfigException { + public static PipesReporter load(PluginManager pluginManager, TikaJsonConfig tikaJsonConfig) throws IOException, TikaConfigException { - JsonNode node = tikaConfigs.getTikaJsonConfig() - .getRootNode().get(CONFIG_KEY); + JsonNode node = tikaJsonConfig.getRootNode().get(CONFIG_KEY); List<PipesReporter> reporters = PluginComponentLoader.loadUnnamedInstances(pluginManager, PipesReporterFactory.class, node); if (reporters.isEmpty()) { diff --git a/tika-pipes/tika-pipes-integration-tests/pom.xml b/tika-pipes/tika-pipes-integration-tests/pom.xml index 0dfe1b6a9..48c495641 100644 --- a/tika-pipes/tika-pipes-integration-tests/pom.xml +++ b/tika-pipes/tika-pipes-integration-tests/pom.xml @@ -28,7 +28,7 @@ <artifactId>tika-pipes-integration-tests</artifactId> - <name>Apache Tika pipes core tests</name> + <name>Apache Tika pipes core integration tests</name> <url>https://tika.apache.org/</url> <dependencies> diff --git a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PassbackFilterTest.java b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PassbackFilterTest.java index 0aed6b79c..0bf8fa545 100644 --- a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PassbackFilterTest.java +++ b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PassbackFilterTest.java @@ -30,7 +30,7 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -import org.apache.tika.config.loader.TikaLoader; +import org.apache.tika.config.loader.TikaJsonConfig; import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; @@ -39,7 +39,6 @@ import org.apache.tika.pipes.api.FetchEmitTuple; import org.apache.tika.pipes.api.PipesResult; import org.apache.tika.pipes.api.emitter.EmitKey; import org.apache.tika.pipes.api.fetcher.FetchKey; -import org.apache.tika.pipes.core.async.AsyncConfig; import org.apache.tika.serialization.JsonMetadataList; import org.apache.tika.utils.StringUtils; @@ -53,7 +52,8 @@ public class PassbackFilterTest { public void init(Path tmpDir) throws Exception { Path pipesConfigPath = PluginsTestHelper.getFileSystemFetcherConfig("tika-config-passback.json", tmpDir); - PipesConfig pipesConfig = TikaLoader.load(pipesConfigPath).configs().load("async", AsyncConfig.class); + TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(pipesConfigPath); + PipesConfig pipesConfig = PipesConfig.load(tikaJsonConfig); PluginsTestHelper.copyTestFilesToTmpInput(tmpDir, testPdfFile); pipesClient = new PipesClient(pipesConfig); diff --git a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PipesClientTest.java b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PipesClientTest.java index de14368d1..5fae57e51 100644 --- a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PipesClientTest.java +++ b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PipesClientTest.java @@ -26,7 +26,7 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; import org.apache.tika.config.TikaTaskTimeout; -import org.apache.tika.config.loader.TikaLoader; +import org.apache.tika.config.loader.TikaJsonConfig; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.metadata.filter.AttachmentCountingListFilter; @@ -38,7 +38,6 @@ import org.apache.tika.pipes.api.FetchEmitTuple; import org.apache.tika.pipes.api.PipesResult; import org.apache.tika.pipes.api.emitter.EmitKey; import org.apache.tika.pipes.api.fetcher.FetchKey; -import org.apache.tika.pipes.core.async.AsyncConfig; public class PipesClientTest { String fetcherName = "fsf"; @@ -49,7 +48,8 @@ public class PipesClientTest { Path pipesConfigPath = PluginsTestHelper.getFileSystemFetcherConfig(tmp, tmp.resolve("input"), tmp.resolve("output")); PluginsTestHelper.copyTestFilesToTmpInput(tmp, testFileName); - PipesConfig pipesConfig = TikaLoader.load(pipesConfigPath).configs().load("async", AsyncConfig.class); + TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(pipesConfigPath); + PipesConfig pipesConfig = PipesConfig.load(tikaJsonConfig); return new PipesClient(pipesConfig); } diff --git a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PipesServerTest.java b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PipesServerTest.java index 3c2d45fbb..eca8ed017 100644 --- a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PipesServerTest.java +++ b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PipesServerTest.java @@ -29,6 +29,7 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; import org.apache.tika.TikaTest; +import org.apache.tika.config.loader.TikaJsonConfig; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; import org.apache.tika.pipes.api.FetchEmitTuple; @@ -40,7 +41,6 @@ import org.apache.tika.pipes.api.pipesiterator.PipesIteratorBaseConfig; import org.apache.tika.pipes.core.extractor.BasicEmbeddedDocumentBytesHandler; import org.apache.tika.pipes.core.extractor.EmbeddedDocumentBytesConfig; import org.apache.tika.pipes.core.fetcher.FetcherManager; -import org.apache.tika.plugins.TikaConfigs; import org.apache.tika.plugins.TikaPluginManager; public class PipesServerTest extends TikaTest { @@ -69,9 +69,9 @@ public class PipesServerTest extends TikaTest { FetchEmitTuple fetchEmitTuple = new FetchEmitTuple("id", new FetchKey("fsf", testDoc), new EmitKey("", "")); - TikaConfigs tikaConfigs = TikaConfigs.load(tikaConfig); - TikaPluginManager pluginManager = TikaPluginManager.load(tikaConfigs); - Fetcher fetcher = FetcherManager.load(pluginManager, tikaConfigs).getFetcher(); + TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(tikaConfig); + TikaPluginManager pluginManager = TikaPluginManager.load(tikaJsonConfig); + Fetcher fetcher = FetcherManager.load(pluginManager, tikaJsonConfig).getFetcher(); PipesServer.MetadataListAndEmbeddedBytes parseData = pipesServer.parseFromTuple(fetchEmitTuple, fetcher); assertEquals("5f3b924303e960ce35d7f705e91d3018dd110a9c3cef0546a91fe013d6dad6fd", @@ -102,9 +102,9 @@ public class PipesServerTest extends TikaTest { FetchEmitTuple fetchEmitTuple = new FetchEmitTuple("id", new FetchKey("fs", testDoc), new EmitKey("", ""), new Metadata(), parseContext); - TikaConfigs tikaConfigs = TikaConfigs.load(tikaConfig); - TikaPluginManager pluginManager = TikaPluginManager.load(tikaConfigs); - Fetcher fetcher = FetcherManager.load(pluginManager, tikaConfigs).getFetcher(); + TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(tikaConfig); + TikaPluginManager pluginManager = TikaPluginManager.load(tikaJsonConfig); + Fetcher fetcher = FetcherManager.load(pluginManager, tikaJsonConfig).getFetcher(); PipesServer.MetadataListAndEmbeddedBytes parseData = pipesServer.parseFromTuple(fetchEmitTuple, fetcher); assertEquals(2, parseData.metadataList.size()); @@ -150,9 +150,9 @@ public class PipesServerTest extends TikaTest { new FetchKey("fs", testDoc), new EmitKey("", ""), new Metadata(), parseContext); - TikaConfigs tikaConfigs = TikaConfigs.load(pipesConfig); - TikaPluginManager pluginManager = TikaPluginManager.load(tikaConfigs); - Fetcher fetcher = FetcherManager.load(pluginManager, tikaConfigs).getFetcher(); + TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(pipesConfig); + TikaPluginManager pluginManager = TikaPluginManager.load(tikaJsonConfig); + Fetcher fetcher = FetcherManager.load(pluginManager, tikaJsonConfig).getFetcher(); PipesServer.MetadataListAndEmbeddedBytes parseData = pipesServer.parseFromTuple(fetchEmitTuple, fetcher); assertEquals(2, parseData.metadataList.size()); diff --git a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PluginManagerTest.java b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PluginManagerTest.java index 24625064c..738c3c476 100644 --- a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PluginManagerTest.java +++ b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PluginManagerTest.java @@ -23,9 +23,9 @@ import java.nio.file.Path; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import org.apache.tika.config.loader.TikaJsonConfig; import org.apache.tika.pipes.api.fetcher.Fetcher; import org.apache.tika.pipes.core.fetcher.FetcherManager; -import org.apache.tika.plugins.TikaConfigs; import org.apache.tika.plugins.TikaPluginManager; public class PluginManagerTest { @@ -33,9 +33,9 @@ public class PluginManagerTest { @Test public void testBasic(@TempDir Path tmpDir) throws Exception { Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir); - TikaConfigs tikaConfigs = TikaConfigs.load(config); - TikaPluginManager tikaPluginManager = TikaPluginManager.load(tikaConfigs); - FetcherManager fetcherManager = FetcherManager.load(tikaPluginManager, tikaConfigs); + TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config); + TikaPluginManager tikaPluginManager = TikaPluginManager.load(tikaJsonConfig); + FetcherManager fetcherManager = FetcherManager.load(tikaPluginManager, tikaJsonConfig); assertEquals(1, fetcherManager.getSupported().size()); Fetcher f = fetcherManager.getFetcher(); assertEquals("fsf", f.getExtensionConfig().id()); diff --git a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-basic.json b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-basic.json index 026045898..57e9dc813 100644 --- a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-basic.json +++ b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-basic.json @@ -38,7 +38,7 @@ } } }, - "async": { + "pipes": { "tikaConfig": "TIKA_CONFIG", "numClients": 4, "timeoutMillis": 5000, @@ -46,7 +46,7 @@ "forkedJvmArgs": ["-Xmx512m"], "maxForEmitBatchBytes": 1000000 }, - "auto-detect-parser-config": { + "auto-detect-parser": { "spoolToDisk": 1000000, "outputThreshold": 1000000, "digesterFactory": { diff --git a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-passback.json b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-passback.json index 406fe1783..c92cf24d7 100644 --- a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-passback.json +++ b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-passback.json @@ -38,7 +38,7 @@ } } }, - "async": { + "pipes": { "tikaConfig": "TIKA_CONFIG", "numClients": 4, "timeoutMillis": 5000, @@ -46,7 +46,7 @@ "forkedJvmArgs": ["-Xmx512m"], "maxForEmitBatchBytes": 0 }, - "auto-detect-parser-config": { + "auto-detect-parser": { "spoolToDisk": 1000000, "outputThreshold": 1000000, "digesterFactory": { diff --git a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-truncate.json b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-truncate.json index 8e9212387..811e8da1b 100644 --- a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-truncate.json +++ b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-truncate.json @@ -38,7 +38,7 @@ } } }, - "async": { + "pipes": { "tikaConfig": "TIKA_CONFIG", "numClients": 4, "timeoutMillis": 5000, @@ -46,7 +46,7 @@ "forkedJvmArgs": ["-Xmx512m"], "maxForEmitBatchBytes": 1000000 }, - "auto-detect-parser-config": { + "auto-detect-parser": { "spoolToDisk": 1000000, "outputThreshold": 1000000, "digesterFactory": { diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java b/tika-pipes/tika-pipes-plugins/tika-pipes-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java index e501eed1a..267dae1d0 100644 --- a/tika-pipes/tika-pipes-plugins/tika-pipes-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java +++ b/tika-pipes/tika-pipes-plugins/tika-pipes-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java @@ -61,6 +61,7 @@ import org.mockito.Mockito; import org.apache.tika.TikaTest; import org.apache.tika.client.HttpClientFactory; import org.apache.tika.config.ConfigContainer; +import org.apache.tika.config.loader.TikaJsonConfig; import org.apache.tika.exception.TikaException; import org.apache.tika.io.TemporaryResources; import org.apache.tika.metadata.Metadata; @@ -72,7 +73,6 @@ import org.apache.tika.pipes.fetcher.http.config.HttpFetcherConfig; import org.apache.tika.pipes.fetcher.http.config.HttpHeaders; import org.apache.tika.pipes.fetcher.http.jwt.JwtGenerator; import org.apache.tika.plugins.ExtensionConfig; -import org.apache.tika.plugins.TikaConfigs; import org.apache.tika.plugins.TikaPluginManager; class HttpFetcherTest extends TikaTest { @@ -281,8 +281,8 @@ class HttpFetcherTest extends TikaTest { FetcherManager getFetcherManager(String path) throws Exception { Path configPath = Paths.get(HttpFetcherTest.class.getResource("/configs/" + path).toURI()); - TikaConfigs tikaConfigs = TikaConfigs.load(configPath); - return FetcherManager.load(TikaPluginManager.load(tikaConfigs), tikaConfigs); + TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(configPath); + return FetcherManager.load(TikaPluginManager.load(tikaJsonConfig), tikaJsonConfig); } private void mockClientResponse(final HttpResponse response) throws Exception { diff --git a/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaConfigs.java b/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaConfigs.java deleted file mode 100644 index d2a780b64..000000000 --- a/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaConfigs.java +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.plugins; - -import java.io.IOException; -import java.nio.file.Path; -import java.util.Iterator; -import java.util.Set; - -import com.fasterxml.jackson.databind.JsonNode; - -import org.apache.tika.config.loader.TikaJsonConfig; -import org.apache.tika.exception.TikaConfigException; - -/** - * Loads and validates Tika pipes/plugin configuration from JSON. - * <p> - * This class validates pipes-specific configuration keys and delegates to - * {@link TikaJsonConfig} for parsing. Core Tika keys (parsers, detectors, etc.) - * are ignored by this validator - they are handled by TikaLoader. - */ -public class TikaConfigs { - - /** - * Pipes-specific configuration keys. - */ - private static final Set<String> PIPES_KEYS = Set.of( - "fetchers", - "emitters", - "pipes-iterator", - "pipes-reporters", - "async", - "plugin-roots" - ); - - /** - * Core Tika configuration keys (handled by TikaLoader, not validated here). - */ - private static final Set<String> CORE_TIKA_KEYS = Set.of( - "parsers", - "detectors", - "encoding-detectors", - "encodingDetectors", - "metadata-filters", - "metadataFilters", - "renderers", - "translators", - "auto-detect-parser-config", - "autoDetectParserConfig", - "server" - ); - - private final TikaJsonConfig tikaJsonConfig; - - /** - * Loads pipes configuration from a pre-parsed TikaJsonConfig. - * This is the preferred method when sharing configuration across - * core Tika and pipes components. - * - * @param tikaJsonConfig the pre-parsed JSON configuration - * @return the pipes configuration - * @throws TikaConfigException if validation fails - */ - public static TikaConfigs load(TikaJsonConfig tikaJsonConfig) throws TikaConfigException { - TikaConfigs configs = new TikaConfigs(tikaJsonConfig); - configs.validatePipesKeys(); - return configs; - } - - /** - * Loads pipes configuration from a file. - * For backwards compatibility - prefer {@link #load(TikaJsonConfig)} when possible. - * - * @param path the path to the JSON configuration file - * @return the pipes configuration - * @throws IOException if reading fails - * @throws TikaConfigException if validation fails - */ - public static TikaConfigs load(Path path) throws IOException, TikaConfigException { - TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(path); - return load(tikaJsonConfig); - } - - private TikaConfigs(TikaJsonConfig tikaJsonConfig) { - this.tikaJsonConfig = tikaJsonConfig; - } - - /** - * Gets the underlying TikaJsonConfig. - * - * @return the TikaJsonConfig - */ - public TikaJsonConfig getTikaJsonConfig() { - return tikaJsonConfig; - } - - /** - * Deserializes a configuration value for the given key. - * - * @param clazz the target class - * @param key the configuration key - * @param <T> the type to deserialize to - * @return the deserialized value - * @throws IOException if deserialization fails - */ - public <T> T deserialize(Class<T> clazz, String key) throws IOException { - return tikaJsonConfig.deserialize(key, clazz); - } - - /** - * Validates that pipes-specific keys are correct. - * This catches typos like "pipes-reporter" instead of "pipes-reporters". - * <p> - * Core Tika keys (parsers, detectors, etc.) are ignored - they are - * validated by TikaLoader. - * <p> - * Keys prefixed with "x-" are allowed for custom extensions. - * - * @throws TikaConfigException if unknown pipes keys are found - */ - private void validatePipesKeys() throws TikaConfigException { - JsonNode root = tikaJsonConfig.getRootNode(); - Iterator<String> fieldNames = root.fieldNames(); - while (fieldNames.hasNext()) { - String key = fieldNames.next(); - - // Ignore core Tika keys - TikaLoader validates those - if (CORE_TIKA_KEYS.contains(key)) { - continue; - } - - // Ignore custom extension keys - if (key.startsWith("x-")) { - continue; - } - - // Must be a known pipes key - if (!PIPES_KEYS.contains(key)) { - throw new TikaConfigException("Unknown pipes config key: '" + key + - "'. Valid pipes keys: " + PIPES_KEYS + - " (or use 'x-' prefix for custom keys). " + - "Core Tika keys like 'parsers', 'detectors' should be configured separately."); - } - } - } -} diff --git a/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaPluginManager.java b/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaPluginManager.java index ad560558b..e030b8a67 100644 --- a/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaPluginManager.java +++ b/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaPluginManager.java @@ -65,13 +65,22 @@ public class TikaPluginManager extends DefaultPluginManager { */ public static TikaPluginManager load(TikaJsonConfig tikaJsonConfig) throws TikaConfigException, IOException { - TikaConfigs tikaConfigs = TikaConfigs.load(tikaJsonConfig); - return load(tikaConfigs); + + JsonNode root = tikaJsonConfig.getRootNode(); + JsonNode pluginRoots = root.get("plugin-roots"); + if (pluginRoots == null) { + throw new TikaConfigException("plugin-roots must be specified"); + } + List<Path> roots = OBJECT_MAPPER.convertValue(pluginRoots, + new TypeReference<List<Path>>() {}); + if (roots.isEmpty()) { + throw new TikaConfigException("plugin-roots must not be empty"); + } + return new TikaPluginManager(roots); } /** * Loads plugin manager from a configuration file. - * For backwards compatibility - prefer {@link #load(TikaJsonConfig)} when possible. * * @param configPath the path to the JSON configuration file * @return the plugin manager @@ -83,29 +92,6 @@ public class TikaPluginManager extends DefaultPluginManager { return load(tikaJsonConfig); } - /** - * Loads plugin manager from a TikaConfigs instance. - * - * @param tikaConfigs the pipes configuration - * @return the plugin manager - * @throws TikaConfigException if configuration is invalid - * @throws IOException if plugin initialization fails - */ - public static TikaPluginManager load(TikaConfigs tikaConfigs) - throws TikaConfigException, IOException { - JsonNode root = tikaConfigs.getTikaJsonConfig().getRootNode(); - JsonNode pluginRoots = root.get("plugin-roots"); - if (pluginRoots == null) { - throw new TikaConfigException("plugin-roots must be specified"); - } - List<Path> roots = OBJECT_MAPPER.convertValue(pluginRoots, - new TypeReference<List<Path>>() {}); - if (roots.isEmpty()) { - throw new TikaConfigException("plugin-roots must not be empty"); - } - return new TikaPluginManager(roots); - } - public TikaPluginManager(List<Path> pluginRoots) throws IOException { super(pluginRoots); init(); diff --git a/tika-plugins-core/src/test/java/org/apache/tika/plugins/TikaConfigsTest.java b/tika-plugins-core/src/test/java/org/apache/tika/plugins/TikaConfigsTest.java deleted file mode 100644 index 207765d26..000000000 --- a/tika-plugins-core/src/test/java/org/apache/tika/plugins/TikaConfigsTest.java +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.plugins; - -import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.ByteArrayInputStream; -import java.nio.charset.StandardCharsets; - -import org.junit.jupiter.api.Test; - -import org.apache.tika.config.loader.TikaJsonConfig; -import org.apache.tika.exception.TikaConfigException; - -public class TikaConfigsTest { - - @Test - public void testValidKnownKeysPass() { - String json = """ - { - "fetchers": {}, - "emitters": {}, - "pipes-iterator": {}, - "pipes-reporters": {}, - "async": {}, - "plugin-roots": "target/plugins" - } - """; - - assertDoesNotThrow(() -> loadFromString(json)); - } - - @Test - public void testUnknownKeyThrows() { - String json = """ - { - "fetchers": {}, - "pipes-reporter": {} - } - """; - - TikaConfigException ex = assertThrows(TikaConfigException.class, - () -> loadFromString(json)); - - assertTrue(ex.getMessage().contains("pipes-reporter")); - assertTrue(ex.getMessage().contains("Unknown pipes config key")); - } - - @Test - public void testTypoInKeyThrows() { - String json = """ - { - "fethcers": {} - } - """; - - TikaConfigException ex = assertThrows(TikaConfigException.class, - () -> loadFromString(json)); - - assertTrue(ex.getMessage().contains("fethcers")); - } - - @Test - public void testExtensionKeyWithXPrefixAllowed() { - String json = """ - { - "fetchers": {}, - "x-custom-extension": { - "setting": "value" - }, - "x-another-custom": {} - } - """; - - assertDoesNotThrow(() -> loadFromString(json)); - } - - @Test - public void testEmptyConfigPasses() { - String json = "{}"; - - assertDoesNotThrow(() -> loadFromString(json)); - } - - @Test - public void testSingleValidKeyPasses() { - String json = """ - { - "plugin-roots": ["path1", "path2"] - } - """; - - assertDoesNotThrow(() -> loadFromString(json)); - } - - @Test - public void testErrorMessageIncludesValidKeys() { - String json = """ - { - "bad-key": {} - } - """; - - TikaConfigException ex = assertThrows(TikaConfigException.class, - () -> loadFromString(json)); - - assertTrue(ex.getMessage().contains("fetchers")); - assertTrue(ex.getMessage().contains("emitters")); - assertTrue(ex.getMessage().contains("x-")); - } - - @Test - public void testGetRootReturnsJsonNode() throws Exception { - String json = """ - { - "fetchers": { - "file-system-fetcher": {} - } - } - """; - - TikaConfigs configs = loadFromString(json); - assertNotNull(configs.getTikaJsonConfig().getRootNode()); - assertNotNull(configs.getTikaJsonConfig().getRootNode().get("fetchers")); - } - - private TikaConfigs loadFromString(String json) throws Exception { - return TikaConfigs.load(TikaJsonConfig.load(new ByteArrayInputStream(json.getBytes(StandardCharsets.UTF_8)))); - } -} diff --git a/tika-serialization/src/main/java/org/apache/tika/config/loader/ConfigLoader.java b/tika-serialization/src/main/java/org/apache/tika/config/loader/ConfigLoader.java index 1f7c16b64..c9045c9d7 100644 --- a/tika-serialization/src/main/java/org/apache/tika/config/loader/ConfigLoader.java +++ b/tika-serialization/src/main/java/org/apache/tika/config/loader/ConfigLoader.java @@ -25,11 +25,14 @@ import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.tika.exception.TikaConfigException; /** - * Loader for simple configuration objects from JSON. + * Loader for custom configuration objects from the "other-configs" section. * <p> - * This class handles straightforward POJOs that can be deserialized directly from JSON. - * For complex components like Parsers, Detectors, etc., use the specific methods on - * {@link TikaLoader} instead (e.g., {@code loadParsers()}, {@code loadDetectors()}). + * This class handles custom POJOs and test configurations that are not part of + * Tika's official configuration schema. All configurations loaded via ConfigLoader + * must be placed under the "other-configs" top-level node in the JSON. + * <p> + * For official Tika components and configurations (parsers, detectors, async, server, etc.), + * use the specific methods on {@link TikaLoader} or load directly from {@link TikaJsonConfig}. * * <p>Usage: * <pre> @@ -40,40 +43,26 @@ import org.apache.tika.exception.TikaConfigException; * * // Load by class name (auto-converts to kebab-case) * HandlerConfig config = loader.configs().load(HandlerConfig.class); - * - * // Load server configuration (tika-server-core module) - * TikaServerConfig serverConfig = loader.configs().load("server", TikaServerConfig.class); - * - * // Load async configuration (tika-pipes-core module) - * AsyncConfig asyncConfig = loader.configs().load("async", AsyncConfig.class); - * - * // Load pipes configuration (tika-pipes-core module) - * PipesConfig pipesConfig = loader.configs().load("pipes", PipesConfig.class); * </pre> * - * <p>JSON configuration examples: + * <p>JSON configuration example: * <pre> * { - * "server": { - * "port": 9998, - * "host": "localhost", - * "taskTimeoutMillis": 300000, - * "enableUnsecureFeatures": false, - * "endpoints": ["all"] - * }, - * "async": { - * "emitWithinMillis": 10000, - * "emitMaxEstimatedBytes": 100000, - * "queueSize": 10000, - * "numEmitters": 1, - * "numClients": 4, - * "timeoutMillis": 60000 - * }, - * "pipes": { - * "numClients": 4, - * "timeoutMillis": 60000, - * "maxFilesProcessedPerProcess": 10000, - * "forkedJvmArgs": ["-Xmx2g", "-XX:+UseG1GC"] + * // Official Tika configs at root level (NOT loaded via configs()) + * "parsers": [...], + * "detectors": [...], + * "pipes": {...}, + * "server": {...}, + * + * // Custom configs MUST be in "other-configs" (loaded via configs()) + * "other-configs": { + * "handler-config": { + * "timeout": 5000, + * "retries": 3 + * }, + * "my-custom-config": { + * "enabled": true + * } * } * } * </pre> @@ -152,7 +141,7 @@ public class ConfigLoader { validateKey(key); validateClass(clazz); - JsonNode node = config.getRootNode().get(key); + JsonNode node = getNode(key); if (node == null || node.isNull()) { return null; } @@ -246,7 +235,7 @@ public class ConfigLoader { validateKey(key); validateClass(clazz); - JsonNode node = config.getRootNode().get(key); + JsonNode node = getNode(key); if (node == null || node.isNull()) { return defaultValue; } @@ -286,10 +275,26 @@ public class ConfigLoader { * @return true if the key exists and is not null */ public boolean hasKey(String key) { - JsonNode node = config.getRootNode().get(key); + JsonNode node = getNode(key); return node != null && !node.isNull(); } + /** + * Gets a node by key from the "other-configs". + * + * @param key The JSON key to look for + * @return the node, or null if not found + */ + private JsonNode getNode(String key) { + + JsonNode otherConfigs = config.getRootNode().get("other-configs"); + if (otherConfigs != null && otherConfigs.isObject()) { + return otherConfigs.get(key); + } + + return null; + } + /** * Derives a kebab-case key from a class name. * <p> diff --git a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java index c8b59206d..1a7cff5d4 100644 --- a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java +++ b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java @@ -22,9 +22,11 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.Collections; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; @@ -76,13 +78,17 @@ import org.apache.tika.exception.TikaConfigException; * { "zip-container-detector": { "maxDepth": 10 } } * ], * - * // Pipes components (validated by TikaConfigs) + * // Pipes components (validated by validateKeys()) * "plugin-roots": ["/path/to/plugins"], * "fetchers": [...], * "emitters": [...], * - * // Custom extensions (prefix with x-) - * "x-my-custom-config": { ... } + * // Custom configurations (for testing or extensions) + * "other-configs": { + * "test-config": { ... }, + * "my-custom-config": { ... }, + * "anything": { ... } + * } * } * </pre> * @@ -92,6 +98,34 @@ import org.apache.tika.exception.TikaConfigException; */ public class TikaJsonConfig { + /** + * Known top-level configuration keys across core Tika and pipes/plugins. + * Only kebab-case names are allowed. + */ + private static final Set<String> KNOWN_KEYS = Set.of( + // Globals + "maxJsonStringFieldLength", + "service-loader", + "xml-reader-utils", + // Core Tika component keys + "parsers", + "detectors", + "encoding-detectors", + "metadata-filters", + "renderers", + "translator", + "auto-detect-parser", + "server", + + // Pipes/plugin keys + "fetchers", + "emitters", + "pipes-iterator", + "pipes-reporters", + "pipes", + "plugin-roots" + ); + private static final ObjectMapper OBJECT_MAPPER = PolymorphicObjectMapperFactory.getMapper(); @@ -130,7 +164,9 @@ public class TikaJsonConfig { public static TikaJsonConfig load(InputStream inputStream) throws TikaConfigException { try { JsonNode rootNode = OBJECT_MAPPER.readTree(inputStream); - return new TikaJsonConfig(rootNode); + TikaJsonConfig tikaJsonConfig = new TikaJsonConfig(rootNode); + tikaJsonConfig.validateKeys(); + return tikaJsonConfig; } catch (IOException e) { throw new TikaConfigException("Failed to parse JSON configuration", e); } @@ -302,6 +338,46 @@ public class TikaJsonConfig { return rootNode.has(key) && !rootNode.get(key).isNull(); } + /** + * Validates that all top-level configuration keys are known or custom extensions. + * <p> + * This catches typos like "parser" instead of "parsers" or "pipes-reporter" + * instead of "pipes-reporters". + * <p> + * The "other-configs" node is allowed for custom configurations. + * + * @throws TikaConfigException if unknown keys are found + */ + private void validateKeys() throws TikaConfigException { + if (rootNode == null || !rootNode.isObject()) { + return; + } + + Iterator<String> fieldNames = rootNode.fieldNames(); + List<String> unknownKeys = new ArrayList<>(); + + while (fieldNames.hasNext()) { + String key = fieldNames.next(); + + // Ignore custom configs node + if (key.equals("other-configs")) { + continue; + } + + // Must be a known key + if (!KNOWN_KEYS.contains(key)) { + unknownKeys.add(key); + } + } + + if (!unknownKeys.isEmpty()) { + throw new TikaConfigException( + "Unknown configuration key(s): " + unknownKeys + ". " + + "Valid keys: " + KNOWN_KEYS + " " + + "(or use 'other-configs' node for custom keys)"); + } + } + @Override public String toString() { return "TikaJsonConfig{" + "rootNode=" + rootNode + '}'; diff --git a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java index 67b57cc69..e3a4c63a6 100644 --- a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java +++ b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java @@ -103,7 +103,7 @@ public class TikaLoader { * * @throws TikaConfigException if loading global settings fails */ - private void init() throws TikaConfigException { + private void init() throws TikaConfigException, IOException { loadGlobalSettings(); } @@ -115,7 +115,7 @@ public class TikaLoader { * @return the Tika loader * @throws TikaConfigException if loading or parsing fails */ - public static TikaLoader load(Path configPath) throws TikaConfigException { + public static TikaLoader load(Path configPath) throws TikaConfigException, IOException { return load(configPath, Thread.currentThread().getContextClassLoader()); } @@ -129,7 +129,7 @@ public class TikaLoader { * @throws TikaConfigException if loading or parsing fails */ public static TikaLoader load(Path configPath, ClassLoader classLoader) - throws TikaConfigException { + throws TikaConfigException, IOException { TikaJsonConfig config = TikaJsonConfig.load(configPath); TikaLoader loader = new TikaLoader(config, classLoader); loader.init(); @@ -162,7 +162,7 @@ public class TikaLoader { TikaLoader loader = new TikaLoader(config, classLoader); try { loader.init(); - } catch (TikaConfigException e) { + } catch (IOException | TikaConfigException e) { // Default config should never throw, but wrap in RuntimeException if it does throw new RuntimeException("Failed to initialize default TikaLoader", e); } @@ -314,7 +314,8 @@ public class TikaLoader { */ public synchronized Parser loadAutoDetectParser() throws TikaConfigException, IOException { if (autoDetectParser == null) { - AutoDetectParserConfig adpConfig = configs().load(AutoDetectParserConfig.class); + // Load directly from root-level config (not via configs() which only looks in "other-configs") + AutoDetectParserConfig adpConfig = config.deserialize("auto-detect-parser", AutoDetectParserConfig.class); if (adpConfig == null) { adpConfig = new AutoDetectParserConfig(); } @@ -410,7 +411,7 @@ public class TikaLoader { * @return the global settings, or an empty object if no settings are configured * @throws TikaConfigException if loading fails */ - public synchronized GlobalSettings loadGlobalSettings() throws TikaConfigException { + public synchronized GlobalSettings loadGlobalSettings() throws IOException, TikaConfigException { if (globalSettings == null) { globalSettings = new GlobalSettings(); @@ -420,16 +421,16 @@ public class TikaLoader { config.getRootNode().get("maxJsonStringFieldLength").asInt()); } - // Load service-loader config + // Load service-loader config (official Tika config at root level) GlobalSettings.ServiceLoaderConfig serviceLoaderConfig = - configs().load("service-loader", GlobalSettings.ServiceLoaderConfig.class); + config.deserialize("service-loader", GlobalSettings.ServiceLoaderConfig.class); if (serviceLoaderConfig != null) { globalSettings.setServiceLoader(serviceLoaderConfig); } - // Load xml-reader-utils config + // Load xml-reader-utils config (official Tika config at root level) GlobalSettings.XmlReaderUtilsConfig xmlReaderUtilsConfig = - configs().load("xml-reader-utils", GlobalSettings.XmlReaderUtilsConfig.class); + config.deserialize("xml-reader-utils", GlobalSettings.XmlReaderUtilsConfig.class); if (xmlReaderUtilsConfig != null) { globalSettings.setXmlReaderUtils(xmlReaderUtilsConfig); } diff --git a/tika-serialization/src/test/resources/configs/test-config-loader.json b/tika-serialization/src/test/resources/configs/test-config-loader.json index 8f6e89a8c..cb6264c91 100644 --- a/tika-serialization/src/test/resources/configs/test-config-loader.json +++ b/tika-serialization/src/test/resources/configs/test-config-loader.json @@ -1,32 +1,34 @@ { - "handler-config": { - "timeout": 5000, - "retries": 3, - "enabled": true - }, + "parsers": [ + {"pdf-parser": {}} + ], - "simple-handler": "org.apache.tika.config.loader.ConfigLoaderTest$SimpleHandlerImpl", + "other-configs": { + "handler-config": { + "timeout": 5000, + "retries": 3, + "enabled": true + }, - "configured-handler": { - "@class": "org.apache.tika.config.loader.ConfigLoaderTest$ConfiguredHandlerImpl", - "maxSize": 100000, - "prefix": "test-" - }, + "simple-handler": "org.apache.tika.config.loader.ConfigLoaderTest$SimpleHandlerImpl", - "tika-task-timeout": { - "millis": 30000 - }, + "configured-handler": { + "@class": "org.apache.tika.config.loader.ConfigLoaderTest$ConfiguredHandlerImpl", + "maxSize": 100000, + "prefix": "test-" + }, - "parsers": [ - {"pdf-parser": {}} - ], + "tika-task-timeout": { + "millis": 30000 + }, - "my-feature-settings": { - "featureName": "test-feature", - "priority": 10 - }, + "my-feature-settings": { + "featureName": "test-feature", + "priority": 10 + }, - "abstract-handler": { - "someProperty": "value" + "abstract-handler": { + "someProperty": "value" + } } } diff --git a/tika-serialization/src/test/resources/configs/test-interface-no-type.json b/tika-serialization/src/test/resources/configs/test-interface-no-type.json index 15a3d35b2..da2e606bb 100644 --- a/tika-serialization/src/test/resources/configs/test-interface-no-type.json +++ b/tika-serialization/src/test/resources/configs/test-interface-no-type.json @@ -1,6 +1,8 @@ { - "handler-no-type": { - "maxSize": 50000, - "prefix": "no-type-" + "other-configs": { + "handler-no-type": { + "maxSize": 50000, + "prefix": "no-type-" + } } } diff --git a/tika-serialization/src/test/resources/configs/test-invalid-class.json b/tika-serialization/src/test/resources/configs/test-invalid-class.json index f0bf4bf4e..c927b6e1c 100644 --- a/tika-serialization/src/test/resources/configs/test-invalid-class.json +++ b/tika-serialization/src/test/resources/configs/test-invalid-class.json @@ -1,3 +1,5 @@ { - "handler": "com.example.NonExistentClass" + "other-configs": { + "handler": "com.example.NonExistentClass" + } } diff --git a/tika-serialization/src/test/resources/configs/test-partial-config.json b/tika-serialization/src/test/resources/configs/test-partial-config.json index fb010c3e8..866f2594b 100644 --- a/tika-serialization/src/test/resources/configs/test-partial-config.json +++ b/tika-serialization/src/test/resources/configs/test-partial-config.json @@ -1,15 +1,17 @@ { - "handler-config": { - "enabled": true - }, + "other-configs": { + "handler-config": { + "enabled": true + }, - "handler-config-full": { - "timeout": 10000, - "retries": 5, - "enabled": false - }, + "handler-config-full": { + "timeout": 10000, + "retries": 5, + "enabled": false + }, - "tika-task-timeout": { - "millis": 30000 + "tika-task-timeout": { + "millis": 30000 + } } } diff --git a/tika-serialization/src/test/resources/configs/test-unexpected-field.json b/tika-serialization/src/test/resources/configs/test-unexpected-field.json index ada7f9bdf..d250d5fa1 100644 --- a/tika-serialization/src/test/resources/configs/test-unexpected-field.json +++ b/tika-serialization/src/test/resources/configs/test-unexpected-field.json @@ -1,8 +1,10 @@ { - "handler-config": { - "timeout": 5000, - "retries": 3, - "enabled": true, - "unexpectedField": "this should cause an error" + "other-configs": { + "handler-config": { + "timeout": 5000, + "retries": 3, + "enabled": true, + "unexpectedField": "this should cause an error" + } } } diff --git a/tika-serialization/src/test/resources/configs/test-wrong-type.json b/tika-serialization/src/test/resources/configs/test-wrong-type.json index b25e9f644..ece5fe3ae 100644 --- a/tika-serialization/src/test/resources/configs/test-wrong-type.json +++ b/tika-serialization/src/test/resources/configs/test-wrong-type.json @@ -1,3 +1,5 @@ { - "handler": "java.lang.String" + "other-configs": { + "handler": "java.lang.String" + } } diff --git a/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClientCLI.java b/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClientCLI.java index af63caec9..0da685418 100644 --- a/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClientCLI.java +++ b/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClientCLI.java @@ -34,12 +34,12 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.SAXException; +import org.apache.tika.config.loader.TikaJsonConfig; import org.apache.tika.exception.TikaException; import org.apache.tika.pipes.api.FetchEmitTuple; import org.apache.tika.pipes.api.pipesiterator.PipesIterator; import org.apache.tika.pipes.core.pipesiterator.CallablePipesIterator; import org.apache.tika.pipes.core.pipesiterator.PipesIteratorManager; -import org.apache.tika.plugins.TikaConfigs; import org.apache.tika.plugins.TikaPluginManager; public class TikaClientCLI { @@ -61,9 +61,9 @@ public class TikaClientCLI { ExecutorCompletionService<Long> completionService = new ExecutorCompletionService<>(executorService); - TikaConfigs tikaConfigs = TikaConfigs.load(pluginsConfigPath); - TikaPluginManager pluginManager = TikaPluginManager.load(tikaConfigs); - final PipesIterator pipesIterator = PipesIteratorManager.load(pluginManager, tikaConfigs) + TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(pluginsConfigPath); + TikaPluginManager pluginManager = TikaPluginManager.load(tikaJsonConfig); + final PipesIterator pipesIterator = PipesIteratorManager.load(pluginManager, tikaJsonConfig) .orElseThrow(() -> new TikaException("No pipes iterator configured")); final ArrayBlockingQueue<FetchEmitTuple> queue = new ArrayBlockingQueue<>(QUEUE_SIZE); diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerConfig.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerConfig.java index 2bb62d595..29cd8f4c4 100644 --- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerConfig.java +++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerConfig.java @@ -150,7 +150,7 @@ private long forkedProcessShutdownMillis = DEFAULT_FORKED_PROCESS_SHUTDOWN_MILLI } static TikaServerConfig load(Path tikaConfigPath, CommandLine commandLine, Set<String> settings) throws IOException, TikaException { - TikaServerConfig tikaServerConfig = TikaLoader.load(tikaConfigPath).configs().load("server", TikaServerConfig.class); + TikaServerConfig tikaServerConfig = TikaLoader.load(tikaConfigPath).getConfig().deserialize("server", TikaServerConfig.class); if (tikaServerConfig == null) { throw new TikaConfigException("Couldn't find 'server' element"); } diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java index 21eae3369..09de5b18a 100644 --- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java +++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java @@ -55,11 +55,11 @@ import org.xml.sax.SAXException; import org.apache.tika.Tika; import org.apache.tika.config.ServiceLoader; +import org.apache.tika.config.loader.TikaJsonConfig; import org.apache.tika.config.loader.TikaLoader; import org.apache.tika.exception.TikaException; import org.apache.tika.pipes.core.emitter.EmitterManager; import org.apache.tika.pipes.core.fetcher.FetcherManager; -import org.apache.tika.plugins.TikaConfigs; import org.apache.tika.plugins.TikaPluginManager; import org.apache.tika.server.core.resource.AsyncResource; import org.apache.tika.server.core.resource.DetectorResource; @@ -176,9 +176,9 @@ public class TikaServerProcess { FetcherManager fetcherManager = null; InputStreamFactory inputStreamFactory = null; if (tikaServerConfig.isEnableUnsecureFeatures()) { - TikaConfigs tikaConfigs = TikaConfigs.load(tikaServerConfig.getConfigPath()); - TikaPluginManager pluginManager = TikaPluginManager.load(tikaConfigs); - fetcherManager = FetcherManager.load(pluginManager, tikaConfigs); + TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(tikaServerConfig.getConfigPath()); + TikaPluginManager pluginManager = TikaPluginManager.load(tikaJsonConfig); + fetcherManager = FetcherManager.load(pluginManager, tikaJsonConfig); inputStreamFactory = new FetcherStreamFactory(fetcherManager); } else { inputStreamFactory = new DefaultInputStreamFactory(); diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/AsyncResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/AsyncResource.java index 96a1fcaa8..952e3b376 100644 --- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/AsyncResource.java +++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/AsyncResource.java @@ -37,6 +37,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.SAXException; +import org.apache.tika.config.loader.TikaJsonConfig; import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; @@ -49,7 +50,6 @@ import org.apache.tika.pipes.core.emitter.EmitDataImpl; import org.apache.tika.pipes.core.emitter.EmitterManager; import org.apache.tika.pipes.core.extractor.EmbeddedDocumentBytesConfig; import org.apache.tika.pipes.core.serialization.JsonFetchEmitTupleList; -import org.apache.tika.plugins.TikaConfigs; import org.apache.tika.plugins.TikaPluginManager; @Path("/async") @@ -63,9 +63,9 @@ public class AsyncResource { public AsyncResource(java.nio.file.Path tikaConfigPath) throws TikaException, IOException, SAXException { this.asyncProcessor = new AsyncProcessor(tikaConfigPath); - TikaConfigs tikaConfigs = TikaConfigs.load(tikaConfigPath); - TikaPluginManager pluginManager = TikaPluginManager.load(tikaConfigs); - this.emitterManager = EmitterManager.load(pluginManager, tikaConfigs); + TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(tikaConfigPath); + TikaPluginManager pluginManager = TikaPluginManager.load(tikaJsonConfig); + this.emitterManager = EmitterManager.load(pluginManager, tikaJsonConfig); } public ArrayBlockingQueue<FetchEmitTuple> getFetchEmitQueue(int queueSize) { diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesResource.java index 139189fee..69426d17f 100644 --- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesResource.java +++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesResource.java @@ -33,7 +33,7 @@ import jakarta.ws.rs.core.UriInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.tika.config.loader.TikaLoader; +import org.apache.tika.config.loader.TikaJsonConfig; import org.apache.tika.exception.TikaConfigException; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; @@ -42,7 +42,6 @@ import org.apache.tika.pipes.api.PipesResult; import org.apache.tika.pipes.core.PipesConfig; import org.apache.tika.pipes.core.PipesException; import org.apache.tika.pipes.core.PipesParser; -import org.apache.tika.pipes.core.async.AsyncConfig; import org.apache.tika.pipes.core.serialization.JsonFetchEmitTuple; @Path("/pipes") @@ -54,7 +53,8 @@ public class PipesResource { private final PipesParser pipesParser; public PipesResource(java.nio.file.Path tikaConfig) throws TikaConfigException, IOException { - PipesConfig pipesConfig = TikaLoader.load(tikaConfig).configs().load("async", AsyncConfig.class); + TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(tikaConfig); + PipesConfig pipesConfig = PipesConfig.load(tikaJsonConfig); //this has to be zero. everything must be emitted through the PipesServer long maxEmit = pipesConfig.getMaxForEmitBatchBytes(); if (maxEmit != 0) { diff --git a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java index b70b465e4..0ecace00c 100644 --- a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java +++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java @@ -69,7 +69,7 @@ public abstract class CXFTestBase { public final static String BASIC_CONFIG = """ { - "auto-detect-parser-config": { + "auto-detect-parser": { "spoolToDisk": 1000000, "outputThreshold": 1000000, "digesterFactory": { diff --git a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaPipesTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaPipesTest.java index 41423d739..74eec7574 100644 --- a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaPipesTest.java +++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaPipesTest.java @@ -48,6 +48,7 @@ import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.apache.tika.config.loader.TikaJsonConfig; import org.apache.tika.exception.TikaConfigException; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; @@ -58,7 +59,6 @@ import org.apache.tika.pipes.api.emitter.EmitKey; import org.apache.tika.pipes.api.fetcher.FetchKey; import org.apache.tika.pipes.core.fetcher.FetcherManager; import org.apache.tika.pipes.core.serialization.JsonFetchEmitTuple; -import org.apache.tika.plugins.TikaConfigs; import org.apache.tika.plugins.TikaPluginManager; import org.apache.tika.sax.BasicContentHandlerFactory; import org.apache.tika.serialization.JsonMetadataList; @@ -106,9 +106,9 @@ public class TikaPipesTest extends CXFTestBase { CXFTestBase.createPluginsConfig(TIKA_CONFIG_PATH, inputDir, TMP_OUTPUT_DIR, null, 10000L); - TikaConfigs tikaConfigs = TikaConfigs.load(TIKA_CONFIG_PATH); - TikaPluginManager pluginManager = TikaPluginManager.load(tikaConfigs); - FETCHER_MANAGER = FetcherManager.load(pluginManager, tikaConfigs); + TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(TIKA_CONFIG_PATH); + TikaPluginManager pluginManager = TikaPluginManager.load(tikaJsonConfig); + FETCHER_MANAGER = FetcherManager.load(pluginManager, tikaJsonConfig); } @AfterAll diff --git a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceFetcherTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceFetcherTest.java index 84f2b6198..f76d14bb2 100644 --- a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceFetcherTest.java +++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceFetcherTest.java @@ -35,10 +35,10 @@ import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.apache.tika.config.loader.TikaJsonConfig; import org.apache.tika.exception.TikaConfigException; import org.apache.tika.io.TikaInputStream; import org.apache.tika.pipes.core.fetcher.FetcherManager; -import org.apache.tika.plugins.TikaConfigs; import org.apache.tika.plugins.TikaPluginManager; import org.apache.tika.server.core.resource.TikaResource; import org.apache.tika.server.core.writer.JSONMessageBodyWriter; @@ -93,10 +93,9 @@ public class TikaResourceFetcherTest extends CXFTestBase { @Override protected InputStreamFactory getInputStreamFactory(InputStream is) { try (TikaInputStream tis = TikaInputStream.get(is)) { - TikaConfigs tikaConfigs = TikaConfigs.load(tis.getPath()); - System.out.println(tikaConfigs.getTikaJsonConfig().getRootNode().toPrettyString()); - TikaPluginManager pluginManager = TikaPluginManager.load(tikaConfigs); - FetcherManager fetcherManager = FetcherManager.load(pluginManager, tikaConfigs); + TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(tis.getPath()); + TikaPluginManager pluginManager = TikaPluginManager.load(tikaJsonConfig); + FetcherManager fetcherManager = FetcherManager.load(pluginManager, tikaJsonConfig); return new FetcherStreamFactory(fetcherManager); } catch (IOException | TikaConfigException e) { throw new RuntimeException(e); diff --git a/tika-server/tika-server-core/src/test/resources/configs/cxf-test-base-template.json b/tika-server/tika-server-core/src/test/resources/configs/cxf-test-base-template.json index 740d5d26b..0182e2bd0 100644 --- a/tika-server/tika-server-core/src/test/resources/configs/cxf-test-base-template.json +++ b/tika-server/tika-server-core/src/test/resources/configs/cxf-test-base-template.json @@ -33,7 +33,7 @@ "pipes" ] }, - "async": { + "pipes": { "tikaConfig": "TIKA_CONFIG", "numClients": 2, "timeoutMillis": TIMEOUT_MILLIS, @@ -43,7 +43,7 @@ ], "maxForEmitBatchBytes": 1000000 }, - "auto-detect-parser-config": { + "auto-detect-parser": { "spoolToDisk": 1000000, "outputThreshold": 1000000, "digesterFactory": { diff --git a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/FetcherTest.java b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/FetcherTest.java index 9c8d349e0..d9a5ce8c0 100644 --- a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/FetcherTest.java +++ b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/FetcherTest.java @@ -33,11 +33,11 @@ import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.apache.tika.config.loader.TikaJsonConfig; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.pipes.core.fetcher.FetcherManager; -import org.apache.tika.plugins.TikaConfigs; import org.apache.tika.plugins.TikaPluginManager; import org.apache.tika.serialization.JsonMetadataList; import org.apache.tika.server.core.CXFTestBase; @@ -76,9 +76,9 @@ public class FetcherTest extends CXFTestBase { @Override protected InputStreamFactory getInputStreamFactory(InputStream tikaConfigInputStream) { try (TikaInputStream tis = TikaInputStream.get(tikaConfigInputStream)) { - TikaConfigs tikaConfigs = TikaConfigs.load(tis.getPath()); - TikaPluginManager pluginManager = TikaPluginManager.load(tikaConfigs); - FetcherManager fetcherManager = FetcherManager.load(pluginManager, tikaConfigs); + TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(tis.getPath()); + TikaPluginManager pluginManager = TikaPluginManager.load(tikaJsonConfig); + FetcherManager fetcherManager = FetcherManager.load(pluginManager, tikaJsonConfig); return new FetcherStreamFactory(fetcherManager); } catch (Exception e) { throw new RuntimeException(e); diff --git a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaPipesTest.java b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaPipesTest.java index 6cd36d6ca..41db6ba73 100644 --- a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaPipesTest.java +++ b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaPipesTest.java @@ -50,6 +50,7 @@ import org.junit.jupiter.api.io.TempDir; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.tika.config.loader.TikaJsonConfig; import org.apache.tika.exception.TikaConfigException; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; @@ -62,7 +63,6 @@ import org.apache.tika.pipes.api.fetcher.FetchKey; import org.apache.tika.pipes.core.extractor.EmbeddedDocumentBytesConfig; import org.apache.tika.pipes.core.fetcher.FetcherManager; import org.apache.tika.pipes.core.serialization.JsonFetchEmitTuple; -import org.apache.tika.plugins.TikaConfigs; import org.apache.tika.plugins.TikaPluginManager; import org.apache.tika.sax.BasicContentHandlerFactory; import org.apache.tika.serialization.JsonMetadataList; @@ -111,9 +111,9 @@ public class TikaPipesTest extends CXFTestBase { TIKA_CONFIG_PATH = Files.createTempFile(TMP_WORKING_DIR, "tika-pipes-config-", ".json"); CXFTestBase.createPluginsConfig(TIKA_CONFIG_PATH, inputDir, OUTPUT_JSON_DIR, OUTPUT_BYTES_DIR, 10000L); - TikaConfigs tikaConfigs = TikaConfigs.load(TIKA_CONFIG_PATH); - TikaPluginManager pluginManager = TikaPluginManager.load(tikaConfigs); - FETCHER_MANAGER = FetcherManager.load(pluginManager, tikaConfigs); + TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(TIKA_CONFIG_PATH); + TikaPluginManager pluginManager = TikaPluginManager.load(tikaJsonConfig); + FETCHER_MANAGER = FetcherManager.load(pluginManager, tikaJsonConfig); } diff --git a/tika-server/tika-server-standard/src/test/resources/configs/cxf-test-base-template.json b/tika-server/tika-server-standard/src/test/resources/configs/cxf-test-base-template.json index 740d5d26b..0182e2bd0 100644 --- a/tika-server/tika-server-standard/src/test/resources/configs/cxf-test-base-template.json +++ b/tika-server/tika-server-standard/src/test/resources/configs/cxf-test-base-template.json @@ -33,7 +33,7 @@ "pipes" ] }, - "async": { + "pipes": { "tikaConfig": "TIKA_CONFIG", "numClients": 2, "timeoutMillis": TIMEOUT_MILLIS, @@ -43,7 +43,7 @@ ], "maxForEmitBatchBytes": 1000000 }, - "auto-detect-parser-config": { + "auto-detect-parser": { "spoolToDisk": 1000000, "outputThreshold": 1000000, "digesterFactory": { diff --git a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-for-server-tests.json b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-for-server-tests.json index e14b2dab7..0e2f181c3 100644 --- a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-for-server-tests.json +++ b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-for-server-tests.json @@ -9,7 +9,7 @@ } } ], - "auto-detect-parser-config": { + "auto-detect-parser": { "spoolToDisk": 1000000, "outputThreshold": 1000000, "digesterFactory": { diff --git a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-opennlp-filter.json b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-opennlp-filter.json index 93c8ae2fa..f760b8d87 100644 --- a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-opennlp-filter.json +++ b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-opennlp-filter.json @@ -14,7 +14,7 @@ "open-nlp-metadata-filter": {} } ], - "auto-detect-parser-config": { + "auto-detect-parser": { "spoolToDisk": 1000000, "outputThreshold": 1000000, "digesterFactory": { diff --git a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-optimaize-filter.json b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-optimaize-filter.json index 38e69c5c3..f4ffda22a 100644 --- a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-optimaize-filter.json +++ b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-optimaize-filter.json @@ -14,7 +14,7 @@ "optimaize-metadata-filter": {} } ], - "auto-detect-parser-config": { + "auto-detect-parser": { "spoolToDisk": 1000000, "outputThreshold": 1000000, "digesterFactory": {
