This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 54054ff6e TIKA-4545-loaders (#2409)
54054ff6e is described below
commit 54054ff6e4ce3cd75bbbf6fbefd8c8a2159b76a8
Author: Tim Allison <[email protected]>
AuthorDate: Fri Nov 28 11:00:38 2025 -0500
TIKA-4545-loaders (#2409)
* TIKA-4545 -- integrate TikaJsonConfig across core and pipes
---
.../tika/pipes/emitter/fs/FileSystemEmitter.java | 42 ++--
.../emitter/fs/FileSystemEmitterRuntimeConfig.java | 74 +++++++
.../tika/pipes/fetcher/fs/FileSystemFetcher.java | 21 +-
.../fetcher/fs/FileSystemFetcherRuntimeConfig.java | 54 +++++
.../fs/FileSystemEmitterRuntimeConfigTest.java | 241 +++++++++++++++++++++
.../fs/FileSystemFetcherRuntimeConfigTest.java | 184 ++++++++++++++++
tika-plugins-core/pom.xml | 5 +
.../java/org/apache/tika/plugins/TikaConfigs.java | 134 +++++++++---
.../org/apache/tika/plugins/TikaPluginManager.java | 56 ++++-
.../org/apache/tika/plugins/TikaConfigsTest.java | 5 +-
.../apache/tika/config/loader/TikaJsonConfig.java | 66 +++++-
11 files changed, 821 insertions(+), 61 deletions(-)
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
index b9ac4edf2..c790f98d0 100644
---
a/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
@@ -79,12 +79,16 @@ public class FileSystemEmitter extends
AbstractStreamEmitter {
@Override
public void emit(String emitKey, List<Metadata> metadataList, ParseContext
parseContext) throws IOException {
- LOG.warn("about to emit: {}", emitKey);
if (metadataList == null || metadataList.isEmpty()) {
throw new IOException("metadata list must not be null or of size
0");
}
- FileSystemEmitterConfig config = getConfig(parseContext);
+ FileSystemEmitterConfig config = null;
+ try {
+ config = getConfig(parseContext);
+ } catch (TikaConfigException e) {
+ throw new IOException(e);
+ }
Path output;
@@ -112,9 +116,13 @@ public class FileSystemEmitter extends
AbstractStreamEmitter {
@Override
public void emit(String emitKey, InputStream inputStream, Metadata
userMetadata, ParseContext parseContext) throws IOException {
- LOG.warn("about to stream emit: {}", emitKey);
- FileSystemEmitterConfig config = getConfig(parseContext);
+ FileSystemEmitterConfig config = null;
+ try {
+ config = getConfig(parseContext);
+ } catch (TikaConfigException e) {
+ throw new IOException(e);
+ }
Path output;
if (config.basePath() != null) {
@@ -128,43 +136,45 @@ public class FileSystemEmitter extends
AbstractStreamEmitter {
}
if (!Files.isDirectory(output.getParent())) {
- LOG.warn("creating parent directory: {}", output);
Files.createDirectories(output.getParent());
}
- LOG.warn("on exists: {}", config.onExists());
if (config.onExists() == FileSystemEmitterConfig.ON_EXISTS.REPLACE) {
- LOG.warn("copying {}", output);
Files.copy(inputStream, output,
StandardCopyOption.REPLACE_EXISTING);
} else if (config.onExists() ==
FileSystemEmitterConfig.ON_EXISTS.EXCEPTION) {
- LOG.warn("copying 2 {}", output);
Files.copy(inputStream, output);
} else if (config.onExists() ==
FileSystemEmitterConfig.ON_EXISTS.SKIP) {
if (!Files.isRegularFile(output)) {
try {
- LOG.warn("copying 3 {}", output);
-
Files.copy(inputStream, output);
} catch (FileAlreadyExistsException e) {
//swallow
- LOG.warn("file exists");
}
}
}
}
- private FileSystemEmitterConfig getConfig(ParseContext parseContext)
throws IOException {
+ private FileSystemEmitterConfig getConfig(ParseContext parseContext)
throws TikaConfigException, IOException {
FileSystemEmitterConfig config = fileSystemEmitterConfig;
ConfigContainer configContainer =
parseContext.get(ConfigContainer.class);
if (configContainer != null) {
Optional<String> configJson =
configContainer.get(getExtensionConfig().id());
if (configJson.isPresent()) {
- try {
- config = FileSystemEmitterConfig.load(configJson.get());
- } catch (TikaConfigException e) {
- throw new IOException("Failed to load config", e);
+ // Check if basePath is present in runtime config - this is
not allowed for security
+ if (configJson
+ .get()
+ .contains("\"basePath\"")) {
+ throw new TikaConfigException("Cannot change 'basePath' at
runtime for security reasons. " + "basePath can only be set during
initialization.");
}
+
+ // Load runtime config (excludes basePath for security)
+ FileSystemEmitterRuntimeConfig runtimeConfig =
FileSystemEmitterRuntimeConfig.load(configJson.get());
+
+ // Merge runtime config into default config while preserving
basePath
+ config = new
FileSystemEmitterConfig(fileSystemEmitterConfig.basePath(),
runtimeConfig.getFileExtension(), runtimeConfig.getOnExists(),
+ runtimeConfig.isPrettyPrint());
checkConfig(config);
}
+
}
return config;
}
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitterRuntimeConfig.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitterRuntimeConfig.java
new file mode 100644
index 000000000..111f23688
--- /dev/null
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitterRuntimeConfig.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.emitter.fs;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import org.apache.tika.exception.TikaConfigException;
+
+/**
+ * Runtime configuration for FileSystemEmitter.
+ * Only includes fields that are safe to update at runtime.
+ * basePath is intentionally excluded for security reasons.
+ */
+public class FileSystemEmitterRuntimeConfig {
+
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ public static FileSystemEmitterRuntimeConfig load(final String json)
+ throws TikaConfigException {
+ try {
+ return OBJECT_MAPPER.readValue(json,
+ FileSystemEmitterRuntimeConfig.class);
+ } catch (JsonProcessingException e) {
+ throw new TikaConfigException(
+ "Failed to parse FileSystemEmitterRuntimeConfig from
JSON", e);
+ }
+ }
+
+ private String fileExtension;
+ private FileSystemEmitterConfig.ON_EXISTS onExists;
+ private boolean prettyPrint;
+
+ public String getFileExtension() {
+ return fileExtension;
+ }
+
+ public FileSystemEmitterRuntimeConfig setFileExtension(String
fileExtension) {
+ this.fileExtension = fileExtension;
+ return this;
+ }
+
+ public FileSystemEmitterConfig.ON_EXISTS getOnExists() {
+ return onExists;
+ }
+
+ public FileSystemEmitterRuntimeConfig
setOnExists(FileSystemEmitterConfig.ON_EXISTS onExists) {
+ this.onExists = onExists;
+ return this;
+ }
+
+ public boolean isPrettyPrint() {
+ return prettyPrint;
+ }
+
+ public FileSystemEmitterRuntimeConfig setPrettyPrint(boolean prettyPrint) {
+ this.prettyPrint = prettyPrint;
+ return this;
+ }
+}
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java
index fb1ee0abd..7dcb53dab 100644
---
a/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java
@@ -80,8 +80,25 @@ public class FileSystemFetcher extends AbstractTikaExtension
implements Fetcher
if (configContainer != null) {
Optional<String> configJson =
configContainer.get(getExtensionConfig().id());
if (configJson.isPresent()) {
- config = FileSystemFetcherConfig.load(configJson.get());
- checkConfig(config);
+ try {
+ // Check if basePath is present in runtime config - this
is not allowed for security
+ if (configJson.get().contains("\"basePath\"")) {
+ throw new TikaConfigException(
+ "Cannot change 'basePath' at runtime for
security reasons. " +
+ "basePath can only be set during
initialization.");
+ }
+
+ // Load runtime config (excludes basePath for security)
+ FileSystemFetcherRuntimeConfig runtimeConfig =
+
FileSystemFetcherRuntimeConfig.load(configJson.get());
+
+ // Merge runtime config into default config while
preserving basePath
+ config = new FileSystemFetcherConfig()
+
.setBasePath(defaultFileSystemFetcherConfig.getBasePath())
+
.setExtractFileSystemMetadata(runtimeConfig.isExtractFileSystemMetadata());
+ } catch (TikaConfigException e) {
+ throw new IOException("Failed to load runtime config", e);
+ }
}
}
Path p = null;
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherRuntimeConfig.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherRuntimeConfig.java
new file mode 100644
index 000000000..ffadf9822
--- /dev/null
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherRuntimeConfig.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetcher.fs;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import org.apache.tika.exception.TikaConfigException;
+
+/**
+ * Runtime configuration for FileSystemFetcher.
+ * Only includes fields that are safe to update at runtime.
+ * basePath is intentionally excluded for security reasons.
+ */
+public class FileSystemFetcherRuntimeConfig {
+
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ public static FileSystemFetcherRuntimeConfig load(final String json)
+ throws TikaConfigException {
+ try {
+ return OBJECT_MAPPER.readValue(json,
+ FileSystemFetcherRuntimeConfig.class);
+ } catch (JsonProcessingException e) {
+ throw new TikaConfigException(
+ "Failed to parse FileSystemFetcherRuntimeConfig from
JSON", e);
+ }
+ }
+
+ private boolean extractFileSystemMetadata;
+
+ public boolean isExtractFileSystemMetadata() {
+ return extractFileSystemMetadata;
+ }
+
+ public FileSystemFetcherRuntimeConfig setExtractFileSystemMetadata(boolean
extractFileSystemMetadata) {
+ this.extractFileSystemMetadata = extractFileSystemMetadata;
+ return this;
+ }
+}
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitterRuntimeConfigTest.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitterRuntimeConfigTest.java
new file mode 100644
index 000000000..b3f58d54c
--- /dev/null
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitterRuntimeConfigTest.java
@@ -0,0 +1,241 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.emitter.fs;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.config.ConfigContainer;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.plugins.ExtensionConfig;
+
+/**
+ * Tests runtime configuration of FileSystemEmitter via ConfigContainer and
ParseContext.
+ */
+public class FileSystemEmitterRuntimeConfigTest {
+
+ @Test
+ public void testRuntimeConfigCannotOverrideBasePath(@TempDir Path tempDir)
throws Exception {
+ // Create two output directories
+ Path dir1 = tempDir.resolve("output1");
+ Path dir2 = tempDir.resolve("output2");
+ Files.createDirectories(dir1);
+ Files.createDirectories(dir2);
+
+ // Create emitter with dir1 as default basePath
+ String defaultConfig = String.format(Locale.ROOT,
+ "{\"basePath\":\"%s\", \"onExists\":\"REPLACE\"}",
+ dir1.toString().replace("\\", "\\\\"));
+ ExtensionConfig pluginConfig = new ExtensionConfig("test-emitter",
"test", defaultConfig);
+ FileSystemEmitter emitter = FileSystemEmitter.build(pluginConfig);
+
+ // Emit with default config
+ List<Metadata> metadataList1 = new ArrayList<>();
+ Metadata m1 = new Metadata();
+ m1.set(TikaCoreProperties.TIKA_CONTENT, "content1");
+ metadataList1.add(m1);
+
+ ParseContext context1 = new ParseContext();
+ emitter.emit("test1.json", metadataList1, context1);
+
+ Path output1 = dir1.resolve("test1.json");
+ assertTrue(Files.exists(output1), "File should be created in dir1");
+
+ // Try to override basePath at runtime to point to dir2
+ // This should throw an exception for security reasons
+ String runtimeConfig = String.format(Locale.ROOT,
+ "{\"basePath\":\"%s\", \"onExists\":\"REPLACE\"}",
+ dir2.toString().replace("\\", "\\\\"));
+ ConfigContainer configContainer = new ConfigContainer();
+ configContainer.set("test-emitter", runtimeConfig);
+
+ ParseContext context2 = new ParseContext();
+ context2.set(ConfigContainer.class, configContainer);
+
+ // Emit with runtime config - should throw exception
+ List<Metadata> metadataList2 = new ArrayList<>();
+ Metadata m2 = new Metadata();
+ m2.set(TikaCoreProperties.TIKA_CONTENT, "content2");
+ metadataList2.add(m2);
+
+ IOException exception = assertThrows(IOException.class, () -> {
+ emitter.emit("test2.json", metadataList2, context2);
+ });
+ assertTrue(exception.getCause() != null &&
+ exception.getCause().getMessage().contains("Cannot change
'basePath' at runtime"),
+ "Should throw exception when attempting to change basePath at
runtime");
+ }
+
+ @Test
+ public void testRuntimeConfigFileExtension(@TempDir Path tempDir) throws
Exception {
+ // Create emitter with no file extension
+ String defaultConfig = String.format(Locale.ROOT,
+ "{\"basePath\":\"%s\", \"onExists\":\"REPLACE\"}",
+ tempDir.toString().replace("\\", "\\\\"));
+ ExtensionConfig pluginConfig = new ExtensionConfig("test-emitter",
"test", defaultConfig);
+ FileSystemEmitter emitter = FileSystemEmitter.build(pluginConfig);
+
+ // Emit with default config - no extension added
+ List<Metadata> metadataList1 = new ArrayList<>();
+ Metadata m1 = new Metadata();
+ m1.set(TikaCoreProperties.TIKA_CONTENT, "content1");
+ metadataList1.add(m1);
+
+ ParseContext context1 = new ParseContext();
+ emitter.emit("test1", metadataList1, context1);
+
+ assertTrue(Files.exists(tempDir.resolve("test1")),
+ "File without extension should exist");
+
+ // Override at runtime to add .json extension
+ // Note: basePath is NOT included for security reasons
+ String runtimeConfig = "{\"fileExtension\":\"json\",
\"onExists\":\"REPLACE\"}";
+ ConfigContainer configContainer = new ConfigContainer();
+ configContainer.set("test-emitter", runtimeConfig);
+
+ ParseContext context2 = new ParseContext();
+ context2.set(ConfigContainer.class, configContainer);
+
+ // Emit with runtime config
+ List<Metadata> metadataList2 = new ArrayList<>();
+ Metadata m2 = new Metadata();
+ m2.set(TikaCoreProperties.TIKA_CONTENT, "content2");
+ metadataList2.add(m2);
+
+ emitter.emit("test2", metadataList2, context2);
+
+ assertTrue(Files.exists(tempDir.resolve("test2.json")),
+ "File with .json extension should exist");
+ }
+
+ @Test
+ public void testRuntimeConfigOnExists(@TempDir Path tempDir) throws
Exception {
+ // Create emitter with REPLACE as default
+ String defaultConfig = String.format(Locale.ROOT,
+ "{\"basePath\":\"%s\", \"onExists\":\"REPLACE\"}",
+ tempDir.toString().replace("\\", "\\\\"));
+ ExtensionConfig pluginConfig = new ExtensionConfig("test-emitter",
"test", defaultConfig);
+ FileSystemEmitter emitter = FileSystemEmitter.build(pluginConfig);
+
+ // Create a test file using stream emit
+ Path testFile = tempDir.resolve("test.txt");
+ Files.writeString(testFile, "original content");
+
+ // Emit with default config (REPLACE) - should succeed
+ InputStream inputStream1 = new ByteArrayInputStream("replaced
content".getBytes(StandardCharsets.UTF_8));
+ Metadata metadata1 = new Metadata();
+ ParseContext context1 = new ParseContext();
+
+ emitter.emit("test.txt", inputStream1, metadata1, context1);
+ assertEquals("replaced content", Files.readString(testFile),
+ "Content should be replaced");
+
+ // Override at runtime to use SKIP
+ // Note: basePath is NOT included for security reasons
+ String runtimeConfig = "{\"onExists\":\"SKIP\"}";
+ ConfigContainer configContainer = new ConfigContainer();
+ configContainer.set("test-emitter", runtimeConfig);
+
+ ParseContext context2 = new ParseContext();
+ context2.set(ConfigContainer.class, configContainer);
+
+ // Emit with runtime config (SKIP) - should not replace existing file
+ InputStream inputStream2 = new ByteArrayInputStream("new
content".getBytes(StandardCharsets.UTF_8));
+ Metadata metadata2 = new Metadata();
+
+ emitter.emit("test.txt", inputStream2, metadata2, context2);
+ assertEquals("replaced content", Files.readString(testFile),
+ "Content should not change with SKIP");
+ }
+
+ @Test
+ public void testConfigContainerNotPresent(@TempDir Path tempDir) throws
Exception {
+ // Create emitter with default config
+ String defaultConfig = String.format(Locale.ROOT,
+ "{\"basePath\":\"%s\", \"onExists\":\"REPLACE\"}",
+ tempDir.toString().replace("\\", "\\\\"));
+ ExtensionConfig pluginConfig = new ExtensionConfig("test-emitter",
"test", defaultConfig);
+ FileSystemEmitter emitter = FileSystemEmitter.build(pluginConfig);
+
+ // Emit with ParseContext that has no ConfigContainer - should use
default config
+ List<Metadata> metadataList = new ArrayList<>();
+ Metadata m = new Metadata();
+ m.set(TikaCoreProperties.TIKA_CONTENT, "test content");
+ metadataList.add(m);
+
+ ParseContext context = new ParseContext();
+ // Don't set ConfigContainer in context
+
+ emitter.emit("test.json", metadataList, context);
+
+ Path output = tempDir.resolve("test.json");
+ assertTrue(Files.exists(output), "File should be created with default
config");
+ }
+
+ @Test
+ public void testConfigContainerWithDifferentId(@TempDir Path tempDir)
throws Exception {
+ // Create emitter with default config
+ String defaultConfig = String.format(Locale.ROOT,
+ "{\"basePath\":\"%s\", \"onExists\":\"REPLACE\"}",
+ tempDir.toString().replace("\\", "\\\\"));
+ ExtensionConfig pluginConfig = new ExtensionConfig("test-emitter",
"test", defaultConfig);
+ FileSystemEmitter emitter = FileSystemEmitter.build(pluginConfig);
+
+ // Create ConfigContainer with config for a different emitter ID
+ Path otherDir = tempDir.resolve("other");
+ Files.createDirectories(otherDir);
+
+ ConfigContainer configContainer = new ConfigContainer();
+ String runtimeConfig = String.format(Locale.ROOT,
+ "{\"basePath\":\"%s\", \"onExists\":\"REPLACE\"}",
+ otherDir.toString().replace("\\", "\\\\"));
+ configContainer.set("different-emitter", runtimeConfig);
+
+ ParseContext context = new ParseContext();
+ context.set(ConfigContainer.class, configContainer);
+
+ // Emit - should use default config since runtime config is for
different ID
+ List<Metadata> metadataList = new ArrayList<>();
+ Metadata m = new Metadata();
+ m.set(TikaCoreProperties.TIKA_CONTENT, "test content");
+ metadataList.add(m);
+
+ emitter.emit("test.json", metadataList, context);
+
+ assertTrue(Files.exists(tempDir.resolve("test.json")),
+ "File should be created in default basePath");
+ assertFalse(Files.exists(otherDir.resolve("test.json")),
+ "File should not be created in other directory");
+ }
+}
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherRuntimeConfigTest.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherRuntimeConfigTest.java
new file mode 100644
index 000000000..c1be6c535
--- /dev/null
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherRuntimeConfigTest.java
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetcher.fs;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Locale;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.config.ConfigContainer;
+import org.apache.tika.metadata.FileSystem;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.plugins.ExtensionConfig;
+
+/**
+ * Tests runtime configuration of FileSystemFetcher via ConfigContainer and
ParseContext.
+ */
+public class FileSystemFetcherRuntimeConfigTest {
+
+ @Test
+ public void testRuntimeConfigViaParseContext(@TempDir Path tempDir) throws
Exception {
+ // Create a test file
+ Path testFile = tempDir.resolve("test.txt");
+ Files.writeString(testFile, "test content");
+
+ // Create fetcher with default config (no extractFileSystemMetadata)
+ String defaultConfig = String.format(Locale.ROOT,
"{\"basePath\":\"%s\"}",
+ tempDir.toString().replace("\\", "\\\\"));
+ ExtensionConfig pluginConfig = new ExtensionConfig("test-fetcher",
"test", defaultConfig);
+ FileSystemFetcher fetcher = new FileSystemFetcher(pluginConfig);
+
+ // Fetch without runtime config - should not extract file system
metadata
+ Metadata metadata1 = new Metadata();
+ ParseContext context1 = new ParseContext();
+ try (InputStream is = fetcher.fetch("test.txt", metadata1, context1)) {
+ assertNotNull(is);
+ }
+ assertNull(metadata1.get(FileSystem.CREATED),
+ "Without extractFileSystemMetadata, should not have CREATED
metadata");
+
+ // Now create runtime config with extractFileSystemMetadata=true
+ // Note: basePath is NOT included for security reasons
+ String runtimeConfig = "{\"extractFileSystemMetadata\":true}";
+
+ ConfigContainer configContainer = new ConfigContainer();
+ configContainer.set("test-fetcher", runtimeConfig);
+
+ ParseContext context2 = new ParseContext();
+ context2.set(ConfigContainer.class, configContainer);
+
+ // Fetch with runtime config - should extract file system metadata
+ Metadata metadata2 = new Metadata();
+ try (InputStream is = fetcher.fetch("test.txt", metadata2, context2)) {
+ assertNotNull(is);
+ }
+ assertNotNull(metadata2.get(FileSystem.CREATED),
+ "With extractFileSystemMetadata=true, should have CREATED
metadata");
+ assertNotNull(metadata2.get(FileSystem.MODIFIED),
+ "With extractFileSystemMetadata=true, should have MODIFIED
metadata");
+ }
+
+ @Test
+ public void testRuntimeConfigCannotOverrideBasePath(@TempDir Path tempDir)
throws Exception {
+ // Create two directories with different files
+ Path dir1 = tempDir.resolve("dir1");
+ Path dir2 = tempDir.resolve("dir2");
+ Files.createDirectories(dir1);
+ Files.createDirectories(dir2);
+
+ Path file1 = dir1.resolve("test.txt");
+ Files.writeString(file1, "content from dir1");
+
+ // Create fetcher with dir1 as default basePath
+ String defaultConfig = String.format(Locale.ROOT,
"{\"basePath\":\"%s\"}",
+ dir1.toString().replace("\\", "\\\\"));
+ ExtensionConfig pluginConfig = new ExtensionConfig("test-fetcher",
"test", defaultConfig);
+ FileSystemFetcher fetcher = new FileSystemFetcher(pluginConfig);
+
+ // Fetch from default basePath (dir1)
+ Metadata metadata1 = new Metadata();
+ ParseContext context1 = new ParseContext();
+ try (InputStream is = fetcher.fetch("test.txt", metadata1, context1)) {
+ String content = new String(is.readAllBytes(),
StandardCharsets.UTF_8);
+ assertEquals("content from dir1", content);
+ }
+
+ // Try to override basePath at runtime to point to dir2
+ // This should throw an exception for security reasons
+ String runtimeConfig = String.format(Locale.ROOT,
"{\"basePath\":\"%s\"}",
+ dir2.toString().replace("\\", "\\\\"));
+ ConfigContainer configContainer = new ConfigContainer();
+ configContainer.set("test-fetcher", runtimeConfig);
+
+ ParseContext context2 = new ParseContext();
+ context2.set(ConfigContainer.class, configContainer);
+
+ // Fetch with runtime config - should throw exception
+ Metadata metadata2 = new Metadata();
+ IOException exception = assertThrows(IOException.class, () -> {
+ fetcher.fetch("test.txt", metadata2, context2);
+ });
+ assertTrue(exception.getCause() != null &&
+ exception.getCause().getMessage().contains("Cannot change
'basePath' at runtime"),
+ "Should throw exception when attempting to change basePath at
runtime");
+ }
+
+ @Test
+ public void testConfigContainerNotPresent(@TempDir Path tempDir) throws
Exception {
+ // Create a test file
+ Path testFile = tempDir.resolve("test.txt");
+ Files.writeString(testFile, "test content");
+
+ // Create fetcher with default config
+ String defaultConfig = String.format(Locale.ROOT,
"{\"basePath\":\"%s\"}",
+ tempDir.toString().replace("\\", "\\\\"));
+ ExtensionConfig pluginConfig = new ExtensionConfig("test-fetcher",
"test", defaultConfig);
+ FileSystemFetcher fetcher = new FileSystemFetcher(pluginConfig);
+
+ // Fetch with ParseContext that has no ConfigContainer - should use
default config
+ Metadata metadata = new Metadata();
+ ParseContext context = new ParseContext();
+ // Don't set ConfigContainer in context
+
+ try (InputStream is = fetcher.fetch("test.txt", metadata, context)) {
+ assertNotNull(is);
+ String content = new String(is.readAllBytes(),
StandardCharsets.UTF_8);
+ assertEquals("test content", content);
+ }
+ }
+
+ @Test
+ public void testConfigContainerWithDifferentId(@TempDir Path tempDir)
throws Exception {
+ // Create a test file
+ Path testFile = tempDir.resolve("test.txt");
+ Files.writeString(testFile, "test content");
+
+ // Create fetcher with default config
+ String defaultConfig = String.format(Locale.ROOT,
"{\"basePath\":\"%s\"}",
+ tempDir.toString().replace("\\", "\\\\"));
+ ExtensionConfig pluginConfig = new ExtensionConfig("test-fetcher",
"test", defaultConfig);
+ FileSystemFetcher fetcher = new FileSystemFetcher(pluginConfig);
+
+ // Create ConfigContainer with config for a different fetcher ID
+ ConfigContainer configContainer = new ConfigContainer();
+ configContainer.set("different-fetcher",
"{\"basePath\":\"/some/other/path\"}");
+
+ ParseContext context = new ParseContext();
+ context.set(ConfigContainer.class, configContainer);
+
+ // Fetch - should use default config since runtime config is for
different ID
+ Metadata metadata = new Metadata();
+ try (InputStream is = fetcher.fetch("test.txt", metadata, context)) {
+ assertNotNull(is);
+ String content = new String(is.readAllBytes(),
StandardCharsets.UTF_8);
+ assertEquals("test content", content);
+ }
+ }
+}
diff --git a/tika-plugins-core/pom.xml b/tika-plugins-core/pom.xml
index c6fc4368a..9051943e4 100644
--- a/tika-plugins-core/pom.xml
+++ b/tika-plugins-core/pom.xml
@@ -36,6 +36,11 @@
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-serialization</artifactId>
+ <version>${project.version}</version>
+ </dependency>
<dependency>
<groupId>org.pf4j</groupId>
<artifactId>pf4j</artifactId>
diff --git
a/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaConfigs.java
b/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaConfigs.java
index cbc1231a9..0b7f80df6 100644
--- a/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaConfigs.java
+++ b/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaConfigs.java
@@ -16,13 +16,7 @@
*/
package org.apache.tika.plugins;
-import java.io.BufferedReader;
import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.Reader;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Iterator;
import java.util.Set;
@@ -31,14 +25,22 @@ import
com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.tika.config.loader.TikaJsonConfig;
import org.apache.tika.exception.TikaConfigException;
/**
- * Loads and validates Tika plugin configuration from JSON.
+ * Loads and validates Tika pipes/plugin configuration from JSON.
+ * <p>
+ * This class validates pipes-specific configuration keys and delegates to
+ * {@link TikaJsonConfig} for parsing. Core Tika keys (parsers, detectors,
etc.)
+ * are ignored by this validator - they are handled by TikaLoader.
*/
public class TikaConfigs {
- private static final Set<String> KNOWN_ROOT_KEYS = Set.of(
+ /**
+ * Pipes-specific configuration keys.
+ */
+ private static final Set<String> PIPES_KEYS = Set.of(
"fetchers",
"emitters",
"pipes-iterator",
@@ -47,50 +49,126 @@ public class TikaConfigs {
"plugin-roots"
);
+ /**
+ * Core Tika configuration keys (handled by TikaLoader, not validated
here).
+ */
+ private static final Set<String> CORE_TIKA_KEYS = Set.of(
+ "parsers",
+ "detectors",
+ "encoding-detectors",
+ "encodingDetectors",
+ "metadata-filters",
+ "metadataFilters",
+ "renderers",
+ "translators",
+ "auto-detect-parser-config",
+ "autoDetectParserConfig"
+ );
+
static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
.configure(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY,
true);
- public static TikaConfigs load(InputStream is) throws IOException,
TikaConfigException {
- try (Reader reader = new BufferedReader(new InputStreamReader(is,
StandardCharsets.UTF_8))) {
- TikaConfigs configs = new
TikaConfigs(OBJECT_MAPPER.readTree(reader));
- configs.validateNoUnknownKeys();
- return configs;
- }
+ private final TikaJsonConfig tikaJsonConfig;
+
+ /**
+ * Loads pipes configuration from a pre-parsed TikaJsonConfig.
+ * This is the preferred method when sharing configuration across
+ * core Tika and pipes components.
+ *
+ * @param tikaJsonConfig the pre-parsed JSON configuration
+ * @return the pipes configuration
+ * @throws TikaConfigException if validation fails
+ */
+ public static TikaConfigs load(TikaJsonConfig tikaJsonConfig) throws
TikaConfigException {
+ TikaConfigs configs = new TikaConfigs(tikaJsonConfig);
+ configs.validatePipesKeys();
+ return configs;
}
+
+ /**
+ * Loads pipes configuration from a file.
+ * For backwards compatibility - prefer {@link #load(TikaJsonConfig)} when
possible.
+ *
+ * @param path the path to the JSON configuration file
+ * @return the pipes configuration
+ * @throws IOException if reading fails
+ * @throws TikaConfigException if validation fails
+ */
public static TikaConfigs load(Path path) throws IOException,
TikaConfigException {
- try (InputStream is = Files.newInputStream(path)) {
- return load(is);
- }
+ TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(path);
+ return load(tikaJsonConfig);
}
- private final JsonNode root;
- private TikaConfigs(JsonNode root) {
- this.root = root;
+ private TikaConfigs(TikaJsonConfig tikaJsonConfig) {
+ this.tikaJsonConfig = tikaJsonConfig;
}
+ /**
+ * Gets the underlying TikaJsonConfig.
+ *
+ * @return the TikaJsonConfig
+ */
+ public TikaJsonConfig getTikaJsonConfig() {
+ return tikaJsonConfig;
+ }
+
+ /**
+ * Gets the root JSON node.
+ * Deprecated - use {@link #getTikaJsonConfig()} instead.
+ *
+ * @return the root JSON node
+ */
+ @Deprecated
public JsonNode getRoot() {
- return root;
+ return tikaJsonConfig.getRootNode();
}
+ /**
+ * Deserializes a configuration value for the given key.
+ *
+ * @param clazz the target class
+ * @param key the configuration key
+ * @param <T> the type to deserialize to
+ * @return the deserialized value
+ * @throws IOException if deserialization fails
+ */
public <T> T deserialize(Class<T> clazz, String key) throws IOException {
- return OBJECT_MAPPER.treeToValue(root.get(key), clazz);
+ return tikaJsonConfig.deserialize(key, clazz);
}
/**
- * Validates that the config contains no unknown root-level keys.
+ * Validates that pipes-specific keys are correct.
* This catches typos like "pipes-reporter" instead of "pipes-reporters".
* <p>
+ * Core Tika keys (parsers, detectors, etc.) are ignored - they are
+ * validated by TikaLoader.
+ * <p>
* Keys prefixed with "x-" are allowed for custom extensions.
*
- * @throws TikaConfigException if unknown keys are found
+ * @throws TikaConfigException if unknown pipes keys are found
*/
- private void validateNoUnknownKeys() throws TikaConfigException {
+ private void validatePipesKeys() throws TikaConfigException {
+ JsonNode root = tikaJsonConfig.getRootNode();
Iterator<String> fieldNames = root.fieldNames();
while (fieldNames.hasNext()) {
String key = fieldNames.next();
- if (!KNOWN_ROOT_KEYS.contains(key) && !key.startsWith("x-")) {
- throw new TikaConfigException("Unknown config key: '" + key +
- "'. Valid keys: " + KNOWN_ROOT_KEYS + " (or use 'x-'
prefix for custom keys)");
+
+ // Ignore core Tika keys - TikaLoader validates those
+ if (CORE_TIKA_KEYS.contains(key)) {
+ continue;
+ }
+
+ // Ignore custom extension keys
+ if (key.startsWith("x-")) {
+ continue;
+ }
+
+ // Must be a known pipes key
+ if (!PIPES_KEYS.contains(key)) {
+ throw new TikaConfigException("Unknown pipes config key: '" +
key +
+ "'. Valid pipes keys: " + PIPES_KEYS +
+ " (or use 'x-' prefix for custom keys). " +
+ "Core Tika keys like 'parsers', 'detectors' should be
configured separately.");
}
}
}
diff --git
a/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaPluginManager.java
b/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaPluginManager.java
index ac52d0da6..df23e078c 100644
---
a/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaPluginManager.java
+++
b/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaPluginManager.java
@@ -18,7 +18,6 @@ package org.apache.tika.plugins;
import java.io.File;
import java.io.IOException;
-import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
@@ -31,31 +30,66 @@ import org.pf4j.ExtensionFinder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.tika.config.loader.TikaJsonConfig;
import org.apache.tika.exception.TikaConfigException;
+/**
+ * PF4J-based plugin manager for Tika pipes components.
+ * <p>
+ * This manager loads plugins from configured plugin root directories and
+ * makes their extensions available for discovery.
+ */
public class TikaPluginManager extends DefaultPluginManager {
-
private static final Logger LOG =
LoggerFactory.getLogger(TikaPluginManager.class);
- public static TikaPluginManager load(Path p) throws TikaConfigException,
IOException {
- try (InputStream is = Files.newInputStream(p)) {
- return load(is);
- }
+ /**
+ * Loads plugin manager from a pre-parsed TikaJsonConfig.
+ * This is the preferred method when sharing configuration across
+ * core Tika and pipes components.
+ *
+ * @param tikaJsonConfig the pre-parsed JSON configuration
+ * @return the plugin manager
+ * @throws TikaConfigException if configuration is invalid
+ * @throws IOException if plugin initialization fails
+ */
+ public static TikaPluginManager load(TikaJsonConfig tikaJsonConfig)
+ throws TikaConfigException, IOException {
+ TikaConfigs tikaConfigs = TikaConfigs.load(tikaJsonConfig);
+ return load(tikaConfigs);
}
- public static TikaPluginManager load(InputStream is) throws
TikaConfigException, IOException {
- return load(TikaConfigs.load(is));
+ /**
+ * Loads plugin manager from a configuration file.
+ * For backwards compatibility - prefer {@link #load(TikaJsonConfig)} when
possible.
+ *
+ * @param configPath the path to the JSON configuration file
+ * @return the plugin manager
+ * @throws TikaConfigException if configuration is invalid
+ * @throws IOException if reading or plugin initialization fails
+ */
+ public static TikaPluginManager load(Path configPath) throws
TikaConfigException, IOException {
+ TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(configPath);
+ return load(tikaJsonConfig);
}
- public static TikaPluginManager load(TikaConfigs tikaConfigs) throws
TikaConfigException, IOException {
+ /**
+ * Loads plugin manager from a TikaConfigs instance.
+ *
+ * @param tikaConfigs the pipes configuration
+ * @return the plugin manager
+ * @throws TikaConfigException if configuration is invalid
+ * @throws IOException if plugin initialization fails
+ */
+ public static TikaPluginManager load(TikaConfigs tikaConfigs)
+ throws TikaConfigException, IOException {
JsonNode root = tikaConfigs.getRoot();
JsonNode pluginRoots = root.get("plugin-roots");
if (pluginRoots == null) {
throw new TikaConfigException("plugin-roots must be specified");
}
- List<Path> roots = TikaConfigs.OBJECT_MAPPER.convertValue(pluginRoots,
new TypeReference<List<Path>>() {
- });
+ List<Path> roots = TikaConfigs.OBJECT_MAPPER.convertValue(pluginRoots,
+ new TypeReference<List<Path>>() {});
if (roots.isEmpty()) {
throw new TikaConfigException("plugin-roots must not be empty");
}
diff --git
a/tika-plugins-core/src/test/java/org/apache/tika/plugins/TikaConfigsTest.java
b/tika-plugins-core/src/test/java/org/apache/tika/plugins/TikaConfigsTest.java
index 94727623f..3ecafc018 100644
---
a/tika-plugins-core/src/test/java/org/apache/tika/plugins/TikaConfigsTest.java
+++
b/tika-plugins-core/src/test/java/org/apache/tika/plugins/TikaConfigsTest.java
@@ -26,6 +26,7 @@ import java.nio.charset.StandardCharsets;
import org.junit.jupiter.api.Test;
+import org.apache.tika.config.loader.TikaJsonConfig;
import org.apache.tika.exception.TikaConfigException;
public class TikaConfigsTest {
@@ -59,7 +60,7 @@ public class TikaConfigsTest {
() -> loadFromString(json));
assertTrue(ex.getMessage().contains("pipes-reporter"));
- assertTrue(ex.getMessage().contains("Unknown config key"));
+ assertTrue(ex.getMessage().contains("Unknown pipes config key"));
}
@Test
@@ -141,6 +142,6 @@ public class TikaConfigsTest {
}
private TikaConfigs loadFromString(String json) throws Exception {
- return TikaConfigs.load(new
ByteArrayInputStream(json.getBytes(StandardCharsets.UTF_8)));
+ return TikaConfigs.load(TikaJsonConfig.load(new
ByteArrayInputStream(json.getBytes(StandardCharsets.UTF_8))));
}
}
diff --git
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
index fe8464dd3..4ab35a0ae 100644
---
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
+++
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
@@ -36,9 +36,37 @@ import org.apache.tika.exception.TikaConfigException;
* Parsed representation of a Tika JSON configuration file.
* Provides access to component configurations by type (parsers, detectors,
etc.).
*
- * <p>JSON structure:
+ * <p>This class serves as the single source of truth for JSON parsing across
+ * core Tika (parsers, detectors) and tika-pipes (fetchers, emitters)
components.
+ * It performs no validation - consumers validate only their own keys.
+ *
+ * <p><b>Unified Configuration Usage:</b>
+ * <pre>
+ * // Parse config once
+ * TikaJsonConfig jsonConfig = TikaJsonConfig.load(Paths.get("config.json"));
+ *
+ * // Load core Tika components (same classloader)
+ * TikaLoader tikaLoader = TikaLoader.load(jsonConfig);
+ * Parser parser = tikaLoader.loadParsers();
+ * Detector detector = tikaLoader.loadDetectors();
+ *
+ * // Load pipes/plugin components (different classloader)
+ * TikaPluginManager pluginManager = TikaPluginManager.load(jsonConfig);
+ * pluginManager.loadPlugins();
+ * pluginManager.startPlugins();
+ *
+ * // Extract config for plugins (crosses classloader boundary as string)
+ * JsonNode fetchersNode = jsonConfig.getRootNode().get("fetchers");
+ * if (fetchersNode != null) {
+ * String fetcherConfigJson = fetchersNode.toString();
+ * // Pass string to plugin - safe across classloader boundary
+ * }
+ * </pre>
+ *
+ * <p><b>JSON structure:</b>
* <pre>
* {
+ * // Core Tika components (validated by TikaLoader)
* "parsers": [
* { "pdf-parser": { "_decorate": {...}, "ocrStrategy": "AUTO", ... } },
* { "html-parser": { ... } },
@@ -48,7 +76,14 @@ import org.apache.tika.exception.TikaConfigException;
* { "mime-magic-detector": {} },
* { "zip-container-detector": { "maxDepth": 10 } }
* ],
- * ...
+ *
+ * // Pipes components (validated by TikaConfigs)
+ * "plugin-roots": ["/path/to/plugins"],
+ * "fetchers": [...],
+ * "emitters": [...],
+ *
+ * // Custom extensions (prefix with x-)
+ * "x-my-custom-config": { ... }
* }
* </pre>
*
@@ -247,6 +282,33 @@ public class TikaJsonConfig {
return result;
}
+ /**
+ * Deserializes a configuration value for the given key.
+ *
+ * @param key the configuration key
+ * @param clazz the target class
+ * @param <T> the type to deserialize to
+ * @return the deserialized value, or null if key doesn't exist
+ * @throws IOException if deserialization fails
+ */
+ public <T> T deserialize(String key, Class<T> clazz) throws IOException {
+ JsonNode node = rootNode.get(key);
+ if (node == null || node.isNull()) {
+ return null;
+ }
+ return OBJECT_MAPPER.treeToValue(node, clazz);
+ }
+
+ /**
+ * Checks if a configuration key exists.
+ *
+ * @param key the configuration key
+ * @return true if the key exists and is not null
+ */
+ public boolean hasKey(String key) {
+ return rootNode.has(key) && !rootNode.get(key).isNull();
+ }
+
/**
* Gets the ObjectMapper used for JSON processing.
*