This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch TIKA-4558-lazy-fetcher-manager
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/TIKA-4558-lazy-fetcher-manager
by this push:
new 32a91fa3a Reapply "TIKA-4558 -- add lazy loading to FetcherManager"
32a91fa3a is described below
commit 32a91fa3a3bc89d7916f3519ccb3e1a090a32420
Author: tallison <[email protected]>
AuthorDate: Tue Dec 9 07:24:17 2025 -0500
Reapply "TIKA-4558 -- add lazy loading to FetcherManager"
This reverts commit 200f2244165dfb5e4ec995ae0e015d00c81e6dc6.
---
.../api/fetcher/FetcherNotFoundException.java | 33 ++
.../tika/pipes/core/fetcher/FetcherManager.java | 247 ++++++++++--
.../pipes/core/fetcher/FetcherManagerTest.java | 445 +++++++++++++++++++++
3 files changed, 698 insertions(+), 27 deletions(-)
diff --git
a/tika-pipes/tika-pipes-api/src/main/java/org/apache/tika/pipes/api/fetcher/FetcherNotFoundException.java
b/tika-pipes/tika-pipes-api/src/main/java/org/apache/tika/pipes/api/fetcher/FetcherNotFoundException.java
new file mode 100644
index 000000000..d05d4335c
--- /dev/null
+++
b/tika-pipes/tika-pipes-api/src/main/java/org/apache/tika/pipes/api/fetcher/FetcherNotFoundException.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.api.fetcher;
+
+import org.apache.tika.exception.TikaException;
+
+/**
+ * Exception thrown when a requested fetcher configuration does not exist.
+ */
+public class FetcherNotFoundException extends TikaException {
+
+ public FetcherNotFoundException(String msg) {
+ super(msg);
+ }
+
+ public FetcherNotFoundException(String msg, Throwable cause) {
+ super(msg, cause);
+ }
+}
diff --git
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java
index e30a833fa..2d7fb080f 100644
---
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java
+++
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java
@@ -17,79 +17,272 @@
package org.apache.tika.pipes.core.fetcher;
import java.io.IOException;
+import java.util.HashMap;
+import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
+import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import org.pf4j.PluginManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.tika.config.loader.PolymorphicObjectMapperFactory;
import org.apache.tika.config.loader.TikaJsonConfig;
import org.apache.tika.exception.TikaConfigException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.pipes.api.fetcher.Fetcher;
import org.apache.tika.pipes.api.fetcher.FetcherFactory;
-import org.apache.tika.plugins.PluginComponentLoader;
+import org.apache.tika.pipes.api.fetcher.FetcherNotFoundException;
+import org.apache.tika.plugins.ExtensionConfig;
/**
* Utility class to hold multiple fetchers.
* <p>
- * This forbids multiple fetchers with the same pluginId
+ * This forbids multiple fetchers with the same pluginId.
+ * Fetchers are instantiated lazily on first use.
*/
public class FetcherManager {
public static final String CONFIG_KEY = "fetchers";
private static final Logger LOG =
LoggerFactory.getLogger(FetcherManager.class);
-
+ /**
+ * Loads a FetcherManager without allowing runtime modifications.
+ * Use {@link #load(PluginManager, TikaJsonConfig, boolean)} to enable
runtime fetcher additions.
+ *
+ * @param pluginManager the plugin manager
+ * @param tikaJsonConfig the configuration
+ * @return a FetcherManager that does not allow runtime modifications
+ */
public static FetcherManager load(PluginManager pluginManager,
TikaJsonConfig tikaJsonConfig) throws TikaConfigException, IOException {
+ return load(pluginManager, tikaJsonConfig, false);
+ }
+
+ /**
+ * Loads a FetcherManager with optional support for runtime modifications.
+ *
+ * @param pluginManager the plugin manager
+ * @param tikaJsonConfig the configuration
+ * @param allowRuntimeModifications if true, allows calling {@link
#saveFetcher(ExtensionConfig)} to add fetchers at runtime
+ * @return a FetcherManager
+ */
+ public static FetcherManager load(PluginManager pluginManager,
TikaJsonConfig tikaJsonConfig, boolean allowRuntimeModifications) throws
TikaConfigException, IOException {
JsonNode fetchersNode = tikaJsonConfig.getRootNode().get(CONFIG_KEY);
- Map<String, Fetcher> fetchers =
- PluginComponentLoader.loadInstances(pluginManager,
FetcherFactory.class, fetchersNode);
- return new FetcherManager(fetchers);
+
+ // Validate configuration and collect fetcher configs without
instantiating
+ Map<String, ExtensionConfig> configs =
validateAndCollectConfigs(pluginManager, fetchersNode);
+
+ return new FetcherManager(pluginManager, configs,
allowRuntimeModifications);
+ }
+
+ /**
+ * Validates the configuration by checking that factories exist for all
types,
+ * and collects the configuration data without instantiating fetchers.
+ */
+ private static Map<String, ExtensionConfig> validateAndCollectConfigs(
+ PluginManager pluginManager, JsonNode configNode) throws
TikaConfigException, IOException {
+
+ Map<String, FetcherFactory> factories = getFactories(pluginManager);
+ Map<String, ExtensionConfig> configs = new HashMap<>();
+
+ if (configNode != null && !configNode.isNull()) {
+ // Outer loop: iterate over type names
+ Iterator<Map.Entry<String, JsonNode>> typeFields =
configNode.fields();
+ while (typeFields.hasNext()) {
+ Map.Entry<String, JsonNode> typeEntry = typeFields.next();
+ String typeName = typeEntry.getKey();
+ JsonNode instancesNode = typeEntry.getValue();
+
+ // Validate that factory exists
+ FetcherFactory factory = factories.get(typeName);
+ if (factory == null) {
+ throw new TikaConfigException(
+ "Unknown fetcher type: " + typeName + ".
Available: " + factories.keySet());
+ }
+
+ // Inner loop: iterate over instances of this type
+ Iterator<Map.Entry<String, JsonNode>> instanceFields =
instancesNode.fields();
+ while (instanceFields.hasNext()) {
+ Map.Entry<String, JsonNode> instanceEntry =
instanceFields.next();
+ String instanceId = instanceEntry.getKey();
+ JsonNode config = instanceEntry.getValue();
+
+ if (configs.containsKey(instanceId)) {
+ throw new TikaConfigException("Duplicate fetcher id: "
+ instanceId);
+ }
+
+ configs.put(instanceId, new ExtensionConfig(instanceId,
typeName, toJsonString(config)));
+ }
+ }
+ }
+
+ return configs;
+ }
+
+ private static Map<String, FetcherFactory> getFactories(PluginManager
pluginManager) throws TikaConfigException {
+ if (pluginManager.getStartedPlugins().isEmpty()) {
+ pluginManager.loadPlugins();
+ pluginManager.startPlugins();
+ }
+
+ Map<String, FetcherFactory> factories = new HashMap<>();
+ for (FetcherFactory factory :
pluginManager.getExtensions(FetcherFactory.class)) {
+ String name = factory.getName();
+ ClassLoader cl = factory.getClass().getClassLoader();
+ boolean isFromPlugin = cl instanceof org.pf4j.PluginClassLoader;
+
+ FetcherFactory existing = factories.get(name);
+ if (existing != null) {
+ boolean existingIsFromPlugin =
existing.getClass().getClassLoader()
+ instanceof org.pf4j.PluginClassLoader;
+ if (isFromPlugin && !existingIsFromPlugin) {
+ // Replace classpath version with plugin version
+ factories.put(name, factory);
+ }
+ // Otherwise skip duplicate (keep existing)
+ continue;
+ }
+ factories.put(name, factory);
+ }
+ return factories;
+ }
+
+ private static String toJsonString(final JsonNode node) throws
TikaConfigException {
+ try {
+ return
PolymorphicObjectMapperFactory.getMapper().writeValueAsString(node);
+ } catch (JsonProcessingException e) {
+ throw new TikaConfigException("Failed to serialize config to JSON
string", e);
+ }
}
- private final Map<String, Fetcher> fetcherMap = new ConcurrentHashMap<>();
+ private final PluginManager pluginManager;
+ private final Map<String, ExtensionConfig> fetcherConfigs = new
ConcurrentHashMap<>();
+ private final Map<String, Fetcher> fetcherCache = new
ConcurrentHashMap<>();
+ private final boolean allowRuntimeModifications;
- private FetcherManager(Map<String, Fetcher> fetcherMap) throws
TikaConfigException {
- this.fetcherMap.putAll(fetcherMap);
+ private FetcherManager(PluginManager pluginManager, Map<String,
ExtensionConfig> fetcherConfigs, boolean allowRuntimeModifications) {
+ this.pluginManager = pluginManager;
+ this.fetcherConfigs.putAll(fetcherConfigs);
+ this.allowRuntimeModifications = allowRuntimeModifications;
}
public Fetcher getFetcher(String id) throws IOException, TikaException {
- Fetcher fetcher = fetcherMap.get(id);
- if (fetcher == null) {
- throw new IllegalArgumentException(
- "Can't find fetcher for id=" + id + ". I've loaded: " +
- fetcherMap.keySet());
+ // Check cache first (fast path, no synchronization)
+ Fetcher fetcher = fetcherCache.get(id);
+ if (fetcher != null) {
+ return fetcher;
+ }
+
+ // Check if config exists
+ ExtensionConfig config = fetcherConfigs.get(id);
+ if (config == null) {
+ throw new FetcherNotFoundException(
+ "Can't find fetcher for id=" + id + ". Available: " +
fetcherConfigs.keySet());
+ }
+
+ // Synchronized block to ensure only one thread builds the fetcher
+ synchronized (this) {
+ // Double-check in case another thread built it while we were
waiting
+ fetcher = fetcherCache.get(id);
+ if (fetcher != null) {
+ return fetcher;
+ }
+
+ // Build the fetcher
+ try {
+ fetcher = buildFetcher(config);
+ fetcherCache.put(id, fetcher);
+ LOG.debug("Lazily instantiated fetcher: {}", id);
+ return fetcher;
+ } catch (TikaConfigException e) {
+ throw new IOException("Failed to build fetcher: " + id, e);
+ }
}
- return fetcher;
+ }
+
+ /**
+ * Builds a fetcher instance from its configuration.
+ */
+ private Fetcher buildFetcher(ExtensionConfig config) throws
TikaConfigException, IOException {
+ Map<String, FetcherFactory> factories = getFactories(pluginManager);
+ FetcherFactory factory = factories.get(config.name());
+
+ if (factory == null) {
+ // This shouldn't happen since we validated in load(), but check
anyway
+ throw new TikaConfigException(
+ "Unknown fetcher type: " + config.name() + ". Available: "
+ factories.keySet());
+ }
+
+ return factory.buildExtension(config);
+ }
+
+ /**
+ * Dynamically adds a fetcher configuration at runtime.
+ * The fetcher will not be instantiated until it is first requested via
{@link #getFetcher(String)}.
+ * This allows for dynamic configuration without the overhead of immediate
instantiation.
+ * <p>
+ * This method is only available if the FetcherManager was loaded with
+ * {@link #load(PluginManager, TikaJsonConfig, boolean)} with
allowRuntimeModifications=true
+ * <p>
+ * Only authorized/authenticated users should be allowed to modify
fetchers. BE CAREFUL.
+ *
+ * @param config the extension configuration for the fetcher
+ * @throws TikaConfigException if the fetcher type is unknown, if a
fetcher with the same ID already exists,
+ * or if runtime modifications are not allowed
+ * @throws IOException if there is an error accessing the plugin manager
+ */
+ public synchronized void saveFetcher(ExtensionConfig config) throws
TikaConfigException, IOException {
+ if (!allowRuntimeModifications) {
+ throw new TikaConfigException(
+ "Runtime modifications are not allowed. FetcherManager
must be loaded with " +
+ "allowRuntimeModifications=true to use saveFetcher()");
+ }
+
+ if (config == null) {
+ throw new IllegalArgumentException("ExtensionConfig cannot be
null");
+ }
+
+ String fetcherId = config.id();
+ String typeName = config.name();
+
+ // Check for duplicate ID
+ if (fetcherConfigs.containsKey(fetcherId)) {
+ throw new TikaConfigException("Fetcher with id '" + fetcherId + "'
already exists");
+ }
+
+ // Validate that factory exists for this type
+ Map<String, FetcherFactory> factories = getFactories(pluginManager);
+ if (!factories.containsKey(typeName)) {
+ throw new TikaConfigException(
+ "Unknown fetcher type: " + typeName + ". Available: " +
factories.keySet());
+ }
+
+ // Store config without instantiating
+ fetcherConfigs.put(fetcherId, config);
+ LOG.debug("Saved fetcher config: id={}, type={}", fetcherId, typeName);
}
public Set<String> getSupported() {
- return fetcherMap.keySet();
+ return fetcherConfigs.keySet();
}
/**
* Convenience method that returns a fetcher if only one fetcher
* is specified in the tika-config file. If 0 or > 1 fetchers
* are specified, this throws an IllegalArgumentException.
- * @return
+ * @return the single configured fetcher
*/
- public Fetcher getFetcher() {
- if (fetcherMap.isEmpty()) {
- throw new IllegalArgumentException("fetchers size must == 1 for
the no arg call");
- }
- if (fetcherMap.size() > 1) {
+ public Fetcher getFetcher() throws IOException, TikaException {
+ if (fetcherConfigs.size() > 1) {
throw new IllegalArgumentException("need to specify 'fetcherId' if
> 1 fetchers are" +
" available");
}
- for (Fetcher fetcher : fetcherMap.values()) {
- return fetcher;
- }
- //this should be unreachable?!
- throw new IllegalArgumentException("fetchers size must == 0");
+ // Get the single fetcher id and use getFetcher(id) for lazy loading
+ String fetcherId = fetcherConfigs.keySet().iterator().next();
+ return getFetcher(fetcherId);
}
}
diff --git
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/fetcher/FetcherManagerTest.java
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/fetcher/FetcherManagerTest.java
new file mode 100644
index 000000000..84793245b
--- /dev/null
+++
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/fetcher/FetcherManagerTest.java
@@ -0,0 +1,445 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.core.fetcher;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertSame;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.config.loader.TikaJsonConfig;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.pipes.api.fetcher.Fetcher;
+import org.apache.tika.pipes.api.fetcher.FetcherNotFoundException;
+import org.apache.tika.pipes.core.PluginsTestHelper;
+import org.apache.tika.plugins.ExtensionConfig;
+import org.apache.tika.plugins.TikaPluginManager;
+
+public class FetcherManagerTest {
+
+ @Test
+ public void testBasicLoad(@TempDir Path tmpDir) throws Exception {
+ Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+ TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+ TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
+
+ FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig);
+
+ assertNotNull(fetcherManager);
+ assertEquals(1, fetcherManager.getSupported().size());
+ assertTrue(fetcherManager.getSupported().contains("fsf"));
+ }
+
+ @Test
+ public void testLazyInstantiation(@TempDir Path tmpDir) throws Exception {
+ // Create config with multiple fetchers
+ String configJson = "{\n" +
+ " \"fetchers\": {\n" +
+ " \"file-system-fetcher\": {\n" +
+ " \"fsf1\": {\n" +
+ " \"basePath\": \"" +
tmpDir.resolve("path1").toString().replace("\\", "/") + "\"\n" +
+ " },\n" +
+ " \"fsf2\": {\n" +
+ " \"basePath\": \"" +
tmpDir.resolve("path2").toString().replace("\\", "/") + "\"\n" +
+ " }\n" +
+ " }\n" +
+ " },\n" +
+ " \"plugin-roots\": \"target/plugins\"\n" +
+ "}";
+
+ Path configPath = tmpDir.resolve("config.json");
+ Files.writeString(configPath, configJson, StandardCharsets.UTF_8);
+
+ TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(configPath);
+ TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
+
+ FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig);
+
+ // After load, both fetchers should be in supported list but not
instantiated yet
+ assertEquals(2, fetcherManager.getSupported().size());
+
+ // Request only fsf1 - only it should be instantiated
+ Fetcher fetcher1 = fetcherManager.getFetcher("fsf1");
+ assertNotNull(fetcher1);
+ assertEquals("fsf1", fetcher1.getExtensionConfig().id());
+
+ // fsf2 has not been requested yet - verify it exists in config
+ assertTrue(fetcherManager.getSupported().contains("fsf2"));
+
+ // Now request fsf2
+ Fetcher fetcher2 = fetcherManager.getFetcher("fsf2");
+ assertNotNull(fetcher2);
+ assertEquals("fsf2", fetcher2.getExtensionConfig().id());
+ }
+
+ @Test
+ public void testCaching(@TempDir Path tmpDir) throws Exception {
+ Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+ TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+ TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
+
+ FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig);
+
+ // Get the same fetcher multiple times
+ Fetcher fetcher1 = fetcherManager.getFetcher("fsf");
+ Fetcher fetcher2 = fetcherManager.getFetcher("fsf");
+ Fetcher fetcher3 = fetcherManager.getFetcher("fsf");
+
+ // Should be the exact same instance (reference equality)
+ assertSame(fetcher1, fetcher2);
+ assertSame(fetcher2, fetcher3);
+ }
+
+ @Test
+ public void testThreadSafety(@TempDir Path tmpDir) throws Exception {
+ Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+ TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+ TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
+
+ FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig);
+
+ int threadCount = 10;
+ ExecutorService executor = Executors.newFixedThreadPool(threadCount);
+ CountDownLatch startLatch = new CountDownLatch(1);
+ CountDownLatch doneLatch = new CountDownLatch(threadCount);
+ List<Future<Fetcher>> futures = new ArrayList<>();
+
+ // Start multiple threads that all request the same fetcher
simultaneously
+ for (int i = 0; i < threadCount; i++) {
+ futures.add(executor.submit(() -> {
+ try {
+ // Wait for all threads to be ready
+ startLatch.await();
+
+ // All threads try to get the fetcher at once
+ return fetcherManager.getFetcher("fsf");
+ } finally {
+ doneLatch.countDown();
+ }
+ }));
+ }
+
+ // Start all threads at once
+ startLatch.countDown();
+
+ // Wait for all threads to complete
+ assertTrue(doneLatch.await(10, TimeUnit.SECONDS));
+
+ // Collect all fetchers
+ List<Fetcher> fetchers = new ArrayList<>();
+ for (Future<Fetcher> future : futures) {
+ fetchers.add(future.get());
+ }
+
+ executor.shutdown();
+
+ // All threads should have gotten the same instance
+ Fetcher first = fetchers.get(0);
+ for (Fetcher fetcher : fetchers) {
+ assertSame(first, fetcher, "All threads should get the same
fetcher instance");
+ }
+ }
+
+ @Test
+ public void testUnknownFetcherId(@TempDir Path tmpDir) throws Exception {
+ Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+ TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+ TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
+
+ FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig);
+
+ FetcherNotFoundException exception =
assertThrows(FetcherNotFoundException.class, () -> {
+ fetcherManager.getFetcher("non-existent-fetcher");
+ });
+
+ assertTrue(exception.getMessage().contains("non-existent-fetcher"));
+ assertTrue(exception.getMessage().contains("Available:"));
+ }
+
+ @Test
+ public void testUnknownFetcherType(@TempDir Path tmpDir) throws Exception {
+ String configJson = "{\n" +
+ " \"fetchers\": {\n" +
+ " \"non-existent-fetcher-type\": {\n" +
+ " \"fetcher1\": {\n" +
+ " \"someProp\": \"value\"\n" +
+ " }\n" +
+ " }\n" +
+ " },\n" +
+ " \"plugin-roots\": \"target/plugins\"\n" +
+ "}";
+
+ Path configPath = tmpDir.resolve("config.json");
+ Files.writeString(configPath, configJson, StandardCharsets.UTF_8);
+
+ TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(configPath);
+ TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
+
+ // Should fail during load (early validation)
+ TikaConfigException exception =
assertThrows(TikaConfigException.class, () -> {
+ FetcherManager.load(pluginManager, tikaJsonConfig);
+ });
+
+ assertTrue(exception.getMessage().contains("Unknown fetcher type"));
+
assertTrue(exception.getMessage().contains("non-existent-fetcher-type"));
+ }
+
+ @Test
+ public void testDuplicateFetcherId(@TempDir Path tmpDir) throws Exception {
+ String configJson = "{\n" +
+ " \"fetchers\": {\n" +
+ " \"file-system-fetcher\": {\n" +
+ " \"fsf1\": {\n" +
+ " \"basePath\": \"" +
tmpDir.resolve("path1").toString().replace("\\", "/") + "\"\n" +
+ " },\n" +
+ " \"fsf1\": {\n" +
+ " \"basePath\": \"" +
tmpDir.resolve("path2").toString().replace("\\", "/") + "\"\n" +
+ " }\n" +
+ " }\n" +
+ " },\n" +
+ " \"plugin-roots\": \"target/plugins\"\n" +
+ "}";
+
+ Path configPath = tmpDir.resolve("config.json");
+ Files.writeString(configPath, configJson, StandardCharsets.UTF_8);
+
+ // PolymorphicObjectMapperFactory has FAIL_ON_READING_DUP_TREE_KEY
enabled
+ // so duplicate keys are caught during JSON parsing
+ TikaConfigException exception =
assertThrows(TikaConfigException.class, () -> {
+ TikaJsonConfig.load(configPath);
+ });
+
+ assertTrue(exception.getMessage().contains("Failed to parse JSON") ||
+ exception.getCause().getMessage().contains("Duplicate field"));
+ }
+
+ @Test
+ public void testGetSingleFetcher(@TempDir Path tmpDir) throws Exception {
+ Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+ TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+ TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
+
+ FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig);
+
+ // When only one fetcher exists, no-arg getFetcher() should work
+ Fetcher fetcher = fetcherManager.getFetcher();
+ assertNotNull(fetcher);
+ assertEquals("fsf", fetcher.getExtensionConfig().id());
+ }
+
+ @Test
+ public void testGetSingleFetcherWithMultipleConfigured(@TempDir Path
tmpDir) throws Exception {
+ String configJson = "{\n" +
+ " \"fetchers\": {\n" +
+ " \"file-system-fetcher\": {\n" +
+ " \"fsf1\": {\n" +
+ " \"basePath\": \"" +
tmpDir.resolve("path1").toString().replace("\\", "/") + "\"\n" +
+ " },\n" +
+ " \"fsf2\": {\n" +
+ " \"basePath\": \"" +
tmpDir.resolve("path2").toString().replace("\\", "/") + "\"\n" +
+ " }\n" +
+ " }\n" +
+ " },\n" +
+ " \"plugin-roots\": \"target/plugins\"\n" +
+ "}";
+
+ Path configPath = tmpDir.resolve("config.json");
+ Files.writeString(configPath, configJson, StandardCharsets.UTF_8);
+
+ TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(configPath);
+ TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
+
+ FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig);
+
+ // When multiple fetchers exist, no-arg getFetcher() should fail
+ IllegalArgumentException exception =
assertThrows(IllegalArgumentException.class, () -> {
+ fetcherManager.getFetcher();
+ });
+
+ assertTrue(exception.getMessage().contains("need to specify
'fetcherId'"));
+ }
+
+ @Test
+ public void testSaveFetcher(@TempDir Path tmpDir) throws Exception {
+ Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+ TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+ TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
+
+ // Load with runtime modifications enabled
+ FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig, true);
+
+ // Initially only fsf exists
+ assertEquals(1, fetcherManager.getSupported().size());
+
+ // Dynamically add a new fetcher configuration
+ String newConfigJson = "{\"basePath\": \"" +
tmpDir.resolve("path2").toString().replace("\\", "/") + "\"}";
+ ExtensionConfig newConfig = new ExtensionConfig("fsf2",
"file-system-fetcher", newConfigJson);
+
+ fetcherManager.saveFetcher(newConfig);
+
+ // Now both should be available
+ assertEquals(2, fetcherManager.getSupported().size());
+ assertTrue(fetcherManager.getSupported().contains("fsf"));
+ assertTrue(fetcherManager.getSupported().contains("fsf2"));
+
+ // Fetcher should be lazily instantiated when requested
+ Fetcher fetcher2 = fetcherManager.getFetcher("fsf2");
+ assertNotNull(fetcher2);
+ assertEquals("fsf2", fetcher2.getExtensionConfig().id());
+ }
+
+ @Test
+ public void testSaveFetcherDuplicate(@TempDir Path tmpDir) throws
Exception {
+ Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+ TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+ TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
+
+ FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig, true);
+
+ // Try to add a fetcher with the same ID as existing one
+ String newConfigJson = "{\"basePath\": \"" +
tmpDir.resolve("path2").toString().replace("\\", "/") + "\"}";
+ ExtensionConfig duplicateConfig = new ExtensionConfig("fsf",
"file-system-fetcher", newConfigJson);
+
+ TikaConfigException exception =
assertThrows(TikaConfigException.class, () -> {
+ fetcherManager.saveFetcher(duplicateConfig);
+ });
+
+ assertTrue(exception.getMessage().contains("already exists"));
+ assertTrue(exception.getMessage().contains("fsf"));
+ }
+
+ @Test
+ public void testSaveFetcherUnknownType(@TempDir Path tmpDir) throws
Exception {
+ Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+ TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+ TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
+
+ FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig, true);
+
+ // Try to add a fetcher with unknown type
+ ExtensionConfig unknownTypeConfig = new ExtensionConfig("fetcher2",
"unknown-fetcher-type", "{}");
+
+ TikaConfigException exception =
assertThrows(TikaConfigException.class, () -> {
+ fetcherManager.saveFetcher(unknownTypeConfig);
+ });
+
+ assertTrue(exception.getMessage().contains("Unknown fetcher type"));
+ assertTrue(exception.getMessage().contains("unknown-fetcher-type"));
+ }
+
+ @Test
+ public void testSaveFetcherNull(@TempDir Path tmpDir) throws Exception {
+ Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+ TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+ TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
+
+ FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig, true);
+
+ IllegalArgumentException exception =
assertThrows(IllegalArgumentException.class, () -> {
+ fetcherManager.saveFetcher(null);
+ });
+
+ assertTrue(exception.getMessage().contains("cannot be null"));
+ }
+
+ @Test
+ public void testSaveFetcherLazyInstantiation(@TempDir Path tmpDir) throws
Exception {
+ Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+ TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+ TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
+
+ FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig, true);
+
+ // Add multiple fetchers
+ for (int i = 2; i <= 5; i++) {
+ String configJson = "{\"basePath\": \"" + tmpDir.resolve("path" +
i).toString().replace("\\", "/") + "\"}";
+ ExtensionConfig config2 = new ExtensionConfig("fsf" + i,
"file-system-fetcher", configJson);
+ fetcherManager.saveFetcher(config2);
+ }
+
+ // All 5 should be in supported list
+ assertEquals(5, fetcherManager.getSupported().size());
+
+ // Request only fsf3 - only it should be instantiated
+ Fetcher fetcher3 = fetcherManager.getFetcher("fsf3");
+ assertNotNull(fetcher3);
+ assertEquals("fsf3", fetcher3.getExtensionConfig().id());
+
+ // Others are still available but not instantiated yet
+ assertTrue(fetcherManager.getSupported().contains("fsf2"));
+ assertTrue(fetcherManager.getSupported().contains("fsf4"));
+ assertTrue(fetcherManager.getSupported().contains("fsf5"));
+ }
+
+ @Test
+ public void testSaveFetcherNotAllowed(@TempDir Path tmpDir) throws
Exception {
+ Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+ TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+ TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
+
+ // Load with default (runtime modifications disabled)
+ FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig);
+
+ // Try to add a fetcher - should fail
+ String newConfigJson = "{\"basePath\": \"" +
tmpDir.resolve("path2").toString().replace("\\", "/") + "\"}";
+ ExtensionConfig newConfig = new ExtensionConfig("fsf2",
"file-system-fetcher", newConfigJson);
+
+ TikaConfigException exception =
assertThrows(TikaConfigException.class, () -> {
+ fetcherManager.saveFetcher(newConfig);
+ });
+
+ assertTrue(exception.getMessage().contains("Runtime modifications are
not allowed"));
+
assertTrue(exception.getMessage().contains("allowRuntimeModifications=true"));
+ }
+
+ @Test
+ public void testSaveFetcherNotAllowedExplicit(@TempDir Path tmpDir) throws
Exception {
+ Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+ TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+ TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
+
+ // Load with explicit false
+ FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig, false);
+
+ // Try to add a fetcher - should fail
+ String newConfigJson = "{\"basePath\": \"" +
tmpDir.resolve("path2").toString().replace("\\", "/") + "\"}";
+ ExtensionConfig newConfig = new ExtensionConfig("fsf2",
"file-system-fetcher", newConfigJson);
+
+ TikaConfigException exception =
assertThrows(TikaConfigException.class, () -> {
+ fetcherManager.saveFetcher(newConfig);
+ });
+
+ assertTrue(exception.getMessage().contains("Runtime modifications are
not allowed"));
+ }
+}