This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 200f22441 Revert "TIKA-4558 -- add lazy loading to FetcherManager"
200f22441 is described below
commit 200f2244165dfb5e4ec995ae0e015d00c81e6dc6
Author: tallison <[email protected]>
AuthorDate: Tue Dec 9 07:18:48 2025 -0500
Revert "TIKA-4558 -- add lazy loading to FetcherManager"
This reverts commit f25a56c5ac3693d15c2f941968773c9cbfc25906.
---
.../api/fetcher/FetcherNotFoundException.java | 33 --
.../tika/pipes/core/fetcher/FetcherManager.java | 247 ++----------
.../pipes/core/fetcher/FetcherManagerTest.java | 445 ---------------------
3 files changed, 27 insertions(+), 698 deletions(-)
diff --git
a/tika-pipes/tika-pipes-api/src/main/java/org/apache/tika/pipes/api/fetcher/FetcherNotFoundException.java
b/tika-pipes/tika-pipes-api/src/main/java/org/apache/tika/pipes/api/fetcher/FetcherNotFoundException.java
deleted file mode 100644
index d05d4335c..000000000
---
a/tika-pipes/tika-pipes-api/src/main/java/org/apache/tika/pipes/api/fetcher/FetcherNotFoundException.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.pipes.api.fetcher;
-
-import org.apache.tika.exception.TikaException;
-
-/**
- * Exception thrown when a requested fetcher configuration does not exist.
- */
-public class FetcherNotFoundException extends TikaException {
-
- public FetcherNotFoundException(String msg) {
- super(msg);
- }
-
- public FetcherNotFoundException(String msg, Throwable cause) {
- super(msg, cause);
- }
-}
diff --git
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java
index 2d7fb080f..e30a833fa 100644
---
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java
+++
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java
@@ -17,272 +17,79 @@
package org.apache.tika.pipes.core.fetcher;
import java.io.IOException;
-import java.util.HashMap;
-import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
-import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import org.pf4j.PluginManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.apache.tika.config.loader.PolymorphicObjectMapperFactory;
import org.apache.tika.config.loader.TikaJsonConfig;
import org.apache.tika.exception.TikaConfigException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.pipes.api.fetcher.Fetcher;
import org.apache.tika.pipes.api.fetcher.FetcherFactory;
-import org.apache.tika.pipes.api.fetcher.FetcherNotFoundException;
-import org.apache.tika.plugins.ExtensionConfig;
+import org.apache.tika.plugins.PluginComponentLoader;
/**
* Utility class to hold multiple fetchers.
* <p>
- * This forbids multiple fetchers with the same pluginId.
- * Fetchers are instantiated lazily on first use.
+ * This forbids multiple fetchers with the same pluginId
*/
public class FetcherManager {
public static final String CONFIG_KEY = "fetchers";
private static final Logger LOG =
LoggerFactory.getLogger(FetcherManager.class);
- /**
- * Loads a FetcherManager without allowing runtime modifications.
- * Use {@link #load(PluginManager, TikaJsonConfig, boolean)} to enable
runtime fetcher additions.
- *
- * @param pluginManager the plugin manager
- * @param tikaJsonConfig the configuration
- * @return a FetcherManager that does not allow runtime modifications
- */
- public static FetcherManager load(PluginManager pluginManager,
TikaJsonConfig tikaJsonConfig) throws TikaConfigException, IOException {
- return load(pluginManager, tikaJsonConfig, false);
- }
- /**
- * Loads a FetcherManager with optional support for runtime modifications.
- *
- * @param pluginManager the plugin manager
- * @param tikaJsonConfig the configuration
- * @param allowRuntimeModifications if true, allows calling {@link
#saveFetcher(ExtensionConfig)} to add fetchers at runtime
- * @return a FetcherManager
- */
- public static FetcherManager load(PluginManager pluginManager,
TikaJsonConfig tikaJsonConfig, boolean allowRuntimeModifications) throws
TikaConfigException, IOException {
+ public static FetcherManager load(PluginManager pluginManager,
TikaJsonConfig tikaJsonConfig) throws TikaConfigException, IOException {
JsonNode fetchersNode = tikaJsonConfig.getRootNode().get(CONFIG_KEY);
-
- // Validate configuration and collect fetcher configs without
instantiating
- Map<String, ExtensionConfig> configs =
validateAndCollectConfigs(pluginManager, fetchersNode);
-
- return new FetcherManager(pluginManager, configs,
allowRuntimeModifications);
- }
-
- /**
- * Validates the configuration by checking that factories exist for all
types,
- * and collects the configuration data without instantiating fetchers.
- */
- private static Map<String, ExtensionConfig> validateAndCollectConfigs(
- PluginManager pluginManager, JsonNode configNode) throws
TikaConfigException, IOException {
-
- Map<String, FetcherFactory> factories = getFactories(pluginManager);
- Map<String, ExtensionConfig> configs = new HashMap<>();
-
- if (configNode != null && !configNode.isNull()) {
- // Outer loop: iterate over type names
- Iterator<Map.Entry<String, JsonNode>> typeFields =
configNode.fields();
- while (typeFields.hasNext()) {
- Map.Entry<String, JsonNode> typeEntry = typeFields.next();
- String typeName = typeEntry.getKey();
- JsonNode instancesNode = typeEntry.getValue();
-
- // Validate that factory exists
- FetcherFactory factory = factories.get(typeName);
- if (factory == null) {
- throw new TikaConfigException(
- "Unknown fetcher type: " + typeName + ".
Available: " + factories.keySet());
- }
-
- // Inner loop: iterate over instances of this type
- Iterator<Map.Entry<String, JsonNode>> instanceFields =
instancesNode.fields();
- while (instanceFields.hasNext()) {
- Map.Entry<String, JsonNode> instanceEntry =
instanceFields.next();
- String instanceId = instanceEntry.getKey();
- JsonNode config = instanceEntry.getValue();
-
- if (configs.containsKey(instanceId)) {
- throw new TikaConfigException("Duplicate fetcher id: "
+ instanceId);
- }
-
- configs.put(instanceId, new ExtensionConfig(instanceId,
typeName, toJsonString(config)));
- }
- }
- }
-
- return configs;
- }
-
- private static Map<String, FetcherFactory> getFactories(PluginManager
pluginManager) throws TikaConfigException {
- if (pluginManager.getStartedPlugins().isEmpty()) {
- pluginManager.loadPlugins();
- pluginManager.startPlugins();
- }
-
- Map<String, FetcherFactory> factories = new HashMap<>();
- for (FetcherFactory factory :
pluginManager.getExtensions(FetcherFactory.class)) {
- String name = factory.getName();
- ClassLoader cl = factory.getClass().getClassLoader();
- boolean isFromPlugin = cl instanceof org.pf4j.PluginClassLoader;
-
- FetcherFactory existing = factories.get(name);
- if (existing != null) {
- boolean existingIsFromPlugin =
existing.getClass().getClassLoader()
- instanceof org.pf4j.PluginClassLoader;
- if (isFromPlugin && !existingIsFromPlugin) {
- // Replace classpath version with plugin version
- factories.put(name, factory);
- }
- // Otherwise skip duplicate (keep existing)
- continue;
- }
- factories.put(name, factory);
- }
- return factories;
- }
-
- private static String toJsonString(final JsonNode node) throws
TikaConfigException {
- try {
- return
PolymorphicObjectMapperFactory.getMapper().writeValueAsString(node);
- } catch (JsonProcessingException e) {
- throw new TikaConfigException("Failed to serialize config to JSON
string", e);
- }
+ Map<String, Fetcher> fetchers =
+ PluginComponentLoader.loadInstances(pluginManager,
FetcherFactory.class, fetchersNode);
+ return new FetcherManager(fetchers);
}
- private final PluginManager pluginManager;
- private final Map<String, ExtensionConfig> fetcherConfigs = new
ConcurrentHashMap<>();
- private final Map<String, Fetcher> fetcherCache = new
ConcurrentHashMap<>();
- private final boolean allowRuntimeModifications;
+ private final Map<String, Fetcher> fetcherMap = new ConcurrentHashMap<>();
- private FetcherManager(PluginManager pluginManager, Map<String,
ExtensionConfig> fetcherConfigs, boolean allowRuntimeModifications) {
- this.pluginManager = pluginManager;
- this.fetcherConfigs.putAll(fetcherConfigs);
- this.allowRuntimeModifications = allowRuntimeModifications;
+ private FetcherManager(Map<String, Fetcher> fetcherMap) throws
TikaConfigException {
+ this.fetcherMap.putAll(fetcherMap);
}
public Fetcher getFetcher(String id) throws IOException, TikaException {
- // Check cache first (fast path, no synchronization)
- Fetcher fetcher = fetcherCache.get(id);
- if (fetcher != null) {
- return fetcher;
- }
-
- // Check if config exists
- ExtensionConfig config = fetcherConfigs.get(id);
- if (config == null) {
- throw new FetcherNotFoundException(
- "Can't find fetcher for id=" + id + ". Available: " +
fetcherConfigs.keySet());
- }
-
- // Synchronized block to ensure only one thread builds the fetcher
- synchronized (this) {
- // Double-check in case another thread built it while we were
waiting
- fetcher = fetcherCache.get(id);
- if (fetcher != null) {
- return fetcher;
- }
-
- // Build the fetcher
- try {
- fetcher = buildFetcher(config);
- fetcherCache.put(id, fetcher);
- LOG.debug("Lazily instantiated fetcher: {}", id);
- return fetcher;
- } catch (TikaConfigException e) {
- throw new IOException("Failed to build fetcher: " + id, e);
- }
+ Fetcher fetcher = fetcherMap.get(id);
+ if (fetcher == null) {
+ throw new IllegalArgumentException(
+ "Can't find fetcher for id=" + id + ". I've loaded: " +
+ fetcherMap.keySet());
}
- }
-
- /**
- * Builds a fetcher instance from its configuration.
- */
- private Fetcher buildFetcher(ExtensionConfig config) throws
TikaConfigException, IOException {
- Map<String, FetcherFactory> factories = getFactories(pluginManager);
- FetcherFactory factory = factories.get(config.name());
-
- if (factory == null) {
- // This shouldn't happen since we validated in load(), but check
anyway
- throw new TikaConfigException(
- "Unknown fetcher type: " + config.name() + ". Available: "
+ factories.keySet());
- }
-
- return factory.buildExtension(config);
- }
-
- /**
- * Dynamically adds a fetcher configuration at runtime.
- * The fetcher will not be instantiated until it is first requested via
{@link #getFetcher(String)}.
- * This allows for dynamic configuration without the overhead of immediate
instantiation.
- * <p>
- * This method is only available if the FetcherManager was loaded with
- * {@link #load(PluginManager, TikaJsonConfig, boolean)} with
allowRuntimeModifications=true
- * <p>
- * Only authorized/authenticated users should be allowed to modify
fetchers. BE CAREFUL.
- *
- * @param config the extension configuration for the fetcher
- * @throws TikaConfigException if the fetcher type is unknown, if a
fetcher with the same ID already exists,
- * or if runtime modifications are not allowed
- * @throws IOException if there is an error accessing the plugin manager
- */
- public synchronized void saveFetcher(ExtensionConfig config) throws
TikaConfigException, IOException {
- if (!allowRuntimeModifications) {
- throw new TikaConfigException(
- "Runtime modifications are not allowed. FetcherManager
must be loaded with " +
- "allowRuntimeModifications=true to use saveFetcher()");
- }
-
- if (config == null) {
- throw new IllegalArgumentException("ExtensionConfig cannot be
null");
- }
-
- String fetcherId = config.id();
- String typeName = config.name();
-
- // Check for duplicate ID
- if (fetcherConfigs.containsKey(fetcherId)) {
- throw new TikaConfigException("Fetcher with id '" + fetcherId + "'
already exists");
- }
-
- // Validate that factory exists for this type
- Map<String, FetcherFactory> factories = getFactories(pluginManager);
- if (!factories.containsKey(typeName)) {
- throw new TikaConfigException(
- "Unknown fetcher type: " + typeName + ". Available: " +
factories.keySet());
- }
-
- // Store config without instantiating
- fetcherConfigs.put(fetcherId, config);
- LOG.debug("Saved fetcher config: id={}, type={}", fetcherId, typeName);
+ return fetcher;
}
public Set<String> getSupported() {
- return fetcherConfigs.keySet();
+ return fetcherMap.keySet();
}
/**
* Convenience method that returns a fetcher if only one fetcher
* is specified in the tika-config file. If 0 or > 1 fetchers
* are specified, this throws an IllegalArgumentException.
- * @return the single configured fetcher
+ * @return
*/
- public Fetcher getFetcher() throws IOException, TikaException {
- if (fetcherConfigs.size() > 1) {
+ public Fetcher getFetcher() {
+ if (fetcherMap.isEmpty()) {
+ throw new IllegalArgumentException("fetchers size must == 1 for
the no arg call");
+ }
+ if (fetcherMap.size() > 1) {
throw new IllegalArgumentException("need to specify 'fetcherId' if
> 1 fetchers are" +
" available");
}
- // Get the single fetcher id and use getFetcher(id) for lazy loading
- String fetcherId = fetcherConfigs.keySet().iterator().next();
- return getFetcher(fetcherId);
+ for (Fetcher fetcher : fetcherMap.values()) {
+ return fetcher;
+ }
+ //this should be unreachable?!
+ throw new IllegalArgumentException("fetchers size must == 0");
}
}
diff --git
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/fetcher/FetcherManagerTest.java
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/fetcher/FetcherManagerTest.java
deleted file mode 100644
index 84793245b..000000000
---
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/fetcher/FetcherManagerTest.java
+++ /dev/null
@@ -1,445 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.pipes.core.fetcher;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
-import static org.junit.jupiter.api.Assertions.assertSame;
-import static org.junit.jupiter.api.Assertions.assertThrows;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-
-import org.apache.tika.config.loader.TikaJsonConfig;
-import org.apache.tika.exception.TikaConfigException;
-import org.apache.tika.pipes.api.fetcher.Fetcher;
-import org.apache.tika.pipes.api.fetcher.FetcherNotFoundException;
-import org.apache.tika.pipes.core.PluginsTestHelper;
-import org.apache.tika.plugins.ExtensionConfig;
-import org.apache.tika.plugins.TikaPluginManager;
-
-public class FetcherManagerTest {
-
- @Test
- public void testBasicLoad(@TempDir Path tmpDir) throws Exception {
- Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
- TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
- TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
-
- FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig);
-
- assertNotNull(fetcherManager);
- assertEquals(1, fetcherManager.getSupported().size());
- assertTrue(fetcherManager.getSupported().contains("fsf"));
- }
-
- @Test
- public void testLazyInstantiation(@TempDir Path tmpDir) throws Exception {
- // Create config with multiple fetchers
- String configJson = "{\n" +
- " \"fetchers\": {\n" +
- " \"file-system-fetcher\": {\n" +
- " \"fsf1\": {\n" +
- " \"basePath\": \"" +
tmpDir.resolve("path1").toString().replace("\\", "/") + "\"\n" +
- " },\n" +
- " \"fsf2\": {\n" +
- " \"basePath\": \"" +
tmpDir.resolve("path2").toString().replace("\\", "/") + "\"\n" +
- " }\n" +
- " }\n" +
- " },\n" +
- " \"plugin-roots\": \"target/plugins\"\n" +
- "}";
-
- Path configPath = tmpDir.resolve("config.json");
- Files.writeString(configPath, configJson, StandardCharsets.UTF_8);
-
- TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(configPath);
- TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
-
- FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig);
-
- // After load, both fetchers should be in supported list but not
instantiated yet
- assertEquals(2, fetcherManager.getSupported().size());
-
- // Request only fsf1 - only it should be instantiated
- Fetcher fetcher1 = fetcherManager.getFetcher("fsf1");
- assertNotNull(fetcher1);
- assertEquals("fsf1", fetcher1.getExtensionConfig().id());
-
- // fsf2 has not been requested yet - verify it exists in config
- assertTrue(fetcherManager.getSupported().contains("fsf2"));
-
- // Now request fsf2
- Fetcher fetcher2 = fetcherManager.getFetcher("fsf2");
- assertNotNull(fetcher2);
- assertEquals("fsf2", fetcher2.getExtensionConfig().id());
- }
-
- @Test
- public void testCaching(@TempDir Path tmpDir) throws Exception {
- Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
- TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
- TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
-
- FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig);
-
- // Get the same fetcher multiple times
- Fetcher fetcher1 = fetcherManager.getFetcher("fsf");
- Fetcher fetcher2 = fetcherManager.getFetcher("fsf");
- Fetcher fetcher3 = fetcherManager.getFetcher("fsf");
-
- // Should be the exact same instance (reference equality)
- assertSame(fetcher1, fetcher2);
- assertSame(fetcher2, fetcher3);
- }
-
- @Test
- public void testThreadSafety(@TempDir Path tmpDir) throws Exception {
- Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
- TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
- TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
-
- FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig);
-
- int threadCount = 10;
- ExecutorService executor = Executors.newFixedThreadPool(threadCount);
- CountDownLatch startLatch = new CountDownLatch(1);
- CountDownLatch doneLatch = new CountDownLatch(threadCount);
- List<Future<Fetcher>> futures = new ArrayList<>();
-
- // Start multiple threads that all request the same fetcher
simultaneously
- for (int i = 0; i < threadCount; i++) {
- futures.add(executor.submit(() -> {
- try {
- // Wait for all threads to be ready
- startLatch.await();
-
- // All threads try to get the fetcher at once
- return fetcherManager.getFetcher("fsf");
- } finally {
- doneLatch.countDown();
- }
- }));
- }
-
- // Start all threads at once
- startLatch.countDown();
-
- // Wait for all threads to complete
- assertTrue(doneLatch.await(10, TimeUnit.SECONDS));
-
- // Collect all fetchers
- List<Fetcher> fetchers = new ArrayList<>();
- for (Future<Fetcher> future : futures) {
- fetchers.add(future.get());
- }
-
- executor.shutdown();
-
- // All threads should have gotten the same instance
- Fetcher first = fetchers.get(0);
- for (Fetcher fetcher : fetchers) {
- assertSame(first, fetcher, "All threads should get the same
fetcher instance");
- }
- }
-
- @Test
- public void testUnknownFetcherId(@TempDir Path tmpDir) throws Exception {
- Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
- TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
- TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
-
- FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig);
-
- FetcherNotFoundException exception =
assertThrows(FetcherNotFoundException.class, () -> {
- fetcherManager.getFetcher("non-existent-fetcher");
- });
-
- assertTrue(exception.getMessage().contains("non-existent-fetcher"));
- assertTrue(exception.getMessage().contains("Available:"));
- }
-
- @Test
- public void testUnknownFetcherType(@TempDir Path tmpDir) throws Exception {
- String configJson = "{\n" +
- " \"fetchers\": {\n" +
- " \"non-existent-fetcher-type\": {\n" +
- " \"fetcher1\": {\n" +
- " \"someProp\": \"value\"\n" +
- " }\n" +
- " }\n" +
- " },\n" +
- " \"plugin-roots\": \"target/plugins\"\n" +
- "}";
-
- Path configPath = tmpDir.resolve("config.json");
- Files.writeString(configPath, configJson, StandardCharsets.UTF_8);
-
- TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(configPath);
- TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
-
- // Should fail during load (early validation)
- TikaConfigException exception =
assertThrows(TikaConfigException.class, () -> {
- FetcherManager.load(pluginManager, tikaJsonConfig);
- });
-
- assertTrue(exception.getMessage().contains("Unknown fetcher type"));
-
assertTrue(exception.getMessage().contains("non-existent-fetcher-type"));
- }
-
- @Test
- public void testDuplicateFetcherId(@TempDir Path tmpDir) throws Exception {
- String configJson = "{\n" +
- " \"fetchers\": {\n" +
- " \"file-system-fetcher\": {\n" +
- " \"fsf1\": {\n" +
- " \"basePath\": \"" +
tmpDir.resolve("path1").toString().replace("\\", "/") + "\"\n" +
- " },\n" +
- " \"fsf1\": {\n" +
- " \"basePath\": \"" +
tmpDir.resolve("path2").toString().replace("\\", "/") + "\"\n" +
- " }\n" +
- " }\n" +
- " },\n" +
- " \"plugin-roots\": \"target/plugins\"\n" +
- "}";
-
- Path configPath = tmpDir.resolve("config.json");
- Files.writeString(configPath, configJson, StandardCharsets.UTF_8);
-
- // PolymorphicObjectMapperFactory has FAIL_ON_READING_DUP_TREE_KEY
enabled
- // so duplicate keys are caught during JSON parsing
- TikaConfigException exception =
assertThrows(TikaConfigException.class, () -> {
- TikaJsonConfig.load(configPath);
- });
-
- assertTrue(exception.getMessage().contains("Failed to parse JSON") ||
- exception.getCause().getMessage().contains("Duplicate field"));
- }
-
- @Test
- public void testGetSingleFetcher(@TempDir Path tmpDir) throws Exception {
- Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
- TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
- TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
-
- FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig);
-
- // When only one fetcher exists, no-arg getFetcher() should work
- Fetcher fetcher = fetcherManager.getFetcher();
- assertNotNull(fetcher);
- assertEquals("fsf", fetcher.getExtensionConfig().id());
- }
-
- @Test
- public void testGetSingleFetcherWithMultipleConfigured(@TempDir Path
tmpDir) throws Exception {
- String configJson = "{\n" +
- " \"fetchers\": {\n" +
- " \"file-system-fetcher\": {\n" +
- " \"fsf1\": {\n" +
- " \"basePath\": \"" +
tmpDir.resolve("path1").toString().replace("\\", "/") + "\"\n" +
- " },\n" +
- " \"fsf2\": {\n" +
- " \"basePath\": \"" +
tmpDir.resolve("path2").toString().replace("\\", "/") + "\"\n" +
- " }\n" +
- " }\n" +
- " },\n" +
- " \"plugin-roots\": \"target/plugins\"\n" +
- "}";
-
- Path configPath = tmpDir.resolve("config.json");
- Files.writeString(configPath, configJson, StandardCharsets.UTF_8);
-
- TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(configPath);
- TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
-
- FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig);
-
- // When multiple fetchers exist, no-arg getFetcher() should fail
- IllegalArgumentException exception =
assertThrows(IllegalArgumentException.class, () -> {
- fetcherManager.getFetcher();
- });
-
- assertTrue(exception.getMessage().contains("need to specify
'fetcherId'"));
- }
-
- @Test
- public void testSaveFetcher(@TempDir Path tmpDir) throws Exception {
- Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
- TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
- TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
-
- // Load with runtime modifications enabled
- FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig, true);
-
- // Initially only fsf exists
- assertEquals(1, fetcherManager.getSupported().size());
-
- // Dynamically add a new fetcher configuration
- String newConfigJson = "{\"basePath\": \"" +
tmpDir.resolve("path2").toString().replace("\\", "/") + "\"}";
- ExtensionConfig newConfig = new ExtensionConfig("fsf2",
"file-system-fetcher", newConfigJson);
-
- fetcherManager.saveFetcher(newConfig);
-
- // Now both should be available
- assertEquals(2, fetcherManager.getSupported().size());
- assertTrue(fetcherManager.getSupported().contains("fsf"));
- assertTrue(fetcherManager.getSupported().contains("fsf2"));
-
- // Fetcher should be lazily instantiated when requested
- Fetcher fetcher2 = fetcherManager.getFetcher("fsf2");
- assertNotNull(fetcher2);
- assertEquals("fsf2", fetcher2.getExtensionConfig().id());
- }
-
- @Test
- public void testSaveFetcherDuplicate(@TempDir Path tmpDir) throws
Exception {
- Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
- TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
- TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
-
- FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig, true);
-
- // Try to add a fetcher with the same ID as existing one
- String newConfigJson = "{\"basePath\": \"" +
tmpDir.resolve("path2").toString().replace("\\", "/") + "\"}";
- ExtensionConfig duplicateConfig = new ExtensionConfig("fsf",
"file-system-fetcher", newConfigJson);
-
- TikaConfigException exception =
assertThrows(TikaConfigException.class, () -> {
- fetcherManager.saveFetcher(duplicateConfig);
- });
-
- assertTrue(exception.getMessage().contains("already exists"));
- assertTrue(exception.getMessage().contains("fsf"));
- }
-
- @Test
- public void testSaveFetcherUnknownType(@TempDir Path tmpDir) throws
Exception {
- Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
- TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
- TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
-
- FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig, true);
-
- // Try to add a fetcher with unknown type
- ExtensionConfig unknownTypeConfig = new ExtensionConfig("fetcher2",
"unknown-fetcher-type", "{}");
-
- TikaConfigException exception =
assertThrows(TikaConfigException.class, () -> {
- fetcherManager.saveFetcher(unknownTypeConfig);
- });
-
- assertTrue(exception.getMessage().contains("Unknown fetcher type"));
- assertTrue(exception.getMessage().contains("unknown-fetcher-type"));
- }
-
- @Test
- public void testSaveFetcherNull(@TempDir Path tmpDir) throws Exception {
- Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
- TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
- TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
-
- FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig, true);
-
- IllegalArgumentException exception =
assertThrows(IllegalArgumentException.class, () -> {
- fetcherManager.saveFetcher(null);
- });
-
- assertTrue(exception.getMessage().contains("cannot be null"));
- }
-
- @Test
- public void testSaveFetcherLazyInstantiation(@TempDir Path tmpDir) throws
Exception {
- Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
- TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
- TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
-
- FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig, true);
-
- // Add multiple fetchers
- for (int i = 2; i <= 5; i++) {
- String configJson = "{\"basePath\": \"" + tmpDir.resolve("path" +
i).toString().replace("\\", "/") + "\"}";
- ExtensionConfig config2 = new ExtensionConfig("fsf" + i,
"file-system-fetcher", configJson);
- fetcherManager.saveFetcher(config2);
- }
-
- // All 5 should be in supported list
- assertEquals(5, fetcherManager.getSupported().size());
-
- // Request only fsf3 - only it should be instantiated
- Fetcher fetcher3 = fetcherManager.getFetcher("fsf3");
- assertNotNull(fetcher3);
- assertEquals("fsf3", fetcher3.getExtensionConfig().id());
-
- // Others are still available but not instantiated yet
- assertTrue(fetcherManager.getSupported().contains("fsf2"));
- assertTrue(fetcherManager.getSupported().contains("fsf4"));
- assertTrue(fetcherManager.getSupported().contains("fsf5"));
- }
-
- @Test
- public void testSaveFetcherNotAllowed(@TempDir Path tmpDir) throws
Exception {
- Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
- TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
- TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
-
- // Load with default (runtime modifications disabled)
- FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig);
-
- // Try to add a fetcher - should fail
- String newConfigJson = "{\"basePath\": \"" +
tmpDir.resolve("path2").toString().replace("\\", "/") + "\"}";
- ExtensionConfig newConfig = new ExtensionConfig("fsf2",
"file-system-fetcher", newConfigJson);
-
- TikaConfigException exception =
assertThrows(TikaConfigException.class, () -> {
- fetcherManager.saveFetcher(newConfig);
- });
-
- assertTrue(exception.getMessage().contains("Runtime modifications are
not allowed"));
-
assertTrue(exception.getMessage().contains("allowRuntimeModifications=true"));
- }
-
- @Test
- public void testSaveFetcherNotAllowedExplicit(@TempDir Path tmpDir) throws
Exception {
- Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
- TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
- TikaPluginManager pluginManager =
TikaPluginManager.load(tikaJsonConfig);
-
- // Load with explicit false
- FetcherManager fetcherManager = FetcherManager.load(pluginManager,
tikaJsonConfig, false);
-
- // Try to add a fetcher - should fail
- String newConfigJson = "{\"basePath\": \"" +
tmpDir.resolve("path2").toString().replace("\\", "/") + "\"}";
- ExtensionConfig newConfig = new ExtensionConfig("fsf2",
"file-system-fetcher", newConfigJson);
-
- TikaConfigException exception =
assertThrows(TikaConfigException.class, () -> {
- fetcherManager.saveFetcher(newConfig);
- });
-
- assertTrue(exception.getMessage().contains("Runtime modifications are
not allowed"));
- }
-}