This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new f25a56c5a TIKA-4558 -- add lazy loading to FetcherManager
f25a56c5a is described below

commit f25a56c5ac3693d15c2f941968773c9cbfc25906
Author: tallison <[email protected]>
AuthorDate: Tue Dec 9 07:14:31 2025 -0500

    TIKA-4558 -- add lazy loading to FetcherManager
---
 .../api/fetcher/FetcherNotFoundException.java      |  33 ++
 .../tika/pipes/core/fetcher/FetcherManager.java    | 247 ++++++++++--
 .../pipes/core/fetcher/FetcherManagerTest.java     | 445 +++++++++++++++++++++
 3 files changed, 698 insertions(+), 27 deletions(-)

diff --git 
a/tika-pipes/tika-pipes-api/src/main/java/org/apache/tika/pipes/api/fetcher/FetcherNotFoundException.java
 
b/tika-pipes/tika-pipes-api/src/main/java/org/apache/tika/pipes/api/fetcher/FetcherNotFoundException.java
new file mode 100644
index 000000000..d05d4335c
--- /dev/null
+++ 
b/tika-pipes/tika-pipes-api/src/main/java/org/apache/tika/pipes/api/fetcher/FetcherNotFoundException.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.api.fetcher;
+
+import org.apache.tika.exception.TikaException;
+
+/**
+ * Exception thrown when a requested fetcher configuration does not exist.
+ */
+public class FetcherNotFoundException extends TikaException {
+
+    public FetcherNotFoundException(String msg) {
+        super(msg);
+    }
+
+    public FetcherNotFoundException(String msg, Throwable cause) {
+        super(msg, cause);
+    }
+}
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java
index e30a833fa..2d7fb080f 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java
@@ -17,79 +17,272 @@
 package org.apache.tika.pipes.core.fetcher;
 
 import java.io.IOException;
+import java.util.HashMap;
+import java.util.Iterator;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 
+import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.JsonNode;
 import org.pf4j.PluginManager;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.tika.config.loader.PolymorphicObjectMapperFactory;
 import org.apache.tika.config.loader.TikaJsonConfig;
 import org.apache.tika.exception.TikaConfigException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.pipes.api.fetcher.Fetcher;
 import org.apache.tika.pipes.api.fetcher.FetcherFactory;
-import org.apache.tika.plugins.PluginComponentLoader;
+import org.apache.tika.pipes.api.fetcher.FetcherNotFoundException;
+import org.apache.tika.plugins.ExtensionConfig;
 
 /**
  * Utility class to hold multiple fetchers.
  * <p>
- * This forbids multiple fetchers with the same pluginId
+ * This forbids multiple fetchers with the same pluginId.
+ * Fetchers are instantiated lazily on first use.
  */
 public class FetcherManager {
 
     public static final String CONFIG_KEY = "fetchers";
     private static final Logger LOG = 
LoggerFactory.getLogger(FetcherManager.class);
 
-
+    /**
+     * Loads a FetcherManager without allowing runtime modifications.
+     * Use {@link #load(PluginManager, TikaJsonConfig, boolean)} to enable 
runtime fetcher additions.
+     *
+     * @param pluginManager the plugin manager
+     * @param tikaJsonConfig the configuration
+     * @return a FetcherManager that does not allow runtime modifications
+     */
     public static FetcherManager load(PluginManager pluginManager, 
TikaJsonConfig tikaJsonConfig) throws TikaConfigException, IOException {
+        return load(pluginManager, tikaJsonConfig, false);
+    }
+
+    /**
+     * Loads a FetcherManager with optional support for runtime modifications.
+     *
+     * @param pluginManager the plugin manager
+     * @param tikaJsonConfig the configuration
+     * @param allowRuntimeModifications if true, allows calling {@link 
#saveFetcher(ExtensionConfig)} to add fetchers at runtime
+     * @return a FetcherManager
+     */
+    public static FetcherManager load(PluginManager pluginManager, 
TikaJsonConfig tikaJsonConfig, boolean allowRuntimeModifications) throws 
TikaConfigException, IOException {
         JsonNode fetchersNode = tikaJsonConfig.getRootNode().get(CONFIG_KEY);
-        Map<String, Fetcher> fetchers =
-                PluginComponentLoader.loadInstances(pluginManager, 
FetcherFactory.class, fetchersNode);
-        return new FetcherManager(fetchers);
+
+        // Validate configuration and collect fetcher configs without 
instantiating
+        Map<String, ExtensionConfig> configs = 
validateAndCollectConfigs(pluginManager, fetchersNode);
+
+        return new FetcherManager(pluginManager, configs, 
allowRuntimeModifications);
+    }
+
+    /**
+     * Validates the configuration by checking that factories exist for all 
types,
+     * and collects the configuration data without instantiating fetchers.
+     */
+    private static Map<String, ExtensionConfig> validateAndCollectConfigs(
+            PluginManager pluginManager, JsonNode configNode) throws 
TikaConfigException, IOException {
+
+        Map<String, FetcherFactory> factories = getFactories(pluginManager);
+        Map<String, ExtensionConfig> configs = new HashMap<>();
+
+        if (configNode != null && !configNode.isNull()) {
+            // Outer loop: iterate over type names
+            Iterator<Map.Entry<String, JsonNode>> typeFields = 
configNode.fields();
+            while (typeFields.hasNext()) {
+                Map.Entry<String, JsonNode> typeEntry = typeFields.next();
+                String typeName = typeEntry.getKey();
+                JsonNode instancesNode = typeEntry.getValue();
+
+                // Validate that factory exists
+                FetcherFactory factory = factories.get(typeName);
+                if (factory == null) {
+                    throw new TikaConfigException(
+                            "Unknown fetcher type: " + typeName + ". 
Available: " + factories.keySet());
+                }
+
+                // Inner loop: iterate over instances of this type
+                Iterator<Map.Entry<String, JsonNode>> instanceFields = 
instancesNode.fields();
+                while (instanceFields.hasNext()) {
+                    Map.Entry<String, JsonNode> instanceEntry = 
instanceFields.next();
+                    String instanceId = instanceEntry.getKey();
+                    JsonNode config = instanceEntry.getValue();
+
+                    if (configs.containsKey(instanceId)) {
+                        throw new TikaConfigException("Duplicate fetcher id: " 
+ instanceId);
+                    }
+
+                    configs.put(instanceId, new ExtensionConfig(instanceId, 
typeName, toJsonString(config)));
+                }
+            }
+        }
+
+        return configs;
+    }
+
+    private static Map<String, FetcherFactory> getFactories(PluginManager 
pluginManager) throws TikaConfigException {
+        if (pluginManager.getStartedPlugins().isEmpty()) {
+            pluginManager.loadPlugins();
+            pluginManager.startPlugins();
+        }
+
+        Map<String, FetcherFactory> factories = new HashMap<>();
+        for (FetcherFactory factory : 
pluginManager.getExtensions(FetcherFactory.class)) {
+            String name = factory.getName();
+            ClassLoader cl = factory.getClass().getClassLoader();
+            boolean isFromPlugin = cl instanceof org.pf4j.PluginClassLoader;
+
+            FetcherFactory existing = factories.get(name);
+            if (existing != null) {
+                boolean existingIsFromPlugin = 
existing.getClass().getClassLoader()
+                        instanceof org.pf4j.PluginClassLoader;
+                if (isFromPlugin && !existingIsFromPlugin) {
+                    // Replace classpath version with plugin version
+                    factories.put(name, factory);
+                }
+                // Otherwise skip duplicate (keep existing)
+                continue;
+            }
+            factories.put(name, factory);
+        }
+        return factories;
+    }
+
+    private static String toJsonString(final JsonNode node) throws 
TikaConfigException {
+        try {
+            return 
PolymorphicObjectMapperFactory.getMapper().writeValueAsString(node);
+        } catch (JsonProcessingException e) {
+            throw new TikaConfigException("Failed to serialize config to JSON 
string", e);
+        }
     }
 
-    private final Map<String, Fetcher> fetcherMap = new ConcurrentHashMap<>();
+    private final PluginManager pluginManager;
+    private final Map<String, ExtensionConfig> fetcherConfigs = new 
ConcurrentHashMap<>();
+    private final Map<String, Fetcher> fetcherCache = new 
ConcurrentHashMap<>();
+    private final boolean allowRuntimeModifications;
 
-    private FetcherManager(Map<String, Fetcher> fetcherMap) throws 
TikaConfigException {
-        this.fetcherMap.putAll(fetcherMap);
+    private FetcherManager(PluginManager pluginManager, Map<String, 
ExtensionConfig> fetcherConfigs, boolean allowRuntimeModifications) {
+        this.pluginManager = pluginManager;
+        this.fetcherConfigs.putAll(fetcherConfigs);
+        this.allowRuntimeModifications = allowRuntimeModifications;
     }
 
 
     public Fetcher getFetcher(String id) throws IOException, TikaException {
-        Fetcher fetcher = fetcherMap.get(id);
-        if (fetcher == null) {
-            throw new IllegalArgumentException(
-                    "Can't find fetcher for id=" + id + ". I've loaded: " +
-                            fetcherMap.keySet());
+        // Check cache first (fast path, no synchronization)
+        Fetcher fetcher = fetcherCache.get(id);
+        if (fetcher != null) {
+            return fetcher;
+        }
+
+        // Check if config exists
+        ExtensionConfig config = fetcherConfigs.get(id);
+        if (config == null) {
+            throw new FetcherNotFoundException(
+                    "Can't find fetcher for id=" + id + ". Available: " + 
fetcherConfigs.keySet());
+        }
+
+        // Synchronized block to ensure only one thread builds the fetcher
+        synchronized (this) {
+            // Double-check in case another thread built it while we were 
waiting
+            fetcher = fetcherCache.get(id);
+            if (fetcher != null) {
+                return fetcher;
+            }
+
+            // Build the fetcher
+            try {
+                fetcher = buildFetcher(config);
+                fetcherCache.put(id, fetcher);
+                LOG.debug("Lazily instantiated fetcher: {}", id);
+                return fetcher;
+            } catch (TikaConfigException e) {
+                throw new IOException("Failed to build fetcher: " + id, e);
+            }
         }
-        return fetcher;
+    }
+
+    /**
+     * Builds a fetcher instance from its configuration.
+     */
+    private Fetcher buildFetcher(ExtensionConfig config) throws 
TikaConfigException, IOException {
+        Map<String, FetcherFactory> factories = getFactories(pluginManager);
+        FetcherFactory factory = factories.get(config.name());
+
+        if (factory == null) {
+            // This shouldn't happen since we validated in load(), but check 
anyway
+            throw new TikaConfigException(
+                    "Unknown fetcher type: " + config.name() + ". Available: " 
+ factories.keySet());
+        }
+
+        return factory.buildExtension(config);
+    }
+
+    /**
+     * Dynamically adds a fetcher configuration at runtime.
+     * The fetcher will not be instantiated until it is first requested via 
{@link #getFetcher(String)}.
+     * This allows for dynamic configuration without the overhead of immediate 
instantiation.
+     * <p>
+     * This method is only available if the FetcherManager was loaded with
+     * {@link #load(PluginManager, TikaJsonConfig, boolean)} with 
allowRuntimeModifications=true
+     * <p>
+     * Only authorized/authenticated users should be allowed to modify 
fetchers. BE CAREFUL.
+     *
+     * @param config the extension configuration for the fetcher
+     * @throws TikaConfigException if the fetcher type is unknown, if a 
fetcher with the same ID already exists,
+     *         or if runtime modifications are not allowed
+     * @throws IOException if there is an error accessing the plugin manager
+     */
+    public synchronized void saveFetcher(ExtensionConfig config) throws 
TikaConfigException, IOException {
+        if (!allowRuntimeModifications) {
+            throw new TikaConfigException(
+                    "Runtime modifications are not allowed. FetcherManager 
must be loaded with " +
+                    "allowRuntimeModifications=true to use saveFetcher()");
+        }
+
+        if (config == null) {
+            throw new IllegalArgumentException("ExtensionConfig cannot be 
null");
+        }
+
+        String fetcherId = config.id();
+        String typeName = config.name();
+
+        // Check for duplicate ID
+        if (fetcherConfigs.containsKey(fetcherId)) {
+            throw new TikaConfigException("Fetcher with id '" + fetcherId + "' 
already exists");
+        }
+
+        // Validate that factory exists for this type
+        Map<String, FetcherFactory> factories = getFactories(pluginManager);
+        if (!factories.containsKey(typeName)) {
+            throw new TikaConfigException(
+                    "Unknown fetcher type: " + typeName + ". Available: " + 
factories.keySet());
+        }
+
+        // Store config without instantiating
+        fetcherConfigs.put(fetcherId, config);
+        LOG.debug("Saved fetcher config: id={}, type={}", fetcherId, typeName);
     }
 
     public Set<String> getSupported() {
-        return fetcherMap.keySet();
+        return fetcherConfigs.keySet();
     }
 
     /**
      * Convenience method that returns a fetcher if only one fetcher
      * is specified in the tika-config file.  If 0 or > 1 fetchers
      * are specified, this throws an IllegalArgumentException.
-     * @return
+     * @return the single configured fetcher
      */
-    public Fetcher getFetcher() {
-        if (fetcherMap.isEmpty()) {
-            throw new IllegalArgumentException("fetchers size must == 1 for 
the no arg call");
-        }
-        if (fetcherMap.size() > 1) {
+    public Fetcher getFetcher() throws IOException, TikaException {
+        if (fetcherConfigs.size() > 1) {
             throw new IllegalArgumentException("need to specify 'fetcherId' if 
> 1 fetchers are" +
                     " available");
         }
-        for (Fetcher fetcher : fetcherMap.values()) {
-            return fetcher;
-        }
-        //this should be unreachable?!
-        throw new IllegalArgumentException("fetchers size must == 0");
+        // Get the single fetcher id and use getFetcher(id) for lazy loading
+        String fetcherId = fetcherConfigs.keySet().iterator().next();
+        return getFetcher(fetcherId);
     }
 }
diff --git 
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/fetcher/FetcherManagerTest.java
 
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/fetcher/FetcherManagerTest.java
new file mode 100644
index 000000000..84793245b
--- /dev/null
+++ 
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/fetcher/FetcherManagerTest.java
@@ -0,0 +1,445 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.core.fetcher;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertSame;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.config.loader.TikaJsonConfig;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.pipes.api.fetcher.Fetcher;
+import org.apache.tika.pipes.api.fetcher.FetcherNotFoundException;
+import org.apache.tika.pipes.core.PluginsTestHelper;
+import org.apache.tika.plugins.ExtensionConfig;
+import org.apache.tika.plugins.TikaPluginManager;
+
+public class FetcherManagerTest {
+
+    @Test
+    public void testBasicLoad(@TempDir Path tmpDir) throws Exception {
+        Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+        TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+        TikaPluginManager pluginManager = 
TikaPluginManager.load(tikaJsonConfig);
+
+        FetcherManager fetcherManager = FetcherManager.load(pluginManager, 
tikaJsonConfig);
+
+        assertNotNull(fetcherManager);
+        assertEquals(1, fetcherManager.getSupported().size());
+        assertTrue(fetcherManager.getSupported().contains("fsf"));
+    }
+
+    @Test
+    public void testLazyInstantiation(@TempDir Path tmpDir) throws Exception {
+        // Create config with multiple fetchers
+        String configJson = "{\n" +
+                "  \"fetchers\": {\n" +
+                "    \"file-system-fetcher\": {\n" +
+                "      \"fsf1\": {\n" +
+                "        \"basePath\": \"" + 
tmpDir.resolve("path1").toString().replace("\\", "/") + "\"\n" +
+                "      },\n" +
+                "      \"fsf2\": {\n" +
+                "        \"basePath\": \"" + 
tmpDir.resolve("path2").toString().replace("\\", "/") + "\"\n" +
+                "      }\n" +
+                "    }\n" +
+                "  },\n" +
+                "  \"plugin-roots\": \"target/plugins\"\n" +
+                "}";
+
+        Path configPath = tmpDir.resolve("config.json");
+        Files.writeString(configPath, configJson, StandardCharsets.UTF_8);
+
+        TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(configPath);
+        TikaPluginManager pluginManager = 
TikaPluginManager.load(tikaJsonConfig);
+
+        FetcherManager fetcherManager = FetcherManager.load(pluginManager, 
tikaJsonConfig);
+
+        // After load, both fetchers should be in supported list but not 
instantiated yet
+        assertEquals(2, fetcherManager.getSupported().size());
+
+        // Request only fsf1 - only it should be instantiated
+        Fetcher fetcher1 = fetcherManager.getFetcher("fsf1");
+        assertNotNull(fetcher1);
+        assertEquals("fsf1", fetcher1.getExtensionConfig().id());
+
+        // fsf2 has not been requested yet - verify it exists in config
+        assertTrue(fetcherManager.getSupported().contains("fsf2"));
+
+        // Now request fsf2
+        Fetcher fetcher2 = fetcherManager.getFetcher("fsf2");
+        assertNotNull(fetcher2);
+        assertEquals("fsf2", fetcher2.getExtensionConfig().id());
+    }
+
+    @Test
+    public void testCaching(@TempDir Path tmpDir) throws Exception {
+        Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+        TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+        TikaPluginManager pluginManager = 
TikaPluginManager.load(tikaJsonConfig);
+
+        FetcherManager fetcherManager = FetcherManager.load(pluginManager, 
tikaJsonConfig);
+
+        // Get the same fetcher multiple times
+        Fetcher fetcher1 = fetcherManager.getFetcher("fsf");
+        Fetcher fetcher2 = fetcherManager.getFetcher("fsf");
+        Fetcher fetcher3 = fetcherManager.getFetcher("fsf");
+
+        // Should be the exact same instance (reference equality)
+        assertSame(fetcher1, fetcher2);
+        assertSame(fetcher2, fetcher3);
+    }
+
+    @Test
+    public void testThreadSafety(@TempDir Path tmpDir) throws Exception {
+        Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+        TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+        TikaPluginManager pluginManager = 
TikaPluginManager.load(tikaJsonConfig);
+
+        FetcherManager fetcherManager = FetcherManager.load(pluginManager, 
tikaJsonConfig);
+
+        int threadCount = 10;
+        ExecutorService executor = Executors.newFixedThreadPool(threadCount);
+        CountDownLatch startLatch = new CountDownLatch(1);
+        CountDownLatch doneLatch = new CountDownLatch(threadCount);
+        List<Future<Fetcher>> futures = new ArrayList<>();
+
+        // Start multiple threads that all request the same fetcher 
simultaneously
+        for (int i = 0; i < threadCount; i++) {
+            futures.add(executor.submit(() -> {
+                try {
+                    // Wait for all threads to be ready
+                    startLatch.await();
+
+                    // All threads try to get the fetcher at once
+                    return fetcherManager.getFetcher("fsf");
+                } finally {
+                    doneLatch.countDown();
+                }
+            }));
+        }
+
+        // Start all threads at once
+        startLatch.countDown();
+
+        // Wait for all threads to complete
+        assertTrue(doneLatch.await(10, TimeUnit.SECONDS));
+
+        // Collect all fetchers
+        List<Fetcher> fetchers = new ArrayList<>();
+        for (Future<Fetcher> future : futures) {
+            fetchers.add(future.get());
+        }
+
+        executor.shutdown();
+
+        // All threads should have gotten the same instance
+        Fetcher first = fetchers.get(0);
+        for (Fetcher fetcher : fetchers) {
+            assertSame(first, fetcher, "All threads should get the same 
fetcher instance");
+        }
+    }
+
+    @Test
+    public void testUnknownFetcherId(@TempDir Path tmpDir) throws Exception {
+        Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+        TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+        TikaPluginManager pluginManager = 
TikaPluginManager.load(tikaJsonConfig);
+
+        FetcherManager fetcherManager = FetcherManager.load(pluginManager, 
tikaJsonConfig);
+
+        FetcherNotFoundException exception = 
assertThrows(FetcherNotFoundException.class, () -> {
+            fetcherManager.getFetcher("non-existent-fetcher");
+        });
+
+        assertTrue(exception.getMessage().contains("non-existent-fetcher"));
+        assertTrue(exception.getMessage().contains("Available:"));
+    }
+
+    @Test
+    public void testUnknownFetcherType(@TempDir Path tmpDir) throws Exception {
+        String configJson = "{\n" +
+                "  \"fetchers\": {\n" +
+                "    \"non-existent-fetcher-type\": {\n" +
+                "      \"fetcher1\": {\n" +
+                "        \"someProp\": \"value\"\n" +
+                "      }\n" +
+                "    }\n" +
+                "  },\n" +
+                "  \"plugin-roots\": \"target/plugins\"\n" +
+                "}";
+
+        Path configPath = tmpDir.resolve("config.json");
+        Files.writeString(configPath, configJson, StandardCharsets.UTF_8);
+
+        TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(configPath);
+        TikaPluginManager pluginManager = 
TikaPluginManager.load(tikaJsonConfig);
+
+        // Should fail during load (early validation)
+        TikaConfigException exception = 
assertThrows(TikaConfigException.class, () -> {
+            FetcherManager.load(pluginManager, tikaJsonConfig);
+        });
+
+        assertTrue(exception.getMessage().contains("Unknown fetcher type"));
+        
assertTrue(exception.getMessage().contains("non-existent-fetcher-type"));
+    }
+
+    @Test
+    public void testDuplicateFetcherId(@TempDir Path tmpDir) throws Exception {
+        String configJson = "{\n" +
+                "  \"fetchers\": {\n" +
+                "    \"file-system-fetcher\": {\n" +
+                "      \"fsf1\": {\n" +
+                "        \"basePath\": \"" + 
tmpDir.resolve("path1").toString().replace("\\", "/") + "\"\n" +
+                "      },\n" +
+                "      \"fsf1\": {\n" +
+                "        \"basePath\": \"" + 
tmpDir.resolve("path2").toString().replace("\\", "/") + "\"\n" +
+                "      }\n" +
+                "    }\n" +
+                "  },\n" +
+                "  \"plugin-roots\": \"target/plugins\"\n" +
+                "}";
+
+        Path configPath = tmpDir.resolve("config.json");
+        Files.writeString(configPath, configJson, StandardCharsets.UTF_8);
+
+        // PolymorphicObjectMapperFactory has FAIL_ON_READING_DUP_TREE_KEY 
enabled
+        // so duplicate keys are caught during JSON parsing
+        TikaConfigException exception = 
assertThrows(TikaConfigException.class, () -> {
+            TikaJsonConfig.load(configPath);
+        });
+
+        assertTrue(exception.getMessage().contains("Failed to parse JSON") ||
+                exception.getCause().getMessage().contains("Duplicate field"));
+    }
+
+    @Test
+    public void testGetSingleFetcher(@TempDir Path tmpDir) throws Exception {
+        Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+        TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+        TikaPluginManager pluginManager = 
TikaPluginManager.load(tikaJsonConfig);
+
+        FetcherManager fetcherManager = FetcherManager.load(pluginManager, 
tikaJsonConfig);
+
+        // When only one fetcher exists, no-arg getFetcher() should work
+        Fetcher fetcher = fetcherManager.getFetcher();
+        assertNotNull(fetcher);
+        assertEquals("fsf", fetcher.getExtensionConfig().id());
+    }
+
+    @Test
+    public void testGetSingleFetcherWithMultipleConfigured(@TempDir Path 
tmpDir) throws Exception {
+        String configJson = "{\n" +
+                "  \"fetchers\": {\n" +
+                "    \"file-system-fetcher\": {\n" +
+                "      \"fsf1\": {\n" +
+                "        \"basePath\": \"" + 
tmpDir.resolve("path1").toString().replace("\\", "/") + "\"\n" +
+                "      },\n" +
+                "      \"fsf2\": {\n" +
+                "        \"basePath\": \"" + 
tmpDir.resolve("path2").toString().replace("\\", "/") + "\"\n" +
+                "      }\n" +
+                "    }\n" +
+                "  },\n" +
+                "  \"plugin-roots\": \"target/plugins\"\n" +
+                "}";
+
+        Path configPath = tmpDir.resolve("config.json");
+        Files.writeString(configPath, configJson, StandardCharsets.UTF_8);
+
+        TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(configPath);
+        TikaPluginManager pluginManager = 
TikaPluginManager.load(tikaJsonConfig);
+
+        FetcherManager fetcherManager = FetcherManager.load(pluginManager, 
tikaJsonConfig);
+
+        // When multiple fetchers exist, no-arg getFetcher() should fail
+        IllegalArgumentException exception = 
assertThrows(IllegalArgumentException.class, () -> {
+            fetcherManager.getFetcher();
+        });
+
+        assertTrue(exception.getMessage().contains("need to specify 
'fetcherId'"));
+    }
+
+    @Test
+    public void testSaveFetcher(@TempDir Path tmpDir) throws Exception {
+        Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+        TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+        TikaPluginManager pluginManager = 
TikaPluginManager.load(tikaJsonConfig);
+
+        // Load with runtime modifications enabled
+        FetcherManager fetcherManager = FetcherManager.load(pluginManager, 
tikaJsonConfig, true);
+
+        // Initially only fsf exists
+        assertEquals(1, fetcherManager.getSupported().size());
+
+        // Dynamically add a new fetcher configuration
+        String newConfigJson = "{\"basePath\": \"" + 
tmpDir.resolve("path2").toString().replace("\\", "/") + "\"}";
+        ExtensionConfig newConfig = new ExtensionConfig("fsf2", 
"file-system-fetcher", newConfigJson);
+
+        fetcherManager.saveFetcher(newConfig);
+
+        // Now both should be available
+        assertEquals(2, fetcherManager.getSupported().size());
+        assertTrue(fetcherManager.getSupported().contains("fsf"));
+        assertTrue(fetcherManager.getSupported().contains("fsf2"));
+
+        // Fetcher should be lazily instantiated when requested
+        Fetcher fetcher2 = fetcherManager.getFetcher("fsf2");
+        assertNotNull(fetcher2);
+        assertEquals("fsf2", fetcher2.getExtensionConfig().id());
+    }
+
+    @Test
+    public void testSaveFetcherDuplicate(@TempDir Path tmpDir) throws 
Exception {
+        Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+        TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+        TikaPluginManager pluginManager = 
TikaPluginManager.load(tikaJsonConfig);
+
+        FetcherManager fetcherManager = FetcherManager.load(pluginManager, 
tikaJsonConfig, true);
+
+        // Try to add a fetcher with the same ID as existing one
+        String newConfigJson = "{\"basePath\": \"" + 
tmpDir.resolve("path2").toString().replace("\\", "/") + "\"}";
+        ExtensionConfig duplicateConfig = new ExtensionConfig("fsf", 
"file-system-fetcher", newConfigJson);
+
+        TikaConfigException exception = 
assertThrows(TikaConfigException.class, () -> {
+            fetcherManager.saveFetcher(duplicateConfig);
+        });
+
+        assertTrue(exception.getMessage().contains("already exists"));
+        assertTrue(exception.getMessage().contains("fsf"));
+    }
+
+    @Test
+    public void testSaveFetcherUnknownType(@TempDir Path tmpDir) throws 
Exception {
+        Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+        TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+        TikaPluginManager pluginManager = 
TikaPluginManager.load(tikaJsonConfig);
+
+        FetcherManager fetcherManager = FetcherManager.load(pluginManager, 
tikaJsonConfig, true);
+
+        // Try to add a fetcher with unknown type
+        ExtensionConfig unknownTypeConfig = new ExtensionConfig("fetcher2", 
"unknown-fetcher-type", "{}");
+
+        TikaConfigException exception = 
assertThrows(TikaConfigException.class, () -> {
+            fetcherManager.saveFetcher(unknownTypeConfig);
+        });
+
+        assertTrue(exception.getMessage().contains("Unknown fetcher type"));
+        assertTrue(exception.getMessage().contains("unknown-fetcher-type"));
+    }
+
+    @Test
+    public void testSaveFetcherNull(@TempDir Path tmpDir) throws Exception {
+        Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+        TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+        TikaPluginManager pluginManager = 
TikaPluginManager.load(tikaJsonConfig);
+
+        FetcherManager fetcherManager = FetcherManager.load(pluginManager, 
tikaJsonConfig, true);
+
+        IllegalArgumentException exception = 
assertThrows(IllegalArgumentException.class, () -> {
+            fetcherManager.saveFetcher(null);
+        });
+
+        assertTrue(exception.getMessage().contains("cannot be null"));
+    }
+
+    @Test
+    public void testSaveFetcherLazyInstantiation(@TempDir Path tmpDir) throws 
Exception {
+        Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+        TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+        TikaPluginManager pluginManager = 
TikaPluginManager.load(tikaJsonConfig);
+
+        FetcherManager fetcherManager = FetcherManager.load(pluginManager, 
tikaJsonConfig, true);
+
+        // Add multiple fetchers
+        for (int i = 2; i <= 5; i++) {
+            String configJson = "{\"basePath\": \"" + tmpDir.resolve("path" + 
i).toString().replace("\\", "/") + "\"}";
+            ExtensionConfig config2 = new ExtensionConfig("fsf" + i, 
"file-system-fetcher", configJson);
+            fetcherManager.saveFetcher(config2);
+        }
+
+        // All 5 should be in supported list
+        assertEquals(5, fetcherManager.getSupported().size());
+
+        // Request only fsf3 - only it should be instantiated
+        Fetcher fetcher3 = fetcherManager.getFetcher("fsf3");
+        assertNotNull(fetcher3);
+        assertEquals("fsf3", fetcher3.getExtensionConfig().id());
+
+        // Others are still available but not instantiated yet
+        assertTrue(fetcherManager.getSupported().contains("fsf2"));
+        assertTrue(fetcherManager.getSupported().contains("fsf4"));
+        assertTrue(fetcherManager.getSupported().contains("fsf5"));
+    }
+
+    @Test
+    public void testSaveFetcherNotAllowed(@TempDir Path tmpDir) throws 
Exception {
+        Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+        TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+        TikaPluginManager pluginManager = 
TikaPluginManager.load(tikaJsonConfig);
+
+        // Load with default (runtime modifications disabled)
+        FetcherManager fetcherManager = FetcherManager.load(pluginManager, 
tikaJsonConfig);
+
+        // Try to add a fetcher - should fail
+        String newConfigJson = "{\"basePath\": \"" + 
tmpDir.resolve("path2").toString().replace("\\", "/") + "\"}";
+        ExtensionConfig newConfig = new ExtensionConfig("fsf2", 
"file-system-fetcher", newConfigJson);
+
+        TikaConfigException exception = 
assertThrows(TikaConfigException.class, () -> {
+            fetcherManager.saveFetcher(newConfig);
+        });
+
+        assertTrue(exception.getMessage().contains("Runtime modifications are 
not allowed"));
+        
assertTrue(exception.getMessage().contains("allowRuntimeModifications=true"));
+    }
+
+    @Test
+    public void testSaveFetcherNotAllowedExplicit(@TempDir Path tmpDir) throws 
Exception {
+        Path config = PluginsTestHelper.getFileSystemFetcherConfig(tmpDir);
+        TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(config);
+        TikaPluginManager pluginManager = 
TikaPluginManager.load(tikaJsonConfig);
+
+        // Load with explicit false
+        FetcherManager fetcherManager = FetcherManager.load(pluginManager, 
tikaJsonConfig, false);
+
+        // Try to add a fetcher - should fail
+        String newConfigJson = "{\"basePath\": \"" + 
tmpDir.resolve("path2").toString().replace("\\", "/") + "\"}";
+        ExtensionConfig newConfig = new ExtensionConfig("fsf2", 
"file-system-fetcher", newConfigJson);
+
+        TikaConfigException exception = 
assertThrows(TikaConfigException.class, () -> {
+            fetcherManager.saveFetcher(newConfig);
+        });
+
+        assertTrue(exception.getMessage().contains("Runtime modifications are 
not allowed"));
+    }
+}


Reply via email to