This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4519 in repository https://gitbox.apache.org/repos/asf/tika.git
commit d9875ca546091dee8595f6d9d1889bdb71cfbc32 Author: tallison <[email protected]> AuthorDate: Tue Nov 4 14:54:26 2025 -0500 TIKA-4519 -- checkpoint tika-pipes-tests work --- pom.xml | 1 + tika-pipes/tika-emitters/pom.xml | 6 + .../tika/pipes/emitter/fs/FileSystemEmitter.java | 2 +- ...emitter-plugin.properties => plugin.properties} | 2 +- tika-pipes/tika-fetchers/pom.xml | 6 + .../tika/pipes/fetcher/fs/FileSystemFetcher.java | 2 +- .../src/main/resources/plugin.properties} | 8 +- tika-pipes/tika-pipes-api/pom.xml | 7 +- tika-pipes/tika-pipes-core-tests/pom.xml | 33 +++++ .../apache/tika/pipes/core/PipesClientTest.java | 2 +- .../apache/tika/pipes/core/PluginsTestHelper.java | 15 ++- .../tika/pipes/core/TikaPipesConfigTest.java | 4 +- .../pipes/core/async/AsyncChaosMonkeyTest.java | 60 +++++---- .../apache/tika/pipes/core/async/MockFetcher.java | 51 -------- .../tika/pipes/core/emitter/MockEmitter.java | 90 -------------- .../tika/pipes/core/fetcher/MockFetcher.java | 68 ---------- .../test/resources/configs/fetchers-emitters.json | 12 +- .../apache/tika/config/pipes-iterator-config.xml | 3 +- .../tika/config/pipes-iterator-multiple-config.xml | 6 +- tika-pipes/tika-pipes-core/pom.xml | 5 + .../apache/tika/pipes/core/PipesPluginsConfig.java | 138 --------------------- .../org/apache/tika/pipes/core/PipesServer.java | 9 +- .../tika/pipes/core/emitter/EmitterManager.java | 42 +++++-- .../tika/pipes/core/fetcher/FetcherManager.java | 40 ++++-- .../tika/pipes/core/PipesPluginsConfigTest.java | 28 ----- .../tika-pipes-api => tika-plugins-core}/pom.xml | 30 ++--- .../apache/tika/plugins/AbstractTikaPlugin.java | 0 .../java/org/apache/tika/plugins/PluginConfig.java | 0 .../org/apache/tika/plugins/PluginConfigs.java | 24 ++-- .../java/org/apache/tika/plugins/TikaPlugin.java | 0 .../org/apache/tika/plugins/TikaPluginFactory.java | 12 ++ .../apache/tika/plugins/TikaPluginsManager.java | 133 ++++++++++++++++++++ .../tika/plugins/TikaPluginsManagerTest.java | 69 +++++++++++ .../src/test/resources/test1.json | 6 +- .../src/test/resources/test2.json | 9 +- .../src/test/resources/test3.json | 9 +- .../src/test/resources/testEmpty.json | 12 ++ .../src/test/resources/testEmpty2.json | 14 +++ .../src/test/resources/testNoPluginConfig.json | 15 +++ .../tika/serialization/PluginConfigLoader.java | 50 -------- .../serialization/PluginsConfigDeserializer.java | 43 ------- .../serialization/PluginsConfigSerializer.java | 37 ------ .../tika/serialization/PluginsConfigTest.java | 85 ------------- 43 files changed, 497 insertions(+), 691 deletions(-) diff --git a/pom.xml b/pom.xml index 2bae0da65..5a31c6d32 100644 --- a/pom.xml +++ b/pom.xml @@ -39,6 +39,7 @@ <module>tika-bom</module> <module>tika-core</module> <module>tika-serialization</module> + <module>tika-plugins-core</module> <module>tika-detectors</module> <module>tika-parsers</module> <module>tika-bundles</module> diff --git a/tika-pipes/tika-emitters/pom.xml b/tika-pipes/tika-emitters/pom.xml index 3f870b8cb..8f46e013b 100644 --- a/tika-pipes/tika-emitters/pom.xml +++ b/tika-pipes/tika-emitters/pom.xml @@ -55,6 +55,12 @@ <artifactId>tika-pipes-api</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>org.apache.tika</groupId> + <artifactId>tika-plugins-core</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> </dependencies> <build> diff --git a/tika-pipes/tika-emitters/tika-emitter-file-system/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java b/tika-pipes/tika-emitters/tika-emitter-file-system/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java index 66ea2d76b..8b37e6ad6 100644 --- a/tika-pipes/tika-emitters/tika-emitter-file-system/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java +++ b/tika-pipes/tika-emitters/tika-emitter-file-system/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java @@ -157,7 +157,7 @@ public class FileSystemEmitter extends AbstractStreamEmitter { FileSystemEmitterConfig config = fileSystemEmitterConfig; PluginConfigs pluginConfigs = parseContext.get(PluginConfigs.class); if (pluginConfigs != null) { - Optional<PluginConfig> pluginConfigOpt = pluginConfigs.get(getPluginConfig().id()); + Optional<PluginConfig> pluginConfigOpt = pluginConfigs.getById(getPluginConfig().id()); if (pluginConfigOpt.isPresent()) { config = FileSystemEmitterConfig.load(pluginConfigOpt.get().jsonConfig()); checkConfig(config); diff --git a/tika-pipes/tika-emitters/tika-emitter-file-system/src/main/resources/emitter-plugin.properties b/tika-pipes/tika-emitters/tika-emitter-file-system/src/main/resources/plugin.properties similarity index 94% copy from tika-pipes/tika-emitters/tika-emitter-file-system/src/main/resources/emitter-plugin.properties copy to tika-pipes/tika-emitters/tika-emitter-file-system/src/main/resources/plugin.properties index a85876524..90125b375 100644 --- a/tika-pipes/tika-emitters/tika-emitter-file-system/src/main/resources/emitter-plugin.properties +++ b/tika-pipes/tika-emitters/tika-emitter-file-system/src/main/resources/plugin.properties @@ -18,4 +18,4 @@ plugin.id=file-system-emitter plugin.class=org.apache.tika.pipes.emitter.fs.FileSystemEmitterPlugin plugin.version=4.0.0-SNAPSHOT plugin.provider=Local File System Emitter -plugin.description=Capable of emitting the local file system +plugin.description=Capable of emitting the local file system \ No newline at end of file diff --git a/tika-pipes/tika-fetchers/pom.xml b/tika-pipes/tika-fetchers/pom.xml index 402dbe06e..158da29e4 100644 --- a/tika-pipes/tika-fetchers/pom.xml +++ b/tika-pipes/tika-fetchers/pom.xml @@ -59,6 +59,12 @@ <artifactId>tika-pipes-api</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>org.apache.tika</groupId> + <artifactId>tika-plugins-core</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> </dependencies> <scm> <tag>3.0.0-rc1</tag> diff --git a/tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java index 880011f26..646d3ba50 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java @@ -92,7 +92,7 @@ public class FileSystemFetcher extends AbstractTikaPlugin implements Fetcher { FileSystemFetcherConfig config = defaultFileSystemFetcherConfig; PluginConfigs pluginConfigManager = parseContext.get(PluginConfigs.class); if (pluginConfigManager != null) { - Optional<PluginConfig> pluginConfigOpt = pluginConfigManager.get(getPluginConfig().id()); + Optional<PluginConfig> pluginConfigOpt = pluginConfigManager.getById(getPluginConfig().id()); if (pluginConfigOpt.isPresent()) { PluginConfig pluginConfig = pluginConfigOpt.get(); checkPluginId(pluginConfig.factoryPluginId()); diff --git a/tika-pipes/tika-emitters/tika-emitter-file-system/src/main/resources/emitter-plugin.properties b/tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/resources/plugin.properties similarity index 80% rename from tika-pipes/tika-emitters/tika-emitter-file-system/src/main/resources/emitter-plugin.properties rename to tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/resources/plugin.properties index a85876524..493c8d155 100644 --- a/tika-pipes/tika-emitters/tika-emitter-file-system/src/main/resources/emitter-plugin.properties +++ b/tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/resources/plugin.properties @@ -14,8 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -plugin.id=file-system-emitter -plugin.class=org.apache.tika.pipes.emitter.fs.FileSystemEmitterPlugin +plugin.id=file-system-fetcher +plugin.class=org.apache.tika.pipes.fetcher.fs.FileSystemFetcherPlugin plugin.version=4.0.0-SNAPSHOT -plugin.provider=Local File System Emitter -plugin.description=Capable of emitting the local file system +plugin.provider=Local File System Fetcher +plugin.description=Capable of emitting the local file system \ No newline at end of file diff --git a/tika-pipes/tika-pipes-api/pom.xml b/tika-pipes/tika-pipes-api/pom.xml index f852b8346..1cfbb1e50 100644 --- a/tika-pipes/tika-pipes-api/pom.xml +++ b/tika-pipes/tika-pipes-api/pom.xml @@ -37,13 +37,18 @@ <artifactId>pf4j</artifactId> <scope>provided</scope> </dependency> - <dependency> <groupId>${project.groupId}</groupId> <artifactId>tika-core</artifactId> <version>${project.version}</version> <scope>provided</scope> </dependency> + <dependency> + <groupId>${project.groupId}</groupId> + <artifactId>tika-plugins-core</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> </dependencies> <build> <plugins> diff --git a/tika-pipes/tika-pipes-core-tests/pom.xml b/tika-pipes/tika-pipes-core-tests/pom.xml index c6a8a91de..d15e0ea70 100644 --- a/tika-pipes/tika-pipes-core-tests/pom.xml +++ b/tika-pipes/tika-pipes-core-tests/pom.xml @@ -88,6 +88,39 @@ </excludes> </configuration> </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-dependency-plugin</artifactId> + <executions> + <execution> + <id>copy-plugins</id> + <phase>generate-test-resources</phase> + <goals> + <goal>copy</goal> + </goals> + <configuration> + <outputDirectory>${project.build.directory}/plugins</outputDirectory> + <artifactItems> + <artifactItem> + <groupId>org.apache.tika</groupId> + <artifactId>tika-fetcher-file-system</artifactId> + <version>${project.version}</version> + <type>jar</type> + <overWrite>true</overWrite> + </artifactItem> + <artifactItem> + <groupId>org.apache.tika</groupId> + <artifactId>tika-emitter-file-system</artifactId> + <version>${project.version}</version> + <type>jar</type> + <overWrite>true</overWrite> + </artifactItem> + </artifactItems> + </configuration> + </execution> + </executions> + </plugin> + </plugins> </build> </project> diff --git a/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/PipesClientTest.java b/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/PipesClientTest.java index 509011597..a9c2dd849 100644 --- a/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/PipesClientTest.java +++ b/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/PipesClientTest.java @@ -41,7 +41,7 @@ import org.apache.tika.pipes.core.emitter.EmitKey; import org.apache.tika.pipes.core.fetcher.FetchKey; public class PipesClientTest { - String fetcherName = "file-system-fetcher"; + String fetcherName = "fsf"; String testDoc = "testOverlappingText.pdf"; diff --git a/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/PluginsTestHelper.java b/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/PluginsTestHelper.java index d89b479e1..863fd1621 100644 --- a/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/PluginsTestHelper.java +++ b/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/PluginsTestHelper.java @@ -22,7 +22,13 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.tika.pipes.core.emitter.EmitterManager; + public class PluginsTestHelper { + private static final Logger LOG = LoggerFactory.getLogger(PluginsTestHelper.class); public static Path getFileSystemFetcherConfig(Path configBase) throws Exception { return getFileSystemFetcherConfig(configBase, configBase.resolve("input"), configBase.resolve("output")); @@ -43,7 +49,14 @@ public class PluginsTestHelper { .toAbsolutePath() .toString()); } - + Path pwd = Paths.get(""); + Path plugins = pwd.resolve("target/plugins"); + if (Files.isDirectory(plugins)) { + json = json.replace("PLUGINS_PATHS", plugins.toAbsolutePath().toString()); + LOG.info("found plugins path"); + } else { + LOG.warn("Couldn't find plugins from {}", pwd.toAbsolutePath()); + } Files.write(pipesConfig, json.getBytes(StandardCharsets.UTF_8)); return pipesConfig; } diff --git a/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/TikaPipesConfigTest.java b/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/TikaPipesConfigTest.java index 5a2ebf7d6..c2371de7e 100644 --- a/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/TikaPipesConfigTest.java +++ b/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/TikaPipesConfigTest.java @@ -87,7 +87,7 @@ public class TikaPipesConfigTest extends AbstractTikaConfigTest { public void testPipesIterator() throws Exception { PipesIterator it = PipesIterator.build(getConfigFilePath("pipes-iterator-config.xml")); - assertEquals("fs1", it.getFetcherId()); + assertEquals("fsf1", it.getFetcherId()); } @Test @@ -95,7 +95,7 @@ public class TikaPipesConfigTest extends AbstractTikaConfigTest { assertThrows(TikaConfigException.class, () -> { PipesIterator it = PipesIterator.build(getConfigFilePath("pipes-iterator-multiple-config.xml")); - assertEquals("fs1", it.getFetcherId()); + assertEquals("fsf1", it.getFetcherId()); }); } diff --git a/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/async/AsyncChaosMonkeyTest.java b/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/async/AsyncChaosMonkeyTest.java index 81ee817b3..bb476140b 100644 --- a/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/async/AsyncChaosMonkeyTest.java +++ b/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/async/AsyncChaosMonkeyTest.java @@ -18,10 +18,15 @@ package org.apache.tika.pipes.core.async; import static org.junit.jupiter.api.Assertions.assertEquals; +import java.io.BufferedReader; +import java.io.File; +import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; import java.util.HashSet; +import java.util.List; import java.util.Random; import java.util.Set; @@ -35,13 +40,16 @@ import org.apache.tika.pipes.core.FetchEmitTuple; import org.apache.tika.pipes.core.PipesResult; import org.apache.tika.pipes.core.PluginsTestHelper; import org.apache.tika.pipes.core.emitter.EmitKey; -import org.apache.tika.pipes.core.emitter.MockEmitter; +import org.apache.tika.pipes.core.emitter.EmitterManager; import org.apache.tika.pipes.core.fetcher.FetchKey; import org.apache.tika.pipes.core.pipesiterator.PipesIterator; +import org.apache.tika.plugins.TikaPluginsManager; +import org.apache.tika.serialization.JsonMetadataList; public class AsyncChaosMonkeyTest { - String fetcherPluginId = "file-system-fetcher"; + String fetcherPluginId = "fsf"; + String emitterPluginId = "fse"; private final String OOM = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>" + "<mock>" + "<throw class=\"java.lang.OutOfMemoryError\">oom message</throw>\n</mock>"; @@ -62,12 +70,8 @@ public class AsyncChaosMonkeyTest { private final int totalFiles = 100; - @TempDir private Path inputDir; - - @TempDir - private Path configDir; - + private Path outputDir; private Path pipesPluginsConfigPath; private int ok = 0; @@ -76,7 +80,13 @@ public class AsyncChaosMonkeyTest { private int crash = 0; - public Path setUp(boolean emitIntermediateResults) throws Exception { + public Path setUp(Path tmpDir, boolean emitIntermediateResults) throws Exception { + Path configDir = tmpDir.resolve("config"); + inputDir = tmpDir.resolve("input"); + outputDir = tmpDir.resolve("output"); + Files.createDirectories(configDir); + Files.createDirectories(inputDir); + Files.createDirectories(outputDir); ok = 0; oom = 0; timeouts = 0; @@ -116,9 +126,8 @@ public class AsyncChaosMonkeyTest { ok++; } } - MockEmitter.EMIT_DATA.clear(); MockReporter.RESULTS.clear(); - pipesPluginsConfigPath = PluginsTestHelper.getFileSystemFetcherConfig(configDir, inputDir, null); + pipesPluginsConfigPath = PluginsTestHelper.getFileSystemFetcherConfig(configDir, inputDir, outputDir); return tikaConfigPath; } @@ -135,13 +144,14 @@ public class AsyncChaosMonkeyTest { } */ + @Test - public void testBasic() throws Exception { - AsyncProcessor processor = new AsyncProcessor(setUp(false), pipesPluginsConfigPath); + public void testBasic(@TempDir Path tmpDir) throws Exception { + AsyncProcessor processor = new AsyncProcessor(setUp(tmpDir, false), pipesPluginsConfigPath); for (int i = 0; i < totalFiles; i++) { FetchEmitTuple t = new FetchEmitTuple("myId-" + i, new FetchKey(fetcherPluginId, i + ".xml"), - new EmitKey("mock-emitter", "emit-" + i), new Metadata()); + new EmitKey(emitterPluginId, "emit-" + i), new Metadata()); processor.offer(t, 1000); } for (int i = 0; i < 10; i++) { @@ -153,8 +163,8 @@ public class AsyncChaosMonkeyTest { } processor.close(); Set<String> emitKeys = new HashSet<>(); - for (EmitData d : MockEmitter.EMIT_DATA) { - emitKeys.add(d.getEmitKey()); + for (File f : outputDir.toFile().listFiles()) { + emitKeys.add(f.getName()); } assertEquals(ok, emitKeys.size()); assertEquals(100, MockReporter.RESULTS.size()); @@ -165,11 +175,11 @@ public class AsyncChaosMonkeyTest { } @Test - public void testEmitIntermediate() throws Exception { - AsyncProcessor processor = new AsyncProcessor(setUp(true), pipesPluginsConfigPath); + public void testEmitIntermediate(@TempDir Path tmpDir) throws Exception { + AsyncProcessor processor = new AsyncProcessor(setUp(tmpDir, true), pipesPluginsConfigPath); for (int i = 0; i < totalFiles; i++) { FetchEmitTuple t = new FetchEmitTuple("myId-" + i, new FetchKey(fetcherPluginId, i + ".xml"), - new EmitKey("mock-emitter", "emit-" + i), new Metadata()); + new EmitKey(emitterPluginId, "emit-" + i), new Metadata()); processor.offer(t, 1000); } for (int i = 0; i < 10; i++) { @@ -182,13 +192,17 @@ public class AsyncChaosMonkeyTest { processor.close(); Set<String> emitKeys = new HashSet<>(); int observedOOM = 0; - for (EmitData d : MockEmitter.EMIT_DATA) { - emitKeys.add(d.getEmitKey()); + for (File f : outputDir.toFile().listFiles()) { + emitKeys.add(f.getName()); + List<Metadata> metadataList; + try (BufferedReader reader = Files.newBufferedReader(f.toPath())) { + metadataList = JsonMetadataList.fromJson(reader); + } assertEquals(64, - d.getMetadataList().get(0).get("X-TIKA:digest:SHA-256").trim().length()); + metadataList.get(0).get("X-TIKA:digest:SHA-256").trim().length()); assertEquals("application/mock+xml", - d.getMetadataList().get(0).get(Metadata.CONTENT_TYPE)); - String val = d.getMetadataList().get(0).get(TikaCoreProperties.PIPES_RESULT); + metadataList.get(0).get(Metadata.CONTENT_TYPE)); + String val = metadataList.get(0).get(TikaCoreProperties.PIPES_RESULT); if ("OOM".equals(val)) { observedOOM++; } diff --git a/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/async/MockFetcher.java b/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/async/MockFetcher.java deleted file mode 100644 index 6d28a44d2..000000000 --- a/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/async/MockFetcher.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.pipes.core.async; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; - -import org.apache.tika.exception.TikaConfigException; -import org.apache.tika.exception.TikaException; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.pipes.api.fetcher.Fetcher; -import org.apache.tika.plugins.PluginConfig; - -public class MockFetcher implements Fetcher { - - private static final byte[] BYTES = ("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>" + "<mock>" + - "<metadata action=\"add\" name=\"dc:creator\">Nikolai Lobachevsky</metadata>" + - "<write element=\"p\">main_content</write>" + "</mock>").getBytes(StandardCharsets.UTF_8); - - public MockFetcher() throws IOException { - super(); - } - - - @Override - public InputStream fetch(String fetchKey, Metadata metadata, ParseContext parseContext) throws TikaException, IOException { - return new ByteArrayInputStream(BYTES); - } - - @Override - public PluginConfig getPluginConfig() { - return null; - } -} diff --git a/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/emitter/MockEmitter.java b/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/emitter/MockEmitter.java deleted file mode 100644 index 290163743..000000000 --- a/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/emitter/MockEmitter.java +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.pipes.core.emitter; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ArrayBlockingQueue; - -import org.pf4j.Extension; - -import org.apache.tika.config.Initializable; -import org.apache.tika.config.InitializableProblemHandler; -import org.apache.tika.config.Param; -import org.apache.tika.exception.TikaConfigException; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.pipes.api.emitter.EmitData; -import org.apache.tika.pipes.api.emitter.Emitter; -import org.apache.tika.plugins.PluginConfig; - -public class MockEmitter implements Initializable, Emitter { - - public static ArrayBlockingQueue<EmitData> EMIT_DATA = new ArrayBlockingQueue<>(10000); - - public static List<EmitData> getData() { - return new ArrayList<>(EMIT_DATA); - } - - public MockEmitter() throws IOException { - } - - @Override - public PluginConfig getPluginConfig() { - return null; - } - - private static record MockEmitterConfig(boolean throwOnCheck) { - - } - - private MockEmitterConfig config = new MockEmitterConfig(true); - - @Override - public void initialize(Map<String, Param> params) throws TikaConfigException { - //no-op - } - - @Override - public void checkInitialization(InitializableProblemHandler problemHandler) - throws TikaConfigException { - - if (config.throwOnCheck()) { - throw new TikaConfigException("throw on check"); - } - - } - - - @Override - public void emit(String emitKey, List<Metadata> metadataList, ParseContext parseContext) - throws IOException, TikaEmitterException { - emit( - Collections.singletonList(new EmitDataImpl(emitKey, - metadataList, null, parseContext))); - } - @Override - public void emit(List<? extends EmitData> emitData) throws IOException, TikaEmitterException { - - for (EmitData d : emitData) { - EMIT_DATA.offer(d); - } - } -} diff --git a/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/fetcher/MockFetcher.java b/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/fetcher/MockFetcher.java deleted file mode 100644 index b9a94de05..000000000 --- a/tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/fetcher/MockFetcher.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.pipes.core.fetcher; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; -import java.util.Map; - -import org.apache.tika.config.Field; -import org.apache.tika.config.Param; -import org.apache.tika.exception.TikaConfigException; -import org.apache.tika.exception.TikaException; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.pipes.api.fetcher.Fetcher; -import org.apache.tika.plugins.PluginConfig; - -public class MockFetcher implements Fetcher { - - private Map<String, Param> params; - - @Field - private String byteString = null; - - @Field - private boolean throwOnCheck = false; - - public MockFetcher() throws IOException { - super(); - } - - - public void setThrowOnCheck(boolean throwOnCheck) { - this.throwOnCheck = throwOnCheck; - } - - public void setByteString(String byteString) { - this.byteString = byteString; - } - - - @Override - public InputStream fetch(String fetchKey, Metadata metadata, ParseContext parseContext) throws TikaException, IOException { - return byteString == null ? new ByteArrayInputStream(new byte[0]) : - new ByteArrayInputStream(byteString.getBytes(StandardCharsets.UTF_8)); - } - - @Override - public PluginConfig getPluginConfig() { - return null; - } -} diff --git a/tika-pipes/tika-pipes-core-tests/src/test/resources/configs/fetchers-emitters.json b/tika-pipes/tika-pipes-core-tests/src/test/resources/configs/fetchers-emitters.json index 5cd5e89ce..b8f08b354 100644 --- a/tika-pipes/tika-pipes-core-tests/src/test/resources/configs/fetchers-emitters.json +++ b/tika-pipes/tika-pipes-core-tests/src/test/resources/configs/fetchers-emitters.json @@ -2,8 +2,7 @@ "plugins": { "fetchers": { "fsf": { - "factoryPluginId": "file-system-fetcher", - "config": { + "file-system-fetcher" : { "basePath": "FETCHERS_BASE_PATH", "extractFileSystemMetadata": false } @@ -11,12 +10,13 @@ }, "emitters": { "fse": { - "factoryPluginId": "file-system-emitter", - "config": { + "file-system-emitter" : { "basePath": "EMITTERS_BASE_PATH", - "fileExtension": "json" + "fileExtension": "json", + "onExists": "EXCEPTION" } } } - } + }, + "pluginsPaths": "PLUGINS_PATHS" } \ No newline at end of file diff --git a/tika-pipes/tika-pipes-core-tests/src/test/resources/org/apache/tika/config/pipes-iterator-config.xml b/tika-pipes/tika-pipes-core-tests/src/test/resources/org/apache/tika/config/pipes-iterator-config.xml index 902d7517e..b613ca631 100644 --- a/tika-pipes/tika-pipes-core-tests/src/test/resources/org/apache/tika/config/pipes-iterator-config.xml +++ b/tika-pipes/tika-pipes-core-tests/src/test/resources/org/apache/tika/config/pipes-iterator-config.xml @@ -18,7 +18,8 @@ <properties> <pipesIterator class="org.apache.tika.pipes.pipesiterator.fs.FileSystemPipesIterator"> <params> - <fetcherName>fs1</fetcherName> + <fetcherId>fsf1</fetcherId> + <emitterId>fse1</emitterId> <basePath>/my/base/path1</basePath> </params> </pipesIterator> diff --git a/tika-pipes/tika-pipes-core-tests/src/test/resources/org/apache/tika/config/pipes-iterator-multiple-config.xml b/tika-pipes/tika-pipes-core-tests/src/test/resources/org/apache/tika/config/pipes-iterator-multiple-config.xml index eaab6138b..7e6631035 100644 --- a/tika-pipes/tika-pipes-core-tests/src/test/resources/org/apache/tika/config/pipes-iterator-multiple-config.xml +++ b/tika-pipes/tika-pipes-core-tests/src/test/resources/org/apache/tika/config/pipes-iterator-multiple-config.xml @@ -18,14 +18,16 @@ <properties> <pipesIterator class="org.apache.tika.pipes.pipesiterator.fs.FileSystemPipesIterator"> <params> - <fetcherName>fs1</fetcherName> <basePath>/my/base/path1</basePath> + <emitterId>fse1</emitterId> + <fetcherId>fsf1</fetcherId> </params> </pipesIterator> <pipesIterator class="org.apache.tika.pipes.pipesiterator.fs.FileSystemPipesIterator"> <params> - <fetcherName>fs2</fetcherName> <basePath>/my/base/path2</basePath> + <emitterId>fse2</emitterId> + <fetcherId>fsf2</fetcherId> </params> </pipesIterator> </properties> diff --git a/tika-pipes/tika-pipes-core/pom.xml b/tika-pipes/tika-pipes-core/pom.xml index fa0dea242..9d81e77a2 100644 --- a/tika-pipes/tika-pipes-core/pom.xml +++ b/tika-pipes/tika-pipes-core/pom.xml @@ -37,6 +37,11 @@ <artifactId>tika-pipes-api</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>${project.groupId}</groupId> + <artifactId>tika-plugins-core</artifactId> + <version>${project.version}</version> + </dependency> <dependency> <groupId>org.pf4j</groupId> <artifactId>pf4j</artifactId> diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesPluginsConfig.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesPluginsConfig.java deleted file mode 100644 index ec491cf19..000000000 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesPluginsConfig.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.pipes.core; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.nio.charset.StandardCharsets; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.Iterator; -import java.util.Optional; -import java.util.Set; - -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; - -import org.apache.tika.plugins.PluginConfig; -import org.apache.tika.plugins.PluginConfigs; - - -public class PipesPluginsConfig { - - public static PipesPluginsConfig load(InputStream is) throws IOException { - JsonNode root = new ObjectMapper().readTree(new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))); - PluginConfigs fetchers = null; - PluginConfigs emitters = null; - PluginConfigs iterators = null; - PluginConfigs reporters = null; - - JsonNode plugins = root.get("plugins"); - if (plugins.has("fetchers")) { - fetchers = load(plugins.get("fetchers")); - } - if (plugins.has("emitters")) { - emitters = load(plugins.get("emitters")); - } - if (plugins.has("iterators")) { - iterators = load(plugins.get("iterators")); - } - if (plugins.has("reporters")) { - reporters = load(plugins.get("reporters")); - } - - Path pluginsDir = null; - if (plugins.has("pf4j.pluginsDir")) { - pluginsDir = Paths.get(plugins.get("pf4j.pluginsDir").asText()); - } - return new PipesPluginsConfig(fetchers, emitters, iterators, reporters, pluginsDir); - } - - private static PluginConfigs load(JsonNode pluginsNode) { - PluginConfigs manager = new PluginConfigs(); - Iterator<String> it = pluginsNode.fieldNames(); - manager = new PluginConfigs(); - while (it.hasNext()) { - String id = it.next(); - JsonNode configNode = pluginsNode.get(id); - String pluginId = configNode.get("factoryPluginId").asText(); - JsonNode config = configNode.get("config"); - manager.add(new PluginConfig(id, pluginId, config.toString())); - } - return manager; - } - - private final PluginConfigs fetchers; - private final PluginConfigs emitters; - private final PluginConfigs iterators; - private final PluginConfigs reporters; - - - private final Path pluginsDir; - - public PipesPluginsConfig(PluginConfigs fetchers, PluginConfigs emitters, - PluginConfigs iterators, PluginConfigs reporters, Path pluginsDir) { - this.fetchers = fetchers; - this.emitters = emitters; - this.iterators = iterators; - this.reporters = reporters; - this.pluginsDir = pluginsDir; - } - - - public PluginConfigs getFetcherConfig() { - return fetchers; - } - - public PluginConfigs getEmitterConfig() { - return emitters; - } - - public Optional<PluginConfig> getFetcherConfig(String id) { - if (fetchers == null) { - throw new IllegalArgumentException("fetchers element was not loaded"); - } - return fetchers.get(id); - } - - public Optional<PluginConfig> getEmitterConfig(String id) { - if (emitters == null) { - throw new IllegalArgumentException("emitters element was not loaded"); - } - return emitters.get(id); - } - - public Optional<PluginConfig> getIteratorConfig(String pluginId) { - if (iterators == null) { - throw new IllegalArgumentException("iterators element was not loaded"); - } - return iterators.get(pluginId); - } - - public Optional<PluginConfig> getReporterConfig(String pluginId) { - if (reporters == null) { - throw new IllegalArgumentException("reporters element was not loaded"); - } - return reporters.get(pluginId); - } - - public Optional<Path> getPluginsDir() { - return Optional.ofNullable(pluginsDir); - } -} diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesServer.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesServer.java index 747e39983..e448d1984 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesServer.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesServer.java @@ -76,6 +76,7 @@ import org.apache.tika.pipes.core.extractor.BasicEmbeddedDocumentBytesHandler; import org.apache.tika.pipes.core.extractor.EmbeddedDocumentBytesConfig; import org.apache.tika.pipes.core.extractor.EmittingEmbeddedDocumentBytesHandler; import org.apache.tika.pipes.core.fetcher.FetcherManager; +import org.apache.tika.plugins.TikaPluginsManager; import org.apache.tika.sax.BasicContentHandlerFactory; import org.apache.tika.sax.ContentHandlerFactory; import org.apache.tika.sax.RecursiveParserWrapperHandler; @@ -844,15 +845,17 @@ public class PipesServer implements Runnable { } protected void initializeResources() throws TikaException, IOException, SAXException { - PipesPluginsConfig pipesPluginsConfig = PipesPluginsConfig.load(Files.newInputStream(pipesConfigPath)); + TikaPluginsManager tikaPluginsManager = TikaPluginsManager.load( + Files.newInputStream(pipesConfigPath), TikaPluginsManager.PLUGIN_TYPES.FETCHERS, + TikaPluginsManager.PLUGIN_TYPES.EMITTERS); //TODO allowed named configurations in tika config this.tikaConfig = new TikaConfig(tikaConfigPath); - this.fetcherManager = FetcherManager.load(pipesConfigPath); + this.fetcherManager = FetcherManager.load(tikaPluginsManager); //skip initialization of the emitters if emitting //from the pipesserver is turned off. if (maxForEmitBatchBytes > -1) { - this.emitterManager = EmitterManager.load(pipesConfigPath); + this.emitterManager = EmitterManager.load(tikaPluginsManager); } else { LOG.debug("'maxForEmitBatchBytes' < 0. Not initializing emitters in PipesServer"); this.emitterManager = null; diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/emitter/EmitterManager.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/emitter/EmitterManager.java index e2686bda0..3942c2960 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/emitter/EmitterManager.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/emitter/EmitterManager.java @@ -21,6 +21,7 @@ import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; @@ -28,14 +29,16 @@ import java.util.concurrent.ConcurrentHashMap; import org.pf4j.DefaultPluginManager; import org.pf4j.PluginManager; +import org.pf4j.PluginWrapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.tika.exception.TikaConfigException; import org.apache.tika.pipes.api.emitter.Emitter; import org.apache.tika.pipes.api.emitter.EmitterFactory; -import org.apache.tika.pipes.core.PipesPluginsConfig; import org.apache.tika.plugins.PluginConfig; +import org.apache.tika.plugins.PluginConfigs; +import org.apache.tika.plugins.TikaPluginsManager; /** * Utility class that will apply the appropriate emitter @@ -47,7 +50,6 @@ public class EmitterManager { private static final Logger LOG = LoggerFactory.getLogger(EmitterManager.class); - private final Map<String, Emitter> emitterMap = new ConcurrentHashMap<>(); public static EmitterManager load(Path path) throws IOException, TikaConfigException { @@ -56,11 +58,22 @@ public class EmitterManager { } } - public static EmitterManager load(InputStream pipesPluginsConfigIs) throws IOException, TikaConfigException { - PipesPluginsConfig pluginsConfig = PipesPluginsConfig.load(pipesPluginsConfigIs); + public static EmitterManager load(InputStream is) throws IOException, TikaConfigException { + //this will throw a TikaConfigException if "emitters" is not loaded + TikaPluginsManager tikaPluginsManager = TikaPluginsManager.load(is, TikaPluginsManager.PLUGIN_TYPES.EMITTERS); + return load(tikaPluginsManager); + } + + public static EmitterManager load(TikaPluginsManager tikaPluginsManager) throws IOException, TikaConfigException { + Optional<PluginConfigs> emitterPluginConfigsOpt = tikaPluginsManager.get(TikaPluginsManager.PLUGIN_TYPES.EMITTERS); + if (emitterPluginConfigsOpt.isEmpty()) { + throw new TikaConfigException("Forgot to load 'fetchers'?"); + } + PluginConfigs emitterPluginConfigs = emitterPluginConfigsOpt.get(); + PluginManager pluginManager = null; - if (pluginsConfig.getPluginsDir().isPresent()) { - pluginManager = new DefaultPluginManager(pluginsConfig.getPluginsDir().get()); + if (! tikaPluginsManager.getPluginsPaths().isEmpty()) { + pluginManager = new DefaultPluginManager(tikaPluginsManager.getPluginsPaths()); } else { pluginManager = new DefaultPluginManager(); } @@ -68,10 +81,21 @@ public class EmitterManager { pluginManager.startPlugins(); Map<String, Emitter> emitterMap = new HashMap<>(); for (EmitterFactory emitterFactory : pluginManager.getExtensions(EmitterFactory.class)) { - for (String id : pluginsConfig.getEmitterConfig().ids()) { - Optional<PluginConfig> pluginConfigOpt = pluginsConfig.getFetcherConfig(id); + LOG.warn("Pf4j loaded plugin: " + emitterFactory.getClass()); + PluginWrapper pluginWrapper = pluginManager.whichPlugin(emitterFactory.getClass()); + if (pluginWrapper == null) { + LOG.warn("Couldn't find plugin wrapper for class={}", emitterFactory.getClass()); + continue; + } + String pluginId = pluginManager.whichPlugin(emitterFactory.getClass()).getPluginId(); + Set<String> ids = emitterPluginConfigs.getIdsByPluginId(pluginId); + if (ids.isEmpty()) { + LOG.warn("Couldn't find config for class={} pluginId={}. Skipping", emitterFactory.getClass(), pluginId); + } + for (String id : ids) { + Optional<PluginConfig> pluginConfigOpt = emitterPluginConfigs.getById(id); if (pluginConfigOpt.isEmpty()) { - LOG.warn("Couldn't find config for id={}", id); + LOG.warn("Couldn't find config for id={}. Skipping", id); } else { PluginConfig pluginConfig = pluginConfigOpt.get(); Emitter emitter = emitterFactory.buildPlugin(pluginConfig); diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java index fae86dbf0..731d4057b 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/FetcherManager.java @@ -26,19 +26,20 @@ import java.util.Optional; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; +import com.fasterxml.jackson.databind.JsonNode; import org.pf4j.DefaultPluginManager; import org.pf4j.PluginManager; +import org.pf4j.PluginWrapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.tika.config.Initializable; -import org.apache.tika.config.InitializableProblemHandler; import org.apache.tika.exception.TikaConfigException; import org.apache.tika.exception.TikaException; import org.apache.tika.pipes.api.fetcher.Fetcher; import org.apache.tika.pipes.api.fetcher.FetcherFactory; -import org.apache.tika.pipes.core.PipesPluginsConfig; import org.apache.tika.plugins.PluginConfig; +import org.apache.tika.plugins.PluginConfigs; +import org.apache.tika.plugins.TikaPluginsManager; /** * Utility class to hold multiple fetchers. @@ -55,11 +56,21 @@ public class FetcherManager { } } - public static FetcherManager load(InputStream pipesPluginsConfigIs) throws IOException, TikaConfigException { - PipesPluginsConfig pluginsConfig = PipesPluginsConfig.load(pipesPluginsConfigIs); + public static FetcherManager load(InputStream is) throws IOException, TikaConfigException { + //this will throw a TikaConfigException if "fetchers" is not loaded + TikaPluginsManager tikaPluginsManager = TikaPluginsManager.load(is, TikaPluginsManager.PLUGIN_TYPES.FETCHERS); + return load(tikaPluginsManager); + } + + public static FetcherManager load(TikaPluginsManager tikaPluginsManager) throws IOException, TikaConfigException { + Optional<PluginConfigs> fetcherPluginConfigsOpt = tikaPluginsManager.get(TikaPluginsManager.PLUGIN_TYPES.FETCHERS); + if (fetcherPluginConfigsOpt.isEmpty()) { + throw new TikaConfigException("Forgot to load 'fetchers'?"); + } + PluginConfigs fetcherPluginConfigs = fetcherPluginConfigsOpt.get(); PluginManager pluginManager = null; - if (pluginsConfig.getPluginsDir().isPresent()) { - pluginManager = new DefaultPluginManager(pluginsConfig.getPluginsDir().get()); + if (! tikaPluginsManager.getPluginsPaths().isEmpty()) { + pluginManager = new DefaultPluginManager(tikaPluginsManager.getPluginsPaths()); } else { pluginManager = new DefaultPluginManager(); } @@ -67,8 +78,19 @@ public class FetcherManager { pluginManager.startPlugins(); Map<String, Fetcher> fetcherMap = new HashMap<>(); for (FetcherFactory fetcherFactory : pluginManager.getExtensions(FetcherFactory.class)) { - for (String id : pluginsConfig.getFetcherConfig().ids()) { - Optional<PluginConfig> pluginConfigOpt = pluginsConfig.getFetcherConfig(id); + LOG.warn("Pf4j loaded plugin: " + fetcherFactory.getClass()); + PluginWrapper pluginWrapper = pluginManager.whichPlugin(fetcherFactory.getClass()); + if (pluginWrapper == null) { + LOG.warn("Couldn't find plugin wrapper for class={}", fetcherFactory.getClass()); + continue; + } + String pluginId = pluginWrapper.getPluginId(); + Set<String> ids = fetcherPluginConfigs.getIdsByPluginId(pluginId); + if (ids.isEmpty()) { + LOG.warn("Couldn't find config for class={} pluginId={}. Skipping", fetcherFactory.getClass(), pluginId); + } + for (String id : ids) { + Optional<PluginConfig> pluginConfigOpt = fetcherPluginConfigs.getById(id); if (pluginConfigOpt.isEmpty()) { LOG.warn("Couldn't find config for id={}", id); } else { diff --git a/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/core/PipesPluginsConfigTest.java b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/core/PipesPluginsConfigTest.java deleted file mode 100644 index 32e494a0a..000000000 --- a/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/core/PipesPluginsConfigTest.java +++ /dev/null @@ -1,28 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * <p> - * http://www.apache.org/licenses/LICENSE-2.0 - * <p> - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.pipes.core; - -import org.junit.jupiter.api.Test; - -public class PipesPluginsConfigTest { - - @Test - public void testBasic() throws Exception { - PipesPluginsConfig pipesPluginsConfig = - PipesPluginsConfig.load(PipesPluginsConfigTest.class.getResourceAsStream("/configs/fetchers.json")); - } -} diff --git a/tika-pipes/tika-pipes-api/pom.xml b/tika-plugins-core/pom.xml similarity index 80% copy from tika-pipes/tika-pipes-api/pom.xml copy to tika-plugins-core/pom.xml index f852b8346..d8c7f7b0a 100644 --- a/tika-pipes/tika-pipes-api/pom.xml +++ b/tika-plugins-core/pom.xml @@ -18,32 +18,32 @@ under the License. --> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> <parent> <groupId>org.apache.tika</groupId> - <artifactId>tika-pipes</artifactId> + <artifactId>tika-parent</artifactId> <version>4.0.0-SNAPSHOT</version> - <relativePath>../pom.xml</relativePath> + <relativePath>../tika-parent/pom.xml</relativePath> </parent> - <modelVersion>4.0.0</modelVersion> - - <artifactId>tika-pipes-api</artifactId> - - <name>Apache Tika pipes api</name> - <url>https://tika.apache.org/</url> + <artifactId>tika-plugins-core</artifactId> + <name>Apache Tika plugins core</name> + <url>https://tika.apache.org</url> <dependencies> - <dependency> - <groupId>org.pf4j</groupId> - <artifactId>pf4j</artifactId> - <scope>provided</scope> - </dependency> - <dependency> <groupId>${project.groupId}</groupId> <artifactId>tika-core</artifactId> <version>${project.version}</version> <scope>provided</scope> </dependency> + <dependency> + <groupId>org.pf4j</groupId> + <artifactId>pf4j</artifactId> + </dependency> + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-databind</artifactId> + </dependency> </dependencies> <build> <plugins> @@ -53,7 +53,7 @@ <configuration> <archive> <manifestEntries> - <Automatic-Module-Name>org.apache.tika.pipes.api</Automatic-Module-Name> + <Automatic-Module-Name>org.apache.tika.plugins</Automatic-Module-Name> </manifestEntries> </archive> </configuration> diff --git a/tika-core/src/main/java/org/apache/tika/plugins/AbstractTikaPlugin.java b/tika-plugins-core/src/main/java/org/apache/tika/plugins/AbstractTikaPlugin.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/plugins/AbstractTikaPlugin.java rename to tika-plugins-core/src/main/java/org/apache/tika/plugins/AbstractTikaPlugin.java diff --git a/tika-core/src/main/java/org/apache/tika/plugins/PluginConfig.java b/tika-plugins-core/src/main/java/org/apache/tika/plugins/PluginConfig.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/plugins/PluginConfig.java rename to tika-plugins-core/src/main/java/org/apache/tika/plugins/PluginConfig.java diff --git a/tika-core/src/main/java/org/apache/tika/plugins/PluginConfigs.java b/tika-plugins-core/src/main/java/org/apache/tika/plugins/PluginConfigs.java similarity index 61% rename from tika-core/src/main/java/org/apache/tika/plugins/PluginConfigs.java rename to tika-plugins-core/src/main/java/org/apache/tika/plugins/PluginConfigs.java index d73b2302c..9d93021bf 100644 --- a/tika-core/src/main/java/org/apache/tika/plugins/PluginConfigs.java +++ b/tika-plugins-core/src/main/java/org/apache/tika/plugins/PluginConfigs.java @@ -16,36 +16,46 @@ */ package org.apache.tika.plugins; +import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; public class PluginConfigs { - Map<String, PluginConfig> pluginConfigs = new HashMap<>(); + Map<String, PluginConfig> pluginConfigsById = new HashMap<>(); + Map<String, Set<String>> pluginIdsToIds = new HashMap<>(); public PluginConfigs() { } public PluginConfigs(Map<String, PluginConfig> map) { - pluginConfigs.putAll(map); + for (PluginConfig c : map.values()) { + add(c); + } } public void add(PluginConfig pluginConfig) { - if (pluginConfigs.containsKey(pluginConfig.id())) { + if (pluginConfigsById.containsKey(pluginConfig.id())) { throw new IllegalArgumentException("Can't overwrite existing plugin for id: " + pluginConfig.factoryPluginId()); } - pluginConfigs.put(pluginConfig.factoryPluginId(), pluginConfig); + pluginConfigsById.put(pluginConfig.id(), pluginConfig); + pluginIdsToIds.computeIfAbsent(pluginConfig.factoryPluginId(), k -> new HashSet<>()).add(pluginConfig.id()); } - public Optional<PluginConfig> get(String id) { - return Optional.ofNullable(pluginConfigs.get(id)); + public Optional<PluginConfig> getById(String id) { + return Optional.ofNullable(pluginConfigsById.get(id)); } public Set<String> ids() { - return pluginConfigs.keySet(); + return pluginConfigsById.keySet(); } + public Set<String> getIdsByPluginId(String pluginId) { + return pluginIdsToIds.getOrDefault(pluginId, Set.of()); + } } diff --git a/tika-core/src/main/java/org/apache/tika/plugins/TikaPlugin.java b/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaPlugin.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/plugins/TikaPlugin.java rename to tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaPlugin.java diff --git a/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaPluginFactory.java b/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaPluginFactory.java new file mode 100644 index 000000000..85fab8c71 --- /dev/null +++ b/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaPluginFactory.java @@ -0,0 +1,12 @@ +package org.apache.tika.plugins; + +import java.io.IOException; + +import org.pf4j.ExtensionPoint; + +import org.apache.tika.exception.TikaConfigException; + +public interface TikaPluginFactory<T extends TikaPlugin> extends ExtensionPoint { + + T buildPlugin(PluginConfig pluginConfig) throws IOException, TikaConfigException; +} diff --git a/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaPluginsManager.java b/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaPluginsManager.java new file mode 100644 index 000000000..4694edd02 --- /dev/null +++ b/tika-plugins-core/src/main/java/org/apache/tika/plugins/TikaPluginsManager.java @@ -0,0 +1,133 @@ +package org.apache.tika.plugins; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Optional; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; + +import org.apache.tika.exception.TikaConfigException; + +public class TikaPluginsManager { + + public enum PLUGIN_TYPES { + FETCHERS, + EMITTERS + } + + public static JsonNode loadRoot(InputStream is) throws IOException, TikaConfigException { + return new ObjectMapper().readTree(new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))); + } + + public static TikaPluginsManager load(InputStream is, String ... items) throws IOException, TikaConfigException { + JsonNode root = loadRoot(is); + JsonNode plugins = root.get("plugins"); + if (plugins == null) { + throw new TikaConfigException("Couldn't find 'plugins' node"); + } + + Map<String, PluginConfigs> pluginConfigsMap = new HashMap<>(); + for (String item : items) { + pluginConfigsMap.put(item, loadPluginConfigs(item, plugins)); + } + List<String> pluginsPaths = new ArrayList<>(); + if (root.has("pluginsPaths")) { + JsonNode pluginsPathsNode = root.get("pluginsPaths"); + if (pluginsPathsNode.isArray()) { + Iterator<JsonNode> elements = pluginsPathsNode.elements(); + while (elements.hasNext()) { + JsonNode n = elements.next(); + pluginsPaths.add(n.asText()); + } + } else { + pluginsPaths.add(pluginsPathsNode.asText()); + } + } + return new TikaPluginsManager(pluginConfigsMap, pluginsPaths); + } + + public static TikaPluginsManager load(InputStream is, PLUGIN_TYPES ... types) throws IOException, TikaConfigException { + String[] args = new String[types.length]; + for (int i = 0; i < types.length; i++) { + args[i] = types[i].name().toLowerCase(Locale.ROOT); + } + return load(is, args); + } + + public static PluginConfigs loadPluginConfigs(PLUGIN_TYPES type, JsonNode plugins) throws TikaConfigException { + return loadPluginConfigs(type.name().toLowerCase(Locale.ROOT), plugins); + } + + public static PluginConfigs loadPluginConfigs(String item, JsonNode plugins) throws TikaConfigException { + JsonNode itemNode = plugins.get(item); + if (itemNode == null) { + throw new TikaConfigException("Couldn't find " + item + " under 'plugins'"); + } + PluginConfigs pluginConfigs = new PluginConfigs(); + int configs = 0; + for (Iterator<String> it = itemNode.fieldNames(); it.hasNext(); ) { + String id = it.next(); + JsonNode pluginNode = itemNode.get(id); + if (pluginNode == null) { + throw new TikaConfigException("Couldn't find node for item=" + item + " id=" + id); + } + int cnt = 0; + for (Iterator<String> pluginIds = pluginNode.fieldNames(); pluginIds.hasNext(); ){ + String pluginId = pluginIds.next(); + if (++cnt > 1) { + throw new TikaConfigException("Can only have one pluginId per id: id= " + id + " pluginId=" + pluginId); + } + JsonNode pluginConfigNode = pluginNode.get(pluginId); + PluginConfig pluginConfig = new PluginConfig(id, pluginId, pluginConfigNode.toString()); + pluginConfigs.add(pluginConfig); + } + if (cnt == 0) { + throw new TikaConfigException("need to have at least one plugin node for " + id); + } + configs++; + } + if (configs == 0) { + throw new TikaConfigException("Couldn't find any items for item=" + item); + } + return pluginConfigs; + } + + private final List<String> pluginsPaths; + private final Map<String, PluginConfigs> pluginConfigsMap; + + + public TikaPluginsManager(Map<String, PluginConfigs> pluginConfigsMap, List<String> pluginsPaths) { + this.pluginConfigsMap = pluginConfigsMap; + this.pluginsPaths = pluginsPaths; + } + + public Optional<PluginConfigs> get(PLUGIN_TYPES pluginType) { + return get(pluginType.name().toLowerCase(Locale.ROOT)); + } + + private Optional<PluginConfigs> get(String lowerCase) { + return Optional.ofNullable(pluginConfigsMap.get(lowerCase)); + } + + public List<Path> getPluginsPaths() { + List<Path> ret = new ArrayList<>(); + for (String p : pluginsPaths) { + ret.add(Paths.get(p)); + } + return ret; + } + + +} diff --git a/tika-plugins-core/src/test/java/org/apache/tika/plugins/TikaPluginsManagerTest.java b/tika-plugins-core/src/test/java/org/apache/tika/plugins/TikaPluginsManagerTest.java new file mode 100644 index 000000000..542b4aa0c --- /dev/null +++ b/tika-plugins-core/src/test/java/org/apache/tika/plugins/TikaPluginsManagerTest.java @@ -0,0 +1,69 @@ +package org.apache.tika.plugins; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.nio.file.Path; +import java.util.List; +import java.util.Optional; + +import org.junit.jupiter.api.Test; + +import org.apache.tika.exception.TikaConfigException; + +public class TikaPluginsManagerTest { + + @Test + public void testBasic() throws Exception { + TikaPluginsManager manager = TikaPluginsManager.load(TikaPluginsManagerTest.class.getResourceAsStream("/test1.json"), + TikaPluginsManager.PLUGIN_TYPES.FETCHERS); + Optional<PluginConfigs> pluginConfigsOpt = manager.get(TikaPluginsManager.PLUGIN_TYPES.FETCHERS); + assertTrue(pluginConfigsOpt.isPresent()); + PluginConfigs pluginConfigs = pluginConfigsOpt.get(); + Optional<PluginConfig> pluginConfigOpt = pluginConfigs.getById("fsf"); + assertTrue(pluginConfigOpt.isPresent()); + PluginConfig pluginConfig = pluginConfigOpt.get(); + assertEquals("file-system-fetcher", pluginConfig.factoryPluginId()); + } + + @Test + public void testPath() throws Exception { + TikaPluginsManager manager = TikaPluginsManager.load(TikaPluginsManagerTest.class.getResourceAsStream("/test2.json"), + TikaPluginsManager.PLUGIN_TYPES.FETCHERS); + List<Path> paths = manager.getPluginsPaths(); + assertEquals(1, paths.size()); + assertEquals("path1", paths.get(0).getFileName().toString()); + } + + @Test + public void testPaths() throws Exception { + TikaPluginsManager manager = TikaPluginsManager.load(TikaPluginsManagerTest.class.getResourceAsStream("/test3.json"), + TikaPluginsManager.PLUGIN_TYPES.FETCHERS); + List<Path> paths = manager.getPluginsPaths(); + assertEquals(3, paths.size()); + assertEquals("path1", paths.get(0).getFileName().toString()); + assertEquals("path2", paths.get(1).getFileName().toString()); + assertEquals("path3", paths.get(2).getFileName().toString()); + } + + @Test + public void testMissingItem() throws Exception { + assertThrows(TikaConfigException.class, () -> + TikaPluginsManager.load(TikaPluginsManagerTest.class.getResourceAsStream("/test1.json"), + "qwerty") + ); + assertThrows(TikaConfigException.class, () -> + TikaPluginsManager.load(TikaPluginsManagerTest.class.getResourceAsStream("/testEmpty.json"), + TikaPluginsManager.PLUGIN_TYPES.FETCHERS) + ); + assertThrows(TikaConfigException.class, () -> + TikaPluginsManager.load(TikaPluginsManagerTest.class.getResourceAsStream("/testEmpty2.json"), + TikaPluginsManager.PLUGIN_TYPES.FETCHERS) + ); + assertThrows(TikaConfigException.class, () -> + TikaPluginsManager.load(TikaPluginsManagerTest.class.getResourceAsStream("/testNoPluginConfig.json"), + TikaPluginsManager.PLUGIN_TYPES.FETCHERS) + ); + } +} diff --git a/tika-pipes/tika-pipes-core-tests/src/test/resources/configs/fetchers-emitters.json b/tika-plugins-core/src/test/resources/test1.json similarity index 68% copy from tika-pipes/tika-pipes-core-tests/src/test/resources/configs/fetchers-emitters.json copy to tika-plugins-core/src/test/resources/test1.json index 5cd5e89ce..3c162d3d2 100644 --- a/tika-pipes/tika-pipes-core-tests/src/test/resources/configs/fetchers-emitters.json +++ b/tika-plugins-core/src/test/resources/test1.json @@ -2,8 +2,7 @@ "plugins": { "fetchers": { "fsf": { - "factoryPluginId": "file-system-fetcher", - "config": { + "file-system-fetcher": { "basePath": "FETCHERS_BASE_PATH", "extractFileSystemMetadata": false } @@ -11,8 +10,7 @@ }, "emitters": { "fse": { - "factoryPluginId": "file-system-emitter", - "config": { + "file-system-emitter" : { "basePath": "EMITTERS_BASE_PATH", "fileExtension": "json" } diff --git a/tika-pipes/tika-pipes-core-tests/src/test/resources/configs/fetchers-emitters.json b/tika-plugins-core/src/test/resources/test2.json similarity index 67% copy from tika-pipes/tika-pipes-core-tests/src/test/resources/configs/fetchers-emitters.json copy to tika-plugins-core/src/test/resources/test2.json index 5cd5e89ce..935a76239 100644 --- a/tika-pipes/tika-pipes-core-tests/src/test/resources/configs/fetchers-emitters.json +++ b/tika-plugins-core/src/test/resources/test2.json @@ -2,8 +2,7 @@ "plugins": { "fetchers": { "fsf": { - "factoryPluginId": "file-system-fetcher", - "config": { + "file-system-fetcher": { "basePath": "FETCHERS_BASE_PATH", "extractFileSystemMetadata": false } @@ -11,12 +10,12 @@ }, "emitters": { "fse": { - "factoryPluginId": "file-system-emitter", - "config": { + "file-system-emitter" : { "basePath": "EMITTERS_BASE_PATH", "fileExtension": "json" } } } - } + }, + "pluginsPaths": "path1" } \ No newline at end of file diff --git a/tika-pipes/tika-pipes-core-tests/src/test/resources/configs/fetchers-emitters.json b/tika-plugins-core/src/test/resources/test3.json similarity index 67% copy from tika-pipes/tika-pipes-core-tests/src/test/resources/configs/fetchers-emitters.json copy to tika-plugins-core/src/test/resources/test3.json index 5cd5e89ce..29f6b2939 100644 --- a/tika-pipes/tika-pipes-core-tests/src/test/resources/configs/fetchers-emitters.json +++ b/tika-plugins-core/src/test/resources/test3.json @@ -2,8 +2,7 @@ "plugins": { "fetchers": { "fsf": { - "factoryPluginId": "file-system-fetcher", - "config": { + "file-system-fetcher": { "basePath": "FETCHERS_BASE_PATH", "extractFileSystemMetadata": false } @@ -11,12 +10,12 @@ }, "emitters": { "fse": { - "factoryPluginId": "file-system-emitter", - "config": { + "file-system-emitter" : { "basePath": "EMITTERS_BASE_PATH", "fileExtension": "json" } } } - } + }, + "pluginsPaths": ["path1", "path2", "path3"] } \ No newline at end of file diff --git a/tika-plugins-core/src/test/resources/testEmpty.json b/tika-plugins-core/src/test/resources/testEmpty.json new file mode 100644 index 000000000..61d31259c --- /dev/null +++ b/tika-plugins-core/src/test/resources/testEmpty.json @@ -0,0 +1,12 @@ +{ + "plugins": { + "emitters": { + "fse": { + "file-system-emitter" : { + "basePath": "EMITTERS_BASE_PATH", + "fileExtension": "json" + } + } + } + } +} \ No newline at end of file diff --git a/tika-plugins-core/src/test/resources/testEmpty2.json b/tika-plugins-core/src/test/resources/testEmpty2.json new file mode 100644 index 000000000..b1a5dd495 --- /dev/null +++ b/tika-plugins-core/src/test/resources/testEmpty2.json @@ -0,0 +1,14 @@ +{ + "plugins": { + "fetchers": { + }, + "emitters": { + "fse": { + "file-system-emitter" : { + "basePath": "EMITTERS_BASE_PATH", + "fileExtension": "json" + } + } + } + } +} \ No newline at end of file diff --git a/tika-plugins-core/src/test/resources/testNoPluginConfig.json b/tika-plugins-core/src/test/resources/testNoPluginConfig.json new file mode 100644 index 000000000..f6d9aea28 --- /dev/null +++ b/tika-plugins-core/src/test/resources/testNoPluginConfig.json @@ -0,0 +1,15 @@ +{ + "plugins": { + "fetchers": { + "fsf": {} + }, + "emitters": { + "fse": { + "file-system-emitter" : { + "basePath": "EMITTERS_BASE_PATH", + "fileExtension": "json" + } + } + } + } +} \ No newline at end of file diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/PluginConfigLoader.java b/tika-serialization/src/main/java/org/apache/tika/serialization/PluginConfigLoader.java deleted file mode 100644 index 495ef5115..000000000 --- a/tika-serialization/src/main/java/org/apache/tika/serialization/PluginConfigLoader.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.serialization; - -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.nio.charset.StandardCharsets; - -import com.fasterxml.jackson.annotation.JsonAutoDetect; -import com.fasterxml.jackson.annotation.PropertyAccessor; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.module.SimpleModule; - -import org.apache.tika.plugins.PluginConfig; -import org.apache.tika.plugins.PluginConfigs; - -public class PluginConfigLoader { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - static { - SimpleModule module = new SimpleModule(); - module.addSerializer(PluginConfig.class, new PluginsConfigSerializer()); - OBJECT_MAPPER.registerModule(module); - OBJECT_MAPPER.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY); - } - - public static PluginConfigs load(InputStream is) throws IOException { - try (Reader reader = new InputStreamReader(is, StandardCharsets.UTF_8)) { - return OBJECT_MAPPER.readValue(reader, PluginConfigs.class); - } - } - -} diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/PluginsConfigDeserializer.java b/tika-serialization/src/main/java/org/apache/tika/serialization/PluginsConfigDeserializer.java deleted file mode 100644 index 3b37c81a8..000000000 --- a/tika-serialization/src/main/java/org/apache/tika/serialization/PluginsConfigDeserializer.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.serialization; - -import java.io.IOException; - -import com.fasterxml.jackson.core.JacksonException; -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.databind.DeserializationContext; -import com.fasterxml.jackson.databind.JsonDeserializer; -import com.fasterxml.jackson.databind.JsonNode; - -import org.apache.tika.plugins.PluginConfig; - -public class PluginsConfigDeserializer extends JsonDeserializer<PluginConfig> { - - @Override - public PluginConfig deserialize(JsonParser jsonParser, DeserializationContext deserializationContext) throws IOException, JacksonException { - JsonNode node = jsonParser.getCodec().readTree(jsonParser); - - String factoryPluginId = node.get("factoryPluginId").asText(); - - JsonNode jsonConfigNode = node.get("config"); - - String jsonConfigRaw = jsonConfigNode.toString(); - - return new PluginConfig("id", factoryPluginId, jsonConfigRaw); - } -} diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/PluginsConfigSerializer.java b/tika-serialization/src/main/java/org/apache/tika/serialization/PluginsConfigSerializer.java deleted file mode 100644 index dee7edbb4..000000000 --- a/tika-serialization/src/main/java/org/apache/tika/serialization/PluginsConfigSerializer.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.serialization; - -import java.io.IOException; - -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.databind.JsonSerializer; -import com.fasterxml.jackson.databind.SerializerProvider; - -import org.apache.tika.plugins.PluginConfig; - -public class PluginsConfigSerializer extends JsonSerializer<PluginConfig> { - - @Override - public void serialize(PluginConfig pluginsConfig, JsonGenerator jsonGenerator, SerializerProvider serializerProvider) throws IOException { - jsonGenerator.writeStartObject(); - jsonGenerator.writeStringField("pluginId", pluginsConfig.factoryPluginId()); - jsonGenerator.writeFieldName("jsonConfig"); - jsonGenerator.writeRawValue(pluginsConfig.jsonConfig()); - jsonGenerator.writeEndObject(); - } -} diff --git a/tika-serialization/src/test/java/org/apache/tika/serialization/PluginsConfigTest.java b/tika-serialization/src/test/java/org/apache/tika/serialization/PluginsConfigTest.java deleted file mode 100644 index 7e04f53f4..000000000 --- a/tika-serialization/src/test/java/org/apache/tika/serialization/PluginsConfigTest.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.serialization; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.util.HashMap; -import java.util.Map; - -import com.fasterxml.jackson.annotation.JsonAutoDetect; -import com.fasterxml.jackson.annotation.PropertyAccessor; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.module.SimpleModule; -import org.junit.jupiter.api.Test; - -import org.apache.tika.plugins.PluginConfig; -import org.apache.tika.plugins.PluginConfigs; - -public class PluginsConfigTest { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - static { - SimpleModule module = new SimpleModule(); - module.addDeserializer(PluginConfig.class, new PluginsConfigDeserializer()); - module.addSerializer(PluginConfig.class, new PluginsConfigSerializer()); - OBJECT_MAPPER.registerModule(module); - OBJECT_MAPPER.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY); - } - - @Test - public void testBasic() throws Exception { - - PluginConfig p1 = new PluginConfig("pluginId", - """ - {"basePath":"/my/docs","includeSystemInfo":true} - """); - String json = OBJECT_MAPPER.writeValueAsString(p1); - - PluginConfig deserialized = OBJECT_MAPPER.readValue(json, PluginConfig.class); - assertEquals(p1.factoryPluginId(), deserialized.factoryPluginId()); - assertEquals(flatten(p1.jsonConfig()), flatten(deserialized.jsonConfig())); - } - - @Test - public void testMap() throws Exception { - PluginConfig p1 = new PluginConfig("pluginId1", - """ - {"basePath":"/my/docs1","includeSystemInfo":true} - """); - PluginConfig p2 = new PluginConfig("pluginId2", - """ - {"basePath":"/my/docs2","includeSystemInfo":false} - """); - Map<String, PluginConfig> map = new HashMap<>(); - map.put(p1.factoryPluginId(), p1); - map.put(p2.factoryPluginId(), p2); - PluginConfigs pluginConfigManager = new PluginConfigs(map); - - String json = OBJECT_MAPPER.writeValueAsString(pluginConfigManager); - - PluginConfigs deserialized = OBJECT_MAPPER.readValue(json, PluginConfigs.class); - assertEquals(pluginConfigManager.get(p1.factoryPluginId()).get().factoryPluginId(), deserialized.get(p1.factoryPluginId()).get().factoryPluginId()); - assertEquals(flatten(pluginConfigManager.get(p1.factoryPluginId()).get().jsonConfig()), - flatten(deserialized.get(p1.factoryPluginId()).get().jsonConfig())); - } - - private static String flatten(String s) { - return s.replaceAll("[\r\n]", ""); - } -}
