This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 36c7d4952 TIKA-4572 -- further tweaks (#2459)
36c7d4952 is described below
commit 36c7d49529ce9cbb56c26b9bb3fabc7521ef0b35
Author: Tim Allison <[email protected]>
AuthorDate: Tue Dec 16 12:42:53 2025 -0500
TIKA-4572 -- further tweaks (#2459)
---
tika-grpc/pom.xml | 46 ++++++++++++++
.../apache/tika/pipes/grpc/TikaGrpcServerImpl.java | 13 +++-
.../apache/tika/pipes/grpc/TikaGrpcServerTest.java | 74 ++++++++--------------
.../src/test/resources/tika-pipes-test-config.json | 5 +-
.../src/main/resources/plugin.properties | 2 +-
5 files changed, 86 insertions(+), 54 deletions(-)
diff --git a/tika-grpc/pom.xml b/tika-grpc/pom.xml
index fd4141737..5a5094a9f 100644
--- a/tika-grpc/pom.xml
+++ b/tika-grpc/pom.xml
@@ -228,6 +228,20 @@
<artifactId>tika-pipes-file-system</artifactId>
<version>${project.version}</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-pipes-file-system</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ <type>zip</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-pipes-http</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ <type>zip</type>
+ </dependency>
<dependency>
<groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-jsonSchema</artifactId>
@@ -252,6 +266,38 @@
</dependencies>
<build>
<plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>copy-plugins</id>
+ <phase>process-test-resources</phase>
+ <goals>
+ <goal>copy</goal>
+ </goals>
+ <configuration>
+
<outputDirectory>${project.build.directory}/plugins</outputDirectory>
+ <artifactItems>
+ <artifactItem>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-pipes-file-system</artifactId>
+ <version>${project.version}</version>
+ <type>zip</type>
+ <overWrite>true</overWrite>
+ </artifactItem>
+ <artifactItem>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-pipes-http</artifactId>
+ <version>${project.version}</version>
+ <type>zip</type>
+ <overWrite>true</overWrite>
+ </artifactItem>
+ </artifactItems>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
diff --git
a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java
b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java
index 4e60155d5..2aedbfb94 100644
--- a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java
+++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java
@@ -102,6 +102,8 @@ class TikaGrpcServerImpl extends TikaGrpc.TikaImplBase {
try {
pluginManager = TikaPluginManager.load(tikaJsonConfig);
+ pluginManager.loadPlugins();
+ pluginManager.startPlugins();
} catch (TikaConfigException e) {
LOG.warn("Could not load plugin manager, using default: {}",
e.getMessage());
pluginManager = new org.pf4j.DefaultPluginManager();
@@ -231,15 +233,22 @@ class TikaGrpcServerImpl extends TikaGrpc.TikaImplBase {
private String findFactoryNameForClass(String className) throws
TikaConfigException {
var factories =
pluginManager.getExtensions(org.apache.tika.pipes.api.fetcher.FetcherFactory.class);
+ LOG.debug("Looking for factory that produces class: {}", className);
+ LOG.debug("Found {} factories", factories.size());
for (var factory : factories) {
+ LOG.debug("Checking factory: {} (name={})",
factory.getClass().getName(), factory.getName());
try {
- ExtensionConfig tempConfig = new ExtensionConfig("temp",
factory.getName(), "{}");
+ // Use a permissive config that should allow most factories to
create an instance
+ // FileSystemFetcher requires basePath or allowAbsolutePaths
+ String tempJson = "{\"allowAbsolutePaths\": true}";
+ ExtensionConfig tempConfig = new ExtensionConfig("temp",
factory.getName(), tempJson);
Fetcher fetcher = factory.buildExtension(tempConfig);
+ LOG.debug("Factory {} produced: {}", factory.getName(),
fetcher.getClass().getName());
if (fetcher.getClass().getName().equals(className)) {
return factory.getName();
}
} catch (Exception e) {
- LOG.debug("Could not build fetcher for factory: {}",
factory.getName(), e);
+ LOG.debug("Could not build fetcher for factory: {} - {}",
factory.getName(), e.getMessage());
}
}
throw new TikaConfigException("Could not find factory for class: " +
className);
diff --git
a/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/TikaGrpcServerTest.java
b/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/TikaGrpcServerTest.java
index 02c536d62..15c3ec39b 100644
--- a/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/TikaGrpcServerTest.java
+++ b/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/TikaGrpcServerTest.java
@@ -23,6 +23,8 @@ import static org.junit.jupiter.api.Assertions.fail;
import java.io.File;
import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.Duration;
import java.time.LocalDateTime;
@@ -30,6 +32,7 @@ import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
@@ -64,6 +67,7 @@ import org.apache.tika.GetFetcherRequest;
import org.apache.tika.SaveFetcherReply;
import org.apache.tika.SaveFetcherRequest;
import org.apache.tika.TikaGrpc;
+import org.apache.tika.config.JsonConfigHelper;
import org.apache.tika.pipes.api.PipesResult;
import org.apache.tika.pipes.fetcher.fs.FileSystemFetcher;
@@ -72,69 +76,41 @@ public class TikaGrpcServerTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static final Logger LOG =
LoggerFactory.getLogger(TikaGrpcServerTest.class);
public static final int NUM_TEST_DOCS = 2;
- static File tikaConfigTemplate = Paths
- .get("src", "test", "resources", "tika-pipes-test-config.json")
- .toFile();
- static File tikaConfig = new File("target", "tika-config-" +
UUID.randomUUID() + ".json");
+ static Path tikaConfig = Paths.get("target", "tika-config-" +
UUID.randomUUID() + ".json");
@BeforeAll
static void init() throws Exception {
- // Read the template config
- String configContent = FileUtils.readFileToString(tikaConfigTemplate,
StandardCharsets.UTF_8);
-
- // Parse it as JSON to inject the correct javaPath
- @SuppressWarnings("unchecked")
- Map<String, Object> configMap = OBJECT_MAPPER.readValue(configContent,
Map.class);
-
- // Get or create the pipes section
- @SuppressWarnings("unchecked")
- Map<String, Object> pipesSection = (Map<String, Object>)
configMap.get("pipes");
- if (pipesSection == null) {
- pipesSection = new java.util.HashMap<>();
- configMap.put("pipes", pipesSection);
- }
-
- // Set javaPath to the same Java running the test
+ // Build the javaPath from java.home
String javaHome = System.getProperty("java.home");
- String javaPath = javaHome + File.separator + "bin" + File.separator +
"java";
- pipesSection.put("javaPath", javaPath);
+ Path javaPath = Paths.get(javaHome, "bin", "java");
+
+ // Set up paths
+ Path targetPath = Paths.get("target").toAbsolutePath();
+ Path pluginsDir = targetPath.resolve("plugins");
LOG.info("Setting javaPath to: {}", javaPath);
LOG.info("java.home is: {}", javaHome);
- // Update basePath in fetchers to use current working directory
- @SuppressWarnings("unchecked")
- Map<String, Object> fetchersSection = (Map<String, Object>)
configMap.get("fetchers");
- if (fetchersSection != null) {
- String targetPath = new File("target").getAbsolutePath();
- for (Map.Entry<String, Object> fetcherEntry :
fetchersSection.entrySet()) {
- @SuppressWarnings("unchecked")
- Map<String, Object> fetcherConfig = (Map<String, Object>)
fetcherEntry.getValue();
- if (fetcherConfig.containsKey("file-system-fetcher")) {
- @SuppressWarnings("unchecked")
- Map<String, Object> fsConfig = (Map<String, Object>)
fetcherConfig.get("file-system-fetcher");
- fsConfig.put("basePath", targetPath);
- }
- }
- }
+ // Use JsonConfigHelper to load template and apply replacements
+ Map<String, Object> replacements = new HashMap<>();
+ replacements.put("JAVA_PATH", javaPath);
+ replacements.put("FETCHER_BASE_PATH", targetPath);
+ replacements.put("PLUGIN_ROOTS", pluginsDir);
- // Write the modified config
- String modifiedConfig =
OBJECT_MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(configMap);
- FileUtils.writeStringToFile(tikaConfig, modifiedConfig,
StandardCharsets.UTF_8);
+
JsonConfigHelper.writeConfigFromResource("/tika-pipes-test-config.json",
+ TikaGrpcServerTest.class, replacements, tikaConfig);
- LOG.info("Written config to: {}", tikaConfig.getAbsolutePath());
- LOG.info("Config content:\n{}", modifiedConfig);
+ LOG.debug("Written config to: {}", tikaConfig.toAbsolutePath());
}
@AfterAll
static void clean() {
- if (tikaConfig.exists()) {
- if (!tikaConfig.setWritable(true)) {
- LOG.warn("Failed to set {} writable", tikaConfig);
- }
+ try {
+ Files.deleteIfExists(tikaConfig);
+ } catch (Exception e) {
+ LOG.warn("Failed to delete {}", tikaConfig, e);
}
- FileUtils.deleteQuietly(tikaConfig);
}
static final int NUM_FETCHERS_TO_CREATE = 10;
@@ -146,7 +122,7 @@ public class TikaGrpcServerTest {
Server server = InProcessServerBuilder
.forName(serverName)
.directExecutor()
- .addService(new
TikaGrpcServerImpl(tikaConfig.getAbsolutePath()))
+ .addService(new
TikaGrpcServerImpl(tikaConfig.toAbsolutePath().toString()))
.build()
.start();
resources.register(server, Duration.ofSeconds(10));
@@ -232,7 +208,7 @@ public class TikaGrpcServerTest {
Server server = InProcessServerBuilder
.forName(serverName)
.directExecutor()
- .addService(new
TikaGrpcServerImpl(tikaConfig.getAbsolutePath()))
+ .addService(new
TikaGrpcServerImpl(tikaConfig.toAbsolutePath().toString()))
.build()
.start();
resources.register(server, Duration.ofSeconds(10));
diff --git a/tika-grpc/src/test/resources/tika-pipes-test-config.json
b/tika-grpc/src/test/resources/tika-pipes-test-config.json
index 3dbff4c1a..f25957031 100644
--- a/tika-grpc/src/test/resources/tika-pipes-test-config.json
+++ b/tika-grpc/src/test/resources/tika-pipes-test-config.json
@@ -1,5 +1,6 @@
{
"pipes": {
+ "javaPath": "JAVA_PATH",
"numClients": 2,
"forkedJvmArgs": [
"-Xmx1g",
@@ -15,7 +16,7 @@
"fetchers": {
"nick1:is:cool:super/class
org.apache.tika.pipes.fetcher.fs.FileSystemFetcher": {
"file-system-fetcher": {
- "basePath":
"/home/ndipiazza/source/github/apache/tika/tika-grpc/target",
+ "basePath": "FETCHER_BASE_PATH",
"extractFileSystemMetadata": true
}
},
@@ -31,5 +32,5 @@
}
}
},
- "plugin-roots": ["/tmp/tika-test-plugins"]
+ "plugin-roots": "PLUGIN_ROOTS"
}
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-http/src/main/resources/plugin.properties
b/tika-pipes/tika-pipes-plugins/tika-pipes-http/src/main/resources/plugin.properties
index 8bf6adee4..3f0a9de8e 100644
---
a/tika-pipes/tika-pipes-plugins/tika-pipes-http/src/main/resources/plugin.properties
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-http/src/main/resources/plugin.properties
@@ -15,7 +15,7 @@
# limitations under the License.
plugin.id=tika-pipes-http-plugin
-plugin.class=org.apache.tika.pipes.plugin.http.HttpFetcherPlugin
+plugin.class=org.apache.tika.pipes.plugin.http.HttpPipesPlugin
plugin.version=4.0.0-SNAPSHOT
plugin.provider=Apache Tika
plugin.description=Pipes for http