Copilot commented on code in PR #2655:
URL: https://github.com/apache/tika/pull/2655#discussion_r2874193483


##########
tika-e2e-tests/tika-grpc/src/test/java/org/apache/tika/pipes/ExternalTestBase.java:
##########
@@ -0,0 +1,346 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.time.Duration;
+import java.time.temporal.ChronoUnit;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Pattern;
+import java.util.stream.Stream;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import io.grpc.ManagedChannel;
+import io.grpc.ManagedChannelBuilder;
+import lombok.extern.slf4j.Slf4j;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.TestInstance;
+import org.testcontainers.containers.DockerComposeContainer;
+import org.testcontainers.containers.output.Slf4jLogConsumer;
+import org.testcontainers.containers.wait.strategy.Wait;
+import org.testcontainers.junit.jupiter.Testcontainers;
+
+import org.apache.tika.FetchAndParseReply;
+
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+@Testcontainers
+@Slf4j
+@Tag("E2ETest")
+public abstract class ExternalTestBase {
+    public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+    public static final int MAX_STARTUP_TIMEOUT = 120;
+    public static final String GOV_DOCS_FOLDER = "/tika/govdocs1";
+    public static final File TEST_FOLDER = new File("target", "govdocs1");
+    public static final int GOV_DOCS_FROM_IDX = 
Integer.parseInt(System.getProperty("govdocs1.fromIndex", "1"));
+    public static final int GOV_DOCS_TO_IDX = 
Integer.parseInt(System.getProperty("govdocs1.toIndex", "1"));
+    public static final String DIGITAL_CORPORA_ZIP_FILES_URL = 
"https://corp.digitalcorpora.org/corpora/files/govdocs1/zipfiles";;
+    private static final boolean USE_LOCAL_SERVER = 
Boolean.parseBoolean(System.getProperty("tika.e2e.useLocalServer", "false"));
+    private static final int GRPC_PORT = 
Integer.parseInt(System.getProperty("tika.e2e.grpcPort", "50052"));
+    
+    public static DockerComposeContainer<?> composeContainer;
+    private static Process localGrpcProcess;
+
+    @BeforeAll
+    static void setup() throws Exception {
+        loadGovdocs1();
+        
+        if (USE_LOCAL_SERVER) {
+            startLocalGrpcServer();
+        } else {
+            startDockerGrpcServer();
+        }
+    }
+    
+    private static void startLocalGrpcServer() throws Exception {
+        log.info("Starting local tika-grpc server using Maven exec");
+        
+        Path tikaGrpcDir = findTikaGrpcDirectory();
+        Path configFile = 
Path.of("src/test/resources/tika-config.json").toAbsolutePath();
+        
+        if (!Files.exists(configFile)) {
+            throw new IllegalStateException("Config file not found: " + 
configFile);
+        }
+        
+        log.info("Using tika-grpc from: {}", tikaGrpcDir);
+        log.info("Using config file: {}", configFile);
+        
+        String javaHome = System.getProperty("java.home");
+        boolean isWindows = 
System.getProperty("os.name").toLowerCase(Locale.ROOT).contains("win");
+        String javaCmd = javaHome + (isWindows ? "\\bin\\java.exe" : 
"/bin/java");
+        String mvnCmd = tikaGrpcDir.getParent().resolve(isWindows ? "mvnw.cmd" 
: "mvnw").toString();
+        
+        ProcessBuilder pb = new ProcessBuilder(
+            mvnCmd,
+            "exec:exec",
+            "-Dexec.executable=" + javaCmd,
+            "-Dexec.args=" +
+                "--add-opens=java.base/java.lang=ALL-UNNAMED " +
+                "--add-opens=java.base/java.nio=ALL-UNNAMED " +
+                "--add-opens=java.base/java.util=ALL-UNNAMED " +
+                "--add-opens=java.base/java.util.concurrent=ALL-UNNAMED " +
+                "-classpath %classpath " +
+                "org.apache.tika.pipes.grpc.TikaGrpcServer " +
+                "-c " + configFile + " " +
+                "-p " + GRPC_PORT
+        );
+        
+        pb.directory(tikaGrpcDir.toFile());
+        pb.redirectErrorStream(true);
+        pb.redirectOutput(ProcessBuilder.Redirect.PIPE);
+        
+        localGrpcProcess = pb.start();
+        
+        Thread logThread = new Thread(() -> {
+            try (BufferedReader reader = new BufferedReader(
+                    new InputStreamReader(localGrpcProcess.getInputStream(), 
StandardCharsets.UTF_8))) {
+                String line;
+                while ((line = reader.readLine()) != null) {
+                    log.info("tika-grpc: {}", line);
+                }
+            } catch (IOException e) {
+                log.error("Error reading server output", e);
+            }
+        });
+        logThread.setDaemon(true);
+        logThread.start();
+        
+        waitForServerReady();
+        
+        log.info("Local tika-grpc server started successfully on port {}", 
GRPC_PORT);
+    }
+    
+    private static Path findTikaGrpcDirectory() {
+        Path currentDir = Path.of("").toAbsolutePath();
+        Path tikaRootDir = currentDir;
+        
+        while (tikaRootDir != null && 
+               !(Files.exists(tikaRootDir.resolve("tika-grpc")) && 
+                 Files.exists(tikaRootDir.resolve("tika-e2e-tests")))) {
+            tikaRootDir = tikaRootDir.getParent();
+        }
+        
+        if (tikaRootDir == null) {
+            throw new IllegalStateException("Cannot find tika root directory. 
" +
+                "Current dir: " + currentDir);
+        }
+        
+        return tikaRootDir.resolve("tika-grpc");
+    }
+    
+    private static void waitForServerReady() throws Exception {
+        int maxAttempts = 60;
+        for (int i = 0; i < maxAttempts; i++) {
+            try {
+                ManagedChannel testChannel = ManagedChannelBuilder
+                    .forAddress("localhost", GRPC_PORT)
+                    .usePlaintext()
+                    .build();
+                
+                try {
+                    testChannel.getState(true);
+                    TimeUnit.MILLISECONDS.sleep(100);
+                    if 
(testChannel.getState(false).toString().contains("READY")) {
+                        log.info("gRPC server is ready!");
+                        return;
+                    }
+                } finally {
+                    testChannel.shutdown();
+                    testChannel.awaitTermination(1, TimeUnit.SECONDS);
+                }
+            } catch (Exception e) {
+                log.trace("gRPC server not ready yet: {}", e.getMessage());
+            }
+            TimeUnit.SECONDS.sleep(1);
+        }
+        
+        if (localGrpcProcess != null && localGrpcProcess.isAlive()) {
+            localGrpcProcess.destroyForcibly();
+        }
+        throw new RuntimeException("Local gRPC server failed to start within 
timeout");
+    }
+    
+    private static void startDockerGrpcServer() {
+        log.info("Starting Docker Compose tika-grpc server");
+        
+        String composeFilePath = 
System.getProperty("tika.docker.compose.file");
+        if (composeFilePath == null || composeFilePath.isBlank()) {
+            throw new IllegalStateException(
+                    "Docker Compose mode requires system property 
'tika.docker.compose.file' " +
+                    "pointing to a valid docker-compose.yml file.");
+        }
+        File composeFile = new File(composeFilePath);
+        if (!composeFile.isFile()) {
+            throw new IllegalStateException("Docker Compose file not found: " 
+ composeFile.getAbsolutePath());
+        }
+        composeContainer = new DockerComposeContainer<>(composeFile)
+                .withEnv("HOST_GOVDOCS1_DIR", TEST_FOLDER.getAbsolutePath())
+                .withStartupTimeout(Duration.of(MAX_STARTUP_TIMEOUT, 
ChronoUnit.SECONDS))
+                .withExposedService("tika-grpc", 50052, 
+                    Wait.forLogMessage(".*Server started.*\\n", 1))
+                .withLogConsumer("tika-grpc", new Slf4jLogConsumer(log));
+        
+        composeContainer.start();
+        
+        log.info("Docker Compose containers started successfully");
+    }
+
+    private static void loadGovdocs1() throws IOException, 
InterruptedException {
+        int retries = 3;
+        int attempt = 0;
+        while (true) {
+            try {
+                downloadAndUnzipGovdocs1(GOV_DOCS_FROM_IDX, GOV_DOCS_TO_IDX);
+                break;
+            } catch (IOException e) {
+                attempt++;
+                if (attempt >= retries) {
+                    throw e;
+                }
+                log.warn("Download attempt {} failed, retrying in 10 
seconds...", attempt, e);
+                TimeUnit.SECONDS.sleep(10);
+            }
+        }
+    }
+
+    @AfterAll
+    void close() {
+        if (USE_LOCAL_SERVER && localGrpcProcess != null) {
+            log.info("Stopping local gRPC server");
+            localGrpcProcess.destroy();
+            try {
+                if (!localGrpcProcess.waitFor(10, TimeUnit.SECONDS)) {
+                    localGrpcProcess.destroyForcibly();
+                }
+            } catch (InterruptedException e) {
+                Thread.currentThread().interrupt();
+                localGrpcProcess.destroyForcibly();
+            }
+        } else if (composeContainer != null) {
+            composeContainer.close();
+        }
+    }
+
+    public static void downloadAndUnzipGovdocs1(int fromIndex, int toIndex) 
throws IOException {
+        Path targetDir = TEST_FOLDER.toPath();
+        Files.createDirectories(targetDir);
+
+        for (int i = fromIndex; i <= toIndex; i++) {
+            String zipName = String.format(java.util.Locale.ROOT, "%03d.zip", 
i);
+            String url = DIGITAL_CORPORA_ZIP_FILES_URL + "/" + zipName;
+            Path zipPath = targetDir.resolve(zipName);
+            
+            if (Files.exists(zipPath)) {
+                log.info("{} already exists, skipping download", zipName);
+                continue;
+            }

Review Comment:
   `downloadAndUnzipGovdocs1()` skips *both* download and unzip when the zip 
file already exists. If a previous run downloaded the zip but was interrupted 
before extraction (or the extracted files were cleaned), subsequent runs will 
never unzip and the tests will run against an empty corpus. Consider unzipping 
when the zip exists but extracted files are missing (e.g., check for at least 
one non-zip file/dir), or always unzip into a deterministic output directory.



##########
tika-e2e-tests/tika-grpc/README.md:
##########
@@ -0,0 +1,144 @@
+# Tika gRPC End-to-End Tests
+
+End-to-end integration tests for Apache Tika gRPC Server using Testcontainers.
+
+## Overview
+
+This test module validates the functionality of Apache Tika gRPC Server by:
+- Starting a tika-grpc Docker container using Docker Compose
+- Loading test documents from the GovDocs1 corpus
+- Testing various fetchers (filesystem, Ignite config store, etc.)
+- Verifying parsing results and metadata extraction
+
+## Prerequisites
+
+- Java 17 or later
+- Maven 3.6 or later
+- Docker and Docker Compose
+- Internet connection (for downloading test documents)
+- Docker image `apache/tika-grpc:local` (see below)
+
+## Building
+
+```bash
+./mvnw clean install
+```

Review Comment:
   The README shows running `./mvnw ...` from within 
`tika-e2e-tests/tika-grpc/`, but this directory doesn't contain the Maven 
wrapper scripts. Please update the examples to use the repo-root wrapper via a 
relative path (e.g., `../../../mvnw`) or use `mvn` explicitly so the documented 
commands work when executed as written.



##########
tika-e2e-tests/tika-grpc/src/test/java/org/apache/tika/pipes/ignite/IgniteConfigStoreTest.java:
##########
@@ -0,0 +1,591 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.ignite;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.time.Duration;
+import java.time.temporal.ChronoUnit;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Stream;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import io.grpc.ManagedChannel;
+import io.grpc.ManagedChannelBuilder;
+import io.grpc.stub.StreamObserver;
+import lombok.extern.slf4j.Slf4j;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInstance;
+import org.junit.jupiter.api.condition.DisabledOnOs;
+import org.junit.jupiter.api.condition.OS;
+import org.testcontainers.containers.DockerComposeContainer;
+import org.testcontainers.containers.output.Slf4jLogConsumer;
+import org.testcontainers.containers.wait.strategy.Wait;
+import org.testcontainers.junit.jupiter.Testcontainers;
+
+import org.apache.tika.FetchAndParseReply;
+import org.apache.tika.FetchAndParseRequest;
+import org.apache.tika.SaveFetcherReply;
+import org.apache.tika.SaveFetcherRequest;
+import org.apache.tika.TikaGrpc;
+import org.apache.tika.pipes.fetcher.fs.FileSystemFetcherConfig;
+
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+@Testcontainers
+@Slf4j
+@Tag("E2ETest")
+@DisabledOnOs(value = OS.WINDOWS, disabledReason = "Maven not on PATH and 
Docker/Testcontainers not supported on Windows CI")
+class IgniteConfigStoreTest {
+    
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+    private static final int MAX_STARTUP_TIMEOUT = 120;
+    private static final File TEST_FOLDER = new File("target", "govdocs1");
+    private static final int GOV_DOCS_FROM_IDX = 
Integer.parseInt(System.getProperty("govdocs1.fromIndex", "1"));
+    private static final int GOV_DOCS_TO_IDX = 
Integer.parseInt(System.getProperty("govdocs1.toIndex", "1"));
+    private static final String DIGITAL_CORPORA_ZIP_FILES_URL = 
"https://corp.digitalcorpora.org/corpora/files/govdocs1/zipfiles";;
+    private static final boolean USE_LOCAL_SERVER = 
Boolean.parseBoolean(System.getProperty("tika.e2e.useLocalServer", "false"));

Review Comment:
   `tika.e2e.useLocalServer` is intended to default to local mode (per the 
module POM and PR description), but this test defaults the system property to 
`false`. That makes IDE runs unexpectedly try Docker unless the property is set 
manually. Please align the default here with the module default (`true`) or 
share a common helper for reading these properties.
   ```suggestion
       private static final boolean USE_LOCAL_SERVER = 
Boolean.parseBoolean(System.getProperty("tika.e2e.useLocalServer", "true"));
   ```



##########
tika-e2e-tests/tika-grpc/src/test/java/org/apache/tika/pipes/filesystem/FileSystemFetcherTest.java:
##########
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.filesystem;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Stream;
+
+import io.grpc.ManagedChannel;
+import io.grpc.stub.StreamObserver;
+import lombok.extern.slf4j.Slf4j;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.condition.DisabledOnOs;
+import org.junit.jupiter.api.condition.OS;
+
+import org.apache.tika.FetchAndParseReply;
+import org.apache.tika.FetchAndParseRequest;
+import org.apache.tika.SaveFetcherReply;
+import org.apache.tika.SaveFetcherRequest;
+import org.apache.tika.TikaGrpc;
+import org.apache.tika.pipes.ExternalTestBase;
+import org.apache.tika.pipes.fetcher.fs.FileSystemFetcherConfig;
+
+@Slf4j
+@DisabledOnOs(value = OS.WINDOWS, disabledReason = "exec:exec classpath 
exceeds Windows CreateProcess command-line length limit")
+class FileSystemFetcherTest extends ExternalTestBase {
+    
+    @Test
+    void testFileSystemFetcher() throws Exception {
+        String fetcherId = "defaultFetcher";
+        ManagedChannel channel = getManagedChannel();
+        TikaGrpc.TikaBlockingStub blockingStub = 
TikaGrpc.newBlockingStub(channel);
+        TikaGrpc.TikaStub tikaStub = TikaGrpc.newStub(channel);
+

Review Comment:
   `ManagedChannel` is never shut down in this test. This can leak 
threads/sockets across the test JVM and may cause subsequent tests to hang or 
fail. Please wrap the test body in a try/finally (or try-with-resources via a 
helper) that calls `channel.shutdown()` and `awaitTermination(...)` (and 
`shutdownNow()` as a fallback).



##########
tika-e2e-tests/tika-grpc/src/test/java/org/apache/tika/pipes/ignite/IgniteConfigStoreTest.java:
##########
@@ -0,0 +1,591 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.ignite;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.time.Duration;
+import java.time.temporal.ChronoUnit;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Stream;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import io.grpc.ManagedChannel;
+import io.grpc.ManagedChannelBuilder;
+import io.grpc.stub.StreamObserver;
+import lombok.extern.slf4j.Slf4j;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInstance;
+import org.junit.jupiter.api.condition.DisabledOnOs;
+import org.junit.jupiter.api.condition.OS;
+import org.testcontainers.containers.DockerComposeContainer;
+import org.testcontainers.containers.output.Slf4jLogConsumer;
+import org.testcontainers.containers.wait.strategy.Wait;
+import org.testcontainers.junit.jupiter.Testcontainers;
+
+import org.apache.tika.FetchAndParseReply;
+import org.apache.tika.FetchAndParseRequest;
+import org.apache.tika.SaveFetcherReply;
+import org.apache.tika.SaveFetcherRequest;
+import org.apache.tika.TikaGrpc;
+import org.apache.tika.pipes.fetcher.fs.FileSystemFetcherConfig;
+
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+@Testcontainers
+@Slf4j
+@Tag("E2ETest")
+@DisabledOnOs(value = OS.WINDOWS, disabledReason = "Maven not on PATH and 
Docker/Testcontainers not supported on Windows CI")
+class IgniteConfigStoreTest {
+    
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+    private static final int MAX_STARTUP_TIMEOUT = 120;
+    private static final File TEST_FOLDER = new File("target", "govdocs1");
+    private static final int GOV_DOCS_FROM_IDX = 
Integer.parseInt(System.getProperty("govdocs1.fromIndex", "1"));
+    private static final int GOV_DOCS_TO_IDX = 
Integer.parseInt(System.getProperty("govdocs1.toIndex", "1"));
+    private static final String DIGITAL_CORPORA_ZIP_FILES_URL = 
"https://corp.digitalcorpora.org/corpora/files/govdocs1/zipfiles";;
+    private static final boolean USE_LOCAL_SERVER = 
Boolean.parseBoolean(System.getProperty("tika.e2e.useLocalServer", "false"));
+    private static final int GRPC_PORT = 
Integer.parseInt(System.getProperty("tika.e2e.grpcPort", "50052"));
+    
+    private static DockerComposeContainer<?> igniteComposeContainer;
+    private static Process localGrpcProcess;
+    
+    @BeforeAll
+    static void setupIgnite() throws Exception {
+        if (USE_LOCAL_SERVER) {
+            try {
+                killProcessOnPort(GRPC_PORT);
+                killProcessOnPort(3344);
+                killProcessOnPort(10800);
+            } catch (Exception e) {
+                log.debug("No orphaned processes to clean up");
+            }
+        }
+        
+        if (!TEST_FOLDER.exists() || TEST_FOLDER.listFiles().length == 0) {
+            downloadAndUnzipGovdocs1(GOV_DOCS_FROM_IDX, GOV_DOCS_TO_IDX);
+        }
+        
+        if (USE_LOCAL_SERVER) {
+            startLocalGrpcServer();
+        } else {
+            startDockerGrpcServer();
+        }
+    }
+    
+    private static void startLocalGrpcServer() throws Exception {
+        log.info("Starting local tika-grpc server using Maven");
+        
+        Path currentDir = Path.of("").toAbsolutePath();
+        Path tikaRootDir = currentDir;
+        
+        while (tikaRootDir != null && 
+               !(Files.exists(tikaRootDir.resolve("tika-grpc")) && 
+                 Files.exists(tikaRootDir.resolve("tika-e2e-tests")))) {
+            tikaRootDir = tikaRootDir.getParent();
+        }
+        
+        if (tikaRootDir == null) {
+            throw new IllegalStateException("Cannot find tika root directory. 
" +
+                "Current dir: " + currentDir + ". " +
+                "Please run from within the tika project.");
+        }
+        
+        Path tikaGrpcDir = tikaRootDir.resolve("tika-grpc");
+        if (!Files.exists(tikaGrpcDir)) {
+            throw new IllegalStateException("Cannot find tika-grpc directory 
at: " + tikaGrpcDir);
+        }
+        
+        String configFileName = "tika-config-ignite-local.json";
+        Path configFile = Path.of("src/test/resources/" + 
configFileName).toAbsolutePath();
+        
+        if (!Files.exists(configFile)) {
+            throw new IllegalStateException("Config file not found: " + 
configFile);
+        }
+        
+        log.info("Tika root: {}", tikaRootDir);
+        log.info("Using tika-grpc from: {}", tikaGrpcDir);
+        log.info("Using config file: {}", configFile);
+        
+        // Use mvn exec:exec to run as external process (not exec:java which 
breaks ServiceLoader)
+        String javaHome = System.getProperty("java.home");
+        boolean isWindows = 
System.getProperty("os.name").toLowerCase(Locale.ROOT).contains("win");
+        String javaCmd = javaHome + (isWindows ? "\\bin\\java.exe" : 
"/bin/java");
+        String mvnCmd = tikaRootDir.resolve(isWindows ? "mvnw.cmd" : 
"mvnw").toString();
+        
+        ProcessBuilder pb = new ProcessBuilder(
+            mvnCmd,
+            "exec:exec",
+            "-Dexec.executable=" + javaCmd,
+            "-Dexec.args=" +
+                "--add-opens=java.base/java.lang=ALL-UNNAMED " +
+                "--add-opens=java.base/java.lang.invoke=ALL-UNNAMED " +
+                "--add-opens=java.base/java.lang.reflect=ALL-UNNAMED " +
+                "--add-opens=java.base/java.io=ALL-UNNAMED " +
+                "--add-opens=java.base/java.nio=ALL-UNNAMED " +
+                "--add-opens=java.base/java.math=ALL-UNNAMED " +
+                "--add-opens=java.base/java.util=ALL-UNNAMED " +
+                "--add-opens=java.base/java.util.concurrent=ALL-UNNAMED " +
+                "--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED 
" +
+                "--add-opens=java.base/java.util.concurrent.locks=ALL-UNNAMED 
" +
+                "--add-opens=java.base/java.time=ALL-UNNAMED " +
+                "--add-opens=java.base/jdk.internal.misc=ALL-UNNAMED " +
+                "--add-opens=java.base/jdk.internal.access=ALL-UNNAMED " +
+                "--add-opens=java.base/sun.nio.ch=ALL-UNNAMED " +
+                
"--add-opens=java.management/com.sun.jmx.mbeanserver=ALL-UNNAMED " +
+                
"--add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED " +
+                "-Dio.netty.tryReflectionSetAccessible=true " +
+                "-Dignite.work.dir=" + 
tikaGrpcDir.resolve("target/ignite-work") + " " +
+                "-classpath %classpath " +
+                "org.apache.tika.pipes.grpc.TikaGrpcServer " +
+                "-c " + configFile + " " +

Review Comment:
   In `-Dexec.args`, both `-Dignite.work.dir=...` and `-c <configFile>` inject 
absolute paths into a single space-delimited string. If the workspace path 
contains spaces, the child JVM will receive broken arguments and startup will 
fail. Please ensure these paths are quoted/escaped within `exec.args` (or 
otherwise passed safely) so the test is robust on typical developer paths.
   ```suggestion
                   "-Dignite.work.dir=\"" + 
tikaGrpcDir.resolve("target/ignite-work") + "\" " +
                   "-classpath %classpath " +
                   "org.apache.tika.pipes.grpc.TikaGrpcServer " +
                   "-c \"" + configFile + "\" " +
   ```



##########
tika-e2e-tests/README.md:
##########
@@ -0,0 +1,59 @@
+# Apache Tika End-to-End Tests
+
+End-to-end integration tests for Apache Tika components.
+
+## Overview
+
+This module contains standalone end-to-end (E2E) tests for various Apache Tika 
distribution formats and deployment modes. Unlike unit and integration tests in 
the main Tika build, these E2E tests validate complete deployment scenarios 
using Docker containers and real-world test data.
+
+**Note:** This module is included in the main Tika build under the `e2e` Maven 
profile (`-Pe2e`). Run `mvn test -Pe2e` from the repo root to execute these 
tests.
+
+## Test Modules
+
+- **tika-grpc** - E2E tests for tika-grpc server
+
+## Prerequisites
+
+- Java 17 or later
+- Maven 3.6 or later
+- Docker and Docker Compose
+- Internet connection (for downloading test documents)
+
+## Building All E2E Tests
+
+From this directory:
+
+```bash
+./mvnw clean install
+```

Review Comment:
   The commands in this README use `./mvnw`, but there is no Maven wrapper 
script in the `tika-e2e-tests/` directory. Running these commands from this 
directory will fail unless users happen to have a wrapper in PATH. Please 
update the examples to use the repo-root wrapper via a relative path (e.g., 
`../mvnw`) or use `mvn` explicitly.



##########
tika-e2e-tests/tika-grpc/src/test/java/org/apache/tika/pipes/ExternalTestBase.java:
##########
@@ -0,0 +1,346 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.time.Duration;
+import java.time.temporal.ChronoUnit;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Pattern;
+import java.util.stream.Stream;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import io.grpc.ManagedChannel;
+import io.grpc.ManagedChannelBuilder;
+import lombok.extern.slf4j.Slf4j;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.TestInstance;
+import org.testcontainers.containers.DockerComposeContainer;
+import org.testcontainers.containers.output.Slf4jLogConsumer;
+import org.testcontainers.containers.wait.strategy.Wait;
+import org.testcontainers.junit.jupiter.Testcontainers;
+
+import org.apache.tika.FetchAndParseReply;
+
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+@Testcontainers
+@Slf4j
+@Tag("E2ETest")
+public abstract class ExternalTestBase {
+    public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+    public static final int MAX_STARTUP_TIMEOUT = 120;
+    public static final String GOV_DOCS_FOLDER = "/tika/govdocs1";
+    public static final File TEST_FOLDER = new File("target", "govdocs1");
+    public static final int GOV_DOCS_FROM_IDX = 
Integer.parseInt(System.getProperty("govdocs1.fromIndex", "1"));
+    public static final int GOV_DOCS_TO_IDX = 
Integer.parseInt(System.getProperty("govdocs1.toIndex", "1"));
+    public static final String DIGITAL_CORPORA_ZIP_FILES_URL = 
"https://corp.digitalcorpora.org/corpora/files/govdocs1/zipfiles";;
+    private static final boolean USE_LOCAL_SERVER = 
Boolean.parseBoolean(System.getProperty("tika.e2e.useLocalServer", "false"));
+    private static final int GRPC_PORT = 
Integer.parseInt(System.getProperty("tika.e2e.grpcPort", "50052"));
+    
+    public static DockerComposeContainer<?> composeContainer;
+    private static Process localGrpcProcess;
+
+    @BeforeAll
+    static void setup() throws Exception {
+        loadGovdocs1();
+        

Review Comment:
   `setup()` always calls `loadGovdocs1()` in `@BeforeAll`, which downloads and 
unzips full GovDocs1 zip batches from the public internet. With the CI 
workflows now running `-Pe2e`, this introduces a large external dependency and 
can significantly slow down or flake PR builds even when only `corpa.numdocs` 
is small (the full zip is still downloaded). Consider switching CI to a small 
committed fixture corpus, adding an opt-in flag for GovDocs downloads, or 
introducing caching/pre-fetching so PR builds don't depend on a large network 
download.
   ```suggestion
       private static final boolean DOWNLOAD_GOVDOCS1 = Boolean.parseBoolean(
               System.getProperty("tika.e2e.downloadGovdocs1", "true"));
       
       public static DockerComposeContainer<?> composeContainer;
       private static Process localGrpcProcess;
   
       @BeforeAll
       static void setup() throws Exception {
           if (DOWNLOAD_GOVDOCS1) {
               loadGovdocs1();
           } else {
               log.info("Skipping GovDocs1 download because system property 
'tika.e2e.downloadGovdocs1' is set to false");
           }
   ```



##########
tika-e2e-tests/tika-grpc/src/test/java/org/apache/tika/pipes/ignite/IgniteConfigStoreTest.java:
##########
@@ -0,0 +1,591 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.ignite;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.time.Duration;
+import java.time.temporal.ChronoUnit;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Stream;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import io.grpc.ManagedChannel;
+import io.grpc.ManagedChannelBuilder;
+import io.grpc.stub.StreamObserver;
+import lombok.extern.slf4j.Slf4j;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInstance;
+import org.junit.jupiter.api.condition.DisabledOnOs;
+import org.junit.jupiter.api.condition.OS;
+import org.testcontainers.containers.DockerComposeContainer;
+import org.testcontainers.containers.output.Slf4jLogConsumer;
+import org.testcontainers.containers.wait.strategy.Wait;
+import org.testcontainers.junit.jupiter.Testcontainers;
+
+import org.apache.tika.FetchAndParseReply;
+import org.apache.tika.FetchAndParseRequest;
+import org.apache.tika.SaveFetcherReply;
+import org.apache.tika.SaveFetcherRequest;
+import org.apache.tika.TikaGrpc;
+import org.apache.tika.pipes.fetcher.fs.FileSystemFetcherConfig;
+
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+@Testcontainers
+@Slf4j
+@Tag("E2ETest")
+@DisabledOnOs(value = OS.WINDOWS, disabledReason = "Maven not on PATH and 
Docker/Testcontainers not supported on Windows CI")
+class IgniteConfigStoreTest {
+    
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+    private static final int MAX_STARTUP_TIMEOUT = 120;
+    private static final File TEST_FOLDER = new File("target", "govdocs1");
+    private static final int GOV_DOCS_FROM_IDX = 
Integer.parseInt(System.getProperty("govdocs1.fromIndex", "1"));
+    private static final int GOV_DOCS_TO_IDX = 
Integer.parseInt(System.getProperty("govdocs1.toIndex", "1"));
+    private static final String DIGITAL_CORPORA_ZIP_FILES_URL = 
"https://corp.digitalcorpora.org/corpora/files/govdocs1/zipfiles";;
+    private static final boolean USE_LOCAL_SERVER = 
Boolean.parseBoolean(System.getProperty("tika.e2e.useLocalServer", "false"));
+    private static final int GRPC_PORT = 
Integer.parseInt(System.getProperty("tika.e2e.grpcPort", "50052"));
+    
+    private static DockerComposeContainer<?> igniteComposeContainer;
+    private static Process localGrpcProcess;
+    
+    @BeforeAll
+    static void setupIgnite() throws Exception {
+        if (USE_LOCAL_SERVER) {
+            try {
+                killProcessOnPort(GRPC_PORT);
+                killProcessOnPort(3344);
+                killProcessOnPort(10800);
+            } catch (Exception e) {
+                log.debug("No orphaned processes to clean up");
+            }
+        }
+        
+        if (!TEST_FOLDER.exists() || TEST_FOLDER.listFiles().length == 0) {
+            downloadAndUnzipGovdocs1(GOV_DOCS_FROM_IDX, GOV_DOCS_TO_IDX);
+        }
+        
+        if (USE_LOCAL_SERVER) {
+            startLocalGrpcServer();
+        } else {
+            startDockerGrpcServer();
+        }
+    }
+    
+    private static void startLocalGrpcServer() throws Exception {
+        log.info("Starting local tika-grpc server using Maven");
+        
+        Path currentDir = Path.of("").toAbsolutePath();
+        Path tikaRootDir = currentDir;
+        
+        while (tikaRootDir != null && 
+               !(Files.exists(tikaRootDir.resolve("tika-grpc")) && 
+                 Files.exists(tikaRootDir.resolve("tika-e2e-tests")))) {
+            tikaRootDir = tikaRootDir.getParent();
+        }
+        
+        if (tikaRootDir == null) {
+            throw new IllegalStateException("Cannot find tika root directory. 
" +
+                "Current dir: " + currentDir + ". " +
+                "Please run from within the tika project.");
+        }
+        
+        Path tikaGrpcDir = tikaRootDir.resolve("tika-grpc");
+        if (!Files.exists(tikaGrpcDir)) {
+            throw new IllegalStateException("Cannot find tika-grpc directory 
at: " + tikaGrpcDir);
+        }
+        
+        String configFileName = "tika-config-ignite-local.json";
+        Path configFile = Path.of("src/test/resources/" + 
configFileName).toAbsolutePath();
+        
+        if (!Files.exists(configFile)) {
+            throw new IllegalStateException("Config file not found: " + 
configFile);
+        }
+        
+        log.info("Tika root: {}", tikaRootDir);
+        log.info("Using tika-grpc from: {}", tikaGrpcDir);
+        log.info("Using config file: {}", configFile);
+        
+        // Use mvn exec:exec to run as external process (not exec:java which 
breaks ServiceLoader)
+        String javaHome = System.getProperty("java.home");
+        boolean isWindows = 
System.getProperty("os.name").toLowerCase(Locale.ROOT).contains("win");
+        String javaCmd = javaHome + (isWindows ? "\\bin\\java.exe" : 
"/bin/java");
+        String mvnCmd = tikaRootDir.resolve(isWindows ? "mvnw.cmd" : 
"mvnw").toString();
+        
+        ProcessBuilder pb = new ProcessBuilder(
+            mvnCmd,
+            "exec:exec",
+            "-Dexec.executable=" + javaCmd,
+            "-Dexec.args=" +
+                "--add-opens=java.base/java.lang=ALL-UNNAMED " +
+                "--add-opens=java.base/java.lang.invoke=ALL-UNNAMED " +
+                "--add-opens=java.base/java.lang.reflect=ALL-UNNAMED " +
+                "--add-opens=java.base/java.io=ALL-UNNAMED " +
+                "--add-opens=java.base/java.nio=ALL-UNNAMED " +
+                "--add-opens=java.base/java.math=ALL-UNNAMED " +
+                "--add-opens=java.base/java.util=ALL-UNNAMED " +
+                "--add-opens=java.base/java.util.concurrent=ALL-UNNAMED " +
+                "--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED 
" +
+                "--add-opens=java.base/java.util.concurrent.locks=ALL-UNNAMED 
" +
+                "--add-opens=java.base/java.time=ALL-UNNAMED " +
+                "--add-opens=java.base/jdk.internal.misc=ALL-UNNAMED " +
+                "--add-opens=java.base/jdk.internal.access=ALL-UNNAMED " +
+                "--add-opens=java.base/sun.nio.ch=ALL-UNNAMED " +
+                
"--add-opens=java.management/com.sun.jmx.mbeanserver=ALL-UNNAMED " +
+                
"--add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED " +
+                "-Dio.netty.tryReflectionSetAccessible=true " +
+                "-Dignite.work.dir=" + 
tikaGrpcDir.resolve("target/ignite-work") + " " +
+                "-classpath %classpath " +
+                "org.apache.tika.pipes.grpc.TikaGrpcServer " +
+                "-c " + configFile + " " +
+                "-p " + GRPC_PORT
+        );
+        
+        pb.directory(tikaGrpcDir.toFile());
+        pb.redirectErrorStream(true);
+        pb.redirectOutput(ProcessBuilder.Redirect.PIPE);
+        
+        localGrpcProcess = pb.start();
+        
+        final boolean[] igniteStarted = {false};
+        
+        Thread logThread = new Thread(() -> {
+            try (java.io.BufferedReader reader = new java.io.BufferedReader(
+                    new 
java.io.InputStreamReader(localGrpcProcess.getInputStream(), 
java.nio.charset.StandardCharsets.UTF_8))) {
+                String line;
+                while ((line = reader.readLine()) != null) {
+                    log.info("tika-grpc: {}", line);
+                    
+                    if (line.contains("Ignite server started") ||
+                        line.contains("Table") && line.contains("created 
successfully") ||
+                        line.contains("Server started, listening on")) {
+                        synchronized (igniteStarted) {
+                            igniteStarted[0] = true;
+                            igniteStarted.notifyAll();
+                        }
+                    }
+                }
+            } catch (IOException e) {
+                log.error("Error reading server output", e);
+            }
+        });
+        logThread.setDaemon(true);
+        logThread.start();
+        
+        try {
+            org.awaitility.Awaitility.await()
+                .atMost(java.time.Duration.ofSeconds(180))
+                .pollInterval(java.time.Duration.ofSeconds(2))
+                .until(() -> {
+                    boolean igniteReady;
+                    synchronized (igniteStarted) {
+                        igniteReady = igniteStarted[0];
+                    }
+                    
+                    if (!igniteReady) {
+                        log.debug("Waiting for Ignite to start...");
+                        return false;
+                    }
+                    
+                    try {
+                        ManagedChannel testChannel = ManagedChannelBuilder
+                            .forAddress("localhost", GRPC_PORT)
+                            .usePlaintext()
+                            .build();
+                        
+                        try {
+                            io.grpc.health.v1.HealthGrpc.HealthBlockingStub 
healthStub = 
+                                
io.grpc.health.v1.HealthGrpc.newBlockingStub(testChannel)
+                                    .withDeadlineAfter(2, TimeUnit.SECONDS);
+                            
+                            io.grpc.health.v1.HealthCheckResponse response = 
healthStub.check(
+                                
io.grpc.health.v1.HealthCheckRequest.getDefaultInstance());
+                            
+                            boolean serving = response.getStatus() == 
+                                
io.grpc.health.v1.HealthCheckResponse.ServingStatus.SERVING;
+                            
+                            if (serving) {
+                                log.info("gRPC server is healthy and 
serving!");
+                                return true;
+                            } else {
+                                log.debug("gRPC server responding but not 
serving yet: {}", response.getStatus());
+                                return false;
+                            }
+                        } finally {
+                            testChannel.shutdown();
+                            testChannel.awaitTermination(1, TimeUnit.SECONDS);
+                        }
+                    } catch (io.grpc.StatusRuntimeException e) {
+                        if (e.getStatus().getCode() == 
io.grpc.Status.Code.UNIMPLEMENTED) {
+                            // Health check not implemented, just verify 
channel works
+                            log.info("Health check not available, assuming 
server is ready");
+                            return true;
+                        }
+                        log.debug("gRPC server not ready yet: {}", 
e.getMessage());
+                        return false;
+                    } catch (Exception e) {
+                        log.debug("gRPC server not ready yet: {}", 
e.getMessage());
+                        return false;
+                    }
+                });
+            
+            log.info("Both gRPC server and Ignite are ready!");
+        } catch (org.awaitility.core.ConditionTimeoutException e) {
+            if (localGrpcProcess.isAlive()) {
+                localGrpcProcess.destroyForcibly();
+            }
+            throw new RuntimeException("Local gRPC server or Ignite failed to 
start within timeout", e);
+        }
+        
+        log.info("Local tika-grpc server started successfully on port {}", 
GRPC_PORT);
+    }
+    
+    
+    private static void startDockerGrpcServer() {
+        String composeFilePath = 
System.getProperty("tika.docker.compose.ignite.file");
+        if (composeFilePath == null || composeFilePath.isBlank()) {
+            throw new IllegalStateException(
+                    "Docker Compose mode requires system property 
'tika.docker.compose.ignite.file' " +
+                    "pointing to a valid docker-compose-ignite.yml file.");
+        }
+        File composeFile = new File(composeFilePath);
+        if (!composeFile.isFile()) {
+            throw new IllegalStateException("Docker Compose file not found: " 
+ composeFile.getAbsolutePath());
+        }
+        igniteComposeContainer = new DockerComposeContainer<>(composeFile)
+                .withEnv("HOST_GOVDOCS1_DIR", TEST_FOLDER.getAbsolutePath())
+                .withStartupTimeout(Duration.of(MAX_STARTUP_TIMEOUT, 
ChronoUnit.SECONDS))
+                .withExposedService("tika-grpc", 50052,
+                    Wait.forLogMessage(".*Server started.*\\n", 1))
+                .withLogConsumer("tika-grpc", new Slf4jLogConsumer(log));
+        
+        igniteComposeContainer.start();
+    }
+    
+    @AfterAll
+    static void teardownIgnite() {
+        if (USE_LOCAL_SERVER && localGrpcProcess != null) {
+            log.info("Stopping local gRPC server and all child processes");
+            
+            try {
+                long mvnPid = localGrpcProcess.pid();
+                log.info("Maven process PID: {}", mvnPid);
+                localGrpcProcess.destroy();
+                
+                if (!localGrpcProcess.waitFor(10, TimeUnit.SECONDS)) {
+                    log.warn("Process didn't stop gracefully, forcing 
shutdown");
+                    localGrpcProcess.destroyForcibly();
+                    localGrpcProcess.waitFor(5, TimeUnit.SECONDS);
+                }
+                
+                Thread.sleep(2000);
+                
+                try {
+                    killProcessOnPort(GRPC_PORT);
+                    killProcessOnPort(3344);
+                    killProcessOnPort(10800);
+                } catch (Exception e) {
+                    log.debug("Error killing processes on ports (may already 
be stopped): {}", e.getMessage());
+                }
+                
+                log.info("Local gRPC server stopped");
+            } catch (InterruptedException e) {
+                Thread.currentThread().interrupt();
+                localGrpcProcess.destroyForcibly();
+            }
+        } else if (igniteComposeContainer != null) {
+            igniteComposeContainer.close();
+        }
+    }
+    
+    private static void killProcessOnPort(int port) throws IOException, 
InterruptedException {
+        ProcessBuilder findPb = new ProcessBuilder("lsof", "-ti", ":" + port);
+        findPb.redirectErrorStream(true);
+        Process findProcess = findPb.start();
+        
+        try (java.io.BufferedReader reader = new java.io.BufferedReader(
+                new java.io.InputStreamReader(findProcess.getInputStream(), 
java.nio.charset.StandardCharsets.UTF_8))) {
+            String pidStr = reader.readLine();
+            if (pidStr != null && !pidStr.trim().isEmpty()) {
+                long pid = Long.parseLong(pidStr.trim());
+                long myPid = ProcessHandle.current().pid();
+                
+                // Don't kill ourselves or our parent
+                if (pid == myPid || isParentProcess(pid)) {
+                    log.debug("Skipping kill of PID {} on port {} (test 
process or parent)", pid, port);
+                    return;
+                }
+                
+                log.info("Found process {} listening on port {}, killing it", 
pid, port);
+                
+                ProcessBuilder killPb = new ProcessBuilder("kill", 
String.valueOf(pid));
+                Process killProcess = killPb.start();
+                killProcess.waitFor(2, TimeUnit.SECONDS);
+                
+                Thread.sleep(1000);
+                ProcessBuilder forceKillPb = new ProcessBuilder("kill", "-9", 
String.valueOf(pid));
+                Process forceKillProcess = forceKillPb.start();
+                forceKillProcess.waitFor(2, TimeUnit.SECONDS);
+            }
+        }
+        
+        findProcess.waitFor(2, TimeUnit.SECONDS);
+    }
+    
+    private static boolean isParentProcess(long pid) {
+        try {
+            ProcessHandle current = ProcessHandle.current();
+            while (current.parent().isPresent()) {
+                current = current.parent().get();
+                if (current.pid() == pid) {
+                    return true;
+                }
+            }
+        } catch (Exception e) {
+            log.debug("Error checking parent process", e);
+        }
+        return false;
+    }
+    
+    @Test
+    void testIgniteConfigStore() throws Exception {
+        String fetcherId = "dynamicIgniteFetcher";
+        ManagedChannel channel = getManagedChannelForIgnite();
+        
+        try {
+            TikaGrpc.TikaBlockingStub blockingStub = 
TikaGrpc.newBlockingStub(channel);
+            TikaGrpc.TikaStub tikaStub = TikaGrpc.newStub(channel);
+
+            FileSystemFetcherConfig config = new FileSystemFetcherConfig();
+            String basePath = USE_LOCAL_SERVER ? TEST_FOLDER.getAbsolutePath() 
: "/tika/govdocs1";
+            config.setBasePath(basePath);
+            
+            String configJson = OBJECT_MAPPER.writeValueAsString(config);
+            log.info("Creating fetcher with Ignite ConfigStore (basePath={}): 
{}", basePath, configJson);
+            
+            SaveFetcherReply saveReply = 
blockingStub.saveFetcher(SaveFetcherRequest
+                    .newBuilder()
+                    .setFetcherId(fetcherId)
+                    
.setFetcherClass("org.apache.tika.pipes.fetcher.fs.FileSystemFetcher")
+                    .setFetcherConfigJson(configJson)
+                    .build());
+            
+            log.info("Fetcher saved to Ignite: {}", saveReply.getFetcherId());
+
+            List<FetchAndParseReply> successes = 
Collections.synchronizedList(new ArrayList<>());
+            List<FetchAndParseReply> errors = Collections.synchronizedList(new 
ArrayList<>());
+
+            CountDownLatch countDownLatch = new CountDownLatch(1);
+            StreamObserver<FetchAndParseRequest>
+                    requestStreamObserver = 
tikaStub.fetchAndParseBiDirectionalStreaming(new StreamObserver<>() {
+                @Override
+                public void onNext(FetchAndParseReply fetchAndParseReply) {
+                    log.debug("Reply from fetch-and-parse - key={}, 
status={}", 
+                        fetchAndParseReply.getFetchKey(), 
fetchAndParseReply.getStatus());
+                    if 
("FETCH_AND_PARSE_EXCEPTION".equals(fetchAndParseReply.getStatus())) {
+                        errors.add(fetchAndParseReply);
+                    } else {
+                        successes.add(fetchAndParseReply);
+                    }
+                }
+
+                @Override
+                public void onError(Throwable throwable) {
+                    log.error("Received an error", throwable);
+                    Assertions.fail(throwable);
+                    countDownLatch.countDown();
+                }
+
+                @Override
+                public void onCompleted() {
+                    log.info("Finished streaming fetch and parse replies");
+                    countDownLatch.countDown();
+                }
+            });
+
+            int maxDocs = Integer.parseInt(System.getProperty("corpa.numdocs", 
"-1"));
+            log.info("Document limit: {}", maxDocs == -1 ? "unlimited" : 
maxDocs);
+            
+            try (Stream<Path> paths = Files.walk(TEST_FOLDER.toPath())) {
+                Stream<Path> fileStream = paths.filter(Files::isRegularFile);
+                
+                if (maxDocs > 0) {
+                    fileStream = fileStream.limit(maxDocs);
+                }
+                
+                fileStream.forEach(file -> {
+                    try {
+                        String relPath = 
TEST_FOLDER.toPath().relativize(file).toString();
+                        requestStreamObserver.onNext(FetchAndParseRequest
+                                .newBuilder()
+                                .setFetcherId(fetcherId)
+                                .setFetchKey(relPath)
+                                .build());
+                    } catch (Exception e) {
+                        throw new RuntimeException(e);
+                    }
+                });
+            }
+            log.info("Done submitting files to Ignite-backed fetcher {}", 
fetcherId);
+
+            requestStreamObserver.onCompleted();
+
+            try {
+                if (!countDownLatch.await(3, TimeUnit.MINUTES)) {
+                    log.error("Timed out waiting for parse to complete");
+                    Assertions.fail("Timed out waiting for parsing to 
complete");
+                }
+            } catch (InterruptedException e) {
+                Thread.currentThread().interrupt();
+                Assertions.fail("Interrupted while waiting for parsing to 
complete");
+            }
+            
+            if (maxDocs == -1) {
+                assertAllFilesFetched(TEST_FOLDER.toPath(), successes, errors);
+            } else {
+                int totalProcessed = successes.size() + errors.size();
+                log.info("Processed {} documents with Ignite ConfigStore 
(limit was {})", 
+                    totalProcessed, maxDocs);
+                Assertions.assertTrue(totalProcessed <= maxDocs, 
+                    "Should not process more than " + maxDocs + " documents");
+                Assertions.assertTrue(totalProcessed > 0, 
+                    "Should have processed at least one document");
+            }
+            
+            log.info("Ignite ConfigStore test completed successfully - {} 
successes, {} errors", 
+                successes.size(), errors.size());
+        } finally {
+            channel.shutdown();
+            try {
+                if (!channel.awaitTermination(5, TimeUnit.SECONDS)) {
+                    channel.shutdownNow();
+                }
+            } catch (InterruptedException e) {
+                channel.shutdownNow();
+                Thread.currentThread().interrupt();
+            }
+        }
+    }
+    
+    private static void downloadAndUnzipGovdocs1(int fromIndex, int toIndex) 
throws IOException {
+        Path targetDir = TEST_FOLDER.toPath();
+        Files.createDirectories(targetDir);
+
+        for (int i = fromIndex; i <= toIndex; i++) {
+            String zipName = String.format(java.util.Locale.ROOT, "%03d.zip", 
i);
+            String url = DIGITAL_CORPORA_ZIP_FILES_URL + "/" + zipName;
+            Path zipPath = targetDir.resolve(zipName);
+            
+            if (Files.exists(zipPath)) {
+                log.info("{} already exists, skipping download", zipName);
+                continue;
+            }
+
+            log.info("Downloading {} from {}...", zipName, url);
+            try (InputStream in = new URL(url).openStream()) {
+                Files.copy(in, zipPath, StandardCopyOption.REPLACE_EXISTING);
+            }
+

Review Comment:
   `downloadAndUnzipGovdocs1()` skips unzipping if the zip already exists. If a 
prior run left only the zip (or extracted content was cleaned), this will leave 
`target/govdocs1` without any documents and the test will fail. Please unzip 
when the zip exists but extracted files are missing, or always run the unzip 
step (idempotently) even when download is skipped.
   ```suggestion
   
               if (Files.exists(zipPath)) {
                   log.info("{} already exists, skipping download", zipName);
               } else {
                   log.info("Downloading {} from {}...", zipName, url);
                   try (InputStream in = new URL(url).openStream()) {
                       Files.copy(in, zipPath, 
StandardCopyOption.REPLACE_EXISTING);
                   }
               }
   ```



##########
tika-e2e-tests/tika-grpc/src/test/java/org/apache/tika/pipes/ignite/IgniteConfigStoreTest.java:
##########
@@ -0,0 +1,591 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.ignite;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.time.Duration;
+import java.time.temporal.ChronoUnit;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Stream;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import io.grpc.ManagedChannel;
+import io.grpc.ManagedChannelBuilder;
+import io.grpc.stub.StreamObserver;
+import lombok.extern.slf4j.Slf4j;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInstance;
+import org.junit.jupiter.api.condition.DisabledOnOs;
+import org.junit.jupiter.api.condition.OS;
+import org.testcontainers.containers.DockerComposeContainer;
+import org.testcontainers.containers.output.Slf4jLogConsumer;
+import org.testcontainers.containers.wait.strategy.Wait;
+import org.testcontainers.junit.jupiter.Testcontainers;
+
+import org.apache.tika.FetchAndParseReply;
+import org.apache.tika.FetchAndParseRequest;
+import org.apache.tika.SaveFetcherReply;
+import org.apache.tika.SaveFetcherRequest;
+import org.apache.tika.TikaGrpc;
+import org.apache.tika.pipes.fetcher.fs.FileSystemFetcherConfig;
+
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+@Testcontainers
+@Slf4j
+@Tag("E2ETest")
+@DisabledOnOs(value = OS.WINDOWS, disabledReason = "Maven not on PATH and 
Docker/Testcontainers not supported on Windows CI")
+class IgniteConfigStoreTest {
+    
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+    private static final int MAX_STARTUP_TIMEOUT = 120;
+    private static final File TEST_FOLDER = new File("target", "govdocs1");
+    private static final int GOV_DOCS_FROM_IDX = 
Integer.parseInt(System.getProperty("govdocs1.fromIndex", "1"));
+    private static final int GOV_DOCS_TO_IDX = 
Integer.parseInt(System.getProperty("govdocs1.toIndex", "1"));
+    private static final String DIGITAL_CORPORA_ZIP_FILES_URL = 
"https://corp.digitalcorpora.org/corpora/files/govdocs1/zipfiles";;
+    private static final boolean USE_LOCAL_SERVER = 
Boolean.parseBoolean(System.getProperty("tika.e2e.useLocalServer", "false"));
+    private static final int GRPC_PORT = 
Integer.parseInt(System.getProperty("tika.e2e.grpcPort", "50052"));
+    
+    private static DockerComposeContainer<?> igniteComposeContainer;
+    private static Process localGrpcProcess;
+    
+    @BeforeAll
+    static void setupIgnite() throws Exception {
+        if (USE_LOCAL_SERVER) {
+            try {
+                killProcessOnPort(GRPC_PORT);
+                killProcessOnPort(3344);
+                killProcessOnPort(10800);
+            } catch (Exception e) {
+                log.debug("No orphaned processes to clean up");
+            }
+        }
+        
+        if (!TEST_FOLDER.exists() || TEST_FOLDER.listFiles().length == 0) {
+            downloadAndUnzipGovdocs1(GOV_DOCS_FROM_IDX, GOV_DOCS_TO_IDX);
+        }
+        
+        if (USE_LOCAL_SERVER) {
+            startLocalGrpcServer();
+        } else {
+            startDockerGrpcServer();
+        }
+    }
+    
+    private static void startLocalGrpcServer() throws Exception {
+        log.info("Starting local tika-grpc server using Maven");
+        
+        Path currentDir = Path.of("").toAbsolutePath();
+        Path tikaRootDir = currentDir;
+        
+        while (tikaRootDir != null && 
+               !(Files.exists(tikaRootDir.resolve("tika-grpc")) && 
+                 Files.exists(tikaRootDir.resolve("tika-e2e-tests")))) {
+            tikaRootDir = tikaRootDir.getParent();
+        }
+        
+        if (tikaRootDir == null) {
+            throw new IllegalStateException("Cannot find tika root directory. 
" +
+                "Current dir: " + currentDir + ". " +
+                "Please run from within the tika project.");
+        }
+        
+        Path tikaGrpcDir = tikaRootDir.resolve("tika-grpc");
+        if (!Files.exists(tikaGrpcDir)) {
+            throw new IllegalStateException("Cannot find tika-grpc directory 
at: " + tikaGrpcDir);
+        }
+        
+        String configFileName = "tika-config-ignite-local.json";
+        Path configFile = Path.of("src/test/resources/" + 
configFileName).toAbsolutePath();
+        
+        if (!Files.exists(configFile)) {
+            throw new IllegalStateException("Config file not found: " + 
configFile);
+        }
+        
+        log.info("Tika root: {}", tikaRootDir);
+        log.info("Using tika-grpc from: {}", tikaGrpcDir);
+        log.info("Using config file: {}", configFile);
+        
+        // Use mvn exec:exec to run as external process (not exec:java which 
breaks ServiceLoader)
+        String javaHome = System.getProperty("java.home");
+        boolean isWindows = 
System.getProperty("os.name").toLowerCase(Locale.ROOT).contains("win");
+        String javaCmd = javaHome + (isWindows ? "\\bin\\java.exe" : 
"/bin/java");
+        String mvnCmd = tikaRootDir.resolve(isWindows ? "mvnw.cmd" : 
"mvnw").toString();
+        
+        ProcessBuilder pb = new ProcessBuilder(
+            mvnCmd,
+            "exec:exec",
+            "-Dexec.executable=" + javaCmd,
+            "-Dexec.args=" +
+                "--add-opens=java.base/java.lang=ALL-UNNAMED " +
+                "--add-opens=java.base/java.lang.invoke=ALL-UNNAMED " +
+                "--add-opens=java.base/java.lang.reflect=ALL-UNNAMED " +
+                "--add-opens=java.base/java.io=ALL-UNNAMED " +
+                "--add-opens=java.base/java.nio=ALL-UNNAMED " +
+                "--add-opens=java.base/java.math=ALL-UNNAMED " +
+                "--add-opens=java.base/java.util=ALL-UNNAMED " +
+                "--add-opens=java.base/java.util.concurrent=ALL-UNNAMED " +
+                "--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED 
" +
+                "--add-opens=java.base/java.util.concurrent.locks=ALL-UNNAMED 
" +
+                "--add-opens=java.base/java.time=ALL-UNNAMED " +
+                "--add-opens=java.base/jdk.internal.misc=ALL-UNNAMED " +
+                "--add-opens=java.base/jdk.internal.access=ALL-UNNAMED " +
+                "--add-opens=java.base/sun.nio.ch=ALL-UNNAMED " +
+                
"--add-opens=java.management/com.sun.jmx.mbeanserver=ALL-UNNAMED " +
+                
"--add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED " +
+                "-Dio.netty.tryReflectionSetAccessible=true " +
+                "-Dignite.work.dir=" + 
tikaGrpcDir.resolve("target/ignite-work") + " " +
+                "-classpath %classpath " +
+                "org.apache.tika.pipes.grpc.TikaGrpcServer " +
+                "-c " + configFile + " " +
+                "-p " + GRPC_PORT
+        );
+        
+        pb.directory(tikaGrpcDir.toFile());
+        pb.redirectErrorStream(true);
+        pb.redirectOutput(ProcessBuilder.Redirect.PIPE);
+        
+        localGrpcProcess = pb.start();
+        
+        final boolean[] igniteStarted = {false};
+        
+        Thread logThread = new Thread(() -> {
+            try (java.io.BufferedReader reader = new java.io.BufferedReader(
+                    new 
java.io.InputStreamReader(localGrpcProcess.getInputStream(), 
java.nio.charset.StandardCharsets.UTF_8))) {
+                String line;
+                while ((line = reader.readLine()) != null) {
+                    log.info("tika-grpc: {}", line);
+                    
+                    if (line.contains("Ignite server started") ||
+                        line.contains("Table") && line.contains("created 
successfully") ||
+                        line.contains("Server started, listening on")) {
+                        synchronized (igniteStarted) {
+                            igniteStarted[0] = true;
+                            igniteStarted.notifyAll();
+                        }
+                    }
+                }
+            } catch (IOException e) {
+                log.error("Error reading server output", e);
+            }
+        });
+        logThread.setDaemon(true);
+        logThread.start();
+        
+        try {
+            org.awaitility.Awaitility.await()
+                .atMost(java.time.Duration.ofSeconds(180))
+                .pollInterval(java.time.Duration.ofSeconds(2))
+                .until(() -> {
+                    boolean igniteReady;
+                    synchronized (igniteStarted) {
+                        igniteReady = igniteStarted[0];
+                    }
+                    
+                    if (!igniteReady) {
+                        log.debug("Waiting for Ignite to start...");
+                        return false;
+                    }
+                    
+                    try {
+                        ManagedChannel testChannel = ManagedChannelBuilder
+                            .forAddress("localhost", GRPC_PORT)
+                            .usePlaintext()
+                            .build();
+                        
+                        try {
+                            io.grpc.health.v1.HealthGrpc.HealthBlockingStub 
healthStub = 
+                                
io.grpc.health.v1.HealthGrpc.newBlockingStub(testChannel)
+                                    .withDeadlineAfter(2, TimeUnit.SECONDS);
+                            
+                            io.grpc.health.v1.HealthCheckResponse response = 
healthStub.check(
+                                
io.grpc.health.v1.HealthCheckRequest.getDefaultInstance());
+                            
+                            boolean serving = response.getStatus() == 
+                                
io.grpc.health.v1.HealthCheckResponse.ServingStatus.SERVING;
+                            
+                            if (serving) {
+                                log.info("gRPC server is healthy and 
serving!");
+                                return true;
+                            } else {
+                                log.debug("gRPC server responding but not 
serving yet: {}", response.getStatus());
+                                return false;
+                            }
+                        } finally {
+                            testChannel.shutdown();
+                            testChannel.awaitTermination(1, TimeUnit.SECONDS);
+                        }
+                    } catch (io.grpc.StatusRuntimeException e) {
+                        if (e.getStatus().getCode() == 
io.grpc.Status.Code.UNIMPLEMENTED) {
+                            // Health check not implemented, just verify 
channel works
+                            log.info("Health check not available, assuming 
server is ready");
+                            return true;
+                        }
+                        log.debug("gRPC server not ready yet: {}", 
e.getMessage());
+                        return false;
+                    } catch (Exception e) {
+                        log.debug("gRPC server not ready yet: {}", 
e.getMessage());
+                        return false;
+                    }
+                });
+            
+            log.info("Both gRPC server and Ignite are ready!");
+        } catch (org.awaitility.core.ConditionTimeoutException e) {
+            if (localGrpcProcess.isAlive()) {
+                localGrpcProcess.destroyForcibly();
+            }
+            throw new RuntimeException("Local gRPC server or Ignite failed to 
start within timeout", e);
+        }
+        
+        log.info("Local tika-grpc server started successfully on port {}", 
GRPC_PORT);
+    }
+    
+    
+    private static void startDockerGrpcServer() {
+        String composeFilePath = 
System.getProperty("tika.docker.compose.ignite.file");
+        if (composeFilePath == null || composeFilePath.isBlank()) {
+            throw new IllegalStateException(
+                    "Docker Compose mode requires system property 
'tika.docker.compose.ignite.file' " +
+                    "pointing to a valid docker-compose-ignite.yml file.");
+        }
+        File composeFile = new File(composeFilePath);
+        if (!composeFile.isFile()) {
+            throw new IllegalStateException("Docker Compose file not found: " 
+ composeFile.getAbsolutePath());
+        }
+        igniteComposeContainer = new DockerComposeContainer<>(composeFile)
+                .withEnv("HOST_GOVDOCS1_DIR", TEST_FOLDER.getAbsolutePath())
+                .withStartupTimeout(Duration.of(MAX_STARTUP_TIMEOUT, 
ChronoUnit.SECONDS))
+                .withExposedService("tika-grpc", 50052,
+                    Wait.forLogMessage(".*Server started.*\\n", 1))
+                .withLogConsumer("tika-grpc", new Slf4jLogConsumer(log));
+        
+        igniteComposeContainer.start();
+    }
+    
+    @AfterAll
+    static void teardownIgnite() {
+        if (USE_LOCAL_SERVER && localGrpcProcess != null) {
+            log.info("Stopping local gRPC server and all child processes");
+            
+            try {
+                long mvnPid = localGrpcProcess.pid();
+                log.info("Maven process PID: {}", mvnPid);
+                localGrpcProcess.destroy();
+                
+                if (!localGrpcProcess.waitFor(10, TimeUnit.SECONDS)) {
+                    log.warn("Process didn't stop gracefully, forcing 
shutdown");
+                    localGrpcProcess.destroyForcibly();
+                    localGrpcProcess.waitFor(5, TimeUnit.SECONDS);
+                }
+                
+                Thread.sleep(2000);
+                
+                try {
+                    killProcessOnPort(GRPC_PORT);
+                    killProcessOnPort(3344);
+                    killProcessOnPort(10800);
+                } catch (Exception e) {
+                    log.debug("Error killing processes on ports (may already 
be stopped): {}", e.getMessage());
+                }
+                
+                log.info("Local gRPC server stopped");
+            } catch (InterruptedException e) {
+                Thread.currentThread().interrupt();
+                localGrpcProcess.destroyForcibly();
+            }
+        } else if (igniteComposeContainer != null) {
+            igniteComposeContainer.close();
+        }
+    }
+    
+    private static void killProcessOnPort(int port) throws IOException, 
InterruptedException {
+        ProcessBuilder findPb = new ProcessBuilder("lsof", "-ti", ":" + port);
+        findPb.redirectErrorStream(true);
+        Process findProcess = findPb.start();

Review Comment:
   `killProcessOnPort()` shells out to `lsof` and `kill`, which are not 
guaranteed to exist on all Linux/macOS CI images or developer machines. This 
makes local-server mode brittle and can fail to clean up ports when needed. 
Prefer avoiding port-killing entirely by choosing an ephemeral free port for 
gRPC/Ignite (or retrying with a different port) and ensuring the spawned 
process is terminated via `ProcessHandle` rather than external commands.



##########
tika-e2e-tests/tika-grpc/src/test/java/org/apache/tika/pipes/ExternalTestBase.java:
##########
@@ -0,0 +1,346 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.time.Duration;
+import java.time.temporal.ChronoUnit;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Pattern;
+import java.util.stream.Stream;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import io.grpc.ManagedChannel;
+import io.grpc.ManagedChannelBuilder;
+import lombok.extern.slf4j.Slf4j;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.TestInstance;
+import org.testcontainers.containers.DockerComposeContainer;
+import org.testcontainers.containers.output.Slf4jLogConsumer;
+import org.testcontainers.containers.wait.strategy.Wait;
+import org.testcontainers.junit.jupiter.Testcontainers;
+
+import org.apache.tika.FetchAndParseReply;
+
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+@Testcontainers
+@Slf4j
+@Tag("E2ETest")
+public abstract class ExternalTestBase {
+    public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+    public static final int MAX_STARTUP_TIMEOUT = 120;
+    public static final String GOV_DOCS_FOLDER = "/tika/govdocs1";
+    public static final File TEST_FOLDER = new File("target", "govdocs1");
+    public static final int GOV_DOCS_FROM_IDX = 
Integer.parseInt(System.getProperty("govdocs1.fromIndex", "1"));
+    public static final int GOV_DOCS_TO_IDX = 
Integer.parseInt(System.getProperty("govdocs1.toIndex", "1"));
+    public static final String DIGITAL_CORPORA_ZIP_FILES_URL = 
"https://corp.digitalcorpora.org/corpora/files/govdocs1/zipfiles";;
+    private static final boolean USE_LOCAL_SERVER = 
Boolean.parseBoolean(System.getProperty("tika.e2e.useLocalServer", "false"));
+    private static final int GRPC_PORT = 
Integer.parseInt(System.getProperty("tika.e2e.grpcPort", "50052"));
+    
+    public static DockerComposeContainer<?> composeContainer;
+    private static Process localGrpcProcess;
+
+    @BeforeAll
+    static void setup() throws Exception {
+        loadGovdocs1();
+        
+        if (USE_LOCAL_SERVER) {
+            startLocalGrpcServer();
+        } else {
+            startDockerGrpcServer();
+        }
+    }
+    
+    private static void startLocalGrpcServer() throws Exception {
+        log.info("Starting local tika-grpc server using Maven exec");
+        
+        Path tikaGrpcDir = findTikaGrpcDirectory();
+        Path configFile = 
Path.of("src/test/resources/tika-config.json").toAbsolutePath();
+        
+        if (!Files.exists(configFile)) {
+            throw new IllegalStateException("Config file not found: " + 
configFile);
+        }
+        
+        log.info("Using tika-grpc from: {}", tikaGrpcDir);
+        log.info("Using config file: {}", configFile);
+        
+        String javaHome = System.getProperty("java.home");
+        boolean isWindows = 
System.getProperty("os.name").toLowerCase(Locale.ROOT).contains("win");
+        String javaCmd = javaHome + (isWindows ? "\\bin\\java.exe" : 
"/bin/java");
+        String mvnCmd = tikaGrpcDir.getParent().resolve(isWindows ? "mvnw.cmd" 
: "mvnw").toString();
+        
+        ProcessBuilder pb = new ProcessBuilder(
+            mvnCmd,
+            "exec:exec",
+            "-Dexec.executable=" + javaCmd,
+            "-Dexec.args=" +
+                "--add-opens=java.base/java.lang=ALL-UNNAMED " +
+                "--add-opens=java.base/java.nio=ALL-UNNAMED " +
+                "--add-opens=java.base/java.util=ALL-UNNAMED " +
+                "--add-opens=java.base/java.util.concurrent=ALL-UNNAMED " +
+                "-classpath %classpath " +
+                "org.apache.tika.pipes.grpc.TikaGrpcServer " +
+                "-c " + configFile + " " +
+                "-p " + GRPC_PORT
+        );
+        
+        pb.directory(tikaGrpcDir.toFile());
+        pb.redirectErrorStream(true);
+        pb.redirectOutput(ProcessBuilder.Redirect.PIPE);
+        
+        localGrpcProcess = pb.start();
+        
+        Thread logThread = new Thread(() -> {
+            try (BufferedReader reader = new BufferedReader(
+                    new InputStreamReader(localGrpcProcess.getInputStream(), 
StandardCharsets.UTF_8))) {
+                String line;
+                while ((line = reader.readLine()) != null) {
+                    log.info("tika-grpc: {}", line);
+                }
+            } catch (IOException e) {
+                log.error("Error reading server output", e);
+            }
+        });
+        logThread.setDaemon(true);
+        logThread.start();
+        
+        waitForServerReady();
+        
+        log.info("Local tika-grpc server started successfully on port {}", 
GRPC_PORT);
+    }
+    
+    private static Path findTikaGrpcDirectory() {
+        Path currentDir = Path.of("").toAbsolutePath();
+        Path tikaRootDir = currentDir;
+        
+        while (tikaRootDir != null && 
+               !(Files.exists(tikaRootDir.resolve("tika-grpc")) && 
+                 Files.exists(tikaRootDir.resolve("tika-e2e-tests")))) {
+            tikaRootDir = tikaRootDir.getParent();
+        }
+        
+        if (tikaRootDir == null) {
+            throw new IllegalStateException("Cannot find tika root directory. 
" +
+                "Current dir: " + currentDir);
+        }
+        
+        return tikaRootDir.resolve("tika-grpc");
+    }
+    
+    private static void waitForServerReady() throws Exception {
+        int maxAttempts = 60;
+        for (int i = 0; i < maxAttempts; i++) {
+            try {
+                ManagedChannel testChannel = ManagedChannelBuilder
+                    .forAddress("localhost", GRPC_PORT)
+                    .usePlaintext()
+                    .build();
+                
+                try {
+                    testChannel.getState(true);
+                    TimeUnit.MILLISECONDS.sleep(100);
+                    if 
(testChannel.getState(false).toString().contains("READY")) {
+                        log.info("gRPC server is ready!");
+                        return;
+                    }

Review Comment:
   `waitForServerReady()` only checks that *something* on `GRPC_PORT` reaches 
READY, but it doesn't verify the spawned `localGrpcProcess` is still alive or 
that the process you started is the one answering on the port. If the port is 
already in use, the test can falsely succeed and then talk to the wrong 
service. Consider (1) failing fast if the port is already bound before 
starting, and/or (2) checking `localGrpcProcess.isAlive()` during polling and 
performing a gRPC health check / known Tika RPC call to validate the endpoint.



##########
tika-e2e-tests/pom.xml:
##########
@@ -0,0 +1,174 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
https://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>org.apache.tika</groupId>
+        <artifactId>tika-parent</artifactId>
+        <version>${revision}</version>
+        <relativePath>../tika-parent/pom.xml</relativePath>
+    </parent>
+
+    <artifactId>tika-e2e-tests</artifactId>
+    <packaging>pom</packaging>
+    <name>Apache Tika End-to-End Tests</name>
+    <description>End-to-end integration tests for Apache Tika 
components</description>
+
+    <properties>
+        <maven.compiler.source>17</maven.compiler.source>
+        <maven.compiler.target>17</maven.compiler.target>
+        <maven.compiler.release>17</maven.compiler.release>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+
+        <!-- Tika version -->
+        <tika.version>${revision}</tika.version>
+
+        <!-- Test dependencies -->
+        <junit.version>5.11.4</junit.version>
+        <testcontainers.version>2.0.3</testcontainers.version>
+
+        <!-- Logging -->
+        <slf4j.version>2.0.16</slf4j.version>
+        <log4j.version>2.25.3</log4j.version>
+

Review Comment:
   This module overrides dependency versions that are already centrally managed 
by `tika-parent` (e.g., JUnit, SLF4J, Log4j2). That risks diverging from the 
rest of the build (and can reintroduce dependency convergence/security issues). 
Prefer inheriting the parent-managed versions (e.g., `${junit6.version}`, 
`${slf4j.version}`, `${log4j2.version}`) and only add extra alignment where 
strictly needed for Ignite.
   ```suggestion
           <testcontainers.version>2.0.3</testcontainers.version>
   ```



##########
tika-e2e-tests/tika-grpc/README.md:
##########
@@ -0,0 +1,144 @@
+# Tika gRPC End-to-End Tests
+
+End-to-end integration tests for Apache Tika gRPC Server using Testcontainers.
+
+## Overview
+
+This test module validates the functionality of Apache Tika gRPC Server by:
+- Starting a tika-grpc Docker container using Docker Compose
+- Loading test documents from the GovDocs1 corpus
+- Testing various fetchers (filesystem, Ignite config store, etc.)
+- Verifying parsing results and metadata extraction
+
+## Prerequisites
+
+- Java 17 or later
+- Maven 3.6 or later
+- Docker and Docker Compose
+- Internet connection (for downloading test documents)

Review Comment:
   Prerequisites list Docker/Docker Compose as required, but local-server mode 
is the default and doesn't need Docker. Please clarify Docker is only needed 
when running with `-Dtika.e2e.useLocalServer=false` (and when providing the 
compose-file system property).



##########
tika-e2e-tests/tika-grpc/src/test/java/org/apache/tika/pipes/ExternalTestBase.java:
##########
@@ -0,0 +1,346 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.time.Duration;
+import java.time.temporal.ChronoUnit;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Pattern;
+import java.util.stream.Stream;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import io.grpc.ManagedChannel;
+import io.grpc.ManagedChannelBuilder;
+import lombok.extern.slf4j.Slf4j;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.TestInstance;
+import org.testcontainers.containers.DockerComposeContainer;
+import org.testcontainers.containers.output.Slf4jLogConsumer;
+import org.testcontainers.containers.wait.strategy.Wait;
+import org.testcontainers.junit.jupiter.Testcontainers;
+
+import org.apache.tika.FetchAndParseReply;
+
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+@Testcontainers
+@Slf4j
+@Tag("E2ETest")
+public abstract class ExternalTestBase {
+    public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+    public static final int MAX_STARTUP_TIMEOUT = 120;
+    public static final String GOV_DOCS_FOLDER = "/tika/govdocs1";
+    public static final File TEST_FOLDER = new File("target", "govdocs1");
+    public static final int GOV_DOCS_FROM_IDX = 
Integer.parseInt(System.getProperty("govdocs1.fromIndex", "1"));
+    public static final int GOV_DOCS_TO_IDX = 
Integer.parseInt(System.getProperty("govdocs1.toIndex", "1"));
+    public static final String DIGITAL_CORPORA_ZIP_FILES_URL = 
"https://corp.digitalcorpora.org/corpora/files/govdocs1/zipfiles";;
+    private static final boolean USE_LOCAL_SERVER = 
Boolean.parseBoolean(System.getProperty("tika.e2e.useLocalServer", "false"));
+    private static final int GRPC_PORT = 
Integer.parseInt(System.getProperty("tika.e2e.grpcPort", "50052"));
+    
+    public static DockerComposeContainer<?> composeContainer;
+    private static Process localGrpcProcess;
+
+    @BeforeAll
+    static void setup() throws Exception {
+        loadGovdocs1();
+        
+        if (USE_LOCAL_SERVER) {
+            startLocalGrpcServer();
+        } else {
+            startDockerGrpcServer();
+        }
+    }
+    
+    private static void startLocalGrpcServer() throws Exception {
+        log.info("Starting local tika-grpc server using Maven exec");
+        
+        Path tikaGrpcDir = findTikaGrpcDirectory();
+        Path configFile = 
Path.of("src/test/resources/tika-config.json").toAbsolutePath();
+        
+        if (!Files.exists(configFile)) {
+            throw new IllegalStateException("Config file not found: " + 
configFile);
+        }
+        
+        log.info("Using tika-grpc from: {}", tikaGrpcDir);
+        log.info("Using config file: {}", configFile);
+        
+        String javaHome = System.getProperty("java.home");
+        boolean isWindows = 
System.getProperty("os.name").toLowerCase(Locale.ROOT).contains("win");
+        String javaCmd = javaHome + (isWindows ? "\\bin\\java.exe" : 
"/bin/java");
+        String mvnCmd = tikaGrpcDir.getParent().resolve(isWindows ? "mvnw.cmd" 
: "mvnw").toString();
+        
+        ProcessBuilder pb = new ProcessBuilder(
+            mvnCmd,
+            "exec:exec",
+            "-Dexec.executable=" + javaCmd,
+            "-Dexec.args=" +
+                "--add-opens=java.base/java.lang=ALL-UNNAMED " +
+                "--add-opens=java.base/java.nio=ALL-UNNAMED " +
+                "--add-opens=java.base/java.util=ALL-UNNAMED " +
+                "--add-opens=java.base/java.util.concurrent=ALL-UNNAMED " +
+                "-classpath %classpath " +
+                "org.apache.tika.pipes.grpc.TikaGrpcServer " +
+                "-c " + configFile + " " +
+                "-p " + GRPC_PORT

Review Comment:
   `-Dexec.args` is assembled as a single space-delimited string, but 
`configFile` is an absolute path that may contain spaces. In that case the 
exec-maven-plugin will split the path into multiple JVM args and the server 
will fail to start. Please quote/escape file paths (and any other 
potentially-spaced values) inside `exec.args`, or switch to an approach that 
passes arguments as a list rather than one string.
   ```suggestion
   
           String execArgs = String.join(" ",
                   "--add-opens=java.base/java.lang=ALL-UNNAMED",
                   "--add-opens=java.base/java.nio=ALL-UNNAMED",
                   "--add-opens=java.base/java.util=ALL-UNNAMED",
                   "--add-opens=java.base/java.util.concurrent=ALL-UNNAMED",
                   "-classpath",
                   "%classpath",
                   "org.apache.tika.pipes.grpc.TikaGrpcServer",
                   "-c",
                   "\"" + configFile + "\"",
                   "-p",
                   String.valueOf(GRPC_PORT)
           );
   
           ProcessBuilder pb = new ProcessBuilder(
               mvnCmd,
               "exec:exec",
               "-Dexec.executable=" + javaCmd,
               "-Dexec.args=" + execArgs
   ```



##########
tika-e2e-tests/tika-grpc/src/test/java/org/apache/tika/pipes/ignite/IgniteConfigStoreTest.java:
##########
@@ -0,0 +1,591 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.ignite;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.time.Duration;
+import java.time.temporal.ChronoUnit;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Stream;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import io.grpc.ManagedChannel;
+import io.grpc.ManagedChannelBuilder;
+import io.grpc.stub.StreamObserver;
+import lombok.extern.slf4j.Slf4j;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInstance;
+import org.junit.jupiter.api.condition.DisabledOnOs;
+import org.junit.jupiter.api.condition.OS;
+import org.testcontainers.containers.DockerComposeContainer;
+import org.testcontainers.containers.output.Slf4jLogConsumer;
+import org.testcontainers.containers.wait.strategy.Wait;
+import org.testcontainers.junit.jupiter.Testcontainers;
+
+import org.apache.tika.FetchAndParseReply;
+import org.apache.tika.FetchAndParseRequest;
+import org.apache.tika.SaveFetcherReply;
+import org.apache.tika.SaveFetcherRequest;
+import org.apache.tika.TikaGrpc;
+import org.apache.tika.pipes.fetcher.fs.FileSystemFetcherConfig;
+
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+@Testcontainers
+@Slf4j
+@Tag("E2ETest")
+@DisabledOnOs(value = OS.WINDOWS, disabledReason = "Maven not on PATH and 
Docker/Testcontainers not supported on Windows CI")

Review Comment:
   The `@DisabledOnOs` reason mentions "Maven not on PATH", but this test now 
uses `mvnw`/`mvnw.cmd` (and the real blockers appear to be Unix-only 
`lsof/kill` usage and/or CI environment constraints). Please update the 
disabledReason to reflect the actual reason for disabling on Windows so it 
remains accurate over time.
   ```suggestion
   @DisabledOnOs(value = OS.WINDOWS,
           disabledReason = "Relies on Unix-only lsof/kill usage and 
Docker/Testcontainers are not supported in Windows CI")
   ```



##########
tika-e2e-tests/tika-grpc/README.md:
##########
@@ -0,0 +1,144 @@
+# Tika gRPC End-to-End Tests
+
+End-to-end integration tests for Apache Tika gRPC Server using Testcontainers.
+
+## Overview
+
+This test module validates the functionality of Apache Tika gRPC Server by:
+- Starting a tika-grpc Docker container using Docker Compose
+- Loading test documents from the GovDocs1 corpus
+- Testing various fetchers (filesystem, Ignite config store, etc.)
+- Verifying parsing results and metadata extraction

Review Comment:
   The overview implies Docker Compose is always used ("Starting a tika-grpc 
Docker container...") but the module defaults to local-server mode and Docker 
mode is optional/explicit. Please adjust the overview wording to describe both 
modes and avoid implying Docker is required for the default run.



##########
tika-e2e-tests/pom.xml:
##########
@@ -0,0 +1,174 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
https://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>org.apache.tika</groupId>
+        <artifactId>tika-parent</artifactId>
+        <version>${revision}</version>
+        <relativePath>../tika-parent/pom.xml</relativePath>
+    </parent>
+
+    <artifactId>tika-e2e-tests</artifactId>
+    <packaging>pom</packaging>
+    <name>Apache Tika End-to-End Tests</name>
+    <description>End-to-end integration tests for Apache Tika 
components</description>
+
+    <properties>
+        <maven.compiler.source>17</maven.compiler.source>
+        <maven.compiler.target>17</maven.compiler.target>
+        <maven.compiler.release>17</maven.compiler.release>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+
+        <!-- Tika version -->
+        <tika.version>${revision}</tika.version>
+
+        <!-- Test dependencies -->
+        <junit.version>5.11.4</junit.version>
+        <testcontainers.version>2.0.3</testcontainers.version>
+
+        <!-- Logging -->
+        <slf4j.version>2.0.16</slf4j.version>
+        <log4j.version>2.25.3</log4j.version>
+
+        <!-- Other -->
+        <lombok.version>1.18.32</lombok.version>
+        <jackson.version>2.18.2</jackson.version>

Review Comment:
   `<jackson.version>` is pinned to 2.18.2 here, but the rest of the project 
manages Jackson via `tika-parent` (currently `${jackson.version}` is newer). 
Using an older Jackson just for this module can create confusing classpath 
differences and may miss important fixes. Prefer using the parent Jackson 
version unless there is a confirmed Ignite-specific incompatibility (and 
document it if so).
   ```suggestion
   
   ```



##########
tika-e2e-tests/README.md:
##########
@@ -0,0 +1,59 @@
+# Apache Tika End-to-End Tests
+
+End-to-end integration tests for Apache Tika components.
+
+## Overview
+
+This module contains standalone end-to-end (E2E) tests for various Apache Tika 
distribution formats and deployment modes. Unlike unit and integration tests in 
the main Tika build, these E2E tests validate complete deployment scenarios 
using Docker containers and real-world test data.
+
+**Note:** This module is included in the main Tika build under the `e2e` Maven 
profile (`-Pe2e`). Run `mvn test -Pe2e` from the repo root to execute these 
tests.
+
+## Test Modules
+
+- **tika-grpc** - E2E tests for tika-grpc server
+
+## Prerequisites
+
+- Java 17 or later
+- Maven 3.6 or later
+- Docker and Docker Compose
+- Internet connection (for downloading test documents)
+

Review Comment:
   This README lists Docker/Docker Compose as prerequisites, but the module is 
configured to run in local-server mode by default 
(`tika.e2e.useLocalServer=true`) where Docker isn't required. Please clarify 
that Docker is only needed when running in Docker Compose mode (and point to 
the system property needed to enable it).



##########
.github/workflows/main-jdk17-build.yml:
##########
@@ -44,4 +44,4 @@ jobs:
           java-version: ${{ matrix.java }}
           cache: 'maven'
       - name: Build with Maven

Review Comment:
   This workflow now runs with `-Pe2e`, which will execute the new E2E tests on 
every PR/push. Those tests currently download and unzip GovDocs1 from the 
public internet in `@BeforeAll`, which can add significant time and flakiness 
to CI. Consider running `-Pe2e` in a separate job (or scheduled/nightly), or 
switch the E2E corpus to a small committed fixture / cached artifact so the 
main build isn't gated on a large external download.
   ```suggestion
         - name: Build with Maven
           run: mvn clean apache-rat:check test install javadoc:aggregate -Pci 
-B 
"-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
   
     e2e-tests:
       runs-on: ubuntu-latest
       timeout-minutes: 120
       needs: build
       if: github.event_name == 'push'
       strategy:
         matrix:
           java: [ '17' ]
   
       steps:
         - uses: actions/checkout@v4
         - name: Set up JDK ${{ matrix.java }} for E2E tests
           uses: actions/setup-java@v4
           with:
             distribution: 'temurin'
             java-version: ${{ matrix.java }}
             cache: 'maven'
         - name: Run E2E tests with Maven
   ```



##########
tika-e2e-tests/tika-grpc/src/test/java/org/apache/tika/pipes/ExternalTestBase.java:
##########
@@ -0,0 +1,346 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.time.Duration;
+import java.time.temporal.ChronoUnit;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Pattern;
+import java.util.stream.Stream;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import io.grpc.ManagedChannel;
+import io.grpc.ManagedChannelBuilder;
+import lombok.extern.slf4j.Slf4j;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.TestInstance;
+import org.testcontainers.containers.DockerComposeContainer;
+import org.testcontainers.containers.output.Slf4jLogConsumer;
+import org.testcontainers.containers.wait.strategy.Wait;
+import org.testcontainers.junit.jupiter.Testcontainers;
+
+import org.apache.tika.FetchAndParseReply;
+
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+@Testcontainers
+@Slf4j
+@Tag("E2ETest")
+public abstract class ExternalTestBase {
+    public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+    public static final int MAX_STARTUP_TIMEOUT = 120;
+    public static final String GOV_DOCS_FOLDER = "/tika/govdocs1";
+    public static final File TEST_FOLDER = new File("target", "govdocs1");
+    public static final int GOV_DOCS_FROM_IDX = 
Integer.parseInt(System.getProperty("govdocs1.fromIndex", "1"));
+    public static final int GOV_DOCS_TO_IDX = 
Integer.parseInt(System.getProperty("govdocs1.toIndex", "1"));
+    public static final String DIGITAL_CORPORA_ZIP_FILES_URL = 
"https://corp.digitalcorpora.org/corpora/files/govdocs1/zipfiles";;
+    private static final boolean USE_LOCAL_SERVER = 
Boolean.parseBoolean(System.getProperty("tika.e2e.useLocalServer", "false"));

Review Comment:
   `tika.e2e.useLocalServer` is documented and configured in the module POM as 
defaulting to `true`, but this code defaults it to `false` when the property 
isn't set (e.g., IDE runs). To keep behavior consistent across Maven/CI/IDE, 
consider defaulting this system property to `true` here as well (or 
centralizing the default in one place).
   ```suggestion
       private static final boolean USE_LOCAL_SERVER = 
Boolean.parseBoolean(System.getProperty("tika.e2e.useLocalServer", "true"));
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to