This is an automated email from the ASF dual-hosted git repository.

tballison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 0cbdb26e24 TIKA-4740 -- fix flaky windows test
0cbdb26e24 is described below

commit 0cbdb26e2483de21a0dc4d6f5b559a024d5f4d55
Author: tallison <[email protected]>
AuthorDate: Tue May 26 20:40:47 2026 -0400

    TIKA-4740 -- fix flaky windows test
---
 docs/modules/ROOT/nav.adoc                         |   1 +
 .../tika/pipes/core/PerClientServerManager.java    |  12 +-
 .../apache/tika/pipes/core/ServerProcessIO.java    | 160 +++++++++++++++++++++
 .../tika/pipes/core/SharedServerManager.java       |  12 +-
 4 files changed, 182 insertions(+), 3 deletions(-)

diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc
index 5e29f171d1..2fa628fef4 100644
--- a/docs/modules/ROOT/nav.adoc
+++ b/docs/modules/ROOT/nav.adoc
@@ -31,6 +31,7 @@
 ** xref:pipes/unpack-config.adoc[Extracting Embedded Bytes]
 ** xref:pipes/timeouts.adoc[Timeouts]
 ** xref:pipes/cpu-sizing.adoc[Forked-JVM CPU Sizing]
+** xref:pipes/troubleshooting.adoc[Troubleshooting]
 ** xref:pipes/plugins/index.adoc[Plugins]
 *** xref:pipes/plugins/filesystem.adoc[File System]
 *** xref:pipes/plugins/s3.adoc[Amazon S3]
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PerClientServerManager.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PerClientServerManager.java
index 2f085ed198..3b53cecc41 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PerClientServerManager.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PerClientServerManager.java
@@ -231,6 +231,7 @@ public class PerClientServerManager implements 
ServerManager {
                     int exitValue = process.exitValue();
                     LOG.error("clientId={}: Process exited with code {} before 
connecting to socket",
                             clientId, exitValue);
+                    ServerProcessIO.surfaceCrashDiagnostics(LOG, "clientId=" + 
clientId, tmpDir);
                     // Always treat pre-connect death as retryable.
                     // The only non-retryable paths are:
                     // 1. pb.start() fails (can't launch process) - handled in 
startServer()
@@ -244,6 +245,7 @@ public class PerClientServerManager implements 
ServerManager {
                 long elapsed = System.currentTimeMillis() - startTime;
                 if (elapsed > SOCKET_CONNECT_TIMEOUT_MS) {
                     LOG.error("clientId={}: Timed out waiting for server to 
connect after {}ms", clientId, elapsed);
+                    ServerProcessIO.surfaceCrashDiagnostics(LOG, "clientId=" + 
clientId, tmpDir);
                     throw new ServerInitializationException(
                             "Server did not connect within " + 
SOCKET_CONNECT_TIMEOUT_MS + "ms");
                 }
@@ -268,7 +270,15 @@ public class PerClientServerManager implements 
ServerManager {
 
         tmpDir = Files.createTempDirectory("pipes-server-" + clientId + "-");
         ProcessBuilder pb = new ProcessBuilder(getCommandline());
-        pb.inheritIO();
+        // Run the child in tmpDir so any hs_err_pid<N>.log JVM crash log lands
+        // where surfaceCrashDiagnostics() looks for it. Redirect stdio to per-
+        // server files instead of inheriting the parent JVM's handles -- on
+        // Windows inheritIO() duplicates surefire's stderr handle into the
+        // child, blocking the controller's pipe reader past parent exit and
+        // hanging CI.
+        pb.directory(tmpDir.toFile());
+        pb.redirectOutput(ServerProcessIO.stdoutLog(tmpDir));
+        pb.redirectError(ServerProcessIO.stderrLog(tmpDir));
 
         try {
             process = pb.start();
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/ServerProcessIO.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/ServerProcessIO.java
new file mode 100644
index 0000000000..34054b8642
--- /dev/null
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/ServerProcessIO.java
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.core;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.StandardCopyOption;
+import java.util.stream.Stream;
+
+import org.slf4j.Logger;
+
+/**
+ * Helpers for routing child pipes-server JVM stdout/stderr to per-server log
+ * files in the manager's temp dir, and for surfacing those files (and any
+ * native JVM crash logs) via the parent's SLF4J logger when the child exits
+ * abnormally.
+ * <p>
+ * Background: previously the managers used {@code pb.inheritIO()} /
+ * {@code Redirect.INHERIT}, which duplicated the parent JVM's stdio handles
+ * into the child. On Windows that leaks the parent JVM's stderr handle past
+ * the parent JVM's own lifetime -- a surefire pipe reader thread on the
+ * controller side then blocks forever waiting for EOF, hanging CI.
+ */
+final class ServerProcessIO {
+
+    /** System property opt-in: when set, child log files and any hs_err
+     *  crash logs are copied here before tmpDir cleanup. */
+    static final String LOG_DIR_PROPERTY = "tika.pipes.server.logDir";
+
+    static final String STDOUT_LOG = "server-stdout.log";
+    static final String STDERR_LOG = "server-stderr.log";
+
+    private static final int TAIL_BYTES = 64 * 1024;
+
+    private ServerProcessIO() {
+    }
+
+    static File stdoutLog(Path tmpDir) {
+        return tmpDir.resolve(STDOUT_LOG).toFile();
+    }
+
+    static File stderrLog(Path tmpDir) {
+        return tmpDir.resolve(STDERR_LOG).toFile();
+    }
+
+    /**
+     * Emits the child's stderr tail and any {@code hs_err_pid<N>.log} JVM
+     * crash logs via {@code log.error} so they show up in the parent's log
+     * output. Call this on every abnormal-exit path before {@code tmpDir}
+     * gets deleted, otherwise the diagnostics disappear with the temp dir.
+     * <p>
+     * If {@code tika.pipes.server.logDir} is set, the same files are also
+     * copied to that directory for post-mortem inspection.
+     */
+    static void surfaceCrashDiagnostics(Logger log, String contextLabel, Path 
tmpDir) {
+        if (tmpDir == null || !Files.isDirectory(tmpDir)) {
+            return;
+        }
+        Path stderr = tmpDir.resolve(STDERR_LOG);
+        if (Files.isRegularFile(stderr)) {
+            String tail = readTail(stderr);
+            if (!tail.isEmpty()) {
+                log.error("{}: child stderr tail:\n{}", contextLabel, tail);
+            }
+        }
+        try (Stream<Path> entries = Files.list(tmpDir)) {
+            entries.filter(ServerProcessIO::isJvmCrashLog).forEach(p -> {
+                try {
+                    log.error("{}: JVM crash log {}:\n{}", contextLabel,
+                            p.getFileName(),
+                            Files.readString(p, StandardCharsets.UTF_8));
+                } catch (IOException e) {
+                    log.warn("{}: failed to read JVM crash log {}: {}",
+                            contextLabel, p.getFileName(), e.toString());
+                }
+            });
+        } catch (IOException e) {
+            log.warn("{}: failed to list tmpDir for hs_err logs: {}",
+                    contextLabel, e.toString());
+        }
+
+        String persistDir = System.getProperty(LOG_DIR_PROPERTY);
+        if (persistDir != null && !persistDir.isBlank()) {
+            persistCrashFiles(log, contextLabel, tmpDir, 
Paths.get(persistDir));
+        }
+    }
+
+    private static boolean isJvmCrashLog(Path p) {
+        String name = p.getFileName().toString();
+        return name.startsWith("hs_err_pid") && name.endsWith(".log");
+    }
+
+    private static void persistCrashFiles(Logger log, String contextLabel,
+                                          Path tmpDir, Path dest) {
+        try {
+            Files.createDirectories(dest);
+        } catch (IOException e) {
+            log.warn("{}: failed to create persist dir {}: {}",
+                    contextLabel, dest, e.toString());
+            return;
+        }
+        String stamp = Long.toString(System.currentTimeMillis());
+        try (Stream<Path> entries = Files.list(tmpDir)) {
+            entries.filter(p -> {
+                String name = p.getFileName().toString();
+                return name.equals(STDOUT_LOG) || name.equals(STDERR_LOG)
+                        || isJvmCrashLog(p);
+            }).forEach(p -> {
+                Path target = dest.resolve(stamp + "-" + p.getFileName());
+                try {
+                    Files.copy(p, target, StandardCopyOption.REPLACE_EXISTING);
+                    log.info("{}: persisted {} to {}", contextLabel,
+                            p.getFileName(), target);
+                } catch (IOException e) {
+                    log.warn("{}: failed to copy {} to {}: {}", contextLabel,
+                            p.getFileName(), target, e.toString());
+                }
+            });
+        } catch (IOException e) {
+            log.warn("{}: failed to enumerate tmpDir for persistence: {}",
+                    contextLabel, e.toString());
+        }
+    }
+
+    private static String readTail(Path file) {
+        try (RandomAccessFile raf = new RandomAccessFile(file.toFile(), "r")) {
+            long len = raf.length();
+            long start = Math.max(0, len - TAIL_BYTES);
+            raf.seek(start);
+            byte[] buf = new byte[(int) (len - start)];
+            raf.readFully(buf);
+            String s = new String(buf, StandardCharsets.UTF_8);
+            if (start > 0) {
+                s = "...[truncated, showing last " + TAIL_BYTES + " 
bytes]...\n" + s;
+            }
+            return s;
+        } catch (IOException e) {
+            return "";
+        }
+    }
+}
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/SharedServerManager.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/SharedServerManager.java
index c5b2126ada..3778b082cf 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/SharedServerManager.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/SharedServerManager.java
@@ -283,9 +283,15 @@ public class SharedServerManager implements ServerManager {
         // eliminating the TOCTOU race between probing a free port and binding 
it.
         pb.environment().put("TIKA_PIPES_PORT", "0");
         pb.environment().put("TIKA_PIPES_AUTH_TOKEN", 
HexFormat.of().formatHex(token));
-        // Redirect stderr to inherit, capture stdout to read the READY signal
+        // Run the child in tmpDir so any hs_err_pid<N>.log JVM crash log
+        // lands where surfaceCrashDiagnostics() looks for it. Keep stdout on
+        // a parent-owned pipe so we can read the READY:port signal. Redirect
+        // stderr to a file rather than INHERIT -- on Windows, inheriting
+        // stderr duplicates surefire's stderr handle into the child, blocking
+        // the controller's pipe reader past parent exit and hanging CI.
+        pb.directory(tmpDir.toFile());
         pb.redirectErrorStream(false);
-        pb.redirectError(ProcessBuilder.Redirect.INHERIT);
+        pb.redirectError(ServerProcessIO.stderrLog(tmpDir));
 
         try {
             process = pb.start();
@@ -316,6 +322,7 @@ public class SharedServerManager implements ServerManager {
                 if (!process.isAlive()) {
                     int exitValue = process.exitValue();
                     LOG.error("Shared server process exited with code {} 
before becoming ready", exitValue);
+                    ServerProcessIO.surfaceCrashDiagnostics(LOG, 
"shared-server", tmpDir);
                     throw new ServerInitializationException(
                             "Shared server failed to start (exit code " + 
exitValue + "). Check JVM arguments and classpath.");
                 }
@@ -324,6 +331,7 @@ public class SharedServerManager implements ServerManager {
                 long elapsed = System.currentTimeMillis() - startTime;
                 if (elapsed > STARTUP_TIMEOUT_MS) {
                     LOG.error("Timed out waiting for shared server to start 
after {}ms", elapsed);
+                    ServerProcessIO.surfaceCrashDiagnostics(LOG, 
"shared-server", tmpDir);
                     destroyProcessUnsafe();
                     throw new ServerInitializationException(
                             "Shared server did not start within " + 
STARTUP_TIMEOUT_MS + "ms");

Reply via email to