This is an automated email from the ASF dual-hosted git repository.

bneradt pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git


The following commit(s) were added to refs/heads/master by this push:
     new 444a446046 Add backtrace to crash logs (#12775)
444a446046 is described below

commit 444a446046736fa6f43993c6c73ba1517e92c598
Author: Brian Neradt <[email protected]>
AuthorDate: Wed Jan 21 14:35:38 2026 -0600

    Add backtrace to crash logs (#12775)
    
    Send crash information from the signal handler to traffic_crashlog and
    record it in crash logs.
    
    This also:
    
    * Adds a 10-second timeout around backtrace collection to prevent the
      crashlog helper from hanging indefinitely if ptrace/waitpid blocks.
      This addresses a TODO comment that has been in the code for a while.
    * Removes the "Memory Regions" section from the crash logs. These are
      very rarely useful and otherwise add significant noise to the crash
      log. A post mordem gdb section on a core should be able to get this
      information if it is necessary in a particular instance.
---
 .../command-line/traffic_crashlog.en.rst           |   6 +-
 src/traffic_crashlog/backtrace.cc                  | 100 +++++++++++++------
 src/traffic_crashlog/procinfo.cc                   |   6 --
 src/traffic_crashlog/traffic_crashlog.cc           | 110 +++++++++++++++++----
 src/traffic_crashlog/traffic_crashlog.h            |  21 ++--
 src/traffic_server/Crash.cc                        |  22 +++++
 tests/CMakeLists.txt                               |   1 +
 .../pluginTest/crash_test/CMakeLists.txt           |  18 ++++
 .../gold_tests/pluginTest/crash_test/crash_test.cc | 105 ++++++++++++++++++++
 .../pluginTest/crash_test/crash_test.test.py       | 101 +++++++++++++++++++
 tests/tools/condwait                               |  14 ++-
 11 files changed, 441 insertions(+), 63 deletions(-)

diff --git a/doc/appendices/command-line/traffic_crashlog.en.rst 
b/doc/appendices/command-line/traffic_crashlog.en.rst
index 11a7f1c504..1ccd86977d 100644
--- a/doc/appendices/command-line/traffic_crashlog.en.rst
+++ b/doc/appendices/command-line/traffic_crashlog.en.rst
@@ -37,6 +37,8 @@ for users to run directly.
 
 When :ref:`traffic_server` starts, it will launch a :program:`traffic_crashlog`
 process and keep it stopped, activating it only if a crash occurs.
+The crash report includes backtraces for all threads, with the crashing thread
+listed first.
 
 Options
 =======
@@ -48,8 +50,8 @@ Options
     This option specifies the host triple for the process that
     :program:`traffic_crashlog` should examine. If a supported host
     triple is specified, :program:`traffic_crashlog` expects to
-    receive a ``siginfo_t`` structure on it's standard input,
-    followed by a ``ucontext_t``.
+    receive the crashing thread ID, then a ``siginfo_t`` structure on
+    its standard input, followed by a ``ucontext_t``.
 
 .. option:: --target PID
 
diff --git a/src/traffic_crashlog/backtrace.cc 
b/src/traffic_crashlog/backtrace.cc
index ab0ae983b3..27b40d720d 100644
--- a/src/traffic_crashlog/backtrace.cc
+++ b/src/traffic_crashlog/backtrace.cc
@@ -30,6 +30,8 @@
  ****************************************************************************/
 #include "tscore/ink_config.h"
 
+#include <sys/types.h>
+
 #if TS_USE_REMOTE_UNWINDING
 #include "tscore/Diags.h"
 
@@ -38,7 +40,6 @@
 #include <libunwind.h>
 #include <libunwind-ptrace.h>
 #if defined(__FreeBSD__)
-#include <sys/types.h>
 #define __WALL        P_ALL
 #define PTRACE_ATTACH PT_ATTACH
 #define PTRACE_DETACH PT_DETACH
@@ -56,6 +57,7 @@ using threadlist = std::vector<pid_t>;
 
 DbgCtl dbg_ctl_backtrace{"backtrace"};
 
+/** Enumerate all threads for a given process by reading /proc/<pid>/task. */
 static threadlist
 threads_for_process(pid_t proc)
 {
@@ -104,58 +106,73 @@ backtrace_for_thread(pid_t threadid, TextBuffer &text)
   unw_cursor_t     cursor;
   void            *ap     = nullptr;
   pid_t            target = -1;
-  unsigned         level  = 0;
+  bool             attached{false};
+  unsigned         level = 0;
+  int              step_result;
 
   // First, attach to the child, causing it to stop.
   status = ptrace(PTRACE_ATTACH, threadid, 0, 0);
   if (status < 0) {
+    text.format("  [ptrace ATTACH failed: %s (%d)]\n", strerror(errno), errno);
     Dbg(dbg_ctl_backtrace, "ptrace(ATTACH, %ld) -> %s (%d)\n", (long)threadid, 
strerror(errno), errno);
     return;
   }
+  attached = true;
 
-  // Wait for it to stop (XXX should be a timed wait ...)
+  // Wait for it to stop. The caller uses alarm() to enforce a timeout.
   target = waitpid(threadid, &status, __WALL | WUNTRACED);
   Dbg(dbg_ctl_backtrace, "waited for target %ld, found PID %ld, %s\n", 
(long)threadid, (long)target,
       WIFSTOPPED(status) ? "STOPPED" : "???");
   if (target < 0) {
+    text.format("  [waitpid failed: %s (%d)]\n", strerror(errno), errno);
     goto done;
   }
 
   ap = _UPT_create(threadid);
   Dbg(dbg_ctl_backtrace, "created UPT %p", ap);
   if (ap == nullptr) {
+    text.format("  [_UPT_create failed]\n");
     goto done;
   }
 
   addr_space = unw_create_addr_space(&_UPT_accessors, 0 /* byteorder */);
   Dbg(dbg_ctl_backtrace, "created address space %p\n", addr_space);
   if (addr_space == nullptr) {
+    text.format("  [unw_create_addr_space failed]\n");
     goto done;
   }
 
   status = unw_init_remote(&cursor, addr_space, ap);
   Dbg(dbg_ctl_backtrace, "unw_init_remote(...) -> %d\n", status);
   if (status != 0) {
+    text.format("  [unw_init_remote failed: %d]\n", status);
     goto done;
   }
 
-  while (unw_step(&cursor) > 0) {
+  step_result = unw_step(&cursor);
+  if (step_result <= 0) {
+    text.format("  [unw_step returned %d on first call]\n", step_result);
+  }
+
+  while (step_result > 0) {
     unw_word_t ip;
-    unw_word_t offset;
+    unw_word_t offset = 0;
     char       buf[256];
 
     unw_get_reg(&cursor, UNW_REG_IP, &ip);
 
     if (unw_get_proc_name(&cursor, buf, sizeof(buf), &offset) == 0) {
-      int   status;
-      char *name = abi::__cxa_demangle(buf, nullptr, nullptr, &status);
-      text.format("%-4u 0x%016llx %s + %p\n", level, static_cast<unsigned long 
long>(ip), name ? name : buf, (void *)offset);
+      int   demangle_status;
+      char *name = abi::__cxa_demangle(buf, nullptr, nullptr, 
&demangle_status);
+      text.format("%-4u 0x%016llx %s + 0x%lx\n", level, static_cast<unsigned 
long long>(ip), name ? name : buf,
+                  static_cast<unsigned long>(offset));
       free(name);
     } else {
-      text.format("%-4u 0x%016llx 0x0 + %p\n", level, static_cast<unsigned 
long long>(ip), (void *)offset);
+      text.format("%-4u 0x%016llx <unknown>\n", level, static_cast<unsigned 
long long>(ip));
     }
 
     ++level;
+    step_result = unw_step(&cursor);
   }
 
 done:
@@ -167,38 +184,65 @@ done:
     _UPT_destroy(ap);
   }
 
-  status = ptrace(PTRACE_DETACH, target, NULL, DATA_NULL);
-  Dbg(dbg_ctl_backtrace, "ptrace(DETACH, %ld) -> %d (errno %d)\n", 
(long)target, status, errno);
+  if (attached) {
+    status = ptrace(PTRACE_DETACH, threadid, nullptr, DATA_NULL);
+    Dbg(dbg_ctl_backtrace, "ptrace(DETACH, %ld) -> %d (errno %d)\n", 
(long)threadid, status, errno);
+  }
+}
+
+/** Format a thread header with the thread name from /proc. */
+static void
+format_thread_header(pid_t threadid, const char *prefix, TextBuffer &text)
+{
+  ats_scoped_fd fd;
+  char          path[128];
+
+  snprintf(path, sizeof(path), "/proc/%ld/comm", static_cast<long>(threadid));
+  fd = open(path, O_RDONLY);
+  if (fd >= 0) {
+    text.format("%s (TID %ld, ", prefix, static_cast<long>(threadid));
+    text.readFromFD(fd);
+    text.chomp();
+    text.format("):\n");
+  } else {
+    text.format("%s (TID %ld):\n", prefix, static_cast<long>(threadid));
+  }
 }
 } // namespace
+
 int
-ServerBacktrace(unsigned /* options */, int pid, char **trace)
+ServerBacktrace(unsigned /* options */, pid_t pid, pid_t crashing_tid, char 
**trace)
 {
   *trace = nullptr;
 
   threadlist threads(threads_for_process(pid));
   TextBuffer text(0);
 
-  Dbg(dbg_ctl_backtrace, "tracing %zd threads for traffic_server PID %ld\n", 
threads.size(), (long)pid);
+  Dbg(dbg_ctl_backtrace, "tracing %zd threads for traffic_server PID %ld, 
crashing TID %ld\n", threads.size(),
+      static_cast<long>(pid), static_cast<long>(crashing_tid));
 
+  // First, trace the crashing thread.
+  if (crashing_tid > 0) {
+    Dbg(dbg_ctl_backtrace, "tracing crashing thread %ld\n", 
static_cast<long>(crashing_tid));
+    format_thread_header(crashing_tid, "Crashing Thread", text);
+    backtrace_for_thread(crashing_tid, text);
+    text.format("\n");
+  }
+
+  // Then trace all other threads.
+  bool printed_header = false;
   for (auto threadid : threads) {
-    Dbg(dbg_ctl_backtrace, "tracing thread %ld\n", (long)threadid);
-    // Get the thread name using /proc/PID/comm
-    ats_scoped_fd fd;
-    char          threadname[128];
-
-    snprintf(threadname, sizeof(threadname), "/proc/%ld/comm", 
static_cast<long>(threadid));
-    fd = open(threadname, O_RDONLY);
-    if (fd >= 0) {
-      text.format("Thread %ld, ", static_cast<long>(threadid));
-      text.readFromFD(fd);
-      text.chomp();
-    } else {
-      text.format("Thread %ld", static_cast<long>(threadid));
+    if (threadid == crashing_tid) {
+      continue;
     }
 
-    text.format(":\n");
+    if (!printed_header) {
+      text.format("Other Non-Crashing Threads:\n\n");
+      printed_header = true;
+    }
 
+    Dbg(dbg_ctl_backtrace, "tracing thread %ld\n", 
static_cast<long>(threadid));
+    format_thread_header(threadid, "Thread", text);
     backtrace_for_thread(threadid, text);
     text.format("\n");
   }
@@ -210,7 +254,7 @@ ServerBacktrace(unsigned /* options */, int pid, char 
**trace)
 #else /* TS_USE_REMOTE_UNWINDING */
 
 int
-ServerBacktrace([[maybe_unused]] unsigned options, [[maybe_unused]] int pid, 
char **trace)
+ServerBacktrace([[maybe_unused]] unsigned options, [[maybe_unused]] pid_t pid, 
[[maybe_unused]] pid_t crashing_tid, char **trace)
 {
   *trace = nullptr;
   return -1;
diff --git a/src/traffic_crashlog/procinfo.cc b/src/traffic_crashlog/procinfo.cc
index e0fb1ac5ee..3fe41f4f3a 100644
--- a/src/traffic_crashlog/procinfo.cc
+++ b/src/traffic_crashlog/procinfo.cc
@@ -66,12 +66,6 @@ write_procfd_file(const char *filename, const char *label, 
FILE *fp, const crash
   return !text.empty();
 }
 
-bool
-crashlog_write_regions(FILE *fp, const crashlog_target &target)
-{
-  return write_procfd_file("maps", "Memory Regions", fp, target);
-}
-
 bool
 crashlog_write_procstatus(FILE *fp, const crashlog_target &target)
 {
diff --git a/src/traffic_crashlog/traffic_crashlog.cc 
b/src/traffic_crashlog/traffic_crashlog.cc
index 9354c5f838..404b720255 100644
--- a/src/traffic_crashlog/traffic_crashlog.cc
+++ b/src/traffic_crashlog/traffic_crashlog.cc
@@ -32,8 +32,25 @@
 #include "tscore/BaseLogFile.h"
 #include "tscore/runroot.h"
 #include "iocore/eventsystem/RecProcess.h"
+
+#include <csignal>
 #include <unistd.h>
 
+namespace
+{
+// Timeout in seconds for backtrace collection. If ptrace/waitpid hangs, this
+// prevents the crashlog helper from blocking indefinitely.
+constexpr unsigned BACKTRACE_TIMEOUT_SECS = 10;
+
+volatile sig_atomic_t backtrace_timed_out = 0;
+
+void
+backtrace_alarm_handler(int /* sig */)
+{
+  backtrace_timed_out = 1;
+}
+} // namespace
+
 static int   syslog_mode  = false;
 static int   debug_mode   = false;
 static int   wait_mode    = false;
@@ -91,32 +108,65 @@ crashlog_open(const char *path)
   return (fd == -1) ? nullptr : fdopen(fd, "w");
 }
 
-extern int ServerBacktrace(unsigned /* options */, int pid, char **trace);
+extern int ServerBacktrace(unsigned /* options */, pid_t pid, pid_t 
crashing_tid, char **trace);
 
 bool
-crashlog_write_backtrace(FILE *fp, pid_t pid, const crashlog_target &)
+crashlog_write_backtrace(FILE *fp, const crashlog_target &target)
 {
-  char *trace = nullptr;
-  int   mgmterr;
+  char *trace   = nullptr;
+  int   mgmterr = -1;
+
+  if (target.pid > 0) {
+    // Set up a timeout to prevent indefinite hangs in ptrace/waitpid.
+    backtrace_timed_out = 0;
+    struct sigaction new_action;
+    struct sigaction old_action;
+    new_action.sa_handler = backtrace_alarm_handler;
+    sigemptyset(&new_action.sa_mask);
+    new_action.sa_flags = 0;
+    sigaction(SIGALRM, &new_action, &old_action);
+    alarm(BACKTRACE_TIMEOUT_SECS);
+
+    mgmterr = ServerBacktrace(0, target.pid, target.crashing_tid, &trace);
+
+    // Cancel the alarm and restore the old handler.
+    alarm(0);
+    sigaction(SIGALRM, &old_action, nullptr);
+
+    if (backtrace_timed_out) {
+      fprintf(fp, "Backtrace collection timed out after %u seconds\n", 
BACKTRACE_TIMEOUT_SECS);
+      free(trace);
+      return false;
+    }
+  }
 
   // NOTE: sometimes we can't get a backtrace because the ptrace attach will 
fail with
   // EPERM. I've seen this happen when a debugger is attached, which makes 
sense, but it
   // can also happen without a debugger. Possibly in that case, there is a 
race with the
   // kernel locking the process information?
 
-  if ((mgmterr = ServerBacktrace(0, static_cast<int>(pid), &trace)) != 0) {
-    fprintf(fp, "Unable to retrieve backtrace: %d\n", mgmterr);
-    return false;
+  if (mgmterr == 0 && trace != nullptr) {
+    // ServerBacktrace succeeded - this gives us backtraces for all threads.
+    fprintf(fp, "%s", trace);
+    free(trace);
+    return true;
   }
 
-  if (trace == nullptr) {
-    fprintf(fp, "Unable to retrieve backtrace: trace is null\n");
-    return false;
+  // ServerBacktrace failed. Fall back to the in-process backtrace from the 
crashing thread.
+  if ((target.flags & CRASHLOG_HAVE_BACKTRACE) && !target.backtrace.empty()) {
+    fprintf(fp, "Crashing Thread Backtrace:\n%s", target.backtrace.c_str());
+    return true;
   }
 
-  fprintf(fp, "%s", trace);
-  free(trace);
-  return true;
+  // No backtrace available from either source.
+  if (mgmterr != 0) {
+    fprintf(fp, "Unable to retrieve backtrace: ServerBacktrace returned %d\n", 
mgmterr);
+  } else if (target.pid <= 0) {
+    fprintf(fp, "Unable to retrieve backtrace: process ID not available\n");
+  } else {
+    fprintf(fp, "Unable to retrieve backtrace: no backtrace data available\n");
+  }
+  return false;
 }
 
 void
@@ -200,14 +250,24 @@ main(int /* argc ATS_UNUSED */, const char **argv)
   Note("crashlog started, target=%ld, debug=%s syslog=%s, uid=%ld euid=%ld", 
static_cast<long>(target_pid),
        debug_mode ? "true" : "false", syslog_mode ? "true" : "false", 
(long)getuid(), (long)geteuid());
 
-  ink_zero(target);
   target.pid       = static_cast<pid_t>(target_pid);
   target.timestamp = timestamp();
 
-  if (host_triplet && strncmp(host_triplet, "x86_64-unknown-linux", 
sizeof("x86_64-unknown-linux") - 1) == 0) {
+  // Read crash context on Linux platforms. The siginfo_t and ucontext_t
+  // structures are platform-specific but should be defined for all Linux
+  // architectures.
+  if (host_triplet && (strstr(host_triplet, "linux") != nullptr || 
strstr(host_triplet, "Linux") != nullptr)) {
     ssize_t nbytes;
     target.flags |= CRASHLOG_HAVE_THREADINFO;
 
+    nbytes = read(STDIN_FILENO, &target.crashing_tid, 
sizeof(target.crashing_tid));
+    if (nbytes < static_cast<ssize_t>(sizeof(target.crashing_tid))) {
+      Warning("received %zd of %zu expected crashing thread ID bytes", nbytes, 
sizeof(target.crashing_tid));
+      target.flags &= ~CRASHLOG_HAVE_THREADINFO;
+    } else {
+      Note("received crashing thread ID: %ld", 
static_cast<long>(target.crashing_tid));
+    }
+
     nbytes = read(STDIN_FILENO, &target.siginfo, sizeof(target.siginfo));
     if (nbytes < static_cast<ssize_t>(sizeof(target.siginfo))) {
       Warning("received %zd of %zu expected signal info bytes", nbytes, 
sizeof(target.siginfo));
@@ -219,6 +279,21 @@ main(int /* argc ATS_UNUSED */, const char **argv)
       Warning("received %zd of %zu expected thread context bytes", nbytes, 
sizeof(target.ucontext));
       target.flags &= ~CRASHLOG_HAVE_THREADINFO;
     }
+
+    // Read the in-process backtrace from the crashing thread.
+    uint32_t bt_len = 0;
+    nbytes          = read(STDIN_FILENO, &bt_len, sizeof(bt_len));
+    if (nbytes == static_cast<ssize_t>(sizeof(bt_len)) && bt_len > 0 && bt_len 
< 1024 * 1024) {
+      target.backtrace.resize(bt_len);
+      nbytes = read(STDIN_FILENO, target.backtrace.data(), bt_len);
+      if (nbytes == static_cast<ssize_t>(bt_len)) {
+        target.flags |= CRASHLOG_HAVE_BACKTRACE;
+        Note("received %u bytes of in-process backtrace", bt_len);
+      } else {
+        Warning("received %zd of %u expected backtrace bytes", nbytes, bt_len);
+        target.backtrace.clear();
+      }
+    }
   }
 
   logname = crashlog_name();
@@ -245,16 +320,13 @@ main(int /* argc ATS_UNUSED */, const char **argv)
   crashlog_write_registers(fp, target);
 
   fprintf(fp, "\n");
-  crashlog_write_backtrace(fp, parent, target);
+  crashlog_write_backtrace(fp, target);
 
   fprintf(fp, "\n");
   crashlog_write_procstatus(fp, target);
   fprintf(fp, "\n");
   crashlog_write_proclimits(fp, target);
 
-  fprintf(fp, "\n");
-  crashlog_write_regions(fp, target);
-
   fprintf(fp, "\n");
   crashlog_exec_pgm(fp, target.pid);
 
diff --git a/src/traffic_crashlog/traffic_crashlog.h 
b/src/traffic_crashlog/traffic_crashlog.h
index 086c1fcda3..6c71f6858b 100644
--- a/src/traffic_crashlog/traffic_crashlog.h
+++ b/src/traffic_crashlog/traffic_crashlog.h
@@ -28,6 +28,8 @@
 #include "tscore/Diags.h"
 #include "tscore/TextBuffer.h"
 
+#include <string>
+
 // ucontext.h is deprecated on Darwin, and we really only need it on Linux, so 
only
 // include it if we are planning to use it.
 #if defined(__linux__)
@@ -49,17 +51,23 @@
 #endif
 
 #define CRASHLOG_HAVE_THREADINFO 0x1u
+#define CRASHLOG_HAVE_BACKTRACE  0x2u
 
 struct crashlog_target {
-  pid_t     pid;
-  siginfo_t siginfo;
+  pid_t     pid{0};
+  pid_t     crashing_tid{0};
+  siginfo_t siginfo{};
 #if defined(__linux__)
-  ucontext_t ucontext;
+  ucontext_t ucontext{};
 #else
-  char ucontext; // just a placeholder ...
+  char ucontext{}; // just a placeholder ...
 #endif
-  struct tm timestamp;
-  unsigned  flags;
+  struct tm timestamp {
+  };
+  unsigned flags{0};
+
+  // In-process backtrace from the crashing thread.
+  std::string backtrace;
 };
 
 bool crashlog_write_backtrace(FILE *, const crashlog_target &);
@@ -69,7 +77,6 @@ bool crashlog_write_proclimits(FILE *, const crashlog_target 
&);
 bool crashlog_write_procname(FILE *, const crashlog_target &);
 bool crashlog_write_procstatus(FILE *, const crashlog_target &);
 bool crashlog_write_records(FILE *, const crashlog_target &);
-bool crashlog_write_regions(FILE *, const crashlog_target &);
 bool crashlog_write_registers(FILE *, const crashlog_target &);
 bool crashlog_write_siginfo(FILE *, const crashlog_target &);
 bool crashlog_write_uname(FILE *, const crashlog_target &);
diff --git a/src/traffic_server/Crash.cc b/src/traffic_server/Crash.cc
index 5ee6791a0e..ff5b5e29b9 100644
--- a/src/traffic_server/Crash.cc
+++ b/src/traffic_server/Crash.cc
@@ -28,6 +28,13 @@
 #include "tscore/Version.h"
 #include "tscore/signals.h"
 
+#include <string>
+#include <unistd.h>
+#if defined(__linux__)
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#endif
+
 // ucontext.h is deprecated on Darwin, and we really only need it on Linux, so 
only
 // include it if we are planning to use it.
 #if defined(__linux__)
@@ -139,6 +146,12 @@ crash_logger_init(const char *user)
   crash_logger_pid = child;
   crash_logger_fd  = pipe[0];
 
+#if defined(__linux__)
+  // Allow the crash logger to ptrace us. Without this, Yama's ptrace_scope=1
+  // (the default on many distros) prevents a child process from tracing its 
parent.
+  prctl(PR_SET_PTRACER, crash_logger_pid, 0, 0, 0);
+#endif
+
   // Wait for the helper to stop
   if (waitpid(crash_logger_pid, &status, WUNTRACED) > 0) {
     Dbg(dbg_ctl_server, "waited on PID %ld, %s", (long)crash_logger_pid, 
WIFSTOPPED(status) ? "STOPPED" : "???");
@@ -165,8 +178,17 @@ crash_logger_invoke(int signo, siginfo_t *info, void *ctx)
     // Write the crashing thread information to the crash logger. While the 
siginfo_t is blesses by POSIX, the
     // ucontext_t can contain pointers, so it's highly platform dependent. On 
Linux with glibc, however, it is
     // a single memory block that we can just puke out.
+    pid_t crashing_tid = static_cast<pid_t>(syscall(SYS_gettid));
+    ATS_UNUSED_RETURN(write(crash_logger_fd, &crashing_tid, 
sizeof(crashing_tid)));
     ATS_UNUSED_RETURN(write(crash_logger_fd, info, sizeof(siginfo_t)));
     ATS_UNUSED_RETURN(write(crash_logger_fd, static_cast<ucontext_t *>(ctx), 
sizeof(ucontext_t)));
+
+    // Send zero-length backtrace. We cannot safely generate a backtrace here
+    // because backtrace() can acquire locks (e.g., in the dynamic linker) that
+    // the crashing thread might be holding, causing a deadlock. The crash
+    // logger will get the backtrace via ptrace from a separate process 
instead.
+    uint32_t bt_len = 0;
+    ATS_UNUSED_RETURN(write(crash_logger_fd, &bt_len, sizeof(bt_len)));
 #endif
 
     close(crash_logger_fd);
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 1718ab4911..95b6a0f331 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -39,6 +39,7 @@ add_subdirectory(tools/plugins)
 add_subdirectory(gold_tests/chunked_encoding)
 add_subdirectory(gold_tests/continuations/plugins)
 add_subdirectory(gold_tests/jsonrpc/plugins)
+add_subdirectory(gold_tests/pluginTest/crash_test)
 add_subdirectory(gold_tests/pluginTest/polite_hook_wait)
 add_subdirectory(gold_tests/pluginTest/tsapi)
 add_subdirectory(gold_tests/pluginTest/TSVConnFd)
diff --git a/tests/gold_tests/pluginTest/crash_test/CMakeLists.txt 
b/tests/gold_tests/pluginTest/crash_test/CMakeLists.txt
new file mode 100644
index 0000000000..c6c8ab89cc
--- /dev/null
+++ b/tests/gold_tests/pluginTest/crash_test/CMakeLists.txt
@@ -0,0 +1,18 @@
+#######################
+#
+#  Licensed to the Apache Software Foundation (ASF) under one or more 
contributor license
+#  agreements.  See the NOTICE file distributed with this work for additional 
information regarding
+#  copyright ownership.  The ASF licenses this file to you under the Apache 
License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with the 
License.  You may obtain
+#  a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software 
distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
KIND, either express
+#  or implied. See the License for the specific language governing permissions 
and limitations under
+#  the License.
+#
+#######################
+
+add_autest_plugin(crash_test crash_test.cc)
diff --git a/tests/gold_tests/pluginTest/crash_test/crash_test.cc 
b/tests/gold_tests/pluginTest/crash_test/crash_test.cc
new file mode 100644
index 0000000000..9974c6fe3f
--- /dev/null
+++ b/tests/gold_tests/pluginTest/crash_test/crash_test.cc
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file crash_test.cc
+ * @brief A plugin that intentionally crashes traffic_server for testing
+ *        the crash log functionality.
+ *
+ * This plugin is for TESTING ONLY - do not use in production!
+ *
+ * When a request contains the header "X-Crash-Test: now", this plugin
+ * will dereference a null pointer, causing a SIGSEGV.
+ */
+
+#include <ts/ts.h>
+#include <cstring>
+#include <cstdlib>
+
+#define PLUGIN_NAME "crash_test"
+
+namespace
+{
+DbgCtl dbg_ctl{PLUGIN_NAME};
+
+int
+handle_read_request(TSCont /* contp */, TSEvent event, void *edata)
+{
+  TSHttpTxn txnp = static_cast<TSHttpTxn>(edata);
+
+  if (event != TS_EVENT_HTTP_READ_REQUEST_HDR) {
+    TSHttpTxnReenable(txnp, TS_EVENT_HTTP_CONTINUE);
+    return 0;
+  }
+
+  TSMBuffer bufp;
+  TSMLoc    hdr_loc;
+
+  if (TSHttpTxnClientReqGet(txnp, &bufp, &hdr_loc) != TS_SUCCESS) {
+    TSHttpTxnReenable(txnp, TS_EVENT_HTTP_CONTINUE);
+    return 0;
+  }
+
+  TSMLoc field_loc = TSMimeHdrFieldFind(bufp, hdr_loc, "X-Crash-Test", -1);
+  if (field_loc != TS_NULL_MLOC) {
+    int         value_len = 0;
+    char const *value     = TSMimeHdrFieldValueStringGet(bufp, hdr_loc, 
field_loc, 0, &value_len);
+
+    if (value != nullptr && value_len == 3 && strncmp(value, "now", 3) == 0) {
+      TSNote("Received crash trigger header - crashing now!");
+
+      // Intentionally crash by dereferencing a null pointer.
+      volatile int *null_ptr = nullptr;
+      *null_ptr              = 42;
+      TSNote("This should never be reached.");
+    }
+
+    TSHandleMLocRelease(bufp, hdr_loc, field_loc);
+  }
+
+  TSHandleMLocRelease(bufp, TS_NULL_MLOC, hdr_loc);
+  TSHttpTxnReenable(txnp, TS_EVENT_HTTP_CONTINUE);
+  return 0;
+}
+
+} // anonymous namespace
+
+void
+TSPluginInit(int /* argc */, char const ** /* argv */)
+{
+  Dbg(dbg_ctl, "initializing crash_test plugin");
+
+  TSPluginRegistrationInfo info;
+  info.plugin_name   = const_cast<char *>(PLUGIN_NAME);
+  info.vendor_name   = const_cast<char *>("Apache");
+  info.support_email = const_cast<char *>("[email protected]");
+
+  if (TSPluginRegister(&info) != TS_SUCCESS) {
+    TSError("[%s] Plugin registration failed", PLUGIN_NAME);
+    return;
+  }
+
+  TSCont contp = TSContCreate(handle_read_request, nullptr);
+  if (contp == nullptr) {
+    TSError("[%s] Failed to create continuation", PLUGIN_NAME);
+    return;
+  }
+
+  TSHttpHookAdd(TS_HTTP_READ_REQUEST_HDR_HOOK, contp);
+  Dbg(dbg_ctl, "crash_test plugin initialized - send 'X-Crash-Test: now' 
header to trigger crash");
+}
diff --git a/tests/gold_tests/pluginTest/crash_test/crash_test.test.py 
b/tests/gold_tests/pluginTest/crash_test/crash_test.test.py
new file mode 100644
index 0000000000..90da2057d5
--- /dev/null
+++ b/tests/gold_tests/pluginTest/crash_test/crash_test.test.py
@@ -0,0 +1,101 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""
+Test that crash logs are generated with backtraces when traffic_server crashes.
+
+This test intentionally crashes traffic_server using a plugin that dereferences
+a null pointer when it receives a specific header. It then verifies that:
+1. A crash log file was created
+2. The crash log contains thread information
+"""
+
+import os
+
+Test.Summary = '''
+Test crash log generation with backtrace.
+'''
+
+# Create an origin server for the test.
+server = Test.MakeOriginServer("server")
+
+request_header = {"headers": "GET / HTTP/1.1\r\nHost: example.com\r\n\r\n", 
"timestamp": "1469733493.993", "body": ""}
+response_header = {"headers": "HTTP/1.1 200 OK\r\nConnection: close\r\n\r\n", 
"timestamp": "1469733493.993", "body": "Hello"}
+server.addResponse("sessionlog.json", request_header, response_header)
+
+ts = Test.MakeATSProcess("ts")
+
+# We expect ATS to crash with SIGSEGV, allowing us to test crash logging.
+ts.ReturnCode = -11
+
+ts.Disk.records_config.update(
+    {
+        'proxy.config.proxy_name': 'test_proxy',
+        'proxy.config.url_remap.remap_required': 0,
+        'proxy.config.diags.debug.enabled': 1,
+        'proxy.config.diags.debug.tags': 'crash_test',
+        # Enable the crash log helper.
+        'proxy.config.crash_log_helper': 'traffic_crashlog',
+    })
+
+# Copy the crash_test plugin.
+plugin_path = os.path.join(Test.Variables.AtsBuildGoldTestsDir, 'pluginTest', 
'crash_test', '.libs', 'crash_test.so')
+ts.Setup.Copy(plugin_path, ts.Env['PROXY_CONFIG_PLUGIN_PLUGIN_DIR'])
+
+ts.Disk.plugin_config.AddLine("crash_test.so")
+
+ts.Disk.remap_config.AddLine(f"map / 
http://127.0.0.1:{server.Variables.Port}/";)
+
+ts.Disk.diags_log.Content += Testers.ContainsExpression(
+    "Received crash trigger header - crashing now!", "Expect the log 
indicating the intentional crash.")
+ts.Disk.diags_log.Content += Testers.ExcludesExpression(
+    "This should never be reached.", "Expect to not see the log after the 
crash.")
+
+# Test 1: Make a normal request to verify the server is running.
+tr = Test.AddTestRun("Verify server is running")
+tr.Processes.Default.StartBefore(server)
+tr.Processes.Default.StartBefore(ts)
+tr.MakeCurlCommand(f'-s -o /dev/null -w "%{{http_code}}" 
http://127.0.0.1:{ts.Variables.port}/', ts=ts)
+tr.Processes.Default.ReturnCode = 0
+tr.Processes.Default.Streams.stdout = Testers.ContainsExpression("200", 
"Expected 200 OK response")
+tr.StillRunningAfter = ts
+tr.StillRunningAfter = server
+
+# Test 2: Send the crash trigger header.
+tr = Test.AddTestRun("Trigger crash")
+# The curl command should fail since ATS will crash.
+tr.MakeCurlCommand(f'-s -o /dev/null -H "X-Crash-Test: now" 
http://127.0.0.1:{ts.Variables.port}/', ts=ts)
+tr.Processes.Default.ReturnCode = 52
+
+# Test 3: Wait for a crash log to be created.
+tr = Test.AddTestRun("Wait for crash log")
+crash_log_glob = f'{ts.Variables.LOGDIR}/crash-*.log'
+# Wait up to 60 seconds for a crash log file to appear, then 1 extra second 
for it to be written.
+tr.Processes.Default.Command = 
(f"{os.path.join(Test.Variables.AtsTestToolsDir, 'condwait')} 60 1 -f 
'{crash_log_glob}'")
+tr.Processes.Default.ReturnCode = 0
+
+# Test 4: Verify crash log contains expected content.
+tr = Test.AddTestRun("Check crash log content")
+tr.Processes.Default.Command = (f'cat {ts.Variables.LOGDIR}/crash-*.log 2>&1')
+tr.Processes.Default.ReturnCode = 0
+# The crash log should contain signal information (always present).
+tr.Processes.Default.Streams.stdout += Testers.ContainsExpression(
+    "Segmentation fault", "Expected crash log to show segmentation fault 
signal")
+# The crash log should contain the crashing thread information first.
+# The crashing thread should be listed first.
+tr.Processes.Default.Streams.stdout += Testers.ContainsExpression("Crashing 
Thread", "Expected crashing thread backtrace first")
+# The other threads should be listed after.
+tr.Processes.Default.Streams.stdout += Testers.ContainsExpression(
+    "Other Non-Crashing Threads:", "Expected other non-crashing threads 
section")
diff --git a/tests/tools/condwait b/tests/tools/condwait
index 45e3eff7be..0f208f168f 100755
--- a/tests/tools/condwait
+++ b/tests/tools/condwait
@@ -22,6 +22,7 @@
 #  condwait [ MAX-WAIT [ POST-WAIT ] ] CONDITION
 #
 # CONDITION is the ('test' command) condition to wait for.  (It may contain 
white space.)
+# For file existence tests (-f, -e, -d), glob patterns are supported (e.g., -f 
/path/crash-*.log).
 #
 # MAX-WAIT is the maximum number of seconds to wait for the condition.  If it 
is omitted, it defaults to 60.
 #
@@ -58,9 +59,20 @@ if [[ "$1" = "" ]] ; then
     exit 1
 fi
 
+# Check if this is a simple file existence test (-f, -e, -d). If so, use ls -d
+# which handles glob patterns properly. Otherwise, use test for the condition.
+check_condition() {
+    if [[ "$1" = "-f" || "$1" = "-e" || "$1" = "-d" ]] && [[ $# -eq 2 ]]; then
+        # Use ls -d for file existence tests to support glob patterns.
+        ls -d $2 >/dev/null 2>&1
+    else
+        test $*
+    fi
+}
+
 while (( WAIT > 0 ))
 do
-    if test $*
+    if check_condition $*
     then
         if (( POST_WAIT > 0 ))
         then

Reply via email to