This is an automated email from the ASF dual-hosted git repository.
bneradt pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/master by this push:
new 444a446046 Add backtrace to crash logs (#12775)
444a446046 is described below
commit 444a446046736fa6f43993c6c73ba1517e92c598
Author: Brian Neradt <[email protected]>
AuthorDate: Wed Jan 21 14:35:38 2026 -0600
Add backtrace to crash logs (#12775)
Send crash information from the signal handler to traffic_crashlog and
record it in crash logs.
This also:
* Adds a 10-second timeout around backtrace collection to prevent the
crashlog helper from hanging indefinitely if ptrace/waitpid blocks.
This addresses a TODO comment that has been in the code for a while.
* Removes the "Memory Regions" section from the crash logs. These are
very rarely useful and otherwise add significant noise to the crash
log. A post mordem gdb section on a core should be able to get this
information if it is necessary in a particular instance.
---
.../command-line/traffic_crashlog.en.rst | 6 +-
src/traffic_crashlog/backtrace.cc | 100 +++++++++++++------
src/traffic_crashlog/procinfo.cc | 6 --
src/traffic_crashlog/traffic_crashlog.cc | 110 +++++++++++++++++----
src/traffic_crashlog/traffic_crashlog.h | 21 ++--
src/traffic_server/Crash.cc | 22 +++++
tests/CMakeLists.txt | 1 +
.../pluginTest/crash_test/CMakeLists.txt | 18 ++++
.../gold_tests/pluginTest/crash_test/crash_test.cc | 105 ++++++++++++++++++++
.../pluginTest/crash_test/crash_test.test.py | 101 +++++++++++++++++++
tests/tools/condwait | 14 ++-
11 files changed, 441 insertions(+), 63 deletions(-)
diff --git a/doc/appendices/command-line/traffic_crashlog.en.rst
b/doc/appendices/command-line/traffic_crashlog.en.rst
index 11a7f1c504..1ccd86977d 100644
--- a/doc/appendices/command-line/traffic_crashlog.en.rst
+++ b/doc/appendices/command-line/traffic_crashlog.en.rst
@@ -37,6 +37,8 @@ for users to run directly.
When :ref:`traffic_server` starts, it will launch a :program:`traffic_crashlog`
process and keep it stopped, activating it only if a crash occurs.
+The crash report includes backtraces for all threads, with the crashing thread
+listed first.
Options
=======
@@ -48,8 +50,8 @@ Options
This option specifies the host triple for the process that
:program:`traffic_crashlog` should examine. If a supported host
triple is specified, :program:`traffic_crashlog` expects to
- receive a ``siginfo_t`` structure on it's standard input,
- followed by a ``ucontext_t``.
+ receive the crashing thread ID, then a ``siginfo_t`` structure on
+ its standard input, followed by a ``ucontext_t``.
.. option:: --target PID
diff --git a/src/traffic_crashlog/backtrace.cc
b/src/traffic_crashlog/backtrace.cc
index ab0ae983b3..27b40d720d 100644
--- a/src/traffic_crashlog/backtrace.cc
+++ b/src/traffic_crashlog/backtrace.cc
@@ -30,6 +30,8 @@
****************************************************************************/
#include "tscore/ink_config.h"
+#include <sys/types.h>
+
#if TS_USE_REMOTE_UNWINDING
#include "tscore/Diags.h"
@@ -38,7 +40,6 @@
#include <libunwind.h>
#include <libunwind-ptrace.h>
#if defined(__FreeBSD__)
-#include <sys/types.h>
#define __WALL P_ALL
#define PTRACE_ATTACH PT_ATTACH
#define PTRACE_DETACH PT_DETACH
@@ -56,6 +57,7 @@ using threadlist = std::vector<pid_t>;
DbgCtl dbg_ctl_backtrace{"backtrace"};
+/** Enumerate all threads for a given process by reading /proc/<pid>/task. */
static threadlist
threads_for_process(pid_t proc)
{
@@ -104,58 +106,73 @@ backtrace_for_thread(pid_t threadid, TextBuffer &text)
unw_cursor_t cursor;
void *ap = nullptr;
pid_t target = -1;
- unsigned level = 0;
+ bool attached{false};
+ unsigned level = 0;
+ int step_result;
// First, attach to the child, causing it to stop.
status = ptrace(PTRACE_ATTACH, threadid, 0, 0);
if (status < 0) {
+ text.format(" [ptrace ATTACH failed: %s (%d)]\n", strerror(errno), errno);
Dbg(dbg_ctl_backtrace, "ptrace(ATTACH, %ld) -> %s (%d)\n", (long)threadid,
strerror(errno), errno);
return;
}
+ attached = true;
- // Wait for it to stop (XXX should be a timed wait ...)
+ // Wait for it to stop. The caller uses alarm() to enforce a timeout.
target = waitpid(threadid, &status, __WALL | WUNTRACED);
Dbg(dbg_ctl_backtrace, "waited for target %ld, found PID %ld, %s\n",
(long)threadid, (long)target,
WIFSTOPPED(status) ? "STOPPED" : "???");
if (target < 0) {
+ text.format(" [waitpid failed: %s (%d)]\n", strerror(errno), errno);
goto done;
}
ap = _UPT_create(threadid);
Dbg(dbg_ctl_backtrace, "created UPT %p", ap);
if (ap == nullptr) {
+ text.format(" [_UPT_create failed]\n");
goto done;
}
addr_space = unw_create_addr_space(&_UPT_accessors, 0 /* byteorder */);
Dbg(dbg_ctl_backtrace, "created address space %p\n", addr_space);
if (addr_space == nullptr) {
+ text.format(" [unw_create_addr_space failed]\n");
goto done;
}
status = unw_init_remote(&cursor, addr_space, ap);
Dbg(dbg_ctl_backtrace, "unw_init_remote(...) -> %d\n", status);
if (status != 0) {
+ text.format(" [unw_init_remote failed: %d]\n", status);
goto done;
}
- while (unw_step(&cursor) > 0) {
+ step_result = unw_step(&cursor);
+ if (step_result <= 0) {
+ text.format(" [unw_step returned %d on first call]\n", step_result);
+ }
+
+ while (step_result > 0) {
unw_word_t ip;
- unw_word_t offset;
+ unw_word_t offset = 0;
char buf[256];
unw_get_reg(&cursor, UNW_REG_IP, &ip);
if (unw_get_proc_name(&cursor, buf, sizeof(buf), &offset) == 0) {
- int status;
- char *name = abi::__cxa_demangle(buf, nullptr, nullptr, &status);
- text.format("%-4u 0x%016llx %s + %p\n", level, static_cast<unsigned long
long>(ip), name ? name : buf, (void *)offset);
+ int demangle_status;
+ char *name = abi::__cxa_demangle(buf, nullptr, nullptr,
&demangle_status);
+ text.format("%-4u 0x%016llx %s + 0x%lx\n", level, static_cast<unsigned
long long>(ip), name ? name : buf,
+ static_cast<unsigned long>(offset));
free(name);
} else {
- text.format("%-4u 0x%016llx 0x0 + %p\n", level, static_cast<unsigned
long long>(ip), (void *)offset);
+ text.format("%-4u 0x%016llx <unknown>\n", level, static_cast<unsigned
long long>(ip));
}
++level;
+ step_result = unw_step(&cursor);
}
done:
@@ -167,38 +184,65 @@ done:
_UPT_destroy(ap);
}
- status = ptrace(PTRACE_DETACH, target, NULL, DATA_NULL);
- Dbg(dbg_ctl_backtrace, "ptrace(DETACH, %ld) -> %d (errno %d)\n",
(long)target, status, errno);
+ if (attached) {
+ status = ptrace(PTRACE_DETACH, threadid, nullptr, DATA_NULL);
+ Dbg(dbg_ctl_backtrace, "ptrace(DETACH, %ld) -> %d (errno %d)\n",
(long)threadid, status, errno);
+ }
+}
+
+/** Format a thread header with the thread name from /proc. */
+static void
+format_thread_header(pid_t threadid, const char *prefix, TextBuffer &text)
+{
+ ats_scoped_fd fd;
+ char path[128];
+
+ snprintf(path, sizeof(path), "/proc/%ld/comm", static_cast<long>(threadid));
+ fd = open(path, O_RDONLY);
+ if (fd >= 0) {
+ text.format("%s (TID %ld, ", prefix, static_cast<long>(threadid));
+ text.readFromFD(fd);
+ text.chomp();
+ text.format("):\n");
+ } else {
+ text.format("%s (TID %ld):\n", prefix, static_cast<long>(threadid));
+ }
}
} // namespace
+
int
-ServerBacktrace(unsigned /* options */, int pid, char **trace)
+ServerBacktrace(unsigned /* options */, pid_t pid, pid_t crashing_tid, char
**trace)
{
*trace = nullptr;
threadlist threads(threads_for_process(pid));
TextBuffer text(0);
- Dbg(dbg_ctl_backtrace, "tracing %zd threads for traffic_server PID %ld\n",
threads.size(), (long)pid);
+ Dbg(dbg_ctl_backtrace, "tracing %zd threads for traffic_server PID %ld,
crashing TID %ld\n", threads.size(),
+ static_cast<long>(pid), static_cast<long>(crashing_tid));
+ // First, trace the crashing thread.
+ if (crashing_tid > 0) {
+ Dbg(dbg_ctl_backtrace, "tracing crashing thread %ld\n",
static_cast<long>(crashing_tid));
+ format_thread_header(crashing_tid, "Crashing Thread", text);
+ backtrace_for_thread(crashing_tid, text);
+ text.format("\n");
+ }
+
+ // Then trace all other threads.
+ bool printed_header = false;
for (auto threadid : threads) {
- Dbg(dbg_ctl_backtrace, "tracing thread %ld\n", (long)threadid);
- // Get the thread name using /proc/PID/comm
- ats_scoped_fd fd;
- char threadname[128];
-
- snprintf(threadname, sizeof(threadname), "/proc/%ld/comm",
static_cast<long>(threadid));
- fd = open(threadname, O_RDONLY);
- if (fd >= 0) {
- text.format("Thread %ld, ", static_cast<long>(threadid));
- text.readFromFD(fd);
- text.chomp();
- } else {
- text.format("Thread %ld", static_cast<long>(threadid));
+ if (threadid == crashing_tid) {
+ continue;
}
- text.format(":\n");
+ if (!printed_header) {
+ text.format("Other Non-Crashing Threads:\n\n");
+ printed_header = true;
+ }
+ Dbg(dbg_ctl_backtrace, "tracing thread %ld\n",
static_cast<long>(threadid));
+ format_thread_header(threadid, "Thread", text);
backtrace_for_thread(threadid, text);
text.format("\n");
}
@@ -210,7 +254,7 @@ ServerBacktrace(unsigned /* options */, int pid, char
**trace)
#else /* TS_USE_REMOTE_UNWINDING */
int
-ServerBacktrace([[maybe_unused]] unsigned options, [[maybe_unused]] int pid,
char **trace)
+ServerBacktrace([[maybe_unused]] unsigned options, [[maybe_unused]] pid_t pid,
[[maybe_unused]] pid_t crashing_tid, char **trace)
{
*trace = nullptr;
return -1;
diff --git a/src/traffic_crashlog/procinfo.cc b/src/traffic_crashlog/procinfo.cc
index e0fb1ac5ee..3fe41f4f3a 100644
--- a/src/traffic_crashlog/procinfo.cc
+++ b/src/traffic_crashlog/procinfo.cc
@@ -66,12 +66,6 @@ write_procfd_file(const char *filename, const char *label,
FILE *fp, const crash
return !text.empty();
}
-bool
-crashlog_write_regions(FILE *fp, const crashlog_target &target)
-{
- return write_procfd_file("maps", "Memory Regions", fp, target);
-}
-
bool
crashlog_write_procstatus(FILE *fp, const crashlog_target &target)
{
diff --git a/src/traffic_crashlog/traffic_crashlog.cc
b/src/traffic_crashlog/traffic_crashlog.cc
index 9354c5f838..404b720255 100644
--- a/src/traffic_crashlog/traffic_crashlog.cc
+++ b/src/traffic_crashlog/traffic_crashlog.cc
@@ -32,8 +32,25 @@
#include "tscore/BaseLogFile.h"
#include "tscore/runroot.h"
#include "iocore/eventsystem/RecProcess.h"
+
+#include <csignal>
#include <unistd.h>
+namespace
+{
+// Timeout in seconds for backtrace collection. If ptrace/waitpid hangs, this
+// prevents the crashlog helper from blocking indefinitely.
+constexpr unsigned BACKTRACE_TIMEOUT_SECS = 10;
+
+volatile sig_atomic_t backtrace_timed_out = 0;
+
+void
+backtrace_alarm_handler(int /* sig */)
+{
+ backtrace_timed_out = 1;
+}
+} // namespace
+
static int syslog_mode = false;
static int debug_mode = false;
static int wait_mode = false;
@@ -91,32 +108,65 @@ crashlog_open(const char *path)
return (fd == -1) ? nullptr : fdopen(fd, "w");
}
-extern int ServerBacktrace(unsigned /* options */, int pid, char **trace);
+extern int ServerBacktrace(unsigned /* options */, pid_t pid, pid_t
crashing_tid, char **trace);
bool
-crashlog_write_backtrace(FILE *fp, pid_t pid, const crashlog_target &)
+crashlog_write_backtrace(FILE *fp, const crashlog_target &target)
{
- char *trace = nullptr;
- int mgmterr;
+ char *trace = nullptr;
+ int mgmterr = -1;
+
+ if (target.pid > 0) {
+ // Set up a timeout to prevent indefinite hangs in ptrace/waitpid.
+ backtrace_timed_out = 0;
+ struct sigaction new_action;
+ struct sigaction old_action;
+ new_action.sa_handler = backtrace_alarm_handler;
+ sigemptyset(&new_action.sa_mask);
+ new_action.sa_flags = 0;
+ sigaction(SIGALRM, &new_action, &old_action);
+ alarm(BACKTRACE_TIMEOUT_SECS);
+
+ mgmterr = ServerBacktrace(0, target.pid, target.crashing_tid, &trace);
+
+ // Cancel the alarm and restore the old handler.
+ alarm(0);
+ sigaction(SIGALRM, &old_action, nullptr);
+
+ if (backtrace_timed_out) {
+ fprintf(fp, "Backtrace collection timed out after %u seconds\n",
BACKTRACE_TIMEOUT_SECS);
+ free(trace);
+ return false;
+ }
+ }
// NOTE: sometimes we can't get a backtrace because the ptrace attach will
fail with
// EPERM. I've seen this happen when a debugger is attached, which makes
sense, but it
// can also happen without a debugger. Possibly in that case, there is a
race with the
// kernel locking the process information?
- if ((mgmterr = ServerBacktrace(0, static_cast<int>(pid), &trace)) != 0) {
- fprintf(fp, "Unable to retrieve backtrace: %d\n", mgmterr);
- return false;
+ if (mgmterr == 0 && trace != nullptr) {
+ // ServerBacktrace succeeded - this gives us backtraces for all threads.
+ fprintf(fp, "%s", trace);
+ free(trace);
+ return true;
}
- if (trace == nullptr) {
- fprintf(fp, "Unable to retrieve backtrace: trace is null\n");
- return false;
+ // ServerBacktrace failed. Fall back to the in-process backtrace from the
crashing thread.
+ if ((target.flags & CRASHLOG_HAVE_BACKTRACE) && !target.backtrace.empty()) {
+ fprintf(fp, "Crashing Thread Backtrace:\n%s", target.backtrace.c_str());
+ return true;
}
- fprintf(fp, "%s", trace);
- free(trace);
- return true;
+ // No backtrace available from either source.
+ if (mgmterr != 0) {
+ fprintf(fp, "Unable to retrieve backtrace: ServerBacktrace returned %d\n",
mgmterr);
+ } else if (target.pid <= 0) {
+ fprintf(fp, "Unable to retrieve backtrace: process ID not available\n");
+ } else {
+ fprintf(fp, "Unable to retrieve backtrace: no backtrace data available\n");
+ }
+ return false;
}
void
@@ -200,14 +250,24 @@ main(int /* argc ATS_UNUSED */, const char **argv)
Note("crashlog started, target=%ld, debug=%s syslog=%s, uid=%ld euid=%ld",
static_cast<long>(target_pid),
debug_mode ? "true" : "false", syslog_mode ? "true" : "false",
(long)getuid(), (long)geteuid());
- ink_zero(target);
target.pid = static_cast<pid_t>(target_pid);
target.timestamp = timestamp();
- if (host_triplet && strncmp(host_triplet, "x86_64-unknown-linux",
sizeof("x86_64-unknown-linux") - 1) == 0) {
+ // Read crash context on Linux platforms. The siginfo_t and ucontext_t
+ // structures are platform-specific but should be defined for all Linux
+ // architectures.
+ if (host_triplet && (strstr(host_triplet, "linux") != nullptr ||
strstr(host_triplet, "Linux") != nullptr)) {
ssize_t nbytes;
target.flags |= CRASHLOG_HAVE_THREADINFO;
+ nbytes = read(STDIN_FILENO, &target.crashing_tid,
sizeof(target.crashing_tid));
+ if (nbytes < static_cast<ssize_t>(sizeof(target.crashing_tid))) {
+ Warning("received %zd of %zu expected crashing thread ID bytes", nbytes,
sizeof(target.crashing_tid));
+ target.flags &= ~CRASHLOG_HAVE_THREADINFO;
+ } else {
+ Note("received crashing thread ID: %ld",
static_cast<long>(target.crashing_tid));
+ }
+
nbytes = read(STDIN_FILENO, &target.siginfo, sizeof(target.siginfo));
if (nbytes < static_cast<ssize_t>(sizeof(target.siginfo))) {
Warning("received %zd of %zu expected signal info bytes", nbytes,
sizeof(target.siginfo));
@@ -219,6 +279,21 @@ main(int /* argc ATS_UNUSED */, const char **argv)
Warning("received %zd of %zu expected thread context bytes", nbytes,
sizeof(target.ucontext));
target.flags &= ~CRASHLOG_HAVE_THREADINFO;
}
+
+ // Read the in-process backtrace from the crashing thread.
+ uint32_t bt_len = 0;
+ nbytes = read(STDIN_FILENO, &bt_len, sizeof(bt_len));
+ if (nbytes == static_cast<ssize_t>(sizeof(bt_len)) && bt_len > 0 && bt_len
< 1024 * 1024) {
+ target.backtrace.resize(bt_len);
+ nbytes = read(STDIN_FILENO, target.backtrace.data(), bt_len);
+ if (nbytes == static_cast<ssize_t>(bt_len)) {
+ target.flags |= CRASHLOG_HAVE_BACKTRACE;
+ Note("received %u bytes of in-process backtrace", bt_len);
+ } else {
+ Warning("received %zd of %u expected backtrace bytes", nbytes, bt_len);
+ target.backtrace.clear();
+ }
+ }
}
logname = crashlog_name();
@@ -245,16 +320,13 @@ main(int /* argc ATS_UNUSED */, const char **argv)
crashlog_write_registers(fp, target);
fprintf(fp, "\n");
- crashlog_write_backtrace(fp, parent, target);
+ crashlog_write_backtrace(fp, target);
fprintf(fp, "\n");
crashlog_write_procstatus(fp, target);
fprintf(fp, "\n");
crashlog_write_proclimits(fp, target);
- fprintf(fp, "\n");
- crashlog_write_regions(fp, target);
-
fprintf(fp, "\n");
crashlog_exec_pgm(fp, target.pid);
diff --git a/src/traffic_crashlog/traffic_crashlog.h
b/src/traffic_crashlog/traffic_crashlog.h
index 086c1fcda3..6c71f6858b 100644
--- a/src/traffic_crashlog/traffic_crashlog.h
+++ b/src/traffic_crashlog/traffic_crashlog.h
@@ -28,6 +28,8 @@
#include "tscore/Diags.h"
#include "tscore/TextBuffer.h"
+#include <string>
+
// ucontext.h is deprecated on Darwin, and we really only need it on Linux, so
only
// include it if we are planning to use it.
#if defined(__linux__)
@@ -49,17 +51,23 @@
#endif
#define CRASHLOG_HAVE_THREADINFO 0x1u
+#define CRASHLOG_HAVE_BACKTRACE 0x2u
struct crashlog_target {
- pid_t pid;
- siginfo_t siginfo;
+ pid_t pid{0};
+ pid_t crashing_tid{0};
+ siginfo_t siginfo{};
#if defined(__linux__)
- ucontext_t ucontext;
+ ucontext_t ucontext{};
#else
- char ucontext; // just a placeholder ...
+ char ucontext{}; // just a placeholder ...
#endif
- struct tm timestamp;
- unsigned flags;
+ struct tm timestamp {
+ };
+ unsigned flags{0};
+
+ // In-process backtrace from the crashing thread.
+ std::string backtrace;
};
bool crashlog_write_backtrace(FILE *, const crashlog_target &);
@@ -69,7 +77,6 @@ bool crashlog_write_proclimits(FILE *, const crashlog_target
&);
bool crashlog_write_procname(FILE *, const crashlog_target &);
bool crashlog_write_procstatus(FILE *, const crashlog_target &);
bool crashlog_write_records(FILE *, const crashlog_target &);
-bool crashlog_write_regions(FILE *, const crashlog_target &);
bool crashlog_write_registers(FILE *, const crashlog_target &);
bool crashlog_write_siginfo(FILE *, const crashlog_target &);
bool crashlog_write_uname(FILE *, const crashlog_target &);
diff --git a/src/traffic_server/Crash.cc b/src/traffic_server/Crash.cc
index 5ee6791a0e..ff5b5e29b9 100644
--- a/src/traffic_server/Crash.cc
+++ b/src/traffic_server/Crash.cc
@@ -28,6 +28,13 @@
#include "tscore/Version.h"
#include "tscore/signals.h"
+#include <string>
+#include <unistd.h>
+#if defined(__linux__)
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#endif
+
// ucontext.h is deprecated on Darwin, and we really only need it on Linux, so
only
// include it if we are planning to use it.
#if defined(__linux__)
@@ -139,6 +146,12 @@ crash_logger_init(const char *user)
crash_logger_pid = child;
crash_logger_fd = pipe[0];
+#if defined(__linux__)
+ // Allow the crash logger to ptrace us. Without this, Yama's ptrace_scope=1
+ // (the default on many distros) prevents a child process from tracing its
parent.
+ prctl(PR_SET_PTRACER, crash_logger_pid, 0, 0, 0);
+#endif
+
// Wait for the helper to stop
if (waitpid(crash_logger_pid, &status, WUNTRACED) > 0) {
Dbg(dbg_ctl_server, "waited on PID %ld, %s", (long)crash_logger_pid,
WIFSTOPPED(status) ? "STOPPED" : "???");
@@ -165,8 +178,17 @@ crash_logger_invoke(int signo, siginfo_t *info, void *ctx)
// Write the crashing thread information to the crash logger. While the
siginfo_t is blesses by POSIX, the
// ucontext_t can contain pointers, so it's highly platform dependent. On
Linux with glibc, however, it is
// a single memory block that we can just puke out.
+ pid_t crashing_tid = static_cast<pid_t>(syscall(SYS_gettid));
+ ATS_UNUSED_RETURN(write(crash_logger_fd, &crashing_tid,
sizeof(crashing_tid)));
ATS_UNUSED_RETURN(write(crash_logger_fd, info, sizeof(siginfo_t)));
ATS_UNUSED_RETURN(write(crash_logger_fd, static_cast<ucontext_t *>(ctx),
sizeof(ucontext_t)));
+
+ // Send zero-length backtrace. We cannot safely generate a backtrace here
+ // because backtrace() can acquire locks (e.g., in the dynamic linker) that
+ // the crashing thread might be holding, causing a deadlock. The crash
+ // logger will get the backtrace via ptrace from a separate process
instead.
+ uint32_t bt_len = 0;
+ ATS_UNUSED_RETURN(write(crash_logger_fd, &bt_len, sizeof(bt_len)));
#endif
close(crash_logger_fd);
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 1718ab4911..95b6a0f331 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -39,6 +39,7 @@ add_subdirectory(tools/plugins)
add_subdirectory(gold_tests/chunked_encoding)
add_subdirectory(gold_tests/continuations/plugins)
add_subdirectory(gold_tests/jsonrpc/plugins)
+add_subdirectory(gold_tests/pluginTest/crash_test)
add_subdirectory(gold_tests/pluginTest/polite_hook_wait)
add_subdirectory(gold_tests/pluginTest/tsapi)
add_subdirectory(gold_tests/pluginTest/TSVConnFd)
diff --git a/tests/gold_tests/pluginTest/crash_test/CMakeLists.txt
b/tests/gold_tests/pluginTest/crash_test/CMakeLists.txt
new file mode 100644
index 0000000000..c6c8ab89cc
--- /dev/null
+++ b/tests/gold_tests/pluginTest/crash_test/CMakeLists.txt
@@ -0,0 +1,18 @@
+#######################
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
contributor license
+# agreements. See the NOTICE file distributed with this work for additional
information regarding
+# copyright ownership. The ASF licenses this file to you under the Apache
License, Version 2.0
+# (the "License"); you may not use this file except in compliance with the
License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express
+# or implied. See the License for the specific language governing permissions
and limitations under
+# the License.
+#
+#######################
+
+add_autest_plugin(crash_test crash_test.cc)
diff --git a/tests/gold_tests/pluginTest/crash_test/crash_test.cc
b/tests/gold_tests/pluginTest/crash_test/crash_test.cc
new file mode 100644
index 0000000000..9974c6fe3f
--- /dev/null
+++ b/tests/gold_tests/pluginTest/crash_test/crash_test.cc
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file crash_test.cc
+ * @brief A plugin that intentionally crashes traffic_server for testing
+ * the crash log functionality.
+ *
+ * This plugin is for TESTING ONLY - do not use in production!
+ *
+ * When a request contains the header "X-Crash-Test: now", this plugin
+ * will dereference a null pointer, causing a SIGSEGV.
+ */
+
+#include <ts/ts.h>
+#include <cstring>
+#include <cstdlib>
+
+#define PLUGIN_NAME "crash_test"
+
+namespace
+{
+DbgCtl dbg_ctl{PLUGIN_NAME};
+
+int
+handle_read_request(TSCont /* contp */, TSEvent event, void *edata)
+{
+ TSHttpTxn txnp = static_cast<TSHttpTxn>(edata);
+
+ if (event != TS_EVENT_HTTP_READ_REQUEST_HDR) {
+ TSHttpTxnReenable(txnp, TS_EVENT_HTTP_CONTINUE);
+ return 0;
+ }
+
+ TSMBuffer bufp;
+ TSMLoc hdr_loc;
+
+ if (TSHttpTxnClientReqGet(txnp, &bufp, &hdr_loc) != TS_SUCCESS) {
+ TSHttpTxnReenable(txnp, TS_EVENT_HTTP_CONTINUE);
+ return 0;
+ }
+
+ TSMLoc field_loc = TSMimeHdrFieldFind(bufp, hdr_loc, "X-Crash-Test", -1);
+ if (field_loc != TS_NULL_MLOC) {
+ int value_len = 0;
+ char const *value = TSMimeHdrFieldValueStringGet(bufp, hdr_loc,
field_loc, 0, &value_len);
+
+ if (value != nullptr && value_len == 3 && strncmp(value, "now", 3) == 0) {
+ TSNote("Received crash trigger header - crashing now!");
+
+ // Intentionally crash by dereferencing a null pointer.
+ volatile int *null_ptr = nullptr;
+ *null_ptr = 42;
+ TSNote("This should never be reached.");
+ }
+
+ TSHandleMLocRelease(bufp, hdr_loc, field_loc);
+ }
+
+ TSHandleMLocRelease(bufp, TS_NULL_MLOC, hdr_loc);
+ TSHttpTxnReenable(txnp, TS_EVENT_HTTP_CONTINUE);
+ return 0;
+}
+
+} // anonymous namespace
+
+void
+TSPluginInit(int /* argc */, char const ** /* argv */)
+{
+ Dbg(dbg_ctl, "initializing crash_test plugin");
+
+ TSPluginRegistrationInfo info;
+ info.plugin_name = const_cast<char *>(PLUGIN_NAME);
+ info.vendor_name = const_cast<char *>("Apache");
+ info.support_email = const_cast<char *>("[email protected]");
+
+ if (TSPluginRegister(&info) != TS_SUCCESS) {
+ TSError("[%s] Plugin registration failed", PLUGIN_NAME);
+ return;
+ }
+
+ TSCont contp = TSContCreate(handle_read_request, nullptr);
+ if (contp == nullptr) {
+ TSError("[%s] Failed to create continuation", PLUGIN_NAME);
+ return;
+ }
+
+ TSHttpHookAdd(TS_HTTP_READ_REQUEST_HDR_HOOK, contp);
+ Dbg(dbg_ctl, "crash_test plugin initialized - send 'X-Crash-Test: now'
header to trigger crash");
+}
diff --git a/tests/gold_tests/pluginTest/crash_test/crash_test.test.py
b/tests/gold_tests/pluginTest/crash_test/crash_test.test.py
new file mode 100644
index 0000000000..90da2057d5
--- /dev/null
+++ b/tests/gold_tests/pluginTest/crash_test/crash_test.test.py
@@ -0,0 +1,101 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Test that crash logs are generated with backtraces when traffic_server crashes.
+
+This test intentionally crashes traffic_server using a plugin that dereferences
+a null pointer when it receives a specific header. It then verifies that:
+1. A crash log file was created
+2. The crash log contains thread information
+"""
+
+import os
+
+Test.Summary = '''
+Test crash log generation with backtrace.
+'''
+
+# Create an origin server for the test.
+server = Test.MakeOriginServer("server")
+
+request_header = {"headers": "GET / HTTP/1.1\r\nHost: example.com\r\n\r\n",
"timestamp": "1469733493.993", "body": ""}
+response_header = {"headers": "HTTP/1.1 200 OK\r\nConnection: close\r\n\r\n",
"timestamp": "1469733493.993", "body": "Hello"}
+server.addResponse("sessionlog.json", request_header, response_header)
+
+ts = Test.MakeATSProcess("ts")
+
+# We expect ATS to crash with SIGSEGV, allowing us to test crash logging.
+ts.ReturnCode = -11
+
+ts.Disk.records_config.update(
+ {
+ 'proxy.config.proxy_name': 'test_proxy',
+ 'proxy.config.url_remap.remap_required': 0,
+ 'proxy.config.diags.debug.enabled': 1,
+ 'proxy.config.diags.debug.tags': 'crash_test',
+ # Enable the crash log helper.
+ 'proxy.config.crash_log_helper': 'traffic_crashlog',
+ })
+
+# Copy the crash_test plugin.
+plugin_path = os.path.join(Test.Variables.AtsBuildGoldTestsDir, 'pluginTest',
'crash_test', '.libs', 'crash_test.so')
+ts.Setup.Copy(plugin_path, ts.Env['PROXY_CONFIG_PLUGIN_PLUGIN_DIR'])
+
+ts.Disk.plugin_config.AddLine("crash_test.so")
+
+ts.Disk.remap_config.AddLine(f"map /
http://127.0.0.1:{server.Variables.Port}/")
+
+ts.Disk.diags_log.Content += Testers.ContainsExpression(
+ "Received crash trigger header - crashing now!", "Expect the log
indicating the intentional crash.")
+ts.Disk.diags_log.Content += Testers.ExcludesExpression(
+ "This should never be reached.", "Expect to not see the log after the
crash.")
+
+# Test 1: Make a normal request to verify the server is running.
+tr = Test.AddTestRun("Verify server is running")
+tr.Processes.Default.StartBefore(server)
+tr.Processes.Default.StartBefore(ts)
+tr.MakeCurlCommand(f'-s -o /dev/null -w "%{{http_code}}"
http://127.0.0.1:{ts.Variables.port}/', ts=ts)
+tr.Processes.Default.ReturnCode = 0
+tr.Processes.Default.Streams.stdout = Testers.ContainsExpression("200",
"Expected 200 OK response")
+tr.StillRunningAfter = ts
+tr.StillRunningAfter = server
+
+# Test 2: Send the crash trigger header.
+tr = Test.AddTestRun("Trigger crash")
+# The curl command should fail since ATS will crash.
+tr.MakeCurlCommand(f'-s -o /dev/null -H "X-Crash-Test: now"
http://127.0.0.1:{ts.Variables.port}/', ts=ts)
+tr.Processes.Default.ReturnCode = 52
+
+# Test 3: Wait for a crash log to be created.
+tr = Test.AddTestRun("Wait for crash log")
+crash_log_glob = f'{ts.Variables.LOGDIR}/crash-*.log'
+# Wait up to 60 seconds for a crash log file to appear, then 1 extra second
for it to be written.
+tr.Processes.Default.Command =
(f"{os.path.join(Test.Variables.AtsTestToolsDir, 'condwait')} 60 1 -f
'{crash_log_glob}'")
+tr.Processes.Default.ReturnCode = 0
+
+# Test 4: Verify crash log contains expected content.
+tr = Test.AddTestRun("Check crash log content")
+tr.Processes.Default.Command = (f'cat {ts.Variables.LOGDIR}/crash-*.log 2>&1')
+tr.Processes.Default.ReturnCode = 0
+# The crash log should contain signal information (always present).
+tr.Processes.Default.Streams.stdout += Testers.ContainsExpression(
+ "Segmentation fault", "Expected crash log to show segmentation fault
signal")
+# The crash log should contain the crashing thread information first.
+# The crashing thread should be listed first.
+tr.Processes.Default.Streams.stdout += Testers.ContainsExpression("Crashing
Thread", "Expected crashing thread backtrace first")
+# The other threads should be listed after.
+tr.Processes.Default.Streams.stdout += Testers.ContainsExpression(
+ "Other Non-Crashing Threads:", "Expected other non-crashing threads
section")
diff --git a/tests/tools/condwait b/tests/tools/condwait
index 45e3eff7be..0f208f168f 100755
--- a/tests/tools/condwait
+++ b/tests/tools/condwait
@@ -22,6 +22,7 @@
# condwait [ MAX-WAIT [ POST-WAIT ] ] CONDITION
#
# CONDITION is the ('test' command) condition to wait for. (It may contain
white space.)
+# For file existence tests (-f, -e, -d), glob patterns are supported (e.g., -f
/path/crash-*.log).
#
# MAX-WAIT is the maximum number of seconds to wait for the condition. If it
is omitted, it defaults to 60.
#
@@ -58,9 +59,20 @@ if [[ "$1" = "" ]] ; then
exit 1
fi
+# Check if this is a simple file existence test (-f, -e, -d). If so, use ls -d
+# which handles glob patterns properly. Otherwise, use test for the condition.
+check_condition() {
+ if [[ "$1" = "-f" || "$1" = "-e" || "$1" = "-d" ]] && [[ $# -eq 2 ]]; then
+ # Use ls -d for file existence tests to support glob patterns.
+ ls -d $2 >/dev/null 2>&1
+ else
+ test $*
+ fi
+}
+
while (( WAIT > 0 ))
do
- if test $*
+ if check_condition $*
then
if (( POST_WAIT > 0 ))
then