KUDU-2384. Don't collect thread stacks when running under debugger This changes the stack watchdog and the 'collect all stacks' utility code to not run when it appears that the process is running under a tracer such as gdb or strace. This makes debugging Kudu processes significantly easier because you don't need to know the magical incantation 'handle SIGUSR2 nostop noprint'.
I tested manually by running a master with --diagnostics-log-stack-traces-interval-ms=10 and attaching to it with gdb. With this patch, I didn't see any signals sent. This patch also changes the Env EIO fault injection to never inject failures on /proc/. This is to avoid a TSAN warning since KernelStackWatchdog would otherwise access the env_inject_eio_globs flag concurrent to tests modifying it. Change-Id: Ib17ffb9514e9b440b8d97843374d2a60913a398b Reviewed-on: http://gerrit.cloudera.org:8080/9835 Reviewed-by: Adar Dembo <a...@cloudera.com> Tested-by: Kudu Jenkins Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/4db748f3 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/4db748f3 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/4db748f3 Branch: refs/heads/master Commit: 4db748f3021711a67017fe51f30097350f343101 Parents: 357ef6c Author: Todd Lipcon <t...@apache.org> Authored: Tue Mar 27 16:58:37 2018 -0700 Committer: Todd Lipcon <t...@apache.org> Committed: Mon Apr 2 05:04:29 2018 +0000 ---------------------------------------------------------------------- src/kudu/util/debug-util.cc | 5 +++++ src/kudu/util/debug-util.h | 18 ++++++++++++--- src/kudu/util/env_posix.cc | 17 +++++++++++--- src/kudu/util/kernel_stack_watchdog.cc | 7 ++++++ src/kudu/util/os-util.cc | 35 +++++++++++++++++++++++++++++ src/kudu/util/os-util.h | 7 +++++- 6 files changed, 82 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/4db748f3/src/kudu/util/debug-util.cc ---------------------------------------------------------------------- diff --git a/src/kudu/util/debug-util.cc b/src/kudu/util/debug-util.cc index 07703ea..93a9543 100644 --- a/src/kudu/util/debug-util.cc +++ b/src/kudu/util/debug-util.cc @@ -67,6 +67,7 @@ #include "kudu/util/errno.h" #include "kudu/util/faststring.h" #include "kudu/util/monotime.h" +#include "kudu/util/os-util.h" #include "kudu/util/scoped_cleanup.h" #include "kudu/util/thread.h" @@ -724,6 +725,10 @@ bool StackTrace::LessThan(const StackTrace& s) const { } Status StackTraceSnapshot::SnapshotAllStacks() { + if (IsBeingDebugged()) { + return Status::Incomplete("not collecting stack trace since debugger or strace is attached"); + } + vector<pid_t> tids; RETURN_NOT_OK_PREPEND(ListThreads(&tids), "could not list threads"); http://git-wip-us.apache.org/repos/asf/kudu/blob/4db748f3/src/kudu/util/debug-util.h ---------------------------------------------------------------------- diff --git a/src/kudu/util/debug-util.h b/src/kudu/util/debug-util.h index 44fccc3..e8c94ea 100644 --- a/src/kudu/util/debug-util.h +++ b/src/kudu/util/debug-util.h @@ -68,10 +68,20 @@ Status SetStackTraceSignal(int signum); // thread. It requires that the target thread has not blocked POSIX signals. If // it has, an error message will be returned. // -// This function is thread-safe but coarsely synchronized: only one "dumper" thread -// may be active at a time. +// This function is thread-safe. +// +// NOTE: if Kudu is running inside a debugger, this can be annoying to a developer since +// it internally uses signals that will cause the debugger to stop. Consider checking +// 'IsBeingDebugged()' from os-util.h before using this function for non-critical use +// cases. std::string DumpThreadStack(int64_t tid); +// Capture the thread stack of another thread +// +// NOTE: if Kudu is running inside a debugger, this can be annoying to a developer since +// it internally uses signals that will cause the debugger to stop. Consider checking +// 'IsBeingDebugged()' from os-util.h before using this function for non-critical use +// cases. Status GetThreadStack(int64_t tid, StackTrace* stack); // Return the current stack trace, stringified. @@ -230,7 +240,9 @@ class StackTraceSnapshot { capture_thread_names_ = c; } - // Snapshot the stack traces of all threads in the process. + // Snapshot the stack traces of all threads in the process. This may return a bad + // Status in the case that stack traces aren't supported on the platform, or if + // the process is running inside a debugger. // // NOTE: this may take some time and should not be called in a latency-sensitive // context. http://git-wip-us.apache.org/repos/asf/kudu/blob/4db748f3/src/kudu/util/env_posix.cc ---------------------------------------------------------------------- diff --git a/src/kudu/util/env_posix.cc b/src/kudu/util/env_posix.cc index 9ba583b..d180cb3 100644 --- a/src/kudu/util/env_posix.cc +++ b/src/kudu/util/env_posix.cc @@ -47,6 +47,7 @@ #include "kudu/gutil/stringprintf.h" #include "kudu/gutil/strings/split.h" #include "kudu/gutil/strings/substitute.h" +#include "kudu/gutil/strings/util.h" #include "kudu/util/array_view.h" #include "kudu/util/debug/trace_event.h" #include "kudu/util/env.h" @@ -132,10 +133,20 @@ typedef struct xfs_flock64 { #define MAYBE_RETURN_EIO(filename_expr, error_expr) do { \ const string& f_ = (filename_expr); \ MAYBE_RETURN_FAILURE(FLAGS_env_inject_eio, \ - StringMatchesGlob(f_, FLAGS_env_inject_eio_globs) ? (error_expr) : Status::OK()) \ + ShouldInject(f_, FLAGS_env_inject_eio_globs) ? (error_expr) : Status::OK()) \ } while (0); -bool StringMatchesGlob(const string& candidate, const string& glob_patterns) { +bool ShouldInject(const string& candidate, const string& glob_patterns) { + // Never inject on /proc/ file accesses regardless of the configured flag, + // since it's not possible for /proc to "go bad". + // + // NB: it's important that this is done here _before_ consulting glob_patterns + // since some background threads read /proc/ after gflags have already been + // destructed. + if (HasPrefixString(candidate, "/proc/")) { + return false; + } + vector<string> globs = strings::Split(glob_patterns, ",", strings::SkipEmpty()); for (const auto& glob : globs) { if (fnmatch(glob.c_str(), candidate.c_str(), 0) == 0) { @@ -1339,7 +1350,7 @@ class PosixEnv : public Env { virtual Status LockFile(const std::string& fname, FileLock** lock) OVERRIDE { TRACE_EVENT1("io", "PosixEnv::LockFile", "path", fname); MAYBE_RETURN_EIO(fname, IOError(Env::kInjectedFailureStatusMsg, EIO)); - if (StringMatchesGlob(fname, FLAGS_env_inject_lock_failure_globs)) { + if (ShouldInject(fname, FLAGS_env_inject_lock_failure_globs)) { return IOError("lock " + fname, EAGAIN); } ThreadRestrictions::AssertIOAllowed(); http://git-wip-us.apache.org/repos/asf/kudu/blob/4db748f3/src/kudu/util/kernel_stack_watchdog.cc ---------------------------------------------------------------------- diff --git a/src/kudu/util/kernel_stack_watchdog.cc b/src/kudu/util/kernel_stack_watchdog.cc index 2b890dc..27a259c 100644 --- a/src/kudu/util/kernel_stack_watchdog.cc +++ b/src/kudu/util/kernel_stack_watchdog.cc @@ -38,6 +38,7 @@ #include "kudu/util/fault_injection.h" #include "kudu/util/flag_tags.h" #include "kudu/util/monotime.h" +#include "kudu/util/os-util.h" #include "kudu/util/status.h" #include "kudu/util/thread.h" @@ -135,6 +136,12 @@ void KernelStackWatchdog::RunThread() { break; } + // Don't send signals while the debugger is running, since it makes it hard to + // use. + if (IsBeingDebugged()) { + continue; + } + // Prevent threads from deleting their TLS objects between the snapshot loop and the sending of // signals. This makes it safe for us to access their TLS. // http://git-wip-us.apache.org/repos/asf/kudu/blob/4db748f3/src/kudu/util/os-util.cc ---------------------------------------------------------------------- diff --git a/src/kudu/util/os-util.cc b/src/kudu/util/os-util.cc index d29eaff..d4ab2c7 100644 --- a/src/kudu/util/os-util.cc +++ b/src/kudu/util/os-util.cc @@ -33,11 +33,17 @@ #include <cstddef> #include <fstream> #include <string> +#include <utility> #include <vector> #include "kudu/gutil/strings/numbers.h" #include "kudu/gutil/strings/split.h" +#include "kudu/gutil/strings/stringpiece.h" #include "kudu/gutil/strings/substitute.h" +#include "kudu/gutil/strings/util.h" +#include "kudu/util/env.h" +#include "kudu/util/faststring.h" +#include "kudu/util/logging.h" using std::ifstream; using std::istreambuf_iterator; @@ -142,5 +148,34 @@ void DisableCoreDumps() { } } +bool IsBeingDebugged() { +#ifndef __linux__ + return false; +#else + // Look for the TracerPid line in /proc/self/status. + // If this is non-zero, we are being ptraced, which is indicative of gdb or strace + // being attached. + faststring buf; + Status s = ReadFileToString(Env::Default(), "/proc/self/status", &buf); + if (!s.ok()) { + KLOG_FIRST_N(WARNING, 1) << "could not read /proc/self/status: " << s.ToString(); + return false; + } + StringPiece buf_sp(reinterpret_cast<const char*>(buf.data()), buf.size()); + vector<StringPiece> lines = Split(buf_sp, "\n"); + for (const auto& l : lines) { + if (!HasPrefixString(l, "TracerPid:")) continue; + std::pair<StringPiece, StringPiece> key_val = Split(l, "\t"); + int64_t tracer_pid = -1; + if (!safe_strto64(key_val.second.data(), key_val.second.size(), &tracer_pid)) { + KLOG_FIRST_N(WARNING, 1) << "Invalid line in /proc/self/status: " << l; + return false; + } + return tracer_pid != 0; + } + KLOG_FIRST_N(WARNING, 1) << "Could not find TracerPid line in /proc/self/status"; + return false; +#endif // __linux__ +} } // namespace kudu http://git-wip-us.apache.org/repos/asf/kudu/blob/4db748f3/src/kudu/util/os-util.h ---------------------------------------------------------------------- diff --git a/src/kudu/util/os-util.h b/src/kudu/util/os-util.h index 04ba8f8..c79d579 100644 --- a/src/kudu/util/os-util.h +++ b/src/kudu/util/os-util.h @@ -27,7 +27,7 @@ #include <cstdint> #include <string> -#include <type_traits> +#include <type_traits> // IWYU pragma: keep #include "kudu/util/status.h" @@ -71,6 +71,11 @@ Status GetThreadStats(int64_t tid, ThreadStats* stats); // want to generate a core dump from an "expected" crash. void DisableCoreDumps(); +// Return true if this process appears to be running under a debugger or strace. +// +// This may return false on unsupported (non-Linux) platforms. +bool IsBeingDebugged(); + } // namespace kudu #endif /* KUDU_UTIL_OS_UTIL_H */