KUDU-2384. Don't collect thread stacks when running under debugger

This changes the stack watchdog and the 'collect all stacks' utility
code to not run when it appears that the process is running under a
tracer such as gdb or strace. This makes debugging Kudu processes
significantly easier because you don't need to know the magical
incantation 'handle SIGUSR2 nostop noprint'.

I tested manually by running a master with
--diagnostics-log-stack-traces-interval-ms=10 and attaching to it with
gdb. With this patch, I didn't see any signals sent.

This patch also changes the Env EIO fault injection to never inject
failures on /proc/. This is to avoid a TSAN warning since
KernelStackWatchdog would otherwise access the env_inject_eio_globs flag
concurrent to tests modifying it.

Change-Id: Ib17ffb9514e9b440b8d97843374d2a60913a398b
Reviewed-on: http://gerrit.cloudera.org:8080/9835
Reviewed-by: Adar Dembo <a...@cloudera.com>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/4db748f3
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/4db748f3
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/4db748f3

Branch: refs/heads/master
Commit: 4db748f3021711a67017fe51f30097350f343101
Parents: 357ef6c
Author: Todd Lipcon <t...@apache.org>
Authored: Tue Mar 27 16:58:37 2018 -0700
Committer: Todd Lipcon <t...@apache.org>
Committed: Mon Apr 2 05:04:29 2018 +0000

----------------------------------------------------------------------
 src/kudu/util/debug-util.cc            |  5 +++++
 src/kudu/util/debug-util.h             | 18 ++++++++++++---
 src/kudu/util/env_posix.cc             | 17 +++++++++++---
 src/kudu/util/kernel_stack_watchdog.cc |  7 ++++++
 src/kudu/util/os-util.cc               | 35 +++++++++++++++++++++++++++++
 src/kudu/util/os-util.h                |  7 +++++-
 6 files changed, 82 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/4db748f3/src/kudu/util/debug-util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/debug-util.cc b/src/kudu/util/debug-util.cc
index 07703ea..93a9543 100644
--- a/src/kudu/util/debug-util.cc
+++ b/src/kudu/util/debug-util.cc
@@ -67,6 +67,7 @@
 #include "kudu/util/errno.h"
 #include "kudu/util/faststring.h"
 #include "kudu/util/monotime.h"
+#include "kudu/util/os-util.h"
 #include "kudu/util/scoped_cleanup.h"
 #include "kudu/util/thread.h"
 
@@ -724,6 +725,10 @@ bool StackTrace::LessThan(const StackTrace& s) const {
 }
 
 Status StackTraceSnapshot::SnapshotAllStacks() {
+  if (IsBeingDebugged()) {
+    return Status::Incomplete("not collecting stack trace since debugger or 
strace is attached");
+  }
+
   vector<pid_t> tids;
   RETURN_NOT_OK_PREPEND(ListThreads(&tids), "could not list threads");
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/4db748f3/src/kudu/util/debug-util.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/debug-util.h b/src/kudu/util/debug-util.h
index 44fccc3..e8c94ea 100644
--- a/src/kudu/util/debug-util.h
+++ b/src/kudu/util/debug-util.h
@@ -68,10 +68,20 @@ Status SetStackTraceSignal(int signum);
 // thread. It requires that the target thread has not blocked POSIX signals. If
 // it has, an error message will be returned.
 //
-// This function is thread-safe but coarsely synchronized: only one "dumper" 
thread
-// may be active at a time.
+// This function is thread-safe.
+//
+// NOTE: if Kudu is running inside a debugger, this can be annoying to a 
developer since
+// it internally uses signals that will cause the debugger to stop. Consider 
checking
+// 'IsBeingDebugged()' from os-util.h before using this function for 
non-critical use
+// cases.
 std::string DumpThreadStack(int64_t tid);
 
+// Capture the thread stack of another thread
+//
+// NOTE: if Kudu is running inside a debugger, this can be annoying to a 
developer since
+// it internally uses signals that will cause the debugger to stop. Consider 
checking
+// 'IsBeingDebugged()' from os-util.h before using this function for 
non-critical use
+// cases.
 Status GetThreadStack(int64_t tid, StackTrace* stack);
 
 // Return the current stack trace, stringified.
@@ -230,7 +240,9 @@ class StackTraceSnapshot {
     capture_thread_names_ = c;
   }
 
-  // Snapshot the stack traces of all threads in the process.
+  // Snapshot the stack traces of all threads in the process. This may return 
a bad
+  // Status in the case that stack traces aren't supported on the platform, or 
if
+  // the process is running inside a debugger.
   //
   // NOTE: this may take some time and should not be called in a 
latency-sensitive
   // context.

http://git-wip-us.apache.org/repos/asf/kudu/blob/4db748f3/src/kudu/util/env_posix.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/env_posix.cc b/src/kudu/util/env_posix.cc
index 9ba583b..d180cb3 100644
--- a/src/kudu/util/env_posix.cc
+++ b/src/kudu/util/env_posix.cc
@@ -47,6 +47,7 @@
 #include "kudu/gutil/stringprintf.h"
 #include "kudu/gutil/strings/split.h"
 #include "kudu/gutil/strings/substitute.h"
+#include "kudu/gutil/strings/util.h"
 #include "kudu/util/array_view.h"
 #include "kudu/util/debug/trace_event.h"
 #include "kudu/util/env.h"
@@ -132,10 +133,20 @@ typedef struct xfs_flock64 {
 #define MAYBE_RETURN_EIO(filename_expr, error_expr) do { \
   const string& f_ = (filename_expr); \
   MAYBE_RETURN_FAILURE(FLAGS_env_inject_eio, \
-      StringMatchesGlob(f_, FLAGS_env_inject_eio_globs) ? (error_expr) : 
Status::OK()) \
+      ShouldInject(f_, FLAGS_env_inject_eio_globs) ? (error_expr) : 
Status::OK()) \
 } while (0);
 
-bool StringMatchesGlob(const string& candidate, const string& glob_patterns) {
+bool ShouldInject(const string& candidate, const string& glob_patterns) {
+  // Never inject on /proc/ file accesses regardless of the configured flag,
+  // since it's not possible for /proc to "go bad".
+  //
+  // NB: it's important that this is done here _before_ consulting 
glob_patterns
+  // since some background threads read /proc/ after gflags have already been
+  // destructed.
+  if (HasPrefixString(candidate, "/proc/")) {
+    return false;
+  }
+
   vector<string> globs = strings::Split(glob_patterns, ",", 
strings::SkipEmpty());
   for (const auto& glob : globs) {
     if (fnmatch(glob.c_str(), candidate.c_str(), 0) == 0) {
@@ -1339,7 +1350,7 @@ class PosixEnv : public Env {
   virtual Status LockFile(const std::string& fname, FileLock** lock) OVERRIDE {
     TRACE_EVENT1("io", "PosixEnv::LockFile", "path", fname);
     MAYBE_RETURN_EIO(fname, IOError(Env::kInjectedFailureStatusMsg, EIO));
-    if (StringMatchesGlob(fname, FLAGS_env_inject_lock_failure_globs)) {
+    if (ShouldInject(fname, FLAGS_env_inject_lock_failure_globs)) {
       return IOError("lock " + fname, EAGAIN);
     }
     ThreadRestrictions::AssertIOAllowed();

http://git-wip-us.apache.org/repos/asf/kudu/blob/4db748f3/src/kudu/util/kernel_stack_watchdog.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/kernel_stack_watchdog.cc 
b/src/kudu/util/kernel_stack_watchdog.cc
index 2b890dc..27a259c 100644
--- a/src/kudu/util/kernel_stack_watchdog.cc
+++ b/src/kudu/util/kernel_stack_watchdog.cc
@@ -38,6 +38,7 @@
 #include "kudu/util/fault_injection.h"
 #include "kudu/util/flag_tags.h"
 #include "kudu/util/monotime.h"
+#include "kudu/util/os-util.h"
 #include "kudu/util/status.h"
 #include "kudu/util/thread.h"
 
@@ -135,6 +136,12 @@ void KernelStackWatchdog::RunThread() {
       break;
     }
 
+    // Don't send signals while the debugger is running, since it makes it 
hard to
+    // use.
+    if (IsBeingDebugged()) {
+      continue;
+    }
+
     // Prevent threads from deleting their TLS objects between the snapshot 
loop and the sending of
     // signals. This makes it safe for us to access their TLS.
     //

http://git-wip-us.apache.org/repos/asf/kudu/blob/4db748f3/src/kudu/util/os-util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/os-util.cc b/src/kudu/util/os-util.cc
index d29eaff..d4ab2c7 100644
--- a/src/kudu/util/os-util.cc
+++ b/src/kudu/util/os-util.cc
@@ -33,11 +33,17 @@
 #include <cstddef>
 #include <fstream>
 #include <string>
+#include <utility>
 #include <vector>
 
 #include "kudu/gutil/strings/numbers.h"
 #include "kudu/gutil/strings/split.h"
+#include "kudu/gutil/strings/stringpiece.h"
 #include "kudu/gutil/strings/substitute.h"
+#include "kudu/gutil/strings/util.h"
+#include "kudu/util/env.h"
+#include "kudu/util/faststring.h"
+#include "kudu/util/logging.h"
 
 using std::ifstream;
 using std::istreambuf_iterator;
@@ -142,5 +148,34 @@ void DisableCoreDumps() {
   }
 }
 
+bool IsBeingDebugged() {
+#ifndef __linux__
+  return false;
+#else
+  // Look for the TracerPid line in /proc/self/status.
+  // If this is non-zero, we are being ptraced, which is indicative of gdb or 
strace
+  // being attached.
+  faststring buf;
+  Status s = ReadFileToString(Env::Default(), "/proc/self/status", &buf);
+  if (!s.ok()) {
+    KLOG_FIRST_N(WARNING, 1) << "could not read /proc/self/status: " << 
s.ToString();
+    return false;
+  }
+  StringPiece buf_sp(reinterpret_cast<const char*>(buf.data()), buf.size());
+  vector<StringPiece> lines = Split(buf_sp, "\n");
+  for (const auto& l : lines) {
+    if (!HasPrefixString(l, "TracerPid:")) continue;
+    std::pair<StringPiece, StringPiece> key_val = Split(l, "\t");
+    int64_t tracer_pid = -1;
+    if (!safe_strto64(key_val.second.data(), key_val.second.size(), 
&tracer_pid)) {
+      KLOG_FIRST_N(WARNING, 1) << "Invalid line in /proc/self/status: " << l;
+      return false;
+    }
+    return tracer_pid != 0;
+  }
+  KLOG_FIRST_N(WARNING, 1) << "Could not find TracerPid line in 
/proc/self/status";
+  return false;
+#endif // __linux__
+}
 
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/4db748f3/src/kudu/util/os-util.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/os-util.h b/src/kudu/util/os-util.h
index 04ba8f8..c79d579 100644
--- a/src/kudu/util/os-util.h
+++ b/src/kudu/util/os-util.h
@@ -27,7 +27,7 @@
 
 #include <cstdint>
 #include <string>
-#include <type_traits>
+#include <type_traits> // IWYU pragma: keep
 
 #include "kudu/util/status.h"
 
@@ -71,6 +71,11 @@ Status GetThreadStats(int64_t tid, ThreadStats* stats);
 // want to generate a core dump from an "expected" crash.
 void DisableCoreDumps();
 
+// Return true if this process appears to be running under a debugger or 
strace.
+//
+// This may return false on unsupported (non-Linux) platforms.
+bool IsBeingDebugged();
+
 } // namespace kudu
 
 #endif /* KUDU_UTIL_OS_UTIL_H */

Reply via email to