KUDU-2297 (part 4): periodically dump stacks to diagnostics log

This modifies the diagnostics log to periodically dump stack traces.
This is slightly complicated by the fact that symbolized stack traces
can be relatively large. So, we separate the logging of symbols and
stack traces. When an address first appears in a log file, it is logged
as part of a symbol line. Later logs of the same address do not need
to re-log the symbol.

With this, a typical stack trace dump of an idle tserver is about 4KB
pre-compression, and a 'symbols' dump is about 6KB. So logging stacks
reasonably often should not use much disk space or IO.

Currently this is enabled on the same interval as the metrics log, but
only if a new experimental flag --diagnostics-log-stack-traces is
enabled. I'm planning to move it to a different flag in a later commit,
but wanted to keep this one simple and incremental.

Change-Id: Ic32abf2c48ac6a5f3c384e58838b34671bcaf147
Reviewed-on: http://gerrit.cloudera.org:8080/9330
Tested-by: Todd Lipcon <t...@apache.org>
Reviewed-by: Mike Percy <mpe...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/09298f3b
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/09298f3b
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/09298f3b

Branch: refs/heads/master
Commit: 09298f3b6756f10a7d598b4de77676d49c38f117
Parents: 831483b
Author: Todd Lipcon <t...@apache.org>
Authored: Wed Feb 14 16:17:43 2018 -0800
Committer: Todd Lipcon <t...@apache.org>
Committed: Fri Feb 23 23:06:40 2018 +0000

----------------------------------------------------------------------
 src/kudu/server/diagnostics_log.cc | 152 +++++++++++++++++++++++++++++++-
 src/kudu/server/diagnostics_log.h  |   6 ++
 src/kudu/util/debug-util.cc        |  36 ++++----
 src/kudu/util/debug-util.h         |  19 +++-
 src/kudu/util/rolling_log-test.cc  |  16 ++--
 src/kudu/util/rolling_log.cc       |  13 +--
 src/kudu/util/rolling_log.h        |  36 +++++---
 7 files changed, 236 insertions(+), 42 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/09298f3b/src/kudu/server/diagnostics_log.cc
----------------------------------------------------------------------
diff --git a/src/kudu/server/diagnostics_log.cc 
b/src/kudu/server/diagnostics_log.cc
index 0246347..ab47d8a 100644
--- a/src/kudu/server/diagnostics_log.cc
+++ b/src/kudu/server/diagnostics_log.cc
@@ -22,14 +22,22 @@
 #include <ostream>
 #include <string>
 #include <utility>
+#include <vector>
 
+#include <gflags/gflags.h>
 #include <glog/logging.h>
+#include <sparsehash/dense_hash_set>
 
+#include "kudu/gutil/map-util.h"
 #include "kudu/gutil/ref_counted.h"
+#include "kudu/gutil/stringprintf.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/gutil/walltime.h"
+#include "kudu/util/array_view.h"
 #include "kudu/util/condition_variable.h"
+#include "kudu/util/debug-util.h"
 #include "kudu/util/env.h"
+#include "kudu/util/flag_tags.h"
 #include "kudu/util/jsonwriter.h"
 #include "kudu/util/logging.h"
 #include "kudu/util/metrics.h"
@@ -41,17 +49,60 @@
 
 using std::string;
 using std::unique_ptr;
+using std::vector;
 using strings::Substitute;
 
+// GLog already implements symbolization. Just import their hidden symbol.
+namespace google {
+// Symbolizes a program counter.  On success, returns true and write the
+// symbol name to "out".  The symbol name is demangled if possible
+// (supports symbols generated by GCC 3.x or newer).  Otherwise,
+// returns false.
+bool Symbolize(void *pc, char *out, int out_size);
+}
+
+
+// TODO(todd): this is a placeholder flag. This should actually be an interval.
+// Tagging as experimental and disabling by default while this work is in flux.
+DEFINE_bool(diagnostics_log_stack_traces, false,
+            "Whether to periodically log stack traces to the diagnostics 
log.");
+TAG_FLAG(diagnostics_log_stack_traces, runtime);
+TAG_FLAG(diagnostics_log_stack_traces, experimental);
+
 namespace kudu {
 namespace server {
 
+// Track which symbols have been emitted to the log already.
+class DiagnosticsLog::SymbolSet {
+ public:
+  SymbolSet() {
+    set_.set_empty_key(nullptr);
+  }
+
+  // Return true if the addr was added, false if it already existed.
+  bool Add(void* addr) {
+    return InsertIfNotPresent(&set_, addr);
+  }
+
+  void ResetIfLogRolled(int roll_count) {
+    if (roll_count_ != roll_count) {
+      roll_count_ = roll_count;
+      set_.clear();
+    }
+  }
+
+ private:
+  int roll_count_ = 0;
+  google::dense_hash_set<void*> set_;
+};
+
 DiagnosticsLog::DiagnosticsLog(string log_dir,
                                MetricRegistry* metric_registry) :
     log_dir_(std::move(log_dir)),
     metric_registry_(metric_registry),
     wake_(&lock_),
-    metrics_log_interval_(MonoDelta::FromSeconds(60)) {
+    metrics_log_interval_(MonoDelta::FromSeconds(60)),
+    symbols_(new SymbolSet()) {
 }
 DiagnosticsLog::~DiagnosticsLog() {
   Stop();
@@ -111,10 +162,109 @@ void DiagnosticsLog::RunThread() {
       } else {
         next_log = MonoTime::Now() + metrics_log_interval_;
       }
+
+      if (FLAGS_diagnostics_log_stack_traces) {
+        s = LogStacks();
+        if (!s.ok()) {
+          WARN_NOT_OK(s, "Unable to collect stacks to diagnostics log");
+          next_log = MonoTime::Now() + kWaitBetweenFailures;
+        } else {
+          next_log = MonoTime::Now() + metrics_log_interval_;
+        }
+      }
     }
   }
 }
 
+Status DiagnosticsLog::LogStacks() {
+  StackTraceSnapshot snap;
+  snap.set_capture_thread_names(false);
+  RETURN_NOT_OK(snap.SnapshotAllStacks());
+
+  std::ostringstream buf;
+  MicrosecondsInt64 now = GetCurrentTimeMicros();
+
+  // Because symbols are potentially long strings, and likely to be
+  // very repetitive, we do a sort of dictionary encoding here. When
+  // we roll a file, we clear our symbol table. Then, within that
+  // file, the first time we see any address, we add it to the table
+  // and make sure it is output in a 'symbols' record. Subsequent
+  // repetitions of the same address do not need to re-output the
+  // symbol.
+  symbols_->ResetIfLogRolled(log_->roll_count());
+  vector<std::pair<void*, string>> new_symbols;
+  snap.VisitGroups([&](ArrayView<StackTraceSnapshot::ThreadInfo> group) {
+      const StackTrace& stack = group[0].stack;
+      for (int i = 0; i < stack.num_frames(); i++) {
+        void* addr = stack.frame(i);
+        if (symbols_->Add(addr)) {
+          char buf[1024];
+          // Subtract 1 from the address before symbolizing, because the
+          // address on the stack is actually the return address of the 
function
+          // call rather than the address of the call instruction itself.
+          if (google::Symbolize(static_cast<char*>(addr) - 1, buf, 
sizeof(buf))) {
+            new_symbols.emplace_back(addr, buf);
+          }
+          // If symbolization fails, don't bother adding it. Readers of the log
+          // will just see that it's missing from the symbol map and should 
handle that
+          // as an unknown symbol.
+        }
+      }
+    });
+  if (!new_symbols.empty()) {
+    buf << "I" << FormatTimestampForLog(now)
+        << " symbols " << now << " ";
+    JsonWriter jw(&buf, JsonWriter::COMPACT);
+    jw.StartObject();
+    for (auto& p : new_symbols) {
+      jw.String(StringPrintf("%p", p.first));
+      jw.String(p.second);
+    }
+    jw.EndObject();
+    buf << "\n";
+  }
+
+  buf << "I" << FormatTimestampForLog(now) << " stacks " << now << " ";
+  JsonWriter jw(&buf, JsonWriter::COMPACT);
+  jw.StartObject();
+  jw.String("reason");
+  jw.String("periodic");
+  jw.String("groups");
+  jw.StartArray();
+  snap.VisitGroups([&](ArrayView<StackTraceSnapshot::ThreadInfo> group) {
+      jw.StartObject();
+
+      jw.String("tids");
+      jw.StartArray();
+      for (auto& t : group) {
+        // TODO(todd): should we also output the thread names, perhaps in
+        // a sort of dictionary fashion? It's more instructive but in
+        // practice the stack traces should usually indicate the work
+        // that's being done, anyway, which is enough to tie back to the
+        // thread. The TID is smaller and useful for correlating against
+        // messages in the normal glog as well.
+        jw.Int64(t.tid);
+      }
+      jw.EndArray();
+
+      jw.String("stack");
+      jw.StartArray();
+      const StackTrace& stack = group[0].stack;
+      for (int i = 0; i < stack.num_frames(); i++) {
+        jw.String(StringPrintf("%p", stack.frame(i)));
+      }
+      jw.EndArray();
+      jw.EndObject();
+    });
+  jw.EndArray(); // array of thread groups
+  jw.EndObject(); // end of top-level object
+  buf << "\n";
+
+  RETURN_NOT_OK(log_->Append(buf.str()));
+
+  return Status::OK();
+}
+
 Status DiagnosticsLog::LogMetrics() {
   MetricJsonOptions opts;
   opts.include_raw_histograms = true;

http://git-wip-us.apache.org/repos/asf/kudu/blob/09298f3b/src/kudu/server/diagnostics_log.h
----------------------------------------------------------------------
diff --git a/src/kudu/server/diagnostics_log.h 
b/src/kudu/server/diagnostics_log.h
index 75e1a69..258e2be 100644
--- a/src/kudu/server/diagnostics_log.h
+++ b/src/kudu/server/diagnostics_log.h
@@ -46,8 +46,11 @@ class DiagnosticsLog {
   void Stop();
 
  private:
+  class SymbolSet;
+
   void RunThread();
   Status LogMetrics();
+  Status LogStacks();
 
   const std::string log_dir_;
   const MetricRegistry* metric_registry_;
@@ -63,6 +66,9 @@ class DiagnosticsLog {
 
   int64_t metrics_epoch_ = 0;
 
+  // Out-of-line this internal data to keep the header smaller.
+  std::unique_ptr<SymbolSet> symbols_;
+
   DISALLOW_COPY_AND_ASSIGN(DiagnosticsLog);
 };
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/09298f3b/src/kudu/util/debug-util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/debug-util.cc b/src/kudu/util/debug-util.cc
index e2654f4..5c28132 100644
--- a/src/kudu/util/debug-util.cc
+++ b/src/kudu/util/debug-util.cc
@@ -719,23 +719,25 @@ Status StackTraceSnapshot::SnapshotAllStacks() {
   }
 
   // Now collect the thread names while we are waiting on stack trace 
collection.
-  for (auto& info : infos_) {
-    if (!info.status.ok()) continue;
-
-    // Get the thread's name by reading proc.
-    // TODO(todd): should we have the dumped thread fill in its own name using
-    // prctl to avoid having to open and read /proc? Or maybe we should use the
-    // Kudu ThreadMgr to get the thread names for the cases where we are using
-    // the kudu::Thread wrapper at least.
-    faststring buf;
-    Status s = ReadFileToString(Env::Default(),
-                                strings::Substitute("/proc/self/task/$0/comm", 
info.tid),
-                                &buf);
-    if (!s.ok()) {
-      info.thread_name = "<unknown name>";
-    }  else {
-      info.thread_name = buf.ToString();
-      StripTrailingNewline(&info.thread_name);
+  if (capture_thread_names_) {
+    for (auto& info : infos_) {
+      if (!info.status.ok()) continue;
+
+      // Get the thread's name by reading proc.
+      // TODO(todd): should we have the dumped thread fill in its own name 
using
+      // prctl to avoid having to open and read /proc? Or maybe we should use 
the
+      // Kudu ThreadMgr to get the thread names for the cases where we are 
using
+      // the kudu::Thread wrapper at least.
+      faststring buf;
+      Status s = ReadFileToString(Env::Default(),
+                                  
strings::Substitute("/proc/self/task/$0/comm", info.tid),
+                                  &buf);
+      if (!s.ok()) {
+        info.thread_name = "<unknown name>";
+      }  else {
+        info.thread_name = buf.ToString();
+        StripTrailingNewline(&info.thread_name);
+      }
     }
   }
   num_failed_ = 0;

http://git-wip-us.apache.org/repos/asf/kudu/blob/09298f3b/src/kudu/util/debug-util.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/debug-util.h b/src/kudu/util/debug-util.h
index 4679f36..193788e 100644
--- a/src/kudu/util/debug-util.h
+++ b/src/kudu/util/debug-util.h
@@ -25,6 +25,8 @@
 #include <string>
 #include <vector>
 
+#include <glog/logging.h>
+
 #include "kudu/gutil/macros.h"
 #include "kudu/gutil/strings/fastmem.h"
 #include "kudu/util/status.h"
@@ -144,6 +146,14 @@ class StackTrace {
   // This function is async-safe.
   void Collect(int skip_frames = 0);
 
+  int num_frames() const {
+    return num_frames_;
+  }
+
+  void* frame(int i) const {
+    DCHECK_LE(i, num_frames_);
+    return frames_[i];
+  }
 
   enum Flags {
     // Do not fix up the addresses on the stack to try to point to the 'call'
@@ -202,7 +212,8 @@ class StackTraceSnapshot {
     Status status;
 
     // The name of the thread.
-    // May be missing if 'status' is not OK.
+    // May be missing if 'status' is not OK or if thread name collection was
+    // disabled.
     std::string thread_name;
 
     // The current stack trace of the thread.
@@ -211,6 +222,10 @@ class StackTraceSnapshot {
   };
   using VisitorFunc = std::function<void(ArrayView<ThreadInfo> group)>;
 
+  void set_capture_thread_names(bool c) {
+    capture_thread_names_ = c;
+  }
+
   // Snapshot the stack traces of all threads in the process.
   //
   // NOTE: this may take some time and should not be called in a 
latency-sensitive
@@ -239,6 +254,8 @@ class StackTraceSnapshot {
   std::vector<StackTraceSnapshot::ThreadInfo> infos_;
   std::vector<StackTraceCollector> collectors_;
   int num_failed_ = 0;
+
+  bool capture_thread_names_ = true;
 };
 
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/09298f3b/src/kudu/util/rolling_log-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/rolling_log-test.cc 
b/src/kudu/util/rolling_log-test.cc
index 5c4c13b..f4f8186 100644
--- a/src/kudu/util/rolling_log-test.cc
+++ b/src/kudu/util/rolling_log-test.cc
@@ -19,6 +19,7 @@
 
 #include <unistd.h>
 
+#include <algorithm>
 #include <cstdint>
 #include <string>
 #include <vector>
@@ -68,6 +69,7 @@ class RollingLogTest : public KuduTest {
       ASSERT_TRUE(HasSuffixString(child, pid_suffix) ||
                   HasSuffixString(child, pid_suffix + ".gz")) << "bad child: " 
<< child;
     }
+    std::sort(children->begin(), children->end());
     ASSERT_EQ(children->size(), expected_count) << *children;
   }
 
@@ -78,27 +80,29 @@ class RollingLogTest : public KuduTest {
 TEST_F(RollingLogTest, TestLog) {
   RollingLog log(env_, log_dir_, "mylog");
   log.SetCompressionEnabled(false);
-  log.SetSizeLimitBytes(100);
+  log.SetRollThresholdBytes(100);
 
   // Before writing anything, we shouldn't open a log file.
   vector<string> children;
   NO_FATALS(AssertLogCount(0, &children));
 
   // Appending some data should write a new segment.
-  ASSERT_OK(log.Append("Hello world\n"));
+  const string kTestString = "Hello world\n";
+  ASSERT_OK(log.Append(kTestString));
   NO_FATALS(AssertLogCount(1, &children));
 
   for (int i = 0; i < 10; i++) {
-    ASSERT_OK(log.Append("Hello world\n"));
+    ASSERT_OK(log.Append(kTestString));
   }
   NO_FATALS(AssertLogCount(2, &children));
 
   faststring data;
   string path = JoinPathSegments(log_dir_, children[0]);
   ASSERT_OK(ReadFileToString(env_, path, &data));
-  ASSERT_TRUE(HasPrefixString(data.ToString(), "Hello world\n"))
+  ASSERT_TRUE(HasPrefixString(data.ToString(), kTestString))
     << "Data missing";
-  ASSERT_LE(data.size(), 100) << "Size limit not respected";
+  ASSERT_LE(data.size(), 100 + kTestString.length())
+      << "Roll threshold not respected";
 }
 
 // Test with compression on.
@@ -128,7 +132,7 @@ TEST_F(RollingLogTest, TestCompression) {
 TEST_F(RollingLogTest, TestFileCountLimit) {
   RollingLog log(env_, log_dir_, "mylog");
   ASSERT_OK(log.Open());
-  log.SetSizeLimitBytes(100);
+  log.SetRollThresholdBytes(100);
   log.SetMaxNumSegments(3);
 
   for (int i = 0; i < 100; i++) {

http://git-wip-us.apache.org/repos/asf/kudu/blob/09298f3b/src/kudu/util/rolling_log.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/rolling_log.cc b/src/kudu/util/rolling_log.cc
index a508e91..50f9fbd 100644
--- a/src/kudu/util/rolling_log.cc
+++ b/src/kudu/util/rolling_log.cc
@@ -47,7 +47,7 @@ using std::string;
 using std::unique_ptr;
 using strings::Substitute;
 
-static const int kDefaultSizeLimitBytes = 64 * 1024 * 1024; // 64MB
+static const int kDefaultRollThresholdBytes = 64 * 1024 * 1024; // 64MB
 
 DECLARE_int32(max_log_files);
 
@@ -57,7 +57,7 @@ RollingLog::RollingLog(Env* env, string log_dir, string 
log_name)
     : env_(env),
       log_dir_(std::move(log_dir)),
       log_name_(std::move(log_name)),
-      size_limit_bytes_(kDefaultSizeLimitBytes),
+      roll_threshold_bytes_(kDefaultRollThresholdBytes),
       max_num_segments_(FLAGS_max_log_files),
       compress_after_close_(true) {}
 
@@ -65,9 +65,9 @@ RollingLog::~RollingLog() {
   WARN_NOT_OK(Close(), "Unable to close RollingLog");
 }
 
-void RollingLog::SetSizeLimitBytes(int64_t size) {
+void RollingLog::SetRollThresholdBytes(int64_t size) {
   CHECK_GT(size, 0);
-  size_limit_bytes_ = size;
+  roll_threshold_bytes_ = size;
 }
 
 void RollingLog::SetMaxNumSegments(int num_segments) {
@@ -189,11 +189,12 @@ Status RollingLog::Append(StringPiece s) {
     RETURN_NOT_OK_PREPEND(Open(), "Unable to open log");
   }
 
-  if (file_->Size() + s.size() > size_limit_bytes_) {
+  RETURN_NOT_OK(file_->Append(s));
+  if (file_->Size() > roll_threshold_bytes_) {
     RETURN_NOT_OK_PREPEND(Close(), "Unable to roll log");
+    roll_count_++;
     RETURN_NOT_OK_PREPEND(Open(), "Unable to roll log");
   }
-  RETURN_NOT_OK(file_->Append(s));
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/09298f3b/src/kudu/util/rolling_log.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/rolling_log.h b/src/kudu/util/rolling_log.h
index 897572a..0bb6755 100644
--- a/src/kudu/util/rolling_log.h
+++ b/src/kudu/util/rolling_log.h
@@ -22,6 +22,7 @@
 #include <string>
 
 #include "kudu/gutil/macros.h"
+#include "kudu/gutil/port.h"
 #include "kudu/gutil/strings/stringpiece.h"
 #include "kudu/util/status.h"
 
@@ -63,9 +64,17 @@ class RollingLog {
   // the log as necessary if it is not open.
   Status Open();
 
-  // Set the pre-compression size limit for the current and any future log 
files.
-  // Note that this is the limit on a single segment of the log, not the total.
-  void SetSizeLimitBytes(int64_t size);
+  // Set the pre-compression size threshold at which the log file will be 
rolled.
+  // If the log is already open, this applies for the the current and any 
future
+  // log file.
+  //
+  // NOTE: This is the limit on a single segment of the log, not a limit on 
the total
+  // size of the log.
+  //
+  // NOTE: The threshold is checked _after_ each call to Append(). So, the 
size of
+  // the log may overshoot this threshold by as much as the size of a single 
appended
+  // message.
+  void SetRollThresholdBytes(int64_t size);
 
   // Set the total number of log segments to be retained. When the log is 
rolled,
   // old segments are removed to achieve the targeted number of segments.
@@ -77,17 +86,20 @@ class RollingLog {
 
   // Append the given data to the current log file.
   //
-  // If appending this data would cross the configured file size limit, a new 
file
-  // is created and the data is appended there.
-  //
-  // Note that this is a synchronous API and causes potentially-blocking IO on 
the
-  // current thread. However, this does not fsync() or otherwise ensure 
durability
-  // of the appended data.
-  Status Append(StringPiece data);
+  // If, after appending this data, the file size has crossed the configured 
roll
+  // threshold, a new empty log file is created. Note that this is a 
synchronous API and
+  // causes potentially-blocking IO on the current thread. However, this does 
not fsync()
+  // or otherwise ensure durability of the appended data.
+  Status Append(StringPiece data) WARN_UNUSED_RESULT;
 
   // Close the log.
   Status Close();
 
+  // Return the number of times this log has rolled since it was first opened.
+  int roll_count() const {
+    return roll_count_;
+  }
+
  private:
   std::string GetLogFileName(int sequence) const;
 
@@ -101,12 +113,14 @@ class RollingLog {
   const std::string log_dir_;
   const std::string log_name_;
 
-  int64_t size_limit_bytes_;
+  int64_t roll_threshold_bytes_;
   int max_num_segments_;
 
   std::unique_ptr<WritableFile> file_;
   bool compress_after_close_;
 
+  int roll_count_ = 0;
+
   DISALLOW_COPY_AND_ASSIGN(RollingLog);
 };
 

Reply via email to