Repository: kudu
Updated Branches:
  refs/heads/master 1f2617674 -> 1cf8ad189


KUDU-2291 (part 7): fix race in stack trace collection

This fixes a narrow race in which StackTraceCollector could return
'TimedOut' but in fact the stack trace would have been collected.

See the comments for details.

Change-Id: Id1af0ffa02efb8151f1f3f84dd142d91066cc1f8
Reviewed-on: http://gerrit.cloudera.org:8080/9406
Reviewed-by: Mike Percy <mpe...@apache.org>
Tested-by: Todd Lipcon <t...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/a3ecd1a9
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/a3ecd1a9
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/a3ecd1a9

Branch: refs/heads/master
Commit: a3ecd1a99a8da094da12d5b94efa8e5cf40debad
Parents: 1f26176
Author: Todd Lipcon <t...@apache.org>
Authored: Wed Feb 21 16:03:47 2018 -0800
Committer: Todd Lipcon <t...@apache.org>
Committed: Fri Feb 23 02:37:13 2018 +0000

----------------------------------------------------------------------
 src/kudu/util/debug-util.cc | 22 +++++++++++++++-------
 src/kudu/util/debug-util.h  |  5 ++++-
 2 files changed, 19 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/a3ecd1a9/src/kudu/util/debug-util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/debug-util.cc b/src/kudu/util/debug-util.cc
index 8fd1575..0af7abe 100644
--- a/src/kudu/util/debug-util.cc
+++ b/src/kudu/util/debug-util.cc
@@ -329,7 +329,7 @@ StackTraceCollector::~StackTraceCollector() {
 }
 
 #ifdef __linux__
-void StackTraceCollector::RevokeSigData() {
+bool StackTraceCollector::RevokeSigData() {
   // We have several cases to consider. Though it involves some simple code
   // duplication, each case is handled separately for clarity.
 
@@ -338,7 +338,7 @@ void StackTraceCollector::RevokeSigData() {
   if (sig_data_->result_ready.complete()) {
     delete sig_data_;
     sig_data_ = nullptr;
-    return;
+    return true;
   }
 
   // 2) Timed out, but signal still pending and signal handler not yet invoked.
@@ -368,7 +368,7 @@ void StackTraceCollector::RevokeSigData() {
                   << "to thread " << tid_;
     ANNOTATE_LEAKING_OBJECT_PTR(sig_data_);
     sig_data_ = nullptr;
-    return;
+    return false;
   }
 
   // In case (3), the signal handler started running but isn't complete yet.
@@ -378,6 +378,7 @@ void StackTraceCollector::RevokeSigData() {
 
   delete sig_data_;
   sig_data_ = nullptr;
+  return true;
 }
 
 
@@ -434,9 +435,15 @@ Status StackTraceCollector::AwaitCollection(MonoTime 
deadline) {
   // The main reason that a thread would not respond is that it has blocked 
signals. For
   // example, glibc's timer_thread doesn't respond to our signal, so we always 
time out
   // on that one.
-  bool got_result = sig_data_->result_ready.WaitUntil(deadline);
-  RevokeSigData();
-  if (!got_result) {
+  ignore_result(sig_data_->result_ready.WaitUntil(deadline));
+
+  // Whether or not we timed out above, revoke the signal data structure.
+  // It's possible that the above 'Wait' times out but it succeeds exactly
+  // after that timeout. In that case, RevokeSigData() will return true
+  // and we can return a successful result, because the destination stack trace
+  // has in fact been populated.
+  bool completed = RevokeSigData();
+  if (!completed) {
     return Status::TimedOut("thread did not respond: maybe it is blocking 
signals");
   }
 
@@ -450,7 +457,8 @@ Status StackTraceCollector::TriggerAsync(int64_t tid_, 
StackTrace* stack) {
 Status StackTraceCollector::AwaitCollection() {
   return Status::NotSupported("unsupported platform");
 }
-void StackTraceCollector::RevokeSigData() {
+bool StackTraceCollector::RevokeSigData() {
+  return false;
 }
 #endif // __linux__
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/a3ecd1a9/src/kudu/util/debug-util.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/debug-util.h b/src/kudu/util/debug-util.h
index 637e49f..8ed8f43 100644
--- a/src/kudu/util/debug-util.h
+++ b/src/kudu/util/debug-util.h
@@ -214,8 +214,11 @@ class StackTraceCollector {
   // Safely sets 'sig_data_' back to nullptr after having sent an asynchronous
   // stack trace request. See implementation for details.
   //
+  // Returns true if the stack trace was collected before revocation
+  // and false if it was not.
+  //
   // POSTCONDITION: sig_data_ == nullptr
-  void RevokeSigData();
+  bool RevokeSigData();
 
   int64_t tid_ = 0;
   stack_trace_internal::SignalData* sig_data_ = nullptr;

Reply via email to