IMPALA-5567: race in fragment instance teardown

The bug is that PlanRootSink::GetNext() calls
RuntimeState::CheckQueryState() was called concurrently with
RuntimeState::ReleaseResources() and got a reference

The other callsites of CheckQueryState() are safe because they are in two
categories:
* ExecNodes or DataSink methods executed by fragment instance execution
  threads, which must terminate before the runtime state resources are
  released.
* In FeSupport where ReleaseResources() called after CheckQueryState()

There is no need to asynchronously check for memory limit
exceeded in PlanRootSink::GetNext() since that method does
not allocate tracked memory that could push the query over
the memory limit.

Change-Id: If567e734042b5f2b82323368dd536dbf3bdf4744
Reviewed-on: http://gerrit.cloudera.org:8080/7275
Reviewed-by: Tim Armstrong <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/dbd596be
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/dbd596be
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/dbd596be

Branch: refs/heads/master
Commit: dbd596bebbd4ed2e4a34ca0aa14a8a9bfcca8c62
Parents: ae5a877
Author: Tim Armstrong <[email protected]>
Authored: Fri Jun 23 10:29:59 2017 -0700
Committer: Impala Public Jenkins <[email protected]>
Committed: Wed Jun 28 03:10:54 2017 +0000

----------------------------------------------------------------------
 be/src/exec/plan-root-sink.cc   | 4 ++--
 be/src/runtime/runtime-state.cc | 5 +++--
 be/src/runtime/runtime-state.h  | 4 ++--
 3 files changed, 7 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/dbd596be/be/src/exec/plan-root-sink.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/plan-root-sink.cc b/be/src/exec/plan-root-sink.cc
index 1cba3d5..2e8408a 100644
--- a/be/src/exec/plan-root-sink.cc
+++ b/be/src/exec/plan-root-sink.cc
@@ -142,12 +142,12 @@ Status PlanRootSink::GetNext(
   while (!eos_ && results_ != nullptr && !sender_done_) consumer_cv_.wait(l);
 
   *eos = eos_;
-  return state->CheckQueryState();
+  return state->GetQueryStatus();
 }
 
 void PlanRootSink::GetRowValue(
     TupleRow* row, vector<void*>* result, vector<int>* scales) {
-  DCHECK(result->size() >= output_expr_evals_.size());
+  DCHECK_GE(result->size(), output_expr_evals_.size());
   for (int i = 0; i < output_expr_evals_.size(); ++i) {
     (*result)[i] = output_expr_evals_[i]->GetValue(row);
     (*scales)[i] = output_expr_evals_[i]->output_scale();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/dbd596be/be/src/runtime/runtime-state.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/runtime-state.cc b/be/src/runtime/runtime-state.cc
index 1224345..b05fc6b 100644
--- a/be/src/runtime/runtime-state.cc
+++ b/be/src/runtime/runtime-state.cc
@@ -240,8 +240,8 @@ void RuntimeState::SetMemLimitExceeded(MemTracker* tracker,
 }
 
 Status RuntimeState::CheckQueryState() {
-  if (instance_mem_tracker_ != nullptr
-      && UNLIKELY(instance_mem_tracker_->AnyLimitExceeded())) {
+  DCHECK(instance_mem_tracker_ != nullptr);
+  if (UNLIKELY(instance_mem_tracker_->AnyLimitExceeded())) {
     SetMemLimitExceeded(instance_mem_tracker_.get());
   }
   return GetQueryStatus();
@@ -261,6 +261,7 @@ void RuntimeState::UnregisterReaderContexts() {
 }
 
 void RuntimeState::ReleaseResources() {
+  // TODO: IMPALA-5587: control structures (e.g. MemTrackers) shouldn't be 
destroyed here.
   UnregisterReaderContexts();
   if (filter_bank_ != nullptr) filter_bank_->Close();
   if (resource_pool_ != nullptr) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/dbd596be/be/src/runtime/runtime-state.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/runtime-state.h b/be/src/runtime/runtime-state.h
index d70459f..186d540 100644
--- a/be/src/runtime/runtime-state.h
+++ b/be/src/runtime/runtime-state.h
@@ -293,8 +293,8 @@ class RuntimeState {
 
   /// Returns a non-OK status if query execution should stop (e.g., the query 
was
   /// cancelled or a mem limit was exceeded). Exec nodes should check this 
periodically so
-  /// execution doesn't continue if the query terminates abnormally. This can 
be called
-  /// after ReleaseResources().
+  /// execution doesn't continue if the query terminates abnormally. This 
should not be
+  /// called after ReleaseResources().
   Status CheckQueryState();
 
   /// Create a codegen object accessible via codegen() if it doesn't exist 
already.

Reply via email to