IMPALA-5567: race in fragment instance teardown The bug is that PlanRootSink::GetNext() calls RuntimeState::CheckQueryState() was called concurrently with RuntimeState::ReleaseResources() and got a reference
The other callsites of CheckQueryState() are safe because they are in two categories: * ExecNodes or DataSink methods executed by fragment instance execution threads, which must terminate before the runtime state resources are released. * In FeSupport where ReleaseResources() called after CheckQueryState() There is no need to asynchronously check for memory limit exceeded in PlanRootSink::GetNext() since that method does not allocate tracked memory that could push the query over the memory limit. Change-Id: If567e734042b5f2b82323368dd536dbf3bdf4744 Reviewed-on: http://gerrit.cloudera.org:8080/7275 Reviewed-by: Tim Armstrong <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/dbd596be Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/dbd596be Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/dbd596be Branch: refs/heads/master Commit: dbd596bebbd4ed2e4a34ca0aa14a8a9bfcca8c62 Parents: ae5a877 Author: Tim Armstrong <[email protected]> Authored: Fri Jun 23 10:29:59 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Wed Jun 28 03:10:54 2017 +0000 ---------------------------------------------------------------------- be/src/exec/plan-root-sink.cc | 4 ++-- be/src/runtime/runtime-state.cc | 5 +++-- be/src/runtime/runtime-state.h | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/dbd596be/be/src/exec/plan-root-sink.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/plan-root-sink.cc b/be/src/exec/plan-root-sink.cc index 1cba3d5..2e8408a 100644 --- a/be/src/exec/plan-root-sink.cc +++ b/be/src/exec/plan-root-sink.cc @@ -142,12 +142,12 @@ Status PlanRootSink::GetNext( while (!eos_ && results_ != nullptr && !sender_done_) consumer_cv_.wait(l); *eos = eos_; - return state->CheckQueryState(); + return state->GetQueryStatus(); } void PlanRootSink::GetRowValue( TupleRow* row, vector<void*>* result, vector<int>* scales) { - DCHECK(result->size() >= output_expr_evals_.size()); + DCHECK_GE(result->size(), output_expr_evals_.size()); for (int i = 0; i < output_expr_evals_.size(); ++i) { (*result)[i] = output_expr_evals_[i]->GetValue(row); (*scales)[i] = output_expr_evals_[i]->output_scale(); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/dbd596be/be/src/runtime/runtime-state.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/runtime-state.cc b/be/src/runtime/runtime-state.cc index 1224345..b05fc6b 100644 --- a/be/src/runtime/runtime-state.cc +++ b/be/src/runtime/runtime-state.cc @@ -240,8 +240,8 @@ void RuntimeState::SetMemLimitExceeded(MemTracker* tracker, } Status RuntimeState::CheckQueryState() { - if (instance_mem_tracker_ != nullptr - && UNLIKELY(instance_mem_tracker_->AnyLimitExceeded())) { + DCHECK(instance_mem_tracker_ != nullptr); + if (UNLIKELY(instance_mem_tracker_->AnyLimitExceeded())) { SetMemLimitExceeded(instance_mem_tracker_.get()); } return GetQueryStatus(); @@ -261,6 +261,7 @@ void RuntimeState::UnregisterReaderContexts() { } void RuntimeState::ReleaseResources() { + // TODO: IMPALA-5587: control structures (e.g. MemTrackers) shouldn't be destroyed here. UnregisterReaderContexts(); if (filter_bank_ != nullptr) filter_bank_->Close(); if (resource_pool_ != nullptr) { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/dbd596be/be/src/runtime/runtime-state.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/runtime-state.h b/be/src/runtime/runtime-state.h index d70459f..186d540 100644 --- a/be/src/runtime/runtime-state.h +++ b/be/src/runtime/runtime-state.h @@ -293,8 +293,8 @@ class RuntimeState { /// Returns a non-OK status if query execution should stop (e.g., the query was /// cancelled or a mem limit was exceeded). Exec nodes should check this periodically so - /// execution doesn't continue if the query terminates abnormally. This can be called - /// after ReleaseResources(). + /// execution doesn't continue if the query terminates abnormally. This should not be + /// called after ReleaseResources(). Status CheckQueryState(); /// Create a codegen object accessible via codegen() if it doesn't exist already.
