Repository: impala
Updated Branches:
  refs/heads/master b0027575c -> 7376ca29b


IMPALA-6595: fix crash in NljBuilder::Close()

The bug is that the right child of a blocking join node could be closed
before the builder if an error was encountered when sending a batch to
the sink. This hits a DCHECK because Buffers owned by the sink may still
be accounted against the child node.

Testing:
Added the test that originally triggered the problem. It reproduced the
failure when based on the IMPALA-4835 patch, but I can't reproduce
the failure after rebase onto master.

Change-Id: Ie46b87a4889d7cee907124796c830db41125cf15
Reviewed-on: http://gerrit.cloudera.org:8080/9493
Tested-by: Impala Public Jenkins
Reviewed-by: Tim Armstrong <tarmstr...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/7376ca29
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/7376ca29
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/7376ca29

Branch: refs/heads/master
Commit: 7376ca29b43bfc3e68f6730c9527a97c688b7e38
Parents: b002757
Author: Tim Armstrong <tarmstr...@cloudera.com>
Authored: Sat Mar 3 16:44:00 2018 -0800
Committer: Tim Armstrong <tarmstr...@cloudera.com>
Committed: Tue Mar 6 16:12:05 2018 +0000

----------------------------------------------------------------------
 be/src/exec/blocking-join-node.cc                    |  5 +++--
 be/src/exec/data-sink.h                              |  1 +
 be/src/exec/exec-node.h                              |  3 +--
 be/src/exec/nested-loop-join-node.cc                 |  6 +++++-
 .../QueryTest/single-node-nlj-exhaustive.test        | 15 ++++++++++++++-
 5 files changed, 24 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/7376ca29/be/src/exec/blocking-join-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/blocking-join-node.cc 
b/be/src/exec/blocking-join-node.cc
index e8c0948..cfaf91a 100644
--- a/be/src/exec/blocking-join-node.cc
+++ b/be/src/exec/blocking-join-node.cc
@@ -159,9 +159,10 @@ void BlockingJoinNode::ProcessBuildInputAsync(
   // probed. BlockingJoinNode::Open() will return failure as soon as 
child(0)->Open()
   // completes.
   if (CanCloseBuildEarly() || !status->ok()) {
-    // Release resources in 'build_batch_' before closing the children as some 
of the
-    // resources are still accounted towards the children node.
+    // Release resources in 'build_batch_' and 'build_sink' before closing the 
children
+    // as some of the resources are still accounted towards the children node.
     build_batch_.reset();
+    if (!status->ok()) build_sink->Close(state);
     child(1)->Close(state);
   }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/7376ca29/be/src/exec/data-sink.h
----------------------------------------------------------------------
diff --git a/be/src/exec/data-sink.h b/be/src/exec/data-sink.h
index 23df628..d2e80af 100644
--- a/be/src/exec/data-sink.h
+++ b/be/src/exec/data-sink.h
@@ -99,6 +99,7 @@ class DataSink {
   const std::vector<ScalarExprEvaluator*>& output_expr_evals() const {
     return output_expr_evals_;
   }
+  bool is_closed() const { return closed_; }
 
  protected:
   /// Set to true after Close() has been called. Subclasses should check and 
set this in

http://git-wip-us.apache.org/repos/asf/impala/blob/7376ca29/be/src/exec/exec-node.h
----------------------------------------------------------------------
diff --git a/be/src/exec/exec-node.h b/be/src/exec/exec-node.h
index 187d9a7..302f2e5 100644
--- a/be/src/exec/exec-node.h
+++ b/be/src/exec/exec-node.h
@@ -210,6 +210,7 @@ class ExecNode {
   MemTracker* expr_mem_tracker() { return expr_mem_tracker_.get(); }
   MemPool* expr_perm_pool() { return expr_perm_pool_.get(); }
   MemPool* expr_results_pool() { return expr_results_pool_.get(); }
+  bool is_closed() const { return is_closed_; }
 
   /// Return true if codegen was disabled by the planner for this ExecNode. 
Does not
   /// check to see if codegen was enabled for the enclosing fragment.
@@ -335,8 +336,6 @@ class ExecNode {
   /// reservations pool in Close().
   BufferPool::ClientHandle buffer_pool_client_;
 
-  bool is_closed() const { return is_closed_; }
-
   /// Pointer to the containing SubplanNode or NULL if not inside a subplan.
   /// Set by SubplanNode::Init(). Not owned.
   SubplanNode* containing_subplan_;

http://git-wip-us.apache.org/repos/asf/impala/blob/7376ca29/be/src/exec/nested-loop-join-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/nested-loop-join-node.cc 
b/be/src/exec/nested-loop-join-node.cc
index c5ecf53..e0a375b 100644
--- a/be/src/exec/nested-loop-join-node.cc
+++ b/be/src/exec/nested-loop-join-node.cc
@@ -138,7 +138,11 @@ void NestedLoopJoinNode::Close(RuntimeState* state) {
   if (is_closed()) return;
   ScalarExprEvaluator::Close(join_conjunct_evals_, state);
   ScalarExpr::Close(join_conjuncts_);
-  if (builder_ != NULL) builder_->Close(state);
+  if (builder_ != NULL) {
+    // IMPALA-6595: builder must be closed before child.
+    DCHECK(builder_->is_closed() || !children_[1]->is_closed());
+    builder_->Close(state);
+  }
   build_batches_ = NULL;
   if (matching_build_rows_ != NULL) {
     mem_tracker()->Release(matching_build_rows_->MemUsage());

http://git-wip-us.apache.org/repos/asf/impala/blob/7376ca29/testdata/workloads/functional-query/queries/QueryTest/single-node-nlj-exhaustive.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/single-node-nlj-exhaustive.test
 
b/testdata/workloads/functional-query/queries/QueryTest/single-node-nlj-exhaustive.test
index a6b3cae..e194465 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/single-node-nlj-exhaustive.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/single-node-nlj-exhaustive.test
@@ -6,7 +6,7 @@ select straight_join * from (values(1 id), (2), (3)) v1,
 (select *, count(*) over() from tpch.lineitem where l_orderkey < 100000) v2
 order by id, l_orderkey, l_partkey, l_suppkey, l_linenumber
 limit 5
-----RESULTS
+---- RESULTS
 
1,1,2132,4633,4,28.00,28955.64,0.09,0.06,'N','O','1996-04-21','1996-03-30','1996-05-16','NONE','AIR','lites.
 fluffily even de',100382
 
1,1,15635,638,6,32.00,49620.16,0.07,0.02,'N','O','1996-01-30','1996-02-07','1996-02-03','DELIVER
 IN PERSON','MAIL','arefully slyly ex',100382
 
1,1,24027,1534,5,24.00,22824.48,0.10,0.04,'N','O','1996-03-30','1996-03-14','1996-04-01','NONE','FOB','
 pending foxes. slyly re',100382
@@ -15,3 +15,16 @@ limit 5
 ---- TYPES
 
tinyint,bigint,bigint,bigint,int,decimal,decimal,decimal,decimal,string,string,string,string,string,string,string,string,bigint
 ====
+---- QUERY
+# IMPALA-6595: same query as above, but tuned to trigger a different bug where 
we hit
+# "Memory Limit Exceeded" when adding a batch to NljBuilder.
+# TODO: after IMPALA-4835 goes in, retune this query.
+set buffer_pool_limit=50m;
+set mem_limit=70m;
+select straight_join * from (values(1 id), (2), (3)) v1,
+(select *, count(*) over() from tpch.lineitem where l_orderkey < 100000) v2
+order by id, l_orderkey, l_partkey, l_suppkey, l_linenumber
+limit 5
+---- CATCH
+Memory limit exceeded
+====

Reply via email to