impala git commit: IMPALA-6423: HDFS scanner doesn't check RuntimeState::is_cancelled()
Repository: impala Updated Branches: refs/heads/2.x b961de23d -> b696fb948 IMPALA-6423: HDFS scanner doesn't check RuntimeState::is_cancelled() There are cancellation checks in HDFS scanner code that are using ScannerContext::cancelled(), however, if an internal cancellation happens then RuntimeState::is_cancelled() should also be checked to detect these. As a solution I extended ScannerContext::cancelled() to check RuntimeState::is_cancelled() as well. Creating automatic tests for this is difficult as the result of the query is actually the same with and without this change. With manual tests I verifyed that in case an internal cancel happens (I made the exchange node fail during a select query) this change prevents CommitRows() being executed multiple times even if the query had been cancelled before the first call. Change-Id: I3eb960d9d6f2dde4b2cb4988aa06288d11802b9a Reviewed-on: http://gerrit.cloudera.org:8080/9352 Reviewed-by: Tim ArmstrongTested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/b696fb94 Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/b696fb94 Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/b696fb94 Branch: refs/heads/2.x Commit: b696fb9484832b81db04c60d36e63357d7a421f9 Parents: b961de2 Author: Gabor Kaszab Authored: Wed Feb 14 12:39:03 2018 +0100 Committer: Impala Public Jenkins Committed: Tue Feb 20 01:10:15 2018 + -- be/src/exec/scanner-context.cc | 1 + be/src/exec/scanner-context.h | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/impala/blob/b696fb94/be/src/exec/scanner-context.cc -- diff --git a/be/src/exec/scanner-context.cc b/be/src/exec/scanner-context.cc index 0311f0e..abdde07 100644 --- a/be/src/exec/scanner-context.cc +++ b/be/src/exec/scanner-context.cc @@ -330,6 +330,7 @@ void ScannerContext::Stream::ReturnIoBuffer() { } bool ScannerContext::cancelled() const { + if (state_->is_cancelled()) return true; if (!scan_node_->HasRowBatchQueue()) return false; return static_cast (scan_node_)->done(); } http://git-wip-us.apache.org/repos/asf/impala/blob/b696fb94/be/src/exec/scanner-context.h -- diff --git a/be/src/exec/scanner-context.h b/be/src/exec/scanner-context.h index 09a4bdc..a131d3f 100644 --- a/be/src/exec/scanner-context.h +++ b/be/src/exec/scanner-context.h @@ -359,8 +359,9 @@ class ScannerContext { /// context. Stream* AddStream(io::ScanRange* range); - /// Returns false if scan_node_ is multi-threaded and has been cancelled. - /// Always returns false if the scan_node_ is not multi-threaded. + /// Returns true if RuntimeState::is_cancelled() is true, or if scan node is not + /// multi-threaded and is done (finished, cancelled or reached it's limit). + /// In all other cases returns false. bool cancelled() const; HdfsPartitionDescriptor* partition_descriptor() { return partition_desc_; }
impala git commit: IMPALA-6423: HDFS scanner doesn't check RuntimeState::is_cancelled()
Repository: impala Updated Branches: refs/heads/master 3a049a530 -> 8302608bd IMPALA-6423: HDFS scanner doesn't check RuntimeState::is_cancelled() There are cancellation checks in HDFS scanner code that are using ScannerContext::cancelled(), however, if an internal cancellation happens then RuntimeState::is_cancelled() should also be checked to detect these. As a solution I extended ScannerContext::cancelled() to check RuntimeState::is_cancelled() as well. Creating automatic tests for this is difficult as the result of the query is actually the same with and without this change. With manual tests I verifyed that in case an internal cancel happens (I made the exchange node fail during a select query) this change prevents CommitRows() being executed multiple times even if the query had been cancelled before the first call. Change-Id: I3eb960d9d6f2dde4b2cb4988aa06288d11802b9a Reviewed-on: http://gerrit.cloudera.org:8080/9352 Reviewed-by: Tim ArmstrongTested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/8302608b Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/8302608b Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/8302608b Branch: refs/heads/master Commit: 8302608bdff15d6e8af324fc885dd0a8c2b44a5d Parents: 3a049a5 Author: Gabor Kaszab Authored: Wed Feb 14 12:39:03 2018 +0100 Committer: Impala Public Jenkins Committed: Tue Feb 20 00:45:48 2018 + -- be/src/exec/scanner-context.cc | 1 + be/src/exec/scanner-context.h | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/impala/blob/8302608b/be/src/exec/scanner-context.cc -- diff --git a/be/src/exec/scanner-context.cc b/be/src/exec/scanner-context.cc index 0311f0e..abdde07 100644 --- a/be/src/exec/scanner-context.cc +++ b/be/src/exec/scanner-context.cc @@ -330,6 +330,7 @@ void ScannerContext::Stream::ReturnIoBuffer() { } bool ScannerContext::cancelled() const { + if (state_->is_cancelled()) return true; if (!scan_node_->HasRowBatchQueue()) return false; return static_cast (scan_node_)->done(); } http://git-wip-us.apache.org/repos/asf/impala/blob/8302608b/be/src/exec/scanner-context.h -- diff --git a/be/src/exec/scanner-context.h b/be/src/exec/scanner-context.h index 09a4bdc..a131d3f 100644 --- a/be/src/exec/scanner-context.h +++ b/be/src/exec/scanner-context.h @@ -359,8 +359,9 @@ class ScannerContext { /// context. Stream* AddStream(io::ScanRange* range); - /// Returns false if scan_node_ is multi-threaded and has been cancelled. - /// Always returns false if the scan_node_ is not multi-threaded. + /// Returns true if RuntimeState::is_cancelled() is true, or if scan node is not + /// multi-threaded and is done (finished, cancelled or reached it's limit). + /// In all other cases returns false. bool cancelled() const; HdfsPartitionDescriptor* partition_descriptor() { return partition_desc_; }