This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 5748241 [Bug-fix] When query cancel, transfer_thread does not
continue to schedule scanner_thread (#5768)
5748241 is described below
commit 5748241dabc4777fdbe58e12c047e99fa02ab7ef
Author: Xinyi Zou <[email protected]>
AuthorDate: Wed May 19 09:26:58 2021 +0800
[Bug-fix] When query cancel, transfer_thread does not continue to schedule
scanner_thread (#5768)
The cause of the problem is that after query cancel,
OlapScanNode::transfer_thread still continues to schedule
OlapScanNode::scanner_thread until all tasks are scheduled.
Although each task does not scan data and exits quickly, it still consumes
a lot of resources.
(Guess)This may be the cause of the BUG (#5767) causing the I/O to be full.
So after query cancel, immediately exit the scheduling loop in
transfer_thread, and after waiting for
the end of all scanner_threads, transfer_thread will also exit.
---
be/src/exec/olap_scan_node.cpp | 16 ++++++++++++++++
be/src/exec/olap_scan_node.h | 6 ++++--
2 files changed, 20 insertions(+), 2 deletions(-)
diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp
index b1099fb..9ab9e0a 100644
--- a/be/src/exec/olap_scan_node.cpp
+++ b/be/src/exec/olap_scan_node.cpp
@@ -1252,6 +1252,13 @@ void OlapScanNode::transfer_thread(RuntimeState* state) {
}
// read from scanner
while (LIKELY(status.ok())) {
+ // When query cancel, _transfer_done is set to true at
OlapScanNode::close,
+ // and the loop is exited at this time, and the current thread exits
after
+ // waiting for _running_thread to decrease to 0.
+ if (UNLIKELY(_transfer_done)) {
+ LOG(INFO) << "Transfer thread cancelled, wait for the end of scan
thread.";
+ break;
+ }
int assigned_thread_num = 0;
// copy to local
{
@@ -1361,6 +1368,15 @@ void OlapScanNode::transfer_thread(RuntimeState* state) {
}
void OlapScanNode::scanner_thread(OlapScanner* scanner) {
+ if (UNLIKELY(_transfer_done)) {
+ _scanner_done = true;
+ std::unique_lock<std::mutex> l(_scan_batches_lock);
+ _running_thread--;
+ _scan_batch_added_cv.notify_one();
+ _scan_thread_exit_cv.notify_one();
+ LOG(INFO) << "Scan thread cancelled, cause query done, scan thread
started to exit";
+ return;
+ }
int64_t wait_time = scanner->update_wait_worker_timer();
// Do not use ScopedTimer. There is no guarantee that, the counter
// (_scan_cpu_timer, the class member) is not destroyed after
`_running_thread==0`.
diff --git a/be/src/exec/olap_scan_node.h b/be/src/exec/olap_scan_node.h
index e643e1b..f0c6695 100644
--- a/be/src/exec/olap_scan_node.h
+++ b/be/src/exec/olap_scan_node.h
@@ -18,6 +18,7 @@
#ifndef DORIS_BE_SRC_QUERY_EXEC_OLAP_SCAN_NODE_H
#define DORIS_BE_SRC_QUERY_EXEC_OLAP_SCAN_NODE_H
+#include <atomic>
#include <boost/thread.hpp>
#include <boost/variant/static_visitor.hpp>
#include <condition_variable>
@@ -245,8 +246,9 @@ private:
int _max_materialized_row_batches;
bool _start;
- bool _scanner_done;
- bool _transfer_done;
+ // Used in Scan thread to ensure thread-safe
+ std::atomic_bool _scanner_done;
+ std::atomic_bool _transfer_done;
size_t _direct_conjunct_size;
int _total_assign_num;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]