This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch dev-1.1.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/dev-1.1.1 by this push:
     new e95427b75b [bugfix1.1.1] fix bug of vhash join build (#10954)
e95427b75b is described below

commit e95427b75b2a56dba99f8f0dabe8a08d05f36efa
Author: TengJianPing <[email protected]>
AuthorDate: Mon Jul 18 14:21:30 2022 +0800

    [bugfix1.1.1] fix bug of vhash join build (#10954)
    
    Merge fix of #10614 to branch dev-1.1.1.
---
 be/src/vec/exec/join/vhash_join_node.cpp | 24 +++++++++++++++++++-----
 be/src/vec/exec/join/vhash_join_node.h   |  2 ++
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/be/src/vec/exec/join/vhash_join_node.cpp 
b/be/src/vec/exec/join/vhash_join_node.cpp
index 422b77b801..0309b4fa94 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -18,6 +18,7 @@
 #include "vec/exec/join/vhash_join_node.h"
 
 #include "gen_cpp/PlanNodes_types.h"
+#include "gutil/strings/substitute.h"
 #include "runtime/mem_tracker.h"
 #include "runtime/runtime_filter_mgr.h"
 #include "util/defer_op.h"
@@ -707,7 +708,7 @@ HashJoinNode::HashJoinNode(ObjectPool* pool, const 
TPlanNode& tnode, const Descr
     // avoid vector expand change block address.
     // one block can store 4g data, _build_blocks can store 128*4g data.
     // if probe data bigger than 512g, runtime filter maybe will core dump 
when insert data.
-    _build_blocks.reserve(128);
+    _build_blocks.reserve(_MAX_BUILD_BLOCK_COUNT);
 }
 
 HashJoinNode::~HashJoinNode() = default;
@@ -1082,6 +1083,9 @@ Status HashJoinNode::_hash_table_build(RuntimeState* 
state) {
     int64_t last_mem_used = 0;
     bool eos = false;
 
+    // make one block for each 4 gigabytes
+    constexpr static auto BUILD_BLOCK_MAX_SIZE = 4 * 1024UL * 1024UL * 1024UL;
+
     Block block;
     while (!eos) {
         block.clear_column_data();
@@ -1096,9 +1100,12 @@ Status HashJoinNode::_hash_table_build(RuntimeState* 
state) {
             mutable_block.merge(block);
         }
 
-        // make one block for each 4 gigabytes
-        constexpr static auto BUILD_BLOCK_MAX_SIZE = 4 * 1024UL * 1024UL * 
1024UL;
         if (UNLIKELY(_mem_used - last_mem_used > BUILD_BLOCK_MAX_SIZE)) {
+            if (_build_blocks.size() == _MAX_BUILD_BLOCK_COUNT) {
+                return Status::NotSupported(
+                        strings::Substitute("data size of right table in hash 
join > $0",
+                                            BUILD_BLOCK_MAX_SIZE * 
_MAX_BUILD_BLOCK_COUNT));
+            }
             _build_blocks.emplace_back(mutable_block.to_block());
             // TODO:: Rethink may we should do the proess after we recevie all 
build blocks ?
             // which is better.
@@ -1111,8 +1118,15 @@ Status HashJoinNode::_hash_table_build(RuntimeState* 
state) {
         }
     }
 
-    _build_blocks.emplace_back(mutable_block.to_block());
-    RETURN_IF_ERROR(_process_build_block(state, _build_blocks[index], index));
+    if (!mutable_block.empty()) {
+        if (_build_blocks.size() == _MAX_BUILD_BLOCK_COUNT) {
+            return Status::NotSupported(
+                    strings::Substitute("data size of right table in hash join 
> $0",
+                                        BUILD_BLOCK_MAX_SIZE * 
_MAX_BUILD_BLOCK_COUNT));
+        }
+        _build_blocks.emplace_back(mutable_block.to_block());
+        RETURN_IF_ERROR(_process_build_block(state, _build_blocks[index], 
index));
+    }
     RETURN_IF_LIMIT_EXCEEDED(state, "Hash join, while constructing the hash 
table.");
 
     return std::visit(
diff --git a/be/src/vec/exec/join/vhash_join_node.h 
b/be/src/vec/exec/join/vhash_join_node.h
index ec27589325..5b2025b336 100644
--- a/be/src/vec/exec/join/vhash_join_node.h
+++ b/be/src/vec/exec/join/vhash_join_node.h
@@ -262,6 +262,8 @@ private:
 
     static std::vector<uint16_t> _convert_block_to_null(Block& block);
 
+    static const int _MAX_BUILD_BLOCK_COUNT = 128;
+
     template <class HashTableContext, bool ignore_null, bool build_unique>
     friend struct ProcessHashTableBuild;
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to