This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch dev-1.1.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/dev-1.1.1 by this push:
new e95427b75b [bugfix1.1.1] fix bug of vhash join build (#10954)
e95427b75b is described below
commit e95427b75b2a56dba99f8f0dabe8a08d05f36efa
Author: TengJianPing <[email protected]>
AuthorDate: Mon Jul 18 14:21:30 2022 +0800
[bugfix1.1.1] fix bug of vhash join build (#10954)
Merge fix of #10614 to branch dev-1.1.1.
---
be/src/vec/exec/join/vhash_join_node.cpp | 24 +++++++++++++++++++-----
be/src/vec/exec/join/vhash_join_node.h | 2 ++
2 files changed, 21 insertions(+), 5 deletions(-)
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp
b/be/src/vec/exec/join/vhash_join_node.cpp
index 422b77b801..0309b4fa94 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -18,6 +18,7 @@
#include "vec/exec/join/vhash_join_node.h"
#include "gen_cpp/PlanNodes_types.h"
+#include "gutil/strings/substitute.h"
#include "runtime/mem_tracker.h"
#include "runtime/runtime_filter_mgr.h"
#include "util/defer_op.h"
@@ -707,7 +708,7 @@ HashJoinNode::HashJoinNode(ObjectPool* pool, const
TPlanNode& tnode, const Descr
// avoid vector expand change block address.
// one block can store 4g data, _build_blocks can store 128*4g data.
// if probe data bigger than 512g, runtime filter maybe will core dump
when insert data.
- _build_blocks.reserve(128);
+ _build_blocks.reserve(_MAX_BUILD_BLOCK_COUNT);
}
HashJoinNode::~HashJoinNode() = default;
@@ -1082,6 +1083,9 @@ Status HashJoinNode::_hash_table_build(RuntimeState*
state) {
int64_t last_mem_used = 0;
bool eos = false;
+ // make one block for each 4 gigabytes
+ constexpr static auto BUILD_BLOCK_MAX_SIZE = 4 * 1024UL * 1024UL * 1024UL;
+
Block block;
while (!eos) {
block.clear_column_data();
@@ -1096,9 +1100,12 @@ Status HashJoinNode::_hash_table_build(RuntimeState*
state) {
mutable_block.merge(block);
}
- // make one block for each 4 gigabytes
- constexpr static auto BUILD_BLOCK_MAX_SIZE = 4 * 1024UL * 1024UL *
1024UL;
if (UNLIKELY(_mem_used - last_mem_used > BUILD_BLOCK_MAX_SIZE)) {
+ if (_build_blocks.size() == _MAX_BUILD_BLOCK_COUNT) {
+ return Status::NotSupported(
+ strings::Substitute("data size of right table in hash
join > $0",
+ BUILD_BLOCK_MAX_SIZE *
_MAX_BUILD_BLOCK_COUNT));
+ }
_build_blocks.emplace_back(mutable_block.to_block());
// TODO:: Rethink may we should do the proess after we recevie all
build blocks ?
// which is better.
@@ -1111,8 +1118,15 @@ Status HashJoinNode::_hash_table_build(RuntimeState*
state) {
}
}
- _build_blocks.emplace_back(mutable_block.to_block());
- RETURN_IF_ERROR(_process_build_block(state, _build_blocks[index], index));
+ if (!mutable_block.empty()) {
+ if (_build_blocks.size() == _MAX_BUILD_BLOCK_COUNT) {
+ return Status::NotSupported(
+ strings::Substitute("data size of right table in hash join
> $0",
+ BUILD_BLOCK_MAX_SIZE *
_MAX_BUILD_BLOCK_COUNT));
+ }
+ _build_blocks.emplace_back(mutable_block.to_block());
+ RETURN_IF_ERROR(_process_build_block(state, _build_blocks[index],
index));
+ }
RETURN_IF_LIMIT_EXCEEDED(state, "Hash join, while constructing the hash
table.");
return std::visit(
diff --git a/be/src/vec/exec/join/vhash_join_node.h
b/be/src/vec/exec/join/vhash_join_node.h
index ec27589325..5b2025b336 100644
--- a/be/src/vec/exec/join/vhash_join_node.h
+++ b/be/src/vec/exec/join/vhash_join_node.h
@@ -262,6 +262,8 @@ private:
static std::vector<uint16_t> _convert_block_to_null(Block& block);
+ static const int _MAX_BUILD_BLOCK_COUNT = 128;
+
template <class HashTableContext, bool ignore_null, bool build_unique>
friend struct ProcessHashTableBuild;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]