This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch dev_join
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/dev_join by this push:
new 8b6c0951c6d update
8b6c0951c6d is described below
commit 8b6c0951c6d4526118acb248f0bdb83c40ce0ffa
Author: BiteTheDDDDt <[email protected]>
AuthorDate: Tue Oct 17 20:24:02 2023 +0800
update
---
be/src/vec/common/hash_table/hash_map.h | 10 ++++------
be/src/vec/exec/join/vhash_join_node.h | 11 -----------
2 files changed, 4 insertions(+), 17 deletions(-)
diff --git a/be/src/vec/common/hash_table/hash_map.h
b/be/src/vec/common/hash_table/hash_map.h
index ac0db0795bc..85110deba62 100644
--- a/be/src/vec/common/hash_table/hash_map.h
+++ b/be/src/vec/common/hash_table/hash_map.h
@@ -210,19 +210,17 @@ public:
using HashMapTable<Key, Cell, Hash, Grower, Allocator>::HashMapTable;
- void expanse_for_add_elem(size_t num_elem) {
- bucket_size = calc_bucket_size(num_elem + 1);
- first.resize(bucket_size, 0);
- }
-
static uint32_t calc_bucket_size(size_t num_elem) {
size_t expect_bucket_size = static_cast<size_t>(num_elem) + (num_elem
- 1) / 7;
return phmap::priv::NormalizeCapacity(expect_bucket_size) + 1;
}
void build(const Key* __restrict keys, const size_t* __restrict
hash_values, int num_elem) {
- build_keys = keys;
+ bucket_size = calc_bucket_size(num_elem + 1);
+ first.resize(bucket_size, 0);
next.resize(num_elem);
+
+ build_keys = keys;
for (size_t i = 1; i < num_elem; i++) {
uint32_t bucket_num = hash_values[i] & (bucket_size - 1);
next[i] = first[bucket_num];
diff --git a/be/src/vec/exec/join/vhash_join_node.h
b/be/src/vec/exec/join/vhash_join_node.h
index fffa9e5a2b8..ef5a61eae17 100644
--- a/be/src/vec/exec/join/vhash_join_node.h
+++ b/be/src/vec/exec/join/vhash_join_node.h
@@ -145,17 +145,6 @@ struct ProcessHashTableBuild {
SCOPED_TIMER(_parent->_build_table_insert_timer);
hash_table_ctx.hash_table->reset_resize_timer();
- // only not build_unique, we need expanse hash table before insert data
- // 1. There are fewer duplicate keys, reducing the number of resize
hash tables
- // can improve performance to a certain extent, about 2%-5%
- // 2. There are many duplicate keys, and the hash table filled bucket
is far less than
- // the hash table build bucket, which may waste a lot of memory.
- // TODO, use the NDV expansion of the key column in the optimizer
statistics
- if (!_parent->build_unique()) {
-
RETURN_IF_CATCH_EXCEPTION(hash_table_ctx.hash_table->expanse_for_add_elem(
- std::min<int>(_rows,
config::hash_table_pre_expanse_max_rows)));
- }
-
vector<int>& inserted_rows = _parent->_inserted_rows[&_acquired_block];
bool has_runtime_filter = !_parent->runtime_filter_descs().empty();
if (has_runtime_filter) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]