[doris] branch dev_join updated: update

panxiaolei Tue, 17 Oct 2023 05:25:30 -0700

This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch dev_join
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/dev_join by this push:
     new 8b6c0951c6d update
8b6c0951c6d is described below

commit 8b6c0951c6d4526118acb248f0bdb83c40ce0ffa
Author: BiteTheDDDDt <[email protected]>
AuthorDate: Tue Oct 17 20:24:02 2023 +0800

    update
---
 be/src/vec/common/hash_table/hash_map.h | 10 ++++------
 be/src/vec/exec/join/vhash_join_node.h  | 11 -----------
 2 files changed, 4 insertions(+), 17 deletions(-)

diff --git a/be/src/vec/common/hash_table/hash_map.h 
b/be/src/vec/common/hash_table/hash_map.h
index ac0db0795bc..85110deba62 100644
--- a/be/src/vec/common/hash_table/hash_map.h
+++ b/be/src/vec/common/hash_table/hash_map.h
@@ -210,19 +210,17 @@ public:
 
     using HashMapTable<Key, Cell, Hash, Grower, Allocator>::HashMapTable;
 
-    void expanse_for_add_elem(size_t num_elem) {
-        bucket_size = calc_bucket_size(num_elem + 1);
-        first.resize(bucket_size, 0);
-    }
-
     static uint32_t calc_bucket_size(size_t num_elem) {
         size_t expect_bucket_size = static_cast<size_t>(num_elem) + (num_elem 
- 1) / 7;
         return phmap::priv::NormalizeCapacity(expect_bucket_size) + 1;
     }
 
     void build(const Key* __restrict keys, const size_t* __restrict 
hash_values, int num_elem) {
-        build_keys = keys;
+        bucket_size = calc_bucket_size(num_elem + 1);
+        first.resize(bucket_size, 0);
         next.resize(num_elem);
+
+        build_keys = keys;
         for (size_t i = 1; i < num_elem; i++) {
             uint32_t bucket_num = hash_values[i] & (bucket_size - 1);
             next[i] = first[bucket_num];
diff --git a/be/src/vec/exec/join/vhash_join_node.h 
b/be/src/vec/exec/join/vhash_join_node.h
index fffa9e5a2b8..ef5a61eae17 100644
--- a/be/src/vec/exec/join/vhash_join_node.h
+++ b/be/src/vec/exec/join/vhash_join_node.h
@@ -145,17 +145,6 @@ struct ProcessHashTableBuild {
         SCOPED_TIMER(_parent->_build_table_insert_timer);
         hash_table_ctx.hash_table->reset_resize_timer();
 
-        // only not build_unique, we need expanse hash table before insert data
-        // 1. There are fewer duplicate keys, reducing the number of resize 
hash tables
-        // can improve performance to a certain extent, about 2%-5%
-        // 2. There are many duplicate keys, and the hash table filled bucket 
is far less than
-        // the hash table build bucket, which may waste a lot of memory.
-        // TODO, use the NDV expansion of the key column in the optimizer 
statistics
-        if (!_parent->build_unique()) {
-            
RETURN_IF_CATCH_EXCEPTION(hash_table_ctx.hash_table->expanse_for_add_elem(
-                    std::min<int>(_rows, 
config::hash_table_pre_expanse_max_rows)));
-        }
-
         vector<int>& inserted_rows = _parent->_inserted_rows[&_acquired_block];
         bool has_runtime_filter = !_parent->runtime_filter_descs().empty();
         if (has_runtime_filter) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[doris] branch dev_join updated: update

Reply via email to