(doris) branch master updated: [Chore](hash-table) remove some unused code of PartitionedHashMap (#43215)

panxiaolei Mon, 04 Nov 2024 19:35:24 -0800

This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/master by this push:
     new 17df7d5f435 [Chore](hash-table) remove some unused code of 
PartitionedHashMap (#43215)
17df7d5f435 is described below

commit 17df7d5f4355a810bcc79c437955d94e676514df
Author: Pxl <[email protected]>
AuthorDate: Tue Nov 5 11:34:40 2024 +0800

    [Chore](hash-table) remove some unused code of PartitionedHashMap (#43215)
    
    remove some unused code of PartitionedHashMap
---
 be/src/vec/common/hash_table/hash_map.h            |   3 -
 be/src/vec/common/hash_table/hash_map_context.h    |   1 -
 be/src/vec/common/hash_table/hash_table.h          |  45 +-
 .../vec/common/hash_table/partitioned_hash_map.h   |  60 ---
 .../vec/common/hash_table/partitioned_hash_table.h | 550 ---------------------
 be/src/vec/common/hash_table/ph_hash_map.h         |  48 +-
 6 files changed, 9 insertions(+), 698 deletions(-)

diff --git a/be/src/vec/common/hash_table/hash_map.h 
b/be/src/vec/common/hash_table/hash_map.h
index 018d134d875..448ddd5b7c5 100644
--- a/be/src/vec/common/hash_table/hash_map.h
+++ b/be/src/vec/common/hash_table/hash_map.h
@@ -198,9 +198,6 @@ template <typename Key, typename Mapped, typename Hash = 
DefaultHash<Key>,
           typename Grower = HashTableGrower<>, typename Allocator = 
HashTableAllocator>
 using HashMap = HashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash, 
Grower, Allocator>;
 
-template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>>
-using NormalHashMap = HashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash>;
-
 template <typename Key, typename Hash = DefaultHash<Key>>
 using JoinHashMap = JoinHashTable<Key, Hash>;
 
diff --git a/be/src/vec/common/hash_table/hash_map_context.h 
b/be/src/vec/common/hash_table/hash_map_context.h
index 5354155c529..16a793d7500 100644
--- a/be/src/vec/common/hash_table/hash_map_context.h
+++ b/be/src/vec/common/hash_table/hash_map_context.h
@@ -27,7 +27,6 @@
 #include "vec/common/arena.h"
 #include "vec/common/assert_cast.h"
 #include "vec/common/columns_hashing.h"
-#include "vec/common/hash_table/partitioned_hash_map.h"
 #include "vec/common/hash_table/string_hash_map.h"
 #include "vec/common/string_ref.h"
 #include "vec/common/typeid_cast.h"
diff --git a/be/src/vec/common/hash_table/hash_table.h 
b/be/src/vec/common/hash_table/hash_table.h
index 490cd501692..809868e2bee 100644
--- a/be/src/vec/common/hash_table/hash_table.h
+++ b/be/src/vec/common/hash_table/hash_table.h
@@ -419,28 +419,12 @@ protected:
     Cell* buf = nullptr; /// A piece of memory for all elements except the 
element with zero key.
     Grower grower;
     int64_t _resize_timer_ns;
-    // the bucket count threshold above which it's converted to partioned hash 
table
-    // > 0: enable convert dynamically
-    // 0: convert is disabled
-    int _partitioned_threshold = 0;
-    // if need resize and bucket count after resize will be >= 
_partitioned_threshold,
-    // this flag is set to true, and resize does not actually happen,
-    // PartitionedHashTable will convert this hash table to partitioned hash 
table
-    bool _need_partition = false;
 
     //factor that will trigger growing the hash table on insert.
     static constexpr float MAX_BUCKET_OCCUPANCY_FRACTION = 0.5f;
 
     mutable size_t collisions = 0;
 
-    void set_partitioned_threshold(int threshold) { _partitioned_threshold = 
threshold; }
-
-    bool check_if_need_partition(size_t bucket_count) {
-        return _partitioned_threshold > 0 && bucket_count >= 
_partitioned_threshold;
-    }
-
-    bool need_partition() { return _need_partition; }
-
     /// Find a cell with the same key or an empty cell, starting from the 
specified position and further along the collision resolution chain.
     size_t ALWAYS_INLINE find_cell(const Key& x, size_t hash_value, size_t 
place_value) const {
         while (!buf[place_value].is_zero(*this) &&
@@ -609,8 +593,6 @@ public:
         std::swap(buf, rhs.buf);
         std::swap(m_size, rhs.m_size);
         std::swap(grower, rhs.grower);
-        std::swap(_need_partition, rhs._need_partition);
-        std::swap(_partitioned_threshold, rhs._partitioned_threshold);
 
         Hash::operator=(std::move(rhs));        // 
NOLINT(bugprone-use-after-move)
         Allocator::operator=(std::move(rhs));   // 
NOLINT(bugprone-use-after-move)
@@ -740,12 +722,10 @@ protected:
                 throw;
             }
 
-            if (LIKELY(!_need_partition)) {
-                // The hash table was rehashed, so we have to re-find the key.
-                size_t new_place = find_cell(key, hash_value, 
grower.place(hash_value));
-                assert(!buf[new_place].is_zero(*this));
-                it = &buf[new_place];
-            }
+            // The hash table was rehashed, so we have to re-find the key.
+            size_t new_place = find_cell(key, hash_value, 
grower.place(hash_value));
+            assert(!buf[new_place].is_zero(*this));
+            it = &buf[new_place];
         }
     }
 
@@ -776,12 +756,10 @@ protected:
                 throw;
             }
 
-            if (LIKELY(!_need_partition)) {
-                // The hash table was rehashed, so we have to re-find the key.
-                size_t new_place = find_cell(key, hash_value, 
grower.place(hash_value));
-                assert(!buf[new_place].is_zero(*this));
-                it = &buf[new_place];
-            }
+            // The hash table was rehashed, so we have to re-find the key.
+            size_t new_place = find_cell(key, hash_value, 
grower.place(hash_value));
+            assert(!buf[new_place].is_zero(*this));
+            it = &buf[new_place];
         }
     }
 
@@ -1060,13 +1038,6 @@ private:
         } else
             new_grower.increase_size();
 
-        // new bucket count exceed partitioned hash table bucket count 
threshold,
-        // don't resize and set need partition flag
-        if (check_if_need_partition(new_grower.buf_size())) {
-            _need_partition = true;
-            return;
-        }
-
         /// Expand the space.
         buf = reinterpret_cast<Cell*>(Allocator::realloc(buf, 
get_buffer_size_in_bytes(),
                                                          new_grower.buf_size() 
* sizeof(Cell)));
diff --git a/be/src/vec/common/hash_table/partitioned_hash_map.h 
b/be/src/vec/common/hash_table/partitioned_hash_map.h
deleted file mode 100644
index a2db6fece35..00000000000
--- a/be/src/vec/common/hash_table/partitioned_hash_map.h
+++ /dev/null
@@ -1,60 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-// This file is copied from
-// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/HashTable/TwoLevelHashMap.h
-// and modified by Doris
-#pragma once
-
-#include "vec/common/hash_table/hash_map.h"
-#include "vec/common/hash_table/partitioned_hash_table.h"
-#include "vec/common/hash_table/ph_hash_map.h"
-namespace doris {
-template <typename ImplTable>
-class PartitionedHashMapTable : public PartitionedHashTable<ImplTable> {
-public:
-    using Impl = ImplTable;
-    using Base = PartitionedHashTable<ImplTable>;
-    using Key = typename ImplTable::key_type;
-    using LookupResult = typename Impl::LookupResult;
-
-    auto& ALWAYS_INLINE operator[](const Key& x) {
-        LookupResult it;
-        bool inserted = false;
-        this->emplace(x, it, inserted);
-
-        if (inserted) {
-            new (lookup_result_get_mapped(it)) Base::mapped_type();
-        }
-
-        return *lookup_result_get_mapped(it);
-    }
-
-    template <typename Func>
-    void for_each_mapped(Func&& func) {
-        for (auto& v : *this) {
-            func(v.get_second());
-        }
-    }
-};
-
-template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>>
-using PartitionedHashMap =
-        PartitionedHashMapTable<HashMap<Key, Mapped, Hash, 
PartitionedHashTableGrower<>>>;
-
-template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>>
-using PHNormalHashMap = PHHashMap<Key, Mapped, Hash, false>;
-} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/common/hash_table/partitioned_hash_table.h 
b/be/src/vec/common/hash_table/partitioned_hash_table.h
deleted file mode 100644
index c6a19b36d3a..00000000000
--- a/be/src/vec/common/hash_table/partitioned_hash_table.h
+++ /dev/null
@@ -1,550 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-// This file is copied from
-// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/HashTable/TwoLevelHashTable.h
-// and modified by Doris
-#pragma once
-
-#include "vec/common/hash_table/hash_table.h"
-
-/** Partitioned hash table.
-  * Represents 16 (or 1ULL << BITS_FOR_SUB_TABLE) small hash tables (sub table 
count of the first level).
-  * To determine which one to use, one of the bytes of the hash function is 
taken.
-  *
-  * Usually works a little slower than a simple hash table.
-  * However, it has advantages in some cases:
-  * - if you need to merge two hash tables together, then you can easily 
parallelize it by sub tables;
-  * - delay during resizes is amortized, since the small hash tables will be 
resized separately;
-  * - in theory, resizes are cache-local in a larger range of sizes.
-  */
-
-template <size_t initial_size_degree = 8>
-struct PartitionedHashTableGrower : public 
HashTableGrowerWithPrecalculation<initial_size_degree> {
-    /// Increase the size of the hash table.
-    void increase_size() { this->increase_size_degree(this->size_degree() >= 
15 ? 1 : 2); }
-};
-
-template <typename Impl, size_t BITS_FOR_SUB_TABLE = 4>
-class PartitionedHashTable : private boost::noncopyable, Impl::Hash {
-public:
-    using key_type = typename Impl::key_type;
-    using mapped_type = typename Impl::mapped_type;
-    using value_type = typename Impl::value_type;
-    using cell_type = typename Impl::cell_type;
-    using Key = typename Impl::key_type;
-
-    using LookupResult = typename Impl::LookupResult;
-    using ConstLookupResult = typename Impl::ConstLookupResult;
-
-protected:
-    using Self = PartitionedHashTable;
-
-private:
-    static constexpr size_t NUM_LEVEL1_SUB_TABLES = 1ULL << BITS_FOR_SUB_TABLE;
-    static constexpr size_t MAX_SUB_TABLE = NUM_LEVEL1_SUB_TABLES - 1;
-
-    //factor that will trigger growing the hash table on insert.
-    static constexpr float MAX_SUB_TABLE_OCCUPANCY_FRACTION = 0.5f;
-
-    Impl level0_sub_table;
-    Impl level1_sub_tables[NUM_LEVEL1_SUB_TABLES];
-
-    bool _is_partitioned = false;
-
-    int64_t _convert_timer_ns = 0;
-
-public:
-    PartitionedHashTable() = default;
-
-    PartitionedHashTable(PartitionedHashTable&& rhs) { *this = std::move(rhs); 
}
-
-    PartitionedHashTable& operator=(PartitionedHashTable&& rhs) {
-        std::swap(_is_partitioned, rhs._is_partitioned);
-
-        level0_sub_table = std::move(rhs.level0_sub_table);
-        for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i) {
-            level1_sub_tables[i] = std::move(rhs.level1_sub_tables[i]);
-        }
-
-        return *this;
-    }
-
-    size_t hash(const Key& x) const { return level0_sub_table.hash(x); }
-
-    float get_factor() const { return MAX_SUB_TABLE_OCCUPANCY_FRACTION; }
-
-    int64_t get_convert_timer_value() const { return _convert_timer_ns; }
-
-    bool should_be_shrink(int64_t valid_row) const {
-        if (_is_partitioned) {
-            return false;
-        } else {
-            return level0_sub_table.should_be_shrink(valid_row);
-        }
-    }
-
-    size_t size() {
-        size_t count = 0;
-        if (_is_partitioned) {
-            for (auto i = 0u; i < this->NUM_LEVEL1_SUB_TABLES; ++i) {
-                count += this->level1_sub_tables[i].size();
-            }
-        } else {
-            count = level0_sub_table.size();
-        }
-        return count;
-    }
-
-    void init_buf_size(size_t reserve_for_num_elements) {
-        if (_is_partitioned) {
-            for (auto& impl : level1_sub_tables) {
-                impl.init_buf_size(reserve_for_num_elements / 
NUM_LEVEL1_SUB_TABLES);
-            }
-        } else {
-            if 
(level0_sub_table.check_if_need_partition(reserve_for_num_elements)) {
-                level0_sub_table.clear_and_shrink();
-                _is_partitioned = true;
-
-                for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i) {
-                    
level1_sub_tables[i].init_buf_size(reserve_for_num_elements /
-                                                       NUM_LEVEL1_SUB_TABLES);
-                }
-            } else {
-                level0_sub_table.init_buf_size(reserve_for_num_elements);
-            }
-        }
-    }
-
-    void delete_zero_key(Key key) {
-        if (_is_partitioned) {
-            const auto key_hash = hash(key);
-            size_t sub_table_idx = get_sub_table_from_hash(key_hash);
-            level1_sub_tables[sub_table_idx].delete_zero_key(key);
-        } else {
-            level0_sub_table.delete_zero_key(key);
-        }
-    }
-
-    int64_t get_collisions() const {
-        size_t collisions = level0_sub_table.get_collisions();
-        for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; i++) {
-            collisions += level1_sub_tables[i].get_collisions();
-        }
-        return collisions;
-    }
-
-    size_t get_buffer_size_in_bytes() const {
-        if (_is_partitioned) {
-            size_t buff_size = 0;
-            for (const auto& impl : level1_sub_tables) buff_size += 
impl.get_buffer_size_in_bytes();
-            return buff_size;
-        } else {
-            return level0_sub_table.get_buffer_size_in_bytes();
-        }
-    }
-
-    size_t get_buffer_size_in_cells() const {
-        if (_is_partitioned) {
-            size_t buff_size = 0;
-            for (const auto& impl : level1_sub_tables) buff_size += 
impl.get_buffer_size_in_cells();
-            return buff_size;
-        } else {
-            return level0_sub_table.get_buffer_size_in_cells();
-        }
-    }
-
-    std::vector<size_t> get_buffer_sizes_in_cells() const {
-        std::vector<size_t> sizes;
-        if (_is_partitioned) {
-            for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i) {
-                
sizes.push_back(level1_sub_tables[i].get_buffer_size_in_cells());
-            }
-        } else {
-            sizes.push_back(level0_sub_table.get_buffer_size_in_cells());
-        }
-        return sizes;
-    }
-
-    void reset_resize_timer() {
-        if (_is_partitioned) {
-            for (auto& impl : level1_sub_tables) {
-                impl.reset_resize_timer();
-            }
-        } else {
-            level0_sub_table.reset_resize_timer();
-        }
-    }
-    int64_t get_resize_timer_value() const {
-        if (_is_partitioned) {
-            int64_t resize_timer_ns = 0;
-            for (const auto& impl : level1_sub_tables) {
-                resize_timer_ns += impl.get_resize_timer_value();
-            }
-            return resize_timer_ns;
-        } else {
-            return level0_sub_table.get_resize_timer_value();
-        }
-    }
-
-    bool has_null_key_data() const { return false; }
-    template <typename MappedType>
-    char* get_null_key_data() {
-        return nullptr;
-    }
-
-protected:
-    typename Impl::iterator begin_of_next_non_empty_sub_table_idx(size_t& 
sub_table_idx) {
-        while (sub_table_idx != NUM_LEVEL1_SUB_TABLES && 
level1_sub_tables[sub_table_idx].empty())
-            ++sub_table_idx;
-
-        if (sub_table_idx != NUM_LEVEL1_SUB_TABLES) return 
level1_sub_tables[sub_table_idx].begin();
-
-        --sub_table_idx;
-        return level1_sub_tables[MAX_SUB_TABLE].end();
-    }
-
-    typename Impl::const_iterator begin_of_next_non_empty_sub_table_idx(
-            size_t& sub_table_idx) const {
-        while (sub_table_idx != NUM_LEVEL1_SUB_TABLES && 
level1_sub_tables[sub_table_idx].empty())
-            ++sub_table_idx;
-
-        if (sub_table_idx != NUM_LEVEL1_SUB_TABLES) return 
level1_sub_tables[sub_table_idx].begin();
-
-        --sub_table_idx;
-        return level1_sub_tables[MAX_SUB_TABLE].end();
-    }
-
-public:
-    void set_partitioned_threshold(int threshold) {
-        level0_sub_table.set_partitioned_threshold(threshold);
-    }
-
-    class iterator /// NOLINT
-    {
-        Self* container {};
-        size_t sub_table_idx {};
-        typename Impl::iterator current_it {};
-
-        friend class PartitionedHashTable;
-
-        iterator(Self* container_, size_t sub_table_idx_, typename 
Impl::iterator current_it_)
-                : container(container_), sub_table_idx(sub_table_idx_), 
current_it(current_it_) {}
-
-    public:
-        iterator() = default;
-
-        bool operator==(const iterator& rhs) const {
-            return sub_table_idx == rhs.sub_table_idx && current_it == 
rhs.current_it;
-        }
-        bool operator!=(const iterator& rhs) const { return !(*this == rhs); }
-
-        iterator& operator++() {
-            ++current_it;
-            if (container->_is_partitioned) {
-                if (current_it == 
container->level1_sub_tables[sub_table_idx].end()) {
-                    ++sub_table_idx;
-                    current_it = 
container->begin_of_next_non_empty_sub_table_idx(sub_table_idx);
-                }
-            }
-
-            return *this;
-        }
-
-        auto& operator*() { return *current_it; }
-        auto* operator->() { return current_it.get_ptr(); }
-
-        auto* get_ptr() { return current_it.get_ptr(); }
-        size_t get_hash() { return current_it.get_hash(); }
-    };
-
-    class const_iterator /// NOLINT
-    {
-        Self* container {};
-        size_t sub_table_idx {};
-        typename Impl::const_iterator current_it {};
-
-        friend class PartitionedHashTable;
-
-        const_iterator(Self* container_, size_t sub_table_idx_,
-                       typename Impl::const_iterator current_it_)
-                : container(container_), sub_table_idx(sub_table_idx_), 
current_it(current_it_) {}
-
-    public:
-        const_iterator() = default;
-        const_iterator(const iterator& rhs)
-                : container(rhs.container),
-                  sub_table_idx(rhs.sub_table_idx),
-                  current_it(rhs.current_it) {} /// NOLINT
-
-        bool operator==(const const_iterator& rhs) const {
-            return sub_table_idx == rhs.sub_table_idx && current_it == 
rhs.current_it;
-        }
-        bool operator!=(const const_iterator& rhs) const { return !(*this == 
rhs); }
-
-        const_iterator& operator++() {
-            ++current_it;
-            if (container->_is_partitioned) {
-                if (current_it == 
container->level1_sub_tables[sub_table_idx].end()) {
-                    ++sub_table_idx;
-                    current_it = 
container->begin_of_next_non_empty_sub_table_idx(sub_table_idx);
-                }
-            }
-
-            return *this;
-        }
-
-        const auto& operator*() const { return *current_it; }
-        const auto* operator->() const { return current_it->get_ptr(); }
-
-        const auto* get_ptr() const { return current_it.get_ptr(); }
-        size_t get_hash() const { return current_it.get_hash(); }
-    };
-
-    const_iterator begin() const {
-        if (_is_partitioned) {
-            size_t sub_table_idx = 0;
-            typename Impl::const_iterator impl_it =
-                    begin_of_next_non_empty_sub_table_idx(sub_table_idx);
-            return {this, sub_table_idx, impl_it};
-        } else {
-            return {this, NUM_LEVEL1_SUB_TABLES, level0_sub_table.begin()};
-        }
-    }
-
-    iterator begin() {
-        if (_is_partitioned) {
-            size_t sub_table_idx = 0;
-            typename Impl::iterator impl_it = 
begin_of_next_non_empty_sub_table_idx(sub_table_idx);
-            return {this, sub_table_idx, impl_it};
-        } else {
-            return {this, NUM_LEVEL1_SUB_TABLES, level0_sub_table.begin()};
-        }
-    }
-
-    const_iterator end() const {
-        if (_is_partitioned) {
-            return {this, MAX_SUB_TABLE, 
level1_sub_tables[MAX_SUB_TABLE].end()};
-        } else {
-            return {this, NUM_LEVEL1_SUB_TABLES, level0_sub_table.end()};
-        }
-    }
-    iterator end() {
-        if (_is_partitioned) {
-            return {this, MAX_SUB_TABLE, 
level1_sub_tables[MAX_SUB_TABLE].end()};
-        } else {
-            return {this, NUM_LEVEL1_SUB_TABLES, level0_sub_table.end()};
-        }
-    }
-
-    /// Insert a value. In the case of any more complex values, it is better 
to use the `emplace` function.
-    std::pair<LookupResult, bool> ALWAYS_INLINE insert(const value_type& x) {
-        size_t hash_value = hash(cell_type::get_key(x));
-
-        std::pair<LookupResult, bool> res;
-        emplace(cell_type::get_key(x), res.first, res.second, hash_value);
-
-        if (res.second) insert_set_mapped(lookup_result_get_mapped(res.first), 
x);
-
-        return res;
-    }
-
-    void expanse_for_add_elem(size_t num_elem) {
-        if (_is_partitioned) {
-            size_t num_elem_per_sub_table =
-                    (num_elem + NUM_LEVEL1_SUB_TABLES - 1) / 
NUM_LEVEL1_SUB_TABLES;
-            for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i) {
-                
level1_sub_tables[i].expanse_for_add_elem(num_elem_per_sub_table);
-            }
-        } else {
-            level0_sub_table.expanse_for_add_elem(num_elem);
-            if (UNLIKELY(level0_sub_table.need_partition())) {
-                convert_to_partitioned();
-            }
-        }
-    }
-
-    template <bool READ>
-    void ALWAYS_INLINE prefetch(const Key& key, size_t hash_value) {
-        if (_is_partitioned) {
-            const auto sub_table_idx = get_sub_table_from_hash(hash_value);
-            level1_sub_tables[sub_table_idx].template 
prefetch<READ>(hash_value);
-        } else {
-            level0_sub_table.template prefetch<READ>(hash_value);
-        }
-    }
-
-    /** Insert the key,
-      * return an iterator to a position that can be used for `placement new` 
of value,
-      * as well as the flag - whether a new key was inserted.
-      *
-      * You have to make `placement new` values if you inserted a new key,
-      * since when destroying a hash table, the destructor will be invoked for 
it!
-      *
-      * Example usage:
-      *
-      * Map::iterator it;
-      * bool inserted;
-      * map.emplace(key, it, inserted);
-      * if (inserted)
-      *     new(&it->second) Mapped(value);
-      */
-    template <typename KeyHolder>
-    void ALWAYS_INLINE emplace(KeyHolder&& key_holder, LookupResult& it, bool& 
inserted) {
-        size_t hash_value = hash(key_holder);
-        emplace(key_holder, it, inserted, hash_value);
-    }
-
-    /// Same, but with a precalculated values of hash function.
-    template <typename KeyHolder>
-    void ALWAYS_INLINE emplace(KeyHolder&& key_holder, LookupResult& it, bool& 
inserted,
-                               size_t hash_value) {
-        if (_is_partitioned) {
-            size_t sub_table_idx = get_sub_table_from_hash(hash_value);
-            level1_sub_tables[sub_table_idx].emplace(key_holder, it, inserted, 
hash_value);
-        } else {
-            level0_sub_table.emplace(key_holder, it, inserted, hash_value);
-            if (UNLIKELY(level0_sub_table.need_partition())) {
-                convert_to_partitioned();
-
-                // The hash table was converted to partitioned, so we have to 
re-find the key.
-                size_t sub_table_id = get_sub_table_from_hash(hash_value);
-                it = level1_sub_tables[sub_table_id].find(key_holder, 
hash_value);
-            }
-        }
-    }
-
-    template <typename KeyHolder>
-    void ALWAYS_INLINE emplace(KeyHolder&& key_holder, LookupResult& it, 
size_t hash_value,
-                               bool& inserted) {
-        emplace(key_holder, it, inserted, hash_value);
-    }
-
-    template <typename KeyHolder, typename Func>
-    void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it, 
Func&& f) {
-        size_t hash_value = hash(key_holder);
-        lazy_emplace(key_holder, it, hash_value, std::forward<Func>(f));
-    }
-
-    template <typename KeyHolder, typename Func>
-    void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it, 
size_t hash_value,
-                                    Func&& f) {
-        if (_is_partitioned) {
-            size_t sub_table_idx = get_sub_table_from_hash(hash_value);
-            level1_sub_tables[sub_table_idx].lazy_emplace(key_holder, it, 
hash_value,
-                                                          
std::forward<Func>(f));
-        } else {
-            level0_sub_table.lazy_emplace(key_holder, it, hash_value, 
std::forward<Func>(f));
-            if (UNLIKELY(level0_sub_table.need_partition())) {
-                convert_to_partitioned();
-
-                // The hash table was converted to partitioned, so we have to 
re-find the key.
-                size_t sub_table_id = get_sub_table_from_hash(hash_value);
-                it = level1_sub_tables[sub_table_id].find(key_holder, 
hash_value);
-            }
-        }
-    }
-
-    LookupResult ALWAYS_INLINE find(Key x, size_t hash_value) {
-        if (_is_partitioned) {
-            size_t sub_table_idx = get_sub_table_from_hash(hash_value);
-            return level1_sub_tables[sub_table_idx].find(x, hash_value);
-        } else {
-            return level0_sub_table.find(x, hash_value);
-        }
-    }
-
-    ConstLookupResult ALWAYS_INLINE find(Key x, size_t hash_value) const {
-        return const_cast<std::decay_t<decltype(*this)>*>(this)->find(x, 
hash_value);
-    }
-
-    LookupResult ALWAYS_INLINE find(Key x) { return find(x, hash(x)); }
-
-    ConstLookupResult ALWAYS_INLINE find(Key x) const { return find(x, 
hash(x)); }
-
-    size_t size() const {
-        if (_is_partitioned) {
-            size_t res = 0;
-            for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i) {
-                res += level1_sub_tables[i].size();
-            }
-            return res;
-        } else {
-            return level0_sub_table.size();
-        }
-    }
-
-    std::vector<size_t> sizes() const {
-        std::vector<size_t> sizes;
-        if (_is_partitioned) {
-            for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i) {
-                sizes.push_back(level1_sub_tables[i].size());
-            }
-        } else {
-            sizes.push_back(level0_sub_table.size());
-        }
-        return sizes;
-    }
-
-    bool empty() const {
-        if (_is_partitioned) {
-            for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i)
-                if (!level1_sub_tables[i].empty()) return false;
-            return true;
-        } else {
-            return level0_sub_table.empty();
-        }
-    }
-
-    bool add_elem_size_overflow(size_t row) const {
-        return !_is_partitioned && 
level0_sub_table.add_elem_size_overflow(row);
-    }
-
-private:
-    void convert_to_partitioned() {
-        SCOPED_RAW_TIMER(&_convert_timer_ns);
-
-        DCHECK(!_is_partitioned);
-        _is_partitioned = true;
-
-        auto bucket_count = level0_sub_table.get_buffer_size_in_cells();
-        for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i) {
-            level1_sub_tables[i] = std::move(Impl(bucket_count / 
NUM_LEVEL1_SUB_TABLES));
-        }
-
-        auto it = level0_sub_table.begin();
-
-        /// It is assumed that the zero key (stored separately) is first in 
iteration order.
-        if (it != level0_sub_table.end() && 
it.get_ptr()->is_zero(level0_sub_table)) {
-            insert(it->get_value());
-            ++it;
-        }
-
-        for (; it != level0_sub_table.end(); ++it) {
-            const auto* cell = it.get_ptr();
-            size_t hash_value = cell->get_hash(level0_sub_table);
-            size_t sub_table_idx = get_sub_table_from_hash(hash_value);
-            level1_sub_tables[sub_table_idx].insert_unique_non_zero(cell, 
hash_value);
-        }
-
-        level0_sub_table.clear_and_shrink();
-    }
-
-    /// NOTE Bad for hash tables with more than 2^32 cells.
-    static size_t get_sub_table_from_hash(size_t hash_value) {
-        return (hash_value >> (32 - BITS_FOR_SUB_TABLE)) & MAX_SUB_TABLE;
-    }
-};
diff --git a/be/src/vec/common/hash_table/ph_hash_map.h 
b/be/src/vec/common/hash_table/ph_hash_map.h
index 50cf218dc87..de320425223 100644
--- a/be/src/vec/common/hash_table/ph_hash_map.h
+++ b/be/src/vec/common/hash_table/ph_hash_map.h
@@ -30,8 +30,7 @@ ALWAYS_INLINE inline auto 
lookup_result_get_mapped(std::pair<const Key, Mapped>*
     return &(it->second);
 }
 
-template <typename Key, typename Mapped, typename HashMethod = 
DefaultHash<Key>,
-          bool PartitionedHashTable = false>
+template <typename Key, typename Mapped, typename HashMethod = 
DefaultHash<Key>>
 class PHHashMap : private boost::noncopyable {
 public:
     using Self = PHHashMap;
@@ -58,9 +57,6 @@ public:
     PHHashMap& operator=(PHHashMap&& rhs) {
         _hash_map.clear();
         _hash_map = std::move(rhs._hash_map);
-        std::swap(_need_partition, rhs._need_partition);
-        std::swap(_partitioned_threshold, rhs._partitioned_threshold);
-
         return *this;
     }
 
@@ -130,19 +126,11 @@ public:
             inserted = true;
             ctor(key_holder, nullptr);
         });
-
-        if constexpr (PartitionedHashTable) {
-            _check_if_need_partition();
-        }
     }
 
     template <typename KeyHolder, typename Func>
     void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it, 
Func&& f) {
         it = &*_hash_map.lazy_emplace(key_holder, [&](const auto& ctor) { 
f(ctor, key_holder); });
-
-        if constexpr (PartitionedHashTable) {
-            _check_if_need_partition();
-        }
     }
 
     template <typename KeyHolder>
@@ -157,10 +145,6 @@ public:
                 ctor(key, mapped_type());
             }
         });
-
-        if constexpr (PartitionedHashTable) {
-            _check_if_need_partition();
-        }
     }
 
     template <typename KeyHolder, typename Func>
@@ -168,10 +152,6 @@ public:
                                     Func&& f) {
         it = &*_hash_map.lazy_emplace_with_hash(key, hash_value,
                                                 [&](const auto& ctor) { 
f(ctor, key, key); });
-
-        if constexpr (PartitionedHashTable) {
-            _check_if_need_partition();
-        }
     }
 
     void ALWAYS_INLINE insert(const Key& key, size_t hash_value, const Mapped& 
value) {
@@ -225,18 +205,6 @@ public:
     }
     bool has_null_key_data() const { return false; }
 
-    bool need_partition() { return _need_partition; }
-
-    void set_partitioned_threshold(int threshold) { _partitioned_threshold = 
threshold; }
-
-    bool check_if_need_partition(size_t bucket_count) {
-        if constexpr (PartitionedHashTable) {
-            return _partitioned_threshold > 0 && bucket_count >= 
_partitioned_threshold;
-        } else {
-            return false;
-        }
-    }
-
     bool empty() const { return _hash_map.empty(); }
 
     void clear_and_shrink() { _hash_map.clear(); }
@@ -244,19 +212,5 @@ public:
     void expanse_for_add_elem(size_t num_elem) { _hash_map.reserve(num_elem); }
 
 private:
-    void _check_if_need_partition() {
-        if (UNLIKELY(check_if_need_partition(_hash_map.size() + 1))) {
-            _need_partition = add_elem_size_overflow(1);
-        }
-    }
-
     HashMapImpl _hash_map;
-    // the bucket count threshold above which it's converted to partioned hash 
table
-    // > 0: enable convert dynamically
-    // 0: convert is disabled
-    int _partitioned_threshold = 0;
-    // if need resize and bucket count after resize will be >= 
_partitioned_threshold,
-    // this flag is set to true, and resize does not actually happen,
-    // PartitionedHashTable will convert this hash table to partitioned hash 
table
-    bool _need_partition;
 };


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) branch master updated: [Chore](hash-table) remove some unused code of PartitionedHashMap (#43215)

Reply via email to