This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 17df7d5f435 [Chore](hash-table) remove some unused code of
PartitionedHashMap (#43215)
17df7d5f435 is described below
commit 17df7d5f4355a810bcc79c437955d94e676514df
Author: Pxl <[email protected]>
AuthorDate: Tue Nov 5 11:34:40 2024 +0800
[Chore](hash-table) remove some unused code of PartitionedHashMap (#43215)
remove some unused code of PartitionedHashMap
---
be/src/vec/common/hash_table/hash_map.h | 3 -
be/src/vec/common/hash_table/hash_map_context.h | 1 -
be/src/vec/common/hash_table/hash_table.h | 45 +-
.../vec/common/hash_table/partitioned_hash_map.h | 60 ---
.../vec/common/hash_table/partitioned_hash_table.h | 550 ---------------------
be/src/vec/common/hash_table/ph_hash_map.h | 48 +-
6 files changed, 9 insertions(+), 698 deletions(-)
diff --git a/be/src/vec/common/hash_table/hash_map.h
b/be/src/vec/common/hash_table/hash_map.h
index 018d134d875..448ddd5b7c5 100644
--- a/be/src/vec/common/hash_table/hash_map.h
+++ b/be/src/vec/common/hash_table/hash_map.h
@@ -198,9 +198,6 @@ template <typename Key, typename Mapped, typename Hash =
DefaultHash<Key>,
typename Grower = HashTableGrower<>, typename Allocator =
HashTableAllocator>
using HashMap = HashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash,
Grower, Allocator>;
-template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>>
-using NormalHashMap = HashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash>;
-
template <typename Key, typename Hash = DefaultHash<Key>>
using JoinHashMap = JoinHashTable<Key, Hash>;
diff --git a/be/src/vec/common/hash_table/hash_map_context.h
b/be/src/vec/common/hash_table/hash_map_context.h
index 5354155c529..16a793d7500 100644
--- a/be/src/vec/common/hash_table/hash_map_context.h
+++ b/be/src/vec/common/hash_table/hash_map_context.h
@@ -27,7 +27,6 @@
#include "vec/common/arena.h"
#include "vec/common/assert_cast.h"
#include "vec/common/columns_hashing.h"
-#include "vec/common/hash_table/partitioned_hash_map.h"
#include "vec/common/hash_table/string_hash_map.h"
#include "vec/common/string_ref.h"
#include "vec/common/typeid_cast.h"
diff --git a/be/src/vec/common/hash_table/hash_table.h
b/be/src/vec/common/hash_table/hash_table.h
index 490cd501692..809868e2bee 100644
--- a/be/src/vec/common/hash_table/hash_table.h
+++ b/be/src/vec/common/hash_table/hash_table.h
@@ -419,28 +419,12 @@ protected:
Cell* buf = nullptr; /// A piece of memory for all elements except the
element with zero key.
Grower grower;
int64_t _resize_timer_ns;
- // the bucket count threshold above which it's converted to partioned hash
table
- // > 0: enable convert dynamically
- // 0: convert is disabled
- int _partitioned_threshold = 0;
- // if need resize and bucket count after resize will be >=
_partitioned_threshold,
- // this flag is set to true, and resize does not actually happen,
- // PartitionedHashTable will convert this hash table to partitioned hash
table
- bool _need_partition = false;
//factor that will trigger growing the hash table on insert.
static constexpr float MAX_BUCKET_OCCUPANCY_FRACTION = 0.5f;
mutable size_t collisions = 0;
- void set_partitioned_threshold(int threshold) { _partitioned_threshold =
threshold; }
-
- bool check_if_need_partition(size_t bucket_count) {
- return _partitioned_threshold > 0 && bucket_count >=
_partitioned_threshold;
- }
-
- bool need_partition() { return _need_partition; }
-
/// Find a cell with the same key or an empty cell, starting from the
specified position and further along the collision resolution chain.
size_t ALWAYS_INLINE find_cell(const Key& x, size_t hash_value, size_t
place_value) const {
while (!buf[place_value].is_zero(*this) &&
@@ -609,8 +593,6 @@ public:
std::swap(buf, rhs.buf);
std::swap(m_size, rhs.m_size);
std::swap(grower, rhs.grower);
- std::swap(_need_partition, rhs._need_partition);
- std::swap(_partitioned_threshold, rhs._partitioned_threshold);
Hash::operator=(std::move(rhs)); //
NOLINT(bugprone-use-after-move)
Allocator::operator=(std::move(rhs)); //
NOLINT(bugprone-use-after-move)
@@ -740,12 +722,10 @@ protected:
throw;
}
- if (LIKELY(!_need_partition)) {
- // The hash table was rehashed, so we have to re-find the key.
- size_t new_place = find_cell(key, hash_value,
grower.place(hash_value));
- assert(!buf[new_place].is_zero(*this));
- it = &buf[new_place];
- }
+ // The hash table was rehashed, so we have to re-find the key.
+ size_t new_place = find_cell(key, hash_value,
grower.place(hash_value));
+ assert(!buf[new_place].is_zero(*this));
+ it = &buf[new_place];
}
}
@@ -776,12 +756,10 @@ protected:
throw;
}
- if (LIKELY(!_need_partition)) {
- // The hash table was rehashed, so we have to re-find the key.
- size_t new_place = find_cell(key, hash_value,
grower.place(hash_value));
- assert(!buf[new_place].is_zero(*this));
- it = &buf[new_place];
- }
+ // The hash table was rehashed, so we have to re-find the key.
+ size_t new_place = find_cell(key, hash_value,
grower.place(hash_value));
+ assert(!buf[new_place].is_zero(*this));
+ it = &buf[new_place];
}
}
@@ -1060,13 +1038,6 @@ private:
} else
new_grower.increase_size();
- // new bucket count exceed partitioned hash table bucket count
threshold,
- // don't resize and set need partition flag
- if (check_if_need_partition(new_grower.buf_size())) {
- _need_partition = true;
- return;
- }
-
/// Expand the space.
buf = reinterpret_cast<Cell*>(Allocator::realloc(buf,
get_buffer_size_in_bytes(),
new_grower.buf_size()
* sizeof(Cell)));
diff --git a/be/src/vec/common/hash_table/partitioned_hash_map.h
b/be/src/vec/common/hash_table/partitioned_hash_map.h
deleted file mode 100644
index a2db6fece35..00000000000
--- a/be/src/vec/common/hash_table/partitioned_hash_map.h
+++ /dev/null
@@ -1,60 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-// This file is copied from
-//
https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/HashTable/TwoLevelHashMap.h
-// and modified by Doris
-#pragma once
-
-#include "vec/common/hash_table/hash_map.h"
-#include "vec/common/hash_table/partitioned_hash_table.h"
-#include "vec/common/hash_table/ph_hash_map.h"
-namespace doris {
-template <typename ImplTable>
-class PartitionedHashMapTable : public PartitionedHashTable<ImplTable> {
-public:
- using Impl = ImplTable;
- using Base = PartitionedHashTable<ImplTable>;
- using Key = typename ImplTable::key_type;
- using LookupResult = typename Impl::LookupResult;
-
- auto& ALWAYS_INLINE operator[](const Key& x) {
- LookupResult it;
- bool inserted = false;
- this->emplace(x, it, inserted);
-
- if (inserted) {
- new (lookup_result_get_mapped(it)) Base::mapped_type();
- }
-
- return *lookup_result_get_mapped(it);
- }
-
- template <typename Func>
- void for_each_mapped(Func&& func) {
- for (auto& v : *this) {
- func(v.get_second());
- }
- }
-};
-
-template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>>
-using PartitionedHashMap =
- PartitionedHashMapTable<HashMap<Key, Mapped, Hash,
PartitionedHashTableGrower<>>>;
-
-template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>>
-using PHNormalHashMap = PHHashMap<Key, Mapped, Hash, false>;
-} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/common/hash_table/partitioned_hash_table.h
b/be/src/vec/common/hash_table/partitioned_hash_table.h
deleted file mode 100644
index c6a19b36d3a..00000000000
--- a/be/src/vec/common/hash_table/partitioned_hash_table.h
+++ /dev/null
@@ -1,550 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-// This file is copied from
-//
https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/HashTable/TwoLevelHashTable.h
-// and modified by Doris
-#pragma once
-
-#include "vec/common/hash_table/hash_table.h"
-
-/** Partitioned hash table.
- * Represents 16 (or 1ULL << BITS_FOR_SUB_TABLE) small hash tables (sub table
count of the first level).
- * To determine which one to use, one of the bytes of the hash function is
taken.
- *
- * Usually works a little slower than a simple hash table.
- * However, it has advantages in some cases:
- * - if you need to merge two hash tables together, then you can easily
parallelize it by sub tables;
- * - delay during resizes is amortized, since the small hash tables will be
resized separately;
- * - in theory, resizes are cache-local in a larger range of sizes.
- */
-
-template <size_t initial_size_degree = 8>
-struct PartitionedHashTableGrower : public
HashTableGrowerWithPrecalculation<initial_size_degree> {
- /// Increase the size of the hash table.
- void increase_size() { this->increase_size_degree(this->size_degree() >=
15 ? 1 : 2); }
-};
-
-template <typename Impl, size_t BITS_FOR_SUB_TABLE = 4>
-class PartitionedHashTable : private boost::noncopyable, Impl::Hash {
-public:
- using key_type = typename Impl::key_type;
- using mapped_type = typename Impl::mapped_type;
- using value_type = typename Impl::value_type;
- using cell_type = typename Impl::cell_type;
- using Key = typename Impl::key_type;
-
- using LookupResult = typename Impl::LookupResult;
- using ConstLookupResult = typename Impl::ConstLookupResult;
-
-protected:
- using Self = PartitionedHashTable;
-
-private:
- static constexpr size_t NUM_LEVEL1_SUB_TABLES = 1ULL << BITS_FOR_SUB_TABLE;
- static constexpr size_t MAX_SUB_TABLE = NUM_LEVEL1_SUB_TABLES - 1;
-
- //factor that will trigger growing the hash table on insert.
- static constexpr float MAX_SUB_TABLE_OCCUPANCY_FRACTION = 0.5f;
-
- Impl level0_sub_table;
- Impl level1_sub_tables[NUM_LEVEL1_SUB_TABLES];
-
- bool _is_partitioned = false;
-
- int64_t _convert_timer_ns = 0;
-
-public:
- PartitionedHashTable() = default;
-
- PartitionedHashTable(PartitionedHashTable&& rhs) { *this = std::move(rhs);
}
-
- PartitionedHashTable& operator=(PartitionedHashTable&& rhs) {
- std::swap(_is_partitioned, rhs._is_partitioned);
-
- level0_sub_table = std::move(rhs.level0_sub_table);
- for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i) {
- level1_sub_tables[i] = std::move(rhs.level1_sub_tables[i]);
- }
-
- return *this;
- }
-
- size_t hash(const Key& x) const { return level0_sub_table.hash(x); }
-
- float get_factor() const { return MAX_SUB_TABLE_OCCUPANCY_FRACTION; }
-
- int64_t get_convert_timer_value() const { return _convert_timer_ns; }
-
- bool should_be_shrink(int64_t valid_row) const {
- if (_is_partitioned) {
- return false;
- } else {
- return level0_sub_table.should_be_shrink(valid_row);
- }
- }
-
- size_t size() {
- size_t count = 0;
- if (_is_partitioned) {
- for (auto i = 0u; i < this->NUM_LEVEL1_SUB_TABLES; ++i) {
- count += this->level1_sub_tables[i].size();
- }
- } else {
- count = level0_sub_table.size();
- }
- return count;
- }
-
- void init_buf_size(size_t reserve_for_num_elements) {
- if (_is_partitioned) {
- for (auto& impl : level1_sub_tables) {
- impl.init_buf_size(reserve_for_num_elements /
NUM_LEVEL1_SUB_TABLES);
- }
- } else {
- if
(level0_sub_table.check_if_need_partition(reserve_for_num_elements)) {
- level0_sub_table.clear_and_shrink();
- _is_partitioned = true;
-
- for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i) {
-
level1_sub_tables[i].init_buf_size(reserve_for_num_elements /
- NUM_LEVEL1_SUB_TABLES);
- }
- } else {
- level0_sub_table.init_buf_size(reserve_for_num_elements);
- }
- }
- }
-
- void delete_zero_key(Key key) {
- if (_is_partitioned) {
- const auto key_hash = hash(key);
- size_t sub_table_idx = get_sub_table_from_hash(key_hash);
- level1_sub_tables[sub_table_idx].delete_zero_key(key);
- } else {
- level0_sub_table.delete_zero_key(key);
- }
- }
-
- int64_t get_collisions() const {
- size_t collisions = level0_sub_table.get_collisions();
- for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; i++) {
- collisions += level1_sub_tables[i].get_collisions();
- }
- return collisions;
- }
-
- size_t get_buffer_size_in_bytes() const {
- if (_is_partitioned) {
- size_t buff_size = 0;
- for (const auto& impl : level1_sub_tables) buff_size +=
impl.get_buffer_size_in_bytes();
- return buff_size;
- } else {
- return level0_sub_table.get_buffer_size_in_bytes();
- }
- }
-
- size_t get_buffer_size_in_cells() const {
- if (_is_partitioned) {
- size_t buff_size = 0;
- for (const auto& impl : level1_sub_tables) buff_size +=
impl.get_buffer_size_in_cells();
- return buff_size;
- } else {
- return level0_sub_table.get_buffer_size_in_cells();
- }
- }
-
- std::vector<size_t> get_buffer_sizes_in_cells() const {
- std::vector<size_t> sizes;
- if (_is_partitioned) {
- for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i) {
-
sizes.push_back(level1_sub_tables[i].get_buffer_size_in_cells());
- }
- } else {
- sizes.push_back(level0_sub_table.get_buffer_size_in_cells());
- }
- return sizes;
- }
-
- void reset_resize_timer() {
- if (_is_partitioned) {
- for (auto& impl : level1_sub_tables) {
- impl.reset_resize_timer();
- }
- } else {
- level0_sub_table.reset_resize_timer();
- }
- }
- int64_t get_resize_timer_value() const {
- if (_is_partitioned) {
- int64_t resize_timer_ns = 0;
- for (const auto& impl : level1_sub_tables) {
- resize_timer_ns += impl.get_resize_timer_value();
- }
- return resize_timer_ns;
- } else {
- return level0_sub_table.get_resize_timer_value();
- }
- }
-
- bool has_null_key_data() const { return false; }
- template <typename MappedType>
- char* get_null_key_data() {
- return nullptr;
- }
-
-protected:
- typename Impl::iterator begin_of_next_non_empty_sub_table_idx(size_t&
sub_table_idx) {
- while (sub_table_idx != NUM_LEVEL1_SUB_TABLES &&
level1_sub_tables[sub_table_idx].empty())
- ++sub_table_idx;
-
- if (sub_table_idx != NUM_LEVEL1_SUB_TABLES) return
level1_sub_tables[sub_table_idx].begin();
-
- --sub_table_idx;
- return level1_sub_tables[MAX_SUB_TABLE].end();
- }
-
- typename Impl::const_iterator begin_of_next_non_empty_sub_table_idx(
- size_t& sub_table_idx) const {
- while (sub_table_idx != NUM_LEVEL1_SUB_TABLES &&
level1_sub_tables[sub_table_idx].empty())
- ++sub_table_idx;
-
- if (sub_table_idx != NUM_LEVEL1_SUB_TABLES) return
level1_sub_tables[sub_table_idx].begin();
-
- --sub_table_idx;
- return level1_sub_tables[MAX_SUB_TABLE].end();
- }
-
-public:
- void set_partitioned_threshold(int threshold) {
- level0_sub_table.set_partitioned_threshold(threshold);
- }
-
- class iterator /// NOLINT
- {
- Self* container {};
- size_t sub_table_idx {};
- typename Impl::iterator current_it {};
-
- friend class PartitionedHashTable;
-
- iterator(Self* container_, size_t sub_table_idx_, typename
Impl::iterator current_it_)
- : container(container_), sub_table_idx(sub_table_idx_),
current_it(current_it_) {}
-
- public:
- iterator() = default;
-
- bool operator==(const iterator& rhs) const {
- return sub_table_idx == rhs.sub_table_idx && current_it ==
rhs.current_it;
- }
- bool operator!=(const iterator& rhs) const { return !(*this == rhs); }
-
- iterator& operator++() {
- ++current_it;
- if (container->_is_partitioned) {
- if (current_it ==
container->level1_sub_tables[sub_table_idx].end()) {
- ++sub_table_idx;
- current_it =
container->begin_of_next_non_empty_sub_table_idx(sub_table_idx);
- }
- }
-
- return *this;
- }
-
- auto& operator*() { return *current_it; }
- auto* operator->() { return current_it.get_ptr(); }
-
- auto* get_ptr() { return current_it.get_ptr(); }
- size_t get_hash() { return current_it.get_hash(); }
- };
-
- class const_iterator /// NOLINT
- {
- Self* container {};
- size_t sub_table_idx {};
- typename Impl::const_iterator current_it {};
-
- friend class PartitionedHashTable;
-
- const_iterator(Self* container_, size_t sub_table_idx_,
- typename Impl::const_iterator current_it_)
- : container(container_), sub_table_idx(sub_table_idx_),
current_it(current_it_) {}
-
- public:
- const_iterator() = default;
- const_iterator(const iterator& rhs)
- : container(rhs.container),
- sub_table_idx(rhs.sub_table_idx),
- current_it(rhs.current_it) {} /// NOLINT
-
- bool operator==(const const_iterator& rhs) const {
- return sub_table_idx == rhs.sub_table_idx && current_it ==
rhs.current_it;
- }
- bool operator!=(const const_iterator& rhs) const { return !(*this ==
rhs); }
-
- const_iterator& operator++() {
- ++current_it;
- if (container->_is_partitioned) {
- if (current_it ==
container->level1_sub_tables[sub_table_idx].end()) {
- ++sub_table_idx;
- current_it =
container->begin_of_next_non_empty_sub_table_idx(sub_table_idx);
- }
- }
-
- return *this;
- }
-
- const auto& operator*() const { return *current_it; }
- const auto* operator->() const { return current_it->get_ptr(); }
-
- const auto* get_ptr() const { return current_it.get_ptr(); }
- size_t get_hash() const { return current_it.get_hash(); }
- };
-
- const_iterator begin() const {
- if (_is_partitioned) {
- size_t sub_table_idx = 0;
- typename Impl::const_iterator impl_it =
- begin_of_next_non_empty_sub_table_idx(sub_table_idx);
- return {this, sub_table_idx, impl_it};
- } else {
- return {this, NUM_LEVEL1_SUB_TABLES, level0_sub_table.begin()};
- }
- }
-
- iterator begin() {
- if (_is_partitioned) {
- size_t sub_table_idx = 0;
- typename Impl::iterator impl_it =
begin_of_next_non_empty_sub_table_idx(sub_table_idx);
- return {this, sub_table_idx, impl_it};
- } else {
- return {this, NUM_LEVEL1_SUB_TABLES, level0_sub_table.begin()};
- }
- }
-
- const_iterator end() const {
- if (_is_partitioned) {
- return {this, MAX_SUB_TABLE,
level1_sub_tables[MAX_SUB_TABLE].end()};
- } else {
- return {this, NUM_LEVEL1_SUB_TABLES, level0_sub_table.end()};
- }
- }
- iterator end() {
- if (_is_partitioned) {
- return {this, MAX_SUB_TABLE,
level1_sub_tables[MAX_SUB_TABLE].end()};
- } else {
- return {this, NUM_LEVEL1_SUB_TABLES, level0_sub_table.end()};
- }
- }
-
- /// Insert a value. In the case of any more complex values, it is better
to use the `emplace` function.
- std::pair<LookupResult, bool> ALWAYS_INLINE insert(const value_type& x) {
- size_t hash_value = hash(cell_type::get_key(x));
-
- std::pair<LookupResult, bool> res;
- emplace(cell_type::get_key(x), res.first, res.second, hash_value);
-
- if (res.second) insert_set_mapped(lookup_result_get_mapped(res.first),
x);
-
- return res;
- }
-
- void expanse_for_add_elem(size_t num_elem) {
- if (_is_partitioned) {
- size_t num_elem_per_sub_table =
- (num_elem + NUM_LEVEL1_SUB_TABLES - 1) /
NUM_LEVEL1_SUB_TABLES;
- for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i) {
-
level1_sub_tables[i].expanse_for_add_elem(num_elem_per_sub_table);
- }
- } else {
- level0_sub_table.expanse_for_add_elem(num_elem);
- if (UNLIKELY(level0_sub_table.need_partition())) {
- convert_to_partitioned();
- }
- }
- }
-
- template <bool READ>
- void ALWAYS_INLINE prefetch(const Key& key, size_t hash_value) {
- if (_is_partitioned) {
- const auto sub_table_idx = get_sub_table_from_hash(hash_value);
- level1_sub_tables[sub_table_idx].template
prefetch<READ>(hash_value);
- } else {
- level0_sub_table.template prefetch<READ>(hash_value);
- }
- }
-
- /** Insert the key,
- * return an iterator to a position that can be used for `placement new`
of value,
- * as well as the flag - whether a new key was inserted.
- *
- * You have to make `placement new` values if you inserted a new key,
- * since when destroying a hash table, the destructor will be invoked for
it!
- *
- * Example usage:
- *
- * Map::iterator it;
- * bool inserted;
- * map.emplace(key, it, inserted);
- * if (inserted)
- * new(&it->second) Mapped(value);
- */
- template <typename KeyHolder>
- void ALWAYS_INLINE emplace(KeyHolder&& key_holder, LookupResult& it, bool&
inserted) {
- size_t hash_value = hash(key_holder);
- emplace(key_holder, it, inserted, hash_value);
- }
-
- /// Same, but with a precalculated values of hash function.
- template <typename KeyHolder>
- void ALWAYS_INLINE emplace(KeyHolder&& key_holder, LookupResult& it, bool&
inserted,
- size_t hash_value) {
- if (_is_partitioned) {
- size_t sub_table_idx = get_sub_table_from_hash(hash_value);
- level1_sub_tables[sub_table_idx].emplace(key_holder, it, inserted,
hash_value);
- } else {
- level0_sub_table.emplace(key_holder, it, inserted, hash_value);
- if (UNLIKELY(level0_sub_table.need_partition())) {
- convert_to_partitioned();
-
- // The hash table was converted to partitioned, so we have to
re-find the key.
- size_t sub_table_id = get_sub_table_from_hash(hash_value);
- it = level1_sub_tables[sub_table_id].find(key_holder,
hash_value);
- }
- }
- }
-
- template <typename KeyHolder>
- void ALWAYS_INLINE emplace(KeyHolder&& key_holder, LookupResult& it,
size_t hash_value,
- bool& inserted) {
- emplace(key_holder, it, inserted, hash_value);
- }
-
- template <typename KeyHolder, typename Func>
- void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it,
Func&& f) {
- size_t hash_value = hash(key_holder);
- lazy_emplace(key_holder, it, hash_value, std::forward<Func>(f));
- }
-
- template <typename KeyHolder, typename Func>
- void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it,
size_t hash_value,
- Func&& f) {
- if (_is_partitioned) {
- size_t sub_table_idx = get_sub_table_from_hash(hash_value);
- level1_sub_tables[sub_table_idx].lazy_emplace(key_holder, it,
hash_value,
-
std::forward<Func>(f));
- } else {
- level0_sub_table.lazy_emplace(key_holder, it, hash_value,
std::forward<Func>(f));
- if (UNLIKELY(level0_sub_table.need_partition())) {
- convert_to_partitioned();
-
- // The hash table was converted to partitioned, so we have to
re-find the key.
- size_t sub_table_id = get_sub_table_from_hash(hash_value);
- it = level1_sub_tables[sub_table_id].find(key_holder,
hash_value);
- }
- }
- }
-
- LookupResult ALWAYS_INLINE find(Key x, size_t hash_value) {
- if (_is_partitioned) {
- size_t sub_table_idx = get_sub_table_from_hash(hash_value);
- return level1_sub_tables[sub_table_idx].find(x, hash_value);
- } else {
- return level0_sub_table.find(x, hash_value);
- }
- }
-
- ConstLookupResult ALWAYS_INLINE find(Key x, size_t hash_value) const {
- return const_cast<std::decay_t<decltype(*this)>*>(this)->find(x,
hash_value);
- }
-
- LookupResult ALWAYS_INLINE find(Key x) { return find(x, hash(x)); }
-
- ConstLookupResult ALWAYS_INLINE find(Key x) const { return find(x,
hash(x)); }
-
- size_t size() const {
- if (_is_partitioned) {
- size_t res = 0;
- for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i) {
- res += level1_sub_tables[i].size();
- }
- return res;
- } else {
- return level0_sub_table.size();
- }
- }
-
- std::vector<size_t> sizes() const {
- std::vector<size_t> sizes;
- if (_is_partitioned) {
- for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i) {
- sizes.push_back(level1_sub_tables[i].size());
- }
- } else {
- sizes.push_back(level0_sub_table.size());
- }
- return sizes;
- }
-
- bool empty() const {
- if (_is_partitioned) {
- for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i)
- if (!level1_sub_tables[i].empty()) return false;
- return true;
- } else {
- return level0_sub_table.empty();
- }
- }
-
- bool add_elem_size_overflow(size_t row) const {
- return !_is_partitioned &&
level0_sub_table.add_elem_size_overflow(row);
- }
-
-private:
- void convert_to_partitioned() {
- SCOPED_RAW_TIMER(&_convert_timer_ns);
-
- DCHECK(!_is_partitioned);
- _is_partitioned = true;
-
- auto bucket_count = level0_sub_table.get_buffer_size_in_cells();
- for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i) {
- level1_sub_tables[i] = std::move(Impl(bucket_count /
NUM_LEVEL1_SUB_TABLES));
- }
-
- auto it = level0_sub_table.begin();
-
- /// It is assumed that the zero key (stored separately) is first in
iteration order.
- if (it != level0_sub_table.end() &&
it.get_ptr()->is_zero(level0_sub_table)) {
- insert(it->get_value());
- ++it;
- }
-
- for (; it != level0_sub_table.end(); ++it) {
- const auto* cell = it.get_ptr();
- size_t hash_value = cell->get_hash(level0_sub_table);
- size_t sub_table_idx = get_sub_table_from_hash(hash_value);
- level1_sub_tables[sub_table_idx].insert_unique_non_zero(cell,
hash_value);
- }
-
- level0_sub_table.clear_and_shrink();
- }
-
- /// NOTE Bad for hash tables with more than 2^32 cells.
- static size_t get_sub_table_from_hash(size_t hash_value) {
- return (hash_value >> (32 - BITS_FOR_SUB_TABLE)) & MAX_SUB_TABLE;
- }
-};
diff --git a/be/src/vec/common/hash_table/ph_hash_map.h
b/be/src/vec/common/hash_table/ph_hash_map.h
index 50cf218dc87..de320425223 100644
--- a/be/src/vec/common/hash_table/ph_hash_map.h
+++ b/be/src/vec/common/hash_table/ph_hash_map.h
@@ -30,8 +30,7 @@ ALWAYS_INLINE inline auto
lookup_result_get_mapped(std::pair<const Key, Mapped>*
return &(it->second);
}
-template <typename Key, typename Mapped, typename HashMethod =
DefaultHash<Key>,
- bool PartitionedHashTable = false>
+template <typename Key, typename Mapped, typename HashMethod =
DefaultHash<Key>>
class PHHashMap : private boost::noncopyable {
public:
using Self = PHHashMap;
@@ -58,9 +57,6 @@ public:
PHHashMap& operator=(PHHashMap&& rhs) {
_hash_map.clear();
_hash_map = std::move(rhs._hash_map);
- std::swap(_need_partition, rhs._need_partition);
- std::swap(_partitioned_threshold, rhs._partitioned_threshold);
-
return *this;
}
@@ -130,19 +126,11 @@ public:
inserted = true;
ctor(key_holder, nullptr);
});
-
- if constexpr (PartitionedHashTable) {
- _check_if_need_partition();
- }
}
template <typename KeyHolder, typename Func>
void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it,
Func&& f) {
it = &*_hash_map.lazy_emplace(key_holder, [&](const auto& ctor) {
f(ctor, key_holder); });
-
- if constexpr (PartitionedHashTable) {
- _check_if_need_partition();
- }
}
template <typename KeyHolder>
@@ -157,10 +145,6 @@ public:
ctor(key, mapped_type());
}
});
-
- if constexpr (PartitionedHashTable) {
- _check_if_need_partition();
- }
}
template <typename KeyHolder, typename Func>
@@ -168,10 +152,6 @@ public:
Func&& f) {
it = &*_hash_map.lazy_emplace_with_hash(key, hash_value,
[&](const auto& ctor) {
f(ctor, key, key); });
-
- if constexpr (PartitionedHashTable) {
- _check_if_need_partition();
- }
}
void ALWAYS_INLINE insert(const Key& key, size_t hash_value, const Mapped&
value) {
@@ -225,18 +205,6 @@ public:
}
bool has_null_key_data() const { return false; }
- bool need_partition() { return _need_partition; }
-
- void set_partitioned_threshold(int threshold) { _partitioned_threshold =
threshold; }
-
- bool check_if_need_partition(size_t bucket_count) {
- if constexpr (PartitionedHashTable) {
- return _partitioned_threshold > 0 && bucket_count >=
_partitioned_threshold;
- } else {
- return false;
- }
- }
-
bool empty() const { return _hash_map.empty(); }
void clear_and_shrink() { _hash_map.clear(); }
@@ -244,19 +212,5 @@ public:
void expanse_for_add_elem(size_t num_elem) { _hash_map.reserve(num_elem); }
private:
- void _check_if_need_partition() {
- if (UNLIKELY(check_if_need_partition(_hash_map.size() + 1))) {
- _need_partition = add_elem_size_overflow(1);
- }
- }
-
HashMapImpl _hash_map;
- // the bucket count threshold above which it's converted to partioned hash
table
- // > 0: enable convert dynamically
- // 0: convert is disabled
- int _partitioned_threshold = 0;
- // if need resize and bucket count after resize will be >=
_partitioned_threshold,
- // this flag is set to true, and resize does not actually happen,
- // PartitionedHashTable will convert this hash table to partitioned hash
table
- bool _need_partition;
};
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]