This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new b3fb0e10b52 [Chore](join) split out join hash map from hash map
(#30280)
b3fb0e10b52 is described below
commit b3fb0e10b52b91892fcb746a1f7ee2684e815a08
Author: Pxl <[email protected]>
AuthorDate: Thu Jan 25 10:08:25 2024 +0800
[Chore](join) split out join hash map from hash map (#30280)
split out join hash map from hash map
---
be/src/pipeline/exec/hashjoin_build_sink.cpp | 2 +-
be/src/pipeline/pipeline_x/dependency.h | 27 +-
be/src/vec/common/hash_table/hash_map.h | 376 +--------------------
be/src/vec/common/hash_table/hash_map_context.h | 56 ++-
.../hash_table/{hash_map.h => join_hash_table.h} | 276 +++------------
.../vec/common/hash_table/partitioned_hash_map.h | 3 +-
be/src/vec/common/hash_table/string_hash_map.h | 2 +
be/src/vec/exec/join/vhash_join_node.cpp | 2 +-
be/src/vec/exec/vset_operation_node.cpp | 16 +-
be/src/vec/exec/vset_operation_node.h | 21 +-
10 files changed, 122 insertions(+), 659 deletions(-)
diff --git a/be/src/pipeline/exec/hashjoin_build_sink.cpp
b/be/src/pipeline/exec/hashjoin_build_sink.cpp
index f02e203c783..2711b0d8852 100644
--- a/be/src/pipeline/exec/hashjoin_build_sink.cpp
+++ b/be/src/pipeline/exec/hashjoin_build_sink.cpp
@@ -364,7 +364,7 @@ void
HashJoinBuildSinkLocalState::_hash_table_init(RuntimeState* state) {
}
return;
}
- if (!try_get_hash_map_context_fixed<JoinFixedHashMap,
HashCRC32, RowRefListType>(
+ if (!try_get_hash_map_context_fixed<JoinHashMap, HashCRC32,
RowRefListType>(
*_shared_state->hash_table_variants,
_build_expr_ctxs)) {
_shared_state->hash_table_variants
->emplace<vectorized::SerializedHashTableContext<RowRefListType>>();
diff --git a/be/src/pipeline/pipeline_x/dependency.h
b/be/src/pipeline/pipeline_x/dependency.h
index 59f9fee3775..8a58973be35 100644
--- a/be/src/pipeline/pipeline_x/dependency.h
+++ b/be/src/pipeline/pipeline_x/dependency.h
@@ -35,6 +35,7 @@
#include "vec/common/hash_table/hash_map_context_creator.h"
#include "vec/common/sort/partition_sorter.h"
#include "vec/common/sort/sorter.h"
+#include "vec/core/types.h"
#include "vec/exec/join/process_hash_table_probe.h"
#include "vec/exec/join/vhash_join_node.h"
#include "vec/exec/vaggregation_node.h"
@@ -524,24 +525,22 @@ public:
/// called in setup_local_state
void hash_table_init() {
+ using namespace vectorized;
if (child_exprs_lists[0].size() == 1 && (!build_not_ignore_null[0])) {
// Single column optimization
switch (child_exprs_lists[0][0]->root()->result_type()) {
case TYPE_BOOLEAN:
case TYPE_TINYINT:
- hash_table_variants->emplace<
-
vectorized::I8HashTableContext<vectorized::RowRefListWithFlags>>();
+
hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt8>>();
break;
case TYPE_SMALLINT:
- hash_table_variants->emplace<
-
vectorized::I16HashTableContext<vectorized::RowRefListWithFlags>>();
+
hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt16>>();
break;
case TYPE_INT:
case TYPE_FLOAT:
case TYPE_DATEV2:
case TYPE_DECIMAL32:
- hash_table_variants->emplace<
-
vectorized::I32HashTableContext<vectorized::RowRefListWithFlags>>();
+
hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt32>>();
break;
case TYPE_BIGINT:
case TYPE_DOUBLE:
@@ -549,27 +548,21 @@ public:
case TYPE_DATE:
case TYPE_DECIMAL64:
case TYPE_DATETIMEV2:
- hash_table_variants->emplace<
-
vectorized::I64HashTableContext<vectorized::RowRefListWithFlags>>();
+
hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt64>>();
break;
case TYPE_LARGEINT:
case TYPE_DECIMALV2:
case TYPE_DECIMAL128I:
- hash_table_variants->emplace<
-
vectorized::I128HashTableContext<vectorized::RowRefListWithFlags>>();
+
hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt128>>();
break;
default:
- hash_table_variants->emplace<
-
vectorized::SerializedHashTableContext<vectorized::RowRefListWithFlags>>();
+ hash_table_variants->emplace<SetSerializedHashTableContext>();
}
return;
}
-
- if (!try_get_hash_map_context_fixed<JoinFixedHashMap, HashCRC32,
- vectorized::RowRefListWithFlags>(
+ if (!try_get_hash_map_context_fixed<NormalHashMap, HashCRC32,
RowRefListWithFlags>(
*hash_table_variants, child_exprs_lists[0])) {
- hash_table_variants->emplace<
-
vectorized::SerializedHashTableContext<vectorized::RowRefListWithFlags>>();
+ hash_table_variants->emplace<SetSerializedHashTableContext>();
}
}
};
diff --git a/be/src/vec/common/hash_table/hash_map.h
b/be/src/vec/common/hash_table/hash_map.h
index 382f46acb74..d10b24ade21 100644
--- a/be/src/vec/common/hash_table/hash_map.h
+++ b/be/src/vec/common/hash_table/hash_map.h
@@ -27,7 +27,9 @@
#include "vec/common/hash_table/hash.h"
#include "vec/common/hash_table/hash_table.h"
#include "vec/common/hash_table/hash_table_allocator.h"
+#include "vec/common/hash_table/join_hash_table.h"
+namespace doris {
/** NOTE HashMap could only be used for memmoveable (position independent)
types.
* Example: std::string is not position independent in libstdc++ with C++11
ABI or in libc++.
* Also, key in hash table must be of type, that zero bytes is compared
equals to zero key.
@@ -192,379 +194,15 @@ public:
bool has_null_key_data() const { return false; }
};
-template <typename Key, typename Cell, typename Hash = DefaultHash<Key>,
- typename Grower = HashTableGrower<>, typename Allocator =
HashTableAllocator>
-class JoinHashMapTable : public HashMapTable<Key, Cell, Hash, Grower,
Allocator> {
-public:
- using Self = JoinHashMapTable;
- using Base = HashMapTable<Key, Cell, Hash, Grower, Allocator>;
-
- using key_type = Key;
- using value_type = typename Cell::value_type;
- using mapped_type = typename Cell::Mapped;
-
- using LookupResult = typename Base::LookupResult;
-
- static uint32_t calc_bucket_size(size_t num_elem) {
- size_t expect_bucket_size = num_elem + (num_elem - 1) / 7;
- return phmap::priv::NormalizeCapacity(expect_bucket_size) + 1;
- }
-
- size_t get_byte_size() const {
- auto cal_vector_mem = [](const auto& vec) { return vec.capacity() *
sizeof(vec[0]); };
- return cal_vector_mem(visited) + cal_vector_mem(first) +
cal_vector_mem(next);
- }
-
- template <int JoinOpType>
- void prepare_build(size_t num_elem, int batch_size, bool has_null_key) {
- _has_null_key = has_null_key;
-
- // the first row in build side is not really from build side table
- _empty_build_side = num_elem <= 1;
- max_batch_size = batch_size;
- bucket_size = calc_bucket_size(num_elem + 1);
- first.resize(bucket_size + 1);
- next.resize(num_elem);
-
- if constexpr (JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN ||
- JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN ||
- JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN ||
- JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) {
- visited.resize(num_elem);
- }
- }
-
- uint32_t get_bucket_size() const { return bucket_size; }
-
- size_t size() const { return Base::size() == 0 ? next.size() :
Base::size(); }
-
- std::vector<uint8_t>& get_visited() { return visited; }
-
- void build(const Key* __restrict keys, const uint32_t* __restrict
bucket_nums,
- size_t num_elem) {
- build_keys = keys;
- for (size_t i = 1; i < num_elem; i++) {
- uint32_t bucket_num = bucket_nums[i];
- next[i] = first[bucket_num];
- first[bucket_num] = i;
- }
- first[bucket_size] = 0; // index = bucket_num means null
- }
-
- template <int JoinOpType, bool with_other_conjuncts, bool is_mark_join,
bool need_judge_null>
- auto find_batch(const Key* __restrict keys, const uint32_t* __restrict
build_idx_map,
- int probe_idx, uint32_t build_idx, int probe_rows,
- uint32_t* __restrict probe_idxs, bool& probe_visited,
- uint32_t* __restrict build_idxs,
- doris::vectorized::ColumnFilterHelper* mark_column) {
- if constexpr (JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN)
{
- if (_empty_build_side) {
- return _process_null_aware_left_anti_join_for_empty_build_side<
- JoinOpType, with_other_conjuncts, is_mark_join>(
- probe_idx, probe_rows, probe_idxs, build_idxs,
mark_column);
- }
- }
-
- if constexpr (with_other_conjuncts) {
- return _find_batch_conjunct<JoinOpType>(keys, build_idx_map,
probe_idx, build_idx,
- probe_rows, probe_idxs,
build_idxs);
- }
-
- if constexpr (is_mark_join) {
- return _find_batch_mark<JoinOpType>(keys, build_idx_map,
probe_idx, probe_rows,
- probe_idxs, build_idxs,
mark_column);
- }
-
- if constexpr (JoinOpType == doris::TJoinOp::INNER_JOIN ||
- JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN ||
- JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN ||
- JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN) {
- return _find_batch_inner_outer_join<JoinOpType>(keys,
build_idx_map, probe_idx,
- build_idx,
probe_rows, probe_idxs,
- probe_visited,
build_idxs);
- }
- if constexpr (JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN ||
- JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ||
- JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN)
{
- return _find_batch_left_semi_anti<JoinOpType, need_judge_null>(
- keys, build_idx_map, probe_idx, probe_rows, probe_idxs);
- }
- if constexpr (JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN ||
- JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) {
- return _find_batch_right_semi_anti(keys, build_idx_map, probe_idx,
probe_rows);
- }
- return std::tuple {0, 0U, 0};
- }
-
- template <int JoinOpType>
- bool iterate_map(std::vector<uint32_t>& build_idxs) const {
- const auto batch_size = max_batch_size;
- const auto elem_num = visited.size();
- int count = 0;
- build_idxs.resize(batch_size);
-
- while (count < batch_size && iter_idx < elem_num) {
- const auto matched = visited[iter_idx];
- build_idxs[count] = iter_idx;
- if constexpr (JoinOpType != doris::TJoinOp::RIGHT_SEMI_JOIN) {
- count += !matched;
- } else {
- count += matched;
- }
- iter_idx++;
- }
-
- build_idxs.resize(count);
- return iter_idx >= elem_num;
- }
-
- bool has_null_key() { return _has_null_key; }
-
- void pre_build_idxs(std::vector<uint32>& bucksets, const uint8_t*
null_map) {
- if (null_map) {
- first[bucket_size] = bucket_size; // distinguish between not
matched and null
- }
-
- for (uint32_t i = 0; i < bucksets.size(); i++) {
- bucksets[i] = first[bucksets[i]];
- }
- }
-
-private:
- // only LEFT_ANTI_JOIN/LEFT_SEMI_JOIN/NULL_AWARE_LEFT_ANTI_JOIN/CROSS_JOIN
support mark join
- template <int JoinOpType>
- auto _find_batch_mark(const Key* __restrict keys, const uint32_t*
__restrict build_idx_map,
- int probe_idx, int probe_rows, uint32_t* __restrict
probe_idxs,
- uint32_t* __restrict build_idxs,
- doris::vectorized::ColumnFilterHelper* mark_column) {
- auto matched_cnt = 0;
- const auto batch_size = max_batch_size;
-
- while (probe_idx < probe_rows && matched_cnt < batch_size) {
- auto build_idx = build_idx_map[probe_idx] == bucket_size ? 0 :
build_idx_map[probe_idx];
-
- while (build_idx && keys[probe_idx] != build_keys[build_idx]) {
- build_idx = next[build_idx];
- }
-
- if (build_idx_map[probe_idx] == bucket_size) {
- // mark result as null when probe row is null
- mark_column->insert_null();
- } else {
- bool matched = JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ?
build_idx != 0
- :
build_idx == 0;
- if (!matched && _has_null_key) {
- mark_column->insert_null();
- } else {
- mark_column->insert_value(matched);
- }
- }
-
- probe_idxs[matched_cnt] = probe_idx++;
- build_idxs[matched_cnt] = build_idx;
- matched_cnt++;
- }
- return std::tuple {probe_idx, 0U, matched_cnt};
- }
-
- template <int JoinOpType, bool with_other_conjuncts, bool is_mark_join>
- auto _process_null_aware_left_anti_join_for_empty_build_side(
- int probe_idx, int probe_rows, uint32_t* __restrict probe_idxs,
- uint32_t* __restrict build_idxs,
doris::vectorized::ColumnFilterHelper* mark_column) {
- static_assert(JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN);
- auto matched_cnt = 0;
- const auto batch_size = max_batch_size;
-
- while (probe_idx < probe_rows && matched_cnt < batch_size) {
- probe_idxs[matched_cnt] = probe_idx++;
- if constexpr (is_mark_join) {
- build_idxs[matched_cnt] = 0;
- }
- ++matched_cnt;
- }
-
- if constexpr (is_mark_join && !with_other_conjuncts) {
- mark_column->resize_fill(matched_cnt, 1);
- }
-
- return std::tuple {probe_idx, 0U, matched_cnt};
- }
-
- auto _find_batch_right_semi_anti(const Key* __restrict keys,
- const uint32_t* __restrict build_idx_map,
int probe_idx,
- int probe_rows) {
- while (probe_idx < probe_rows) {
- auto build_idx = build_idx_map[probe_idx];
-
- while (build_idx) {
- if (!visited[build_idx] && keys[probe_idx] ==
build_keys[build_idx]) {
- visited[build_idx] = 1;
- }
- build_idx = next[build_idx];
- }
- probe_idx++;
- }
- return std::tuple {probe_idx, 0U, 0};
- }
-
- template <int JoinOpType, bool need_judge_null>
- auto _find_batch_left_semi_anti(const Key* __restrict keys,
- const uint32_t* __restrict build_idx_map,
int probe_idx,
- int probe_rows, uint32_t* __restrict
probe_idxs) {
- auto matched_cnt = 0;
- const auto batch_size = max_batch_size;
-
- while (probe_idx < probe_rows && matched_cnt < batch_size) {
- if constexpr (need_judge_null) {
- if (build_idx_map[probe_idx] == bucket_size) {
- probe_idx++;
- continue;
- }
- }
-
- auto build_idx = build_idx_map[probe_idx];
-
- while (build_idx && keys[probe_idx] != build_keys[build_idx]) {
- build_idx = next[build_idx];
- }
- bool matched =
- JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ? build_idx
!= 0 : build_idx == 0;
- probe_idxs[matched_cnt] = probe_idx++;
- matched_cnt += matched;
- }
- return std::tuple {probe_idx, 0U, matched_cnt};
- }
-
- template <int JoinOpType>
- auto _find_batch_conjunct(const Key* __restrict keys, const uint32_t*
__restrict build_idx_map,
- int probe_idx, uint32_t build_idx, int
probe_rows,
- uint32_t* __restrict probe_idxs, uint32_t*
__restrict build_idxs) {
- auto matched_cnt = 0;
- const auto batch_size = max_batch_size;
-
- auto do_the_probe = [&]() {
- while (build_idx && matched_cnt < batch_size) {
- if constexpr (JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN ||
- JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) {
- if (!visited[build_idx] && keys[probe_idx] ==
build_keys[build_idx]) {
- probe_idxs[matched_cnt] = probe_idx;
- build_idxs[matched_cnt] = build_idx;
- matched_cnt++;
- }
- } else if (keys[probe_idx] == build_keys[build_idx]) {
- build_idxs[matched_cnt] = build_idx;
- probe_idxs[matched_cnt] = probe_idx;
- matched_cnt++;
- }
- build_idx = next[build_idx];
- }
-
- if constexpr (JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN ||
- JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN ||
- JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ||
- JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN ||
- JoinOpType ==
doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
- // may over batch_size when emplace 0 into build_idxs
- if (!build_idx) {
- probe_idxs[matched_cnt] = probe_idx;
- build_idxs[matched_cnt] = 0;
- matched_cnt++;
- }
- }
-
- probe_idx++;
- };
-
- if (build_idx) {
- do_the_probe();
- }
-
- while (probe_idx < probe_rows && matched_cnt < batch_size) {
- build_idx = build_idx_map[probe_idx];
- do_the_probe();
- }
-
- probe_idx -= (build_idx != 0);
- return std::tuple {probe_idx, build_idx, matched_cnt};
- }
-
- template <int JoinOpType>
- auto _find_batch_inner_outer_join(const Key* __restrict keys,
- const uint32_t* __restrict
build_idx_map, int probe_idx,
- uint32_t build_idx, int probe_rows,
- uint32_t* __restrict probe_idxs, bool&
probe_visited,
- uint32_t* __restrict build_idxs) {
- auto matched_cnt = 0;
- const auto batch_size = max_batch_size;
-
- auto do_the_probe = [&]() {
- while (build_idx && matched_cnt < batch_size) {
- if (keys[probe_idx] == build_keys[build_idx]) {
- probe_idxs[matched_cnt] = probe_idx;
- build_idxs[matched_cnt] = build_idx;
- matched_cnt++;
- if constexpr (JoinOpType ==
doris::TJoinOp::RIGHT_OUTER_JOIN ||
- JoinOpType ==
doris::TJoinOp::FULL_OUTER_JOIN) {
- if (!visited[build_idx]) {
- visited[build_idx] = 1;
- }
- }
- }
- build_idx = next[build_idx];
- }
-
- if constexpr (JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN ||
- JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN) {
- // `(!matched_cnt || probe_idxs[matched_cnt - 1] !=
probe_idx)` means not match one build side
- probe_visited |= (matched_cnt && probe_idxs[matched_cnt - 1]
== probe_idx);
- if (!build_idx) {
- if (!probe_visited) {
- probe_idxs[matched_cnt] = probe_idx;
- build_idxs[matched_cnt] = 0;
- matched_cnt++;
- }
- probe_visited = false;
- }
- }
- probe_idx++;
- };
-
- if (build_idx) {
- do_the_probe();
- }
-
- while (probe_idx < probe_rows && matched_cnt < batch_size) {
- build_idx = build_idx_map[probe_idx];
- do_the_probe();
- }
-
- probe_idx -= (build_idx != 0);
- return std::tuple {probe_idx, build_idx, matched_cnt};
- }
-
- const Key* __restrict build_keys;
- std::vector<uint8_t> visited;
-
- uint32_t bucket_size = 1;
- int max_batch_size = 4064;
-
- std::vector<uint32_t> first = {0};
- std::vector<uint32_t> next = {0};
-
- // use in iter hash map
- mutable uint32_t iter_idx = 1;
- Cell cell;
- doris::vectorized::Arena* pool;
- bool _has_null_key = false;
- bool _empty_build_side = true;
-};
-
template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>,
typename Grower = HashTableGrower<>, typename Allocator =
HashTableAllocator>
using HashMap = HashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash,
Grower, Allocator>;
template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>>
-using JoinFixedHashMap = JoinHashMapTable<Key, HashMapCell<Key, Mapped, Hash>,
Hash>;
+using NormalHashMap = HashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash>;
+
+template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>>
+using JoinHashMap = JoinHashTable<Key, Hash>;
template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>,
typename Grower = HashTableGrower<>, typename Allocator =
HashTableAllocator>
@@ -577,3 +215,5 @@ using HashMapWithStackMemory = HashMapTable<
HashTableGrower<initial_size_degree>,
HashTableAllocatorWithStackMemory<(1ULL << initial_size_degree) *
sizeof(HashMapCellWithSavedHash<Key,
Mapped, Hash>)>>;
+
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/common/hash_table/hash_map_context.h
b/be/src/vec/common/hash_table/hash_map_context.h
index d96aa2d7c65..f8861ccfcd7 100644
--- a/be/src/vec/common/hash_table/hash_map_context.h
+++ b/be/src/vec/common/hash_table/hash_map_context.h
@@ -31,6 +31,7 @@
#include "vec/common/hash_table/string_hash_map.h"
#include "vec/common/string_ref.h"
#include "vec/core/types.h"
+#include "vec/exec/join/join_op.h"
#include "vec/utils/util.hpp"
namespace doris::vectorized {
@@ -41,15 +42,13 @@ template <typename Base>
struct DataWithNullKey;
template <typename HashMap>
-struct MethodBase {
+struct MethodBaseInner {
using Key = typename HashMap::key_type;
using Mapped = typename HashMap::mapped_type;
using Value = typename HashMap::value_type;
- using Iterator = typename HashMap::iterator;
using HashMapType = HashMap;
std::shared_ptr<HashMap> hash_table;
- Iterator iterator;
bool inited_iterator = false;
Key* keys = nullptr;
Arena arena;
@@ -58,21 +57,14 @@ struct MethodBase {
// use in join case
std::vector<uint32_t> bucket_nums;
- MethodBase() { hash_table.reset(new HashMap()); }
- virtual ~MethodBase() = default;
+ MethodBaseInner() { hash_table.reset(new HashMap()); }
+ virtual ~MethodBaseInner() = default;
virtual void reset() {
arena.clear();
inited_iterator = false;
}
- void init_iterator() {
- if (!inited_iterator) {
- inited_iterator = true;
- iterator = hash_table->begin();
- }
- }
-
virtual void init_serialized_keys(const ColumnRawPtrs& key_columns, size_t
num_rows,
const uint8_t* null_map = nullptr, bool
is_join = false,
bool is_build = false, uint32_t
bucket_size = 0) = 0;
@@ -170,6 +162,29 @@ struct MethodBase {
size_t num_rows) = 0;
};
+template <typename T>
+concept IteratoredMap = requires(T* map) { typename T::iterator; };
+
+template <typename HashMap>
+struct MethodBase : public MethodBaseInner<HashMap> {
+ using Iterator = void*;
+ Iterator iterator;
+ void init_iterator() { MethodBaseInner<HashMap>::inited_iterator = true; }
+};
+
+template <IteratoredMap HashMap>
+struct MethodBase<HashMap> : public MethodBaseInner<HashMap> {
+ using Iterator = typename HashMap::iterator;
+ using Base = MethodBaseInner<HashMap>;
+ Iterator iterator;
+ void init_iterator() {
+ if (!Base::inited_iterator) {
+ Base::inited_iterator = true;
+ iterator = Base::hash_table->begin();
+ }
+ }
+};
+
template <typename TData>
struct MethodSerialized : public MethodBase<TData> {
using Base = MethodBase<TData>;
@@ -555,14 +570,23 @@ struct MethodSingleNullableColumn : public
SingleColumnMethod {
};
template <typename RowRefListType>
-using SerializedHashTableContext =
MethodSerialized<JoinFixedHashMap<StringRef, RowRefListType>>;
+using SerializedHashTableContext = MethodSerialized<JoinHashMap<StringRef,
RowRefListType>>;
template <class T, typename RowRefListType>
using PrimaryTypeHashTableContext =
- MethodOneNumber<T, JoinFixedHashMap<T, RowRefListType, HashCRC32<T>>>;
+ MethodOneNumber<T, JoinHashMap<T, RowRefListType, HashCRC32<T>>>;
template <class Key, bool has_null, typename Value>
-using FixedKeyHashTableContext =
- MethodKeysFixed<JoinFixedHashMap<Key, Value, HashCRC32<Key>>,
has_null>;
+using FixedKeyHashTableContext = MethodKeysFixed<JoinHashMap<Key, Value,
HashCRC32<Key>>, has_null>;
+
+template <class Key, bool has_null>
+using SetFixedKeyHashTableContext =
+ MethodKeysFixed<HashMap<Key, RowRefListWithFlags, HashCRC32<Key>>,
has_null>;
+
+template <class T>
+using SetPrimaryTypeHashTableContext =
+ MethodOneNumber<T, HashMap<T, RowRefListWithFlags, HashCRC32<T>>>;
+
+using SetSerializedHashTableContext = MethodSerialized<HashMap<StringRef,
RowRefListWithFlags>>;
} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/common/hash_table/hash_map.h
b/be/src/vec/common/hash_table/join_hash_table.h
similarity index 53%
copy from be/src/vec/common/hash_table/hash_map.h
copy to be/src/vec/common/hash_table/join_hash_table.h
index 382f46acb74..b190d3d89ce 100644
--- a/be/src/vec/common/hash_table/hash_map.h
+++ b/be/src/vec/common/hash_table/join_hash_table.h
@@ -14,196 +14,24 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
-// This file is copied from
-//
https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/HashTable/HashMap.h
-// and modified by Doris
#pragma once
#include <gen_cpp/PlanNodes_types.h>
-#include "common/compiler_util.h"
#include "vec/columns/column_filter_helper.h"
#include "vec/common/hash_table/hash.h"
#include "vec/common/hash_table/hash_table.h"
#include "vec/common/hash_table/hash_table_allocator.h"
-/** NOTE HashMap could only be used for memmoveable (position independent)
types.
- * Example: std::string is not position independent in libstdc++ with C++11
ABI or in libc++.
- * Also, key in hash table must be of type, that zero bytes is compared
equals to zero key.
- */
-
-struct NoInitTag {};
-
-/// A pair that does not initialize the elements, if not needed.
-template <typename First, typename Second>
-struct PairNoInit {
- First first;
- Second second;
-
- PairNoInit() {}
-
- template <typename First_>
- PairNoInit(First_&& first_, NoInitTag) :
first(std::forward<First_>(first_)) {}
-
- template <typename First_, typename Second_>
- PairNoInit(First_&& first_, Second_&& second_)
- : first(std::forward<First_>(first_)),
second(std::forward<Second_>(second_)) {}
-};
-
-template <typename Key, typename TMapped, typename Hash, typename TState =
HashTableNoState>
-struct HashMapCell {
- using Mapped = TMapped;
- using State = TState;
-
- using value_type = PairNoInit<Key, Mapped>;
- using mapped_type = Mapped;
- using key_type = Key;
-
- value_type value;
-
- HashMapCell() = default;
- HashMapCell(const Key& key_, const State&) : value(key_, NoInitTag()) {}
- HashMapCell(const Key& key_, const Mapped& mapped_) : value(key_, mapped_)
{}
- HashMapCell(const value_type& value_, const State&) : value(value_) {}
-
- const Key& get_first() const { return value.first; }
- Mapped& get_second() { return value.second; }
- const Mapped& get_second() const { return value.second; }
-
- const value_type& get_value() const { return value; }
-
- static const Key& get_key(const value_type& value) { return value.first; }
- Mapped& get_mapped() { return value.second; }
- const Mapped& get_mapped() const { return value.second; }
-
- bool key_equals(const Key& key_) const { return value.first == key_; }
- bool key_equals(const Key& key_, size_t /*hash_*/) const { return
value.first == key_; }
- bool key_equals(const Key& key_, size_t /*hash_*/, const State& /*state*/)
const {
- return value.first == key_;
- }
-
- void set_hash(size_t /*hash_value*/) {}
- size_t get_hash(const Hash& hash) const { return hash(value.first); }
-
- bool is_zero(const State& state) const { return is_zero(value.first,
state); }
- static bool is_zero(const Key& key, const State& /*state*/) { return
ZeroTraits::check(key); }
-
- /// Set the key value to zero.
- void set_zero() { ZeroTraits::set(value.first); }
-
- /// Do I need to store the zero key separately (that is, can a zero key be
inserted into the hash table).
- static constexpr bool need_zero_value_storage = true;
-
- void set_mapped(const value_type& value_) { value.second = value_.second; }
-};
-
-template <typename Key, typename Mapped, typename Hash, typename State>
-ALWAYS_INLINE inline auto lookup_result_get_key(HashMapCell<Key, Mapped, Hash,
State>* cell) {
- return &cell->get_first();
-}
-
-template <typename Key, typename Mapped, typename Hash, typename State>
-ALWAYS_INLINE inline auto lookup_result_get_mapped(HashMapCell<Key, Mapped,
Hash, State>* cell) {
- return &cell->get_second();
-}
-
-template <typename Key, typename TMapped, typename Hash, typename TState =
HashTableNoState>
-struct HashMapCellWithSavedHash : public HashMapCell<Key, TMapped, Hash,
TState> {
- using Base = HashMapCell<Key, TMapped, Hash, TState>;
-
- size_t saved_hash;
-
- using Base::Base;
-
- bool key_equals(const Key& key_) const { return this->value.first == key_;
}
- bool key_equals(const Key& key_, size_t hash_) const {
- return saved_hash == hash_ && this->value.first == key_;
- }
- bool key_equals(const Key& key_, size_t hash_, const typename
Base::State&) const {
- return key_equals(key_, hash_);
- }
-
- void set_hash(size_t hash_value) { saved_hash = hash_value; }
- size_t get_hash(const Hash& /*hash_function*/) const { return saved_hash; }
-};
-
-template <typename Key, typename Mapped, typename Hash, typename State>
-ALWAYS_INLINE inline auto lookup_result_get_key(
- HashMapCellWithSavedHash<Key, Mapped, Hash, State>* cell) {
- return &cell->get_first();
-}
-
-template <typename Key, typename Mapped, typename Hash, typename State>
-ALWAYS_INLINE inline auto lookup_result_get_mapped(
- HashMapCellWithSavedHash<Key, Mapped, Hash, State>* cell) {
- return &cell->get_second();
-}
-
-template <typename Key, typename Cell, typename Hash = DefaultHash<Key>,
- typename Grower = HashTableGrower<>, typename Allocator =
HashTableAllocator>
-class HashMapTable : public HashTable<Key, Cell, Hash, Grower, Allocator> {
-public:
- using Self = HashMapTable;
- using Base = HashTable<Key, Cell, Hash, Grower, Allocator>;
-
- using key_type = Key;
- using value_type = typename Cell::value_type;
- using mapped_type = typename Cell::Mapped;
-
- using LookupResult = typename Base::LookupResult;
-
- using HashTable<Key, Cell, Hash, Grower, Allocator>::HashTable;
-
- /// Call func(Mapped &) for each hash map element.
- template <typename Func>
- void for_each_mapped(Func&& func) {
- for (auto& v : *this) func(v.get_second());
- }
-
- mapped_type& ALWAYS_INLINE operator[](Key x) {
- typename HashMapTable::LookupResult it;
- bool inserted;
- this->emplace(x, it, inserted);
-
- /** It may seem that initialization is not necessary for POD-types (or
__has_trivial_constructor),
- * since the hash table memory is initially initialized with zeros.
- * But, in fact, an empty cell may not be initialized with zeros in
the following cases:
- * - ZeroValueStorage (it only zeros the key);
- * - after resizing and moving a part of the cells to the new half of
the hash table, the old cells also have only the key to zero.
- *
- * On performance, there is almost always no difference, due to the
fact that it->second is usually assigned immediately
- * after calling `operator[]`, and since `operator[]` is inlined,
the compiler removes unnecessary initialization.
- *
- * Sometimes due to initialization, the performance even grows. This
occurs in code like `++map[key]`.
- * When we do the initialization, for new cells, it's enough to make
`store 1` right away.
- * And if we did not initialize, then even though there was zero in
the cell,
- * the compiler can not guess about this, and generates the `load`,
`increment`, `store` code.
- */
- if (inserted) new (lookup_result_get_mapped(it)) mapped_type();
-
- return *lookup_result_get_mapped(it);
- }
-
- template <typename MappedType>
- char* get_null_key_data() {
- return nullptr;
- }
- bool has_null_key_data() const { return false; }
-};
-
-template <typename Key, typename Cell, typename Hash = DefaultHash<Key>,
- typename Grower = HashTableGrower<>, typename Allocator =
HashTableAllocator>
-class JoinHashMapTable : public HashMapTable<Key, Cell, Hash, Grower,
Allocator> {
+namespace doris {
+template <typename Key, typename Hash = DefaultHash<Key>>
+class JoinHashTable {
public:
- using Self = JoinHashMapTable;
- using Base = HashMapTable<Key, Cell, Hash, Grower, Allocator>;
-
using key_type = Key;
- using value_type = typename Cell::value_type;
- using mapped_type = typename Cell::Mapped;
-
- using LookupResult = typename Base::LookupResult;
+ using mapped_type = void*;
+ using value_type = void*;
+ size_t hash(const Key& x) const { return Hash()(x); }
static uint32_t calc_bucket_size(size_t num_elem) {
size_t expect_bucket_size = num_elem + (num_elem - 1) / 7;
@@ -226,17 +54,17 @@ public:
first.resize(bucket_size + 1);
next.resize(num_elem);
- if constexpr (JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN ||
- JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN ||
- JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN ||
- JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) {
+ if constexpr (JoinOpType == TJoinOp::FULL_OUTER_JOIN ||
+ JoinOpType == TJoinOp::RIGHT_OUTER_JOIN ||
+ JoinOpType == TJoinOp::RIGHT_ANTI_JOIN ||
+ JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) {
visited.resize(num_elem);
}
}
uint32_t get_bucket_size() const { return bucket_size; }
- size_t size() const { return Base::size() == 0 ? next.size() :
Base::size(); }
+ size_t size() const { return next.size(); }
std::vector<uint8_t>& get_visited() { return visited; }
@@ -255,9 +83,8 @@ public:
auto find_batch(const Key* __restrict keys, const uint32_t* __restrict
build_idx_map,
int probe_idx, uint32_t build_idx, int probe_rows,
uint32_t* __restrict probe_idxs, bool& probe_visited,
- uint32_t* __restrict build_idxs,
- doris::vectorized::ColumnFilterHelper* mark_column) {
- if constexpr (JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN)
{
+ uint32_t* __restrict build_idxs,
vectorized::ColumnFilterHelper* mark_column) {
+ if constexpr (JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
if (_empty_build_side) {
return _process_null_aware_left_anti_join_for_empty_build_side<
JoinOpType, with_other_conjuncts, is_mark_join>(
@@ -275,22 +102,21 @@ public:
probe_idxs, build_idxs,
mark_column);
}
- if constexpr (JoinOpType == doris::TJoinOp::INNER_JOIN ||
- JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN ||
- JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN ||
- JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN) {
+ if constexpr (JoinOpType == TJoinOp::INNER_JOIN || JoinOpType ==
TJoinOp::FULL_OUTER_JOIN ||
+ JoinOpType == TJoinOp::LEFT_OUTER_JOIN ||
+ JoinOpType == TJoinOp::RIGHT_OUTER_JOIN) {
return _find_batch_inner_outer_join<JoinOpType>(keys,
build_idx_map, probe_idx,
build_idx,
probe_rows, probe_idxs,
probe_visited,
build_idxs);
}
- if constexpr (JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN ||
- JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ||
- JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN)
{
+ if constexpr (JoinOpType == TJoinOp::LEFT_ANTI_JOIN ||
+ JoinOpType == TJoinOp::LEFT_SEMI_JOIN ||
+ JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
return _find_batch_left_semi_anti<JoinOpType, need_judge_null>(
keys, build_idx_map, probe_idx, probe_rows, probe_idxs);
}
- if constexpr (JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN ||
- JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) {
+ if constexpr (JoinOpType == TJoinOp::RIGHT_ANTI_JOIN ||
+ JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) {
return _find_batch_right_semi_anti(keys, build_idx_map, probe_idx,
probe_rows);
}
return std::tuple {0, 0U, 0};
@@ -306,7 +132,7 @@ public:
while (count < batch_size && iter_idx < elem_num) {
const auto matched = visited[iter_idx];
build_idxs[count] = iter_idx;
- if constexpr (JoinOpType != doris::TJoinOp::RIGHT_SEMI_JOIN) {
+ if constexpr (JoinOpType != TJoinOp::RIGHT_SEMI_JOIN) {
count += !matched;
} else {
count += matched;
@@ -336,7 +162,7 @@ private:
auto _find_batch_mark(const Key* __restrict keys, const uint32_t*
__restrict build_idx_map,
int probe_idx, int probe_rows, uint32_t* __restrict
probe_idxs,
uint32_t* __restrict build_idxs,
- doris::vectorized::ColumnFilterHelper* mark_column) {
+ vectorized::ColumnFilterHelper* mark_column) {
auto matched_cnt = 0;
const auto batch_size = max_batch_size;
@@ -351,8 +177,8 @@ private:
// mark result as null when probe row is null
mark_column->insert_null();
} else {
- bool matched = JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ?
build_idx != 0
- :
build_idx == 0;
+ bool matched =
+ JoinOpType == TJoinOp::LEFT_SEMI_JOIN ? build_idx != 0
: build_idx == 0;
if (!matched && _has_null_key) {
mark_column->insert_null();
} else {
@@ -370,8 +196,8 @@ private:
template <int JoinOpType, bool with_other_conjuncts, bool is_mark_join>
auto _process_null_aware_left_anti_join_for_empty_build_side(
int probe_idx, int probe_rows, uint32_t* __restrict probe_idxs,
- uint32_t* __restrict build_idxs,
doris::vectorized::ColumnFilterHelper* mark_column) {
- static_assert(JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN);
+ uint32_t* __restrict build_idxs, vectorized::ColumnFilterHelper*
mark_column) {
+ static_assert(JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN);
auto matched_cnt = 0;
const auto batch_size = max_batch_size;
@@ -427,8 +253,7 @@ private:
while (build_idx && keys[probe_idx] != build_keys[build_idx]) {
build_idx = next[build_idx];
}
- bool matched =
- JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ? build_idx
!= 0 : build_idx == 0;
+ bool matched = JoinOpType == TJoinOp::LEFT_SEMI_JOIN ? build_idx
!= 0 : build_idx == 0;
probe_idxs[matched_cnt] = probe_idx++;
matched_cnt += matched;
}
@@ -444,8 +269,8 @@ private:
auto do_the_probe = [&]() {
while (build_idx && matched_cnt < batch_size) {
- if constexpr (JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN ||
- JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) {
+ if constexpr (JoinOpType == TJoinOp::RIGHT_ANTI_JOIN ||
+ JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) {
if (!visited[build_idx] && keys[probe_idx] ==
build_keys[build_idx]) {
probe_idxs[matched_cnt] = probe_idx;
build_idxs[matched_cnt] = build_idx;
@@ -459,11 +284,11 @@ private:
build_idx = next[build_idx];
}
- if constexpr (JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN ||
- JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN ||
- JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ||
- JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN ||
- JoinOpType ==
doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
+ if constexpr (JoinOpType == TJoinOp::LEFT_OUTER_JOIN ||
+ JoinOpType == TJoinOp::FULL_OUTER_JOIN ||
+ JoinOpType == TJoinOp::LEFT_SEMI_JOIN ||
+ JoinOpType == TJoinOp::LEFT_ANTI_JOIN ||
+ JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
// may over batch_size when emplace 0 into build_idxs
if (!build_idx) {
probe_idxs[matched_cnt] = probe_idx;
@@ -503,8 +328,8 @@ private:
probe_idxs[matched_cnt] = probe_idx;
build_idxs[matched_cnt] = build_idx;
matched_cnt++;
- if constexpr (JoinOpType ==
doris::TJoinOp::RIGHT_OUTER_JOIN ||
- JoinOpType ==
doris::TJoinOp::FULL_OUTER_JOIN) {
+ if constexpr (JoinOpType == TJoinOp::RIGHT_OUTER_JOIN ||
+ JoinOpType == TJoinOp::FULL_OUTER_JOIN) {
if (!visited[build_idx]) {
visited[build_idx] = 1;
}
@@ -513,8 +338,8 @@ private:
build_idx = next[build_idx];
}
- if constexpr (JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN ||
- JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN) {
+ if constexpr (JoinOpType == TJoinOp::LEFT_OUTER_JOIN ||
+ JoinOpType == TJoinOp::FULL_OUTER_JOIN) {
// `(!matched_cnt || probe_idxs[matched_cnt - 1] !=
probe_idx)` means not match one build side
probe_visited |= (matched_cnt && probe_idxs[matched_cnt - 1]
== probe_idx);
if (!build_idx) {
@@ -553,27 +378,8 @@ private:
// use in iter hash map
mutable uint32_t iter_idx = 1;
- Cell cell;
- doris::vectorized::Arena* pool;
+ vectorized::Arena* pool;
bool _has_null_key = false;
bool _empty_build_side = true;
};
-
-template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>,
- typename Grower = HashTableGrower<>, typename Allocator =
HashTableAllocator>
-using HashMap = HashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash,
Grower, Allocator>;
-
-template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>>
-using JoinFixedHashMap = JoinHashMapTable<Key, HashMapCell<Key, Mapped, Hash>,
Hash>;
-
-template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>,
- typename Grower = HashTableGrower<>, typename Allocator =
HashTableAllocator>
-using HashMapWithSavedHash =
- HashMapTable<Key, HashMapCellWithSavedHash<Key, Mapped, Hash>, Hash,
Grower, Allocator>;
-
-template <typename Key, typename Mapped, typename Hash, size_t
initial_size_degree>
-using HashMapWithStackMemory = HashMapTable<
- Key, HashMapCellWithSavedHash<Key, Mapped, Hash>, Hash,
- HashTableGrower<initial_size_degree>,
- HashTableAllocatorWithStackMemory<(1ULL << initial_size_degree) *
- sizeof(HashMapCellWithSavedHash<Key,
Mapped, Hash>)>>;
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/common/hash_table/partitioned_hash_map.h
b/be/src/vec/common/hash_table/partitioned_hash_map.h
index f23b0a347de..a2db6fece35 100644
--- a/be/src/vec/common/hash_table/partitioned_hash_map.h
+++ b/be/src/vec/common/hash_table/partitioned_hash_map.h
@@ -22,7 +22,7 @@
#include "vec/common/hash_table/hash_map.h"
#include "vec/common/hash_table/partitioned_hash_table.h"
#include "vec/common/hash_table/ph_hash_map.h"
-
+namespace doris {
template <typename ImplTable>
class PartitionedHashMapTable : public PartitionedHashTable<ImplTable> {
public:
@@ -57,3 +57,4 @@ using PartitionedHashMap =
template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>>
using PHNormalHashMap = PHHashMap<Key, Mapped, Hash, false>;
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/common/hash_table/string_hash_map.h
b/be/src/vec/common/hash_table/string_hash_map.h
index f1efd0fab12..61d304cf7d8 100644
--- a/be/src/vec/common/hash_table/string_hash_map.h
+++ b/be/src/vec/common/hash_table/string_hash_map.h
@@ -23,6 +23,7 @@
#include "vec/common/hash_table/hash_map.h"
#include "vec/common/hash_table/string_hash_table.h"
+namespace doris {
template <typename Key, typename TMapped>
struct StringHashMapCell : public HashMapCell<Key, TMapped,
StringHashTableHash, HashTableNoState> {
using Base = HashMapCell<Key, TMapped, StringHashTableHash,
HashTableNoState>;
@@ -152,3 +153,4 @@ public:
}
bool has_null_key_data() const { return false; }
};
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp
b/be/src/vec/exec/join/vhash_join_node.cpp
index 39e05936397..e6c00d94a2f 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -1049,7 +1049,7 @@ void HashJoinNode::_hash_table_init(RuntimeState* state) {
return;
}
- if (!try_get_hash_map_context_fixed<JoinFixedHashMap,
HashCRC32, RowRefListType>(
+ if (!try_get_hash_map_context_fixed<JoinHashMap, HashCRC32,
RowRefListType>(
*_hash_table_variants, _build_expr_ctxs)) {
_hash_table_variants->emplace<SerializedHashTableContext<RowRefListType>>();
}
diff --git a/be/src/vec/exec/vset_operation_node.cpp
b/be/src/vec/exec/vset_operation_node.cpp
index 3c47638ef42..75317b4c933 100644
--- a/be/src/vec/exec/vset_operation_node.cpp
+++ b/be/src/vec/exec/vset_operation_node.cpp
@@ -183,16 +183,16 @@ void VSetOperationNode<is_intersect>::hash_table_init() {
switch (_child_expr_lists[0][0]->root()->result_type()) {
case TYPE_BOOLEAN:
case TYPE_TINYINT:
-
_hash_table_variants->emplace<I8HashTableContext<RowRefListWithFlags>>();
+
_hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt8>>();
break;
case TYPE_SMALLINT:
-
_hash_table_variants->emplace<I16HashTableContext<RowRefListWithFlags>>();
+
_hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt16>>();
break;
case TYPE_INT:
case TYPE_FLOAT:
case TYPE_DATEV2:
case TYPE_DECIMAL32:
-
_hash_table_variants->emplace<I32HashTableContext<RowRefListWithFlags>>();
+
_hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt32>>();
break;
case TYPE_BIGINT:
case TYPE_DOUBLE:
@@ -200,21 +200,21 @@ void VSetOperationNode<is_intersect>::hash_table_init() {
case TYPE_DATE:
case TYPE_DECIMAL64:
case TYPE_DATETIMEV2:
-
_hash_table_variants->emplace<I64HashTableContext<RowRefListWithFlags>>();
+
_hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt64>>();
break;
case TYPE_LARGEINT:
case TYPE_DECIMALV2:
case TYPE_DECIMAL128I:
-
_hash_table_variants->emplace<I128HashTableContext<RowRefListWithFlags>>();
+
_hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt128>>();
break;
default:
-
_hash_table_variants->emplace<SerializedHashTableContext<RowRefListWithFlags>>();
+ _hash_table_variants->emplace<SetSerializedHashTableContext>();
}
return;
}
- if (!try_get_hash_map_context_fixed<JoinFixedHashMap, HashCRC32,
RowRefListWithFlags>(
+ if (!try_get_hash_map_context_fixed<NormalHashMap, HashCRC32,
RowRefListWithFlags>(
*_hash_table_variants, _child_expr_lists[0])) {
-
_hash_table_variants->emplace<SerializedHashTableContext<RowRefListWithFlags>>();
+ _hash_table_variants->emplace<SetSerializedHashTableContext>();
}
}
diff --git a/be/src/vec/exec/vset_operation_node.h
b/be/src/vec/exec/vset_operation_node.h
index b1ab9c47650..ce5a8eb1dbc 100644
--- a/be/src/vec/exec/vset_operation_node.h
+++ b/be/src/vec/exec/vset_operation_node.h
@@ -31,6 +31,7 @@
#include "vec/columns/column.h"
#include "vec/common/arena.h"
#include "vec/core/block.h"
+#include "vec/core/types.h"
#include "vec/exec/join/process_hash_table_probe.h"
#include "vec/exec/join/vhash_join_node.h"
@@ -45,18 +46,14 @@ class VExprContext;
struct RowRefListWithFlags;
using SetHashTableVariants = std::variant<
- std::monostate, SerializedHashTableContext<RowRefListWithFlags>,
- I8HashTableContext<RowRefListWithFlags>,
I16HashTableContext<RowRefListWithFlags>,
- I32HashTableContext<RowRefListWithFlags>,
I64HashTableContext<RowRefListWithFlags>,
- I128HashTableContext<RowRefListWithFlags>,
I256HashTableContext<RowRefListWithFlags>,
- I64FixedKeyHashTableContext<true, RowRefListWithFlags>,
- I64FixedKeyHashTableContext<false, RowRefListWithFlags>,
- I128FixedKeyHashTableContext<true, RowRefListWithFlags>,
- I128FixedKeyHashTableContext<false, RowRefListWithFlags>,
- I256FixedKeyHashTableContext<true, RowRefListWithFlags>,
- I256FixedKeyHashTableContext<false, RowRefListWithFlags>,
- I136FixedKeyHashTableContext<true, RowRefListWithFlags>,
- I136FixedKeyHashTableContext<false, RowRefListWithFlags>>;
+ std::monostate, MethodSerialized<HashMap<StringRef,
RowRefListWithFlags>>,
+ SetPrimaryTypeHashTableContext<UInt8>,
SetPrimaryTypeHashTableContext<UInt16>,
+ SetPrimaryTypeHashTableContext<UInt32>,
SetPrimaryTypeHashTableContext<UInt64>,
+ SetPrimaryTypeHashTableContext<UInt128>,
SetPrimaryTypeHashTableContext<UInt256>,
+ SetFixedKeyHashTableContext<UInt64, true>,
SetFixedKeyHashTableContext<UInt64, false>,
+ SetFixedKeyHashTableContext<UInt128, true>,
SetFixedKeyHashTableContext<UInt128, false>,
+ SetFixedKeyHashTableContext<UInt256, true>,
SetFixedKeyHashTableContext<UInt256, false>,
+ SetFixedKeyHashTableContext<UInt136, true>,
SetFixedKeyHashTableContext<UInt136, false>>;
template <bool is_intersect>
class VSetOperationNode final : public ExecNode {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]