This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 17c8748c310 branch-4.0: [Improvement](hash) add int96 int104 to hash
method #58770 (#58902)
17c8748c310 is described below
commit 17c8748c310203735a790beea6c556a08205acc9
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Dec 11 19:22:43 2025 +0800
branch-4.0: [Improvement](hash) add int96 int104 to hash method #58770
(#58902)
Cherry-picked from #58770
Co-authored-by: Pxl <[email protected]>
---
be/src/pipeline/common/agg_utils.h | 10 ++++++++++
be/src/pipeline/common/distinct_agg_utils.h | 10 ++++++++++
be/src/pipeline/common/join_utils.h | 9 +++++++++
be/src/pipeline/common/partition_sort_utils.h | 10 ++++++++++
be/src/pipeline/common/set_utils.h | 10 ++++++++++
.../exec/join/process_hash_table_probe_impl.h | 2 ++
be/src/vec/common/hash_table/hash.h | 21 +++++++++++++++++++++
be/src/vec/common/hash_table/hash_key_type.h | 6 ++++++
be/src/vec/common/uint128.h | 19 +++++++++++++++++++
be/src/vec/functions/complex_dict_hash_map.h | 8 ++++++++
be/test/pipeline/common/distinct_agg_utils_test.cpp | 10 ++++++++++
be/test/pipeline/common/set_utils_test.cpp | 8 ++++++++
12 files changed, 123 insertions(+)
diff --git a/be/src/pipeline/common/agg_utils.h
b/be/src/pipeline/common/agg_utils.h
index f0cf0a17f2a..f676013c27f 100644
--- a/be/src/pipeline/common/agg_utils.h
+++ b/be/src/pipeline/common/agg_utils.h
@@ -82,6 +82,8 @@ using AggregatedMethodVariants = std::variant<
vectorized::MethodStringNoCache<AggregatedDataWithNullableShortStringKey>>,
vectorized::MethodKeysFixed<AggData<vectorized::UInt64>>,
vectorized::MethodKeysFixed<AggData<vectorized::UInt72>>,
+ vectorized::MethodKeysFixed<AggData<vectorized::UInt96>>,
+ vectorized::MethodKeysFixed<AggData<vectorized::UInt104>>,
vectorized::MethodKeysFixed<AggData<vectorized::UInt128>>,
vectorized::MethodKeysFixed<AggData<vectorized::UInt136>>,
vectorized::MethodKeysFixed<AggData<vectorized::UInt256>>>;
@@ -142,6 +144,14 @@ struct AggregatedDataVariants
method_variant.emplace<vectorized::MethodKeysFixed<AggData<vectorized::UInt72>>>(
get_key_sizes(data_types));
break;
+ case HashKeyType::fixed96:
+
method_variant.emplace<vectorized::MethodKeysFixed<AggData<vectorized::UInt96>>>(
+ get_key_sizes(data_types));
+ break;
+ case HashKeyType::fixed104:
+
method_variant.emplace<vectorized::MethodKeysFixed<AggData<vectorized::UInt104>>>(
+ get_key_sizes(data_types));
+ break;
case HashKeyType::fixed128:
method_variant.emplace<vectorized::MethodKeysFixed<AggData<vectorized::UInt128>>>(
get_key_sizes(data_types));
diff --git a/be/src/pipeline/common/distinct_agg_utils.h
b/be/src/pipeline/common/distinct_agg_utils.h
index 3c95a2793fc..17ec246be16 100644
--- a/be/src/pipeline/common/distinct_agg_utils.h
+++ b/be/src/pipeline/common/distinct_agg_utils.h
@@ -106,6 +106,8 @@ using DistinctMethodVariants = std::variant<
vectorized::DataWithNullKey<DistinctDataWithShortStringKey>>>,
vectorized::MethodKeysFixed<DistinctData<vectorized::UInt64>>,
vectorized::MethodKeysFixed<DistinctData<vectorized::UInt72>>,
+ vectorized::MethodKeysFixed<DistinctData<vectorized::UInt96>>,
+ vectorized::MethodKeysFixed<DistinctData<vectorized::UInt104>>,
vectorized::MethodKeysFixed<DistinctData<vectorized::UInt128>>,
vectorized::MethodKeysFixed<DistinctData<vectorized::UInt136>>,
vectorized::MethodKeysFixed<DistinctData<vectorized::UInt256>>>;
@@ -161,6 +163,14 @@ struct DistinctDataVariants
method_variant.emplace<vectorized::MethodKeysFixed<DistinctData<vectorized::UInt72>>>(
get_key_sizes(data_types));
break;
+ case HashKeyType::fixed96:
+
method_variant.emplace<vectorized::MethodKeysFixed<DistinctData<vectorized::UInt96>>>(
+ get_key_sizes(data_types));
+ break;
+ case HashKeyType::fixed104:
+
method_variant.emplace<vectorized::MethodKeysFixed<DistinctData<vectorized::UInt104>>>(
+ get_key_sizes(data_types));
+ break;
case HashKeyType::fixed128:
method_variant.emplace<vectorized::MethodKeysFixed<DistinctData<vectorized::UInt128>>>(
get_key_sizes(data_types));
diff --git a/be/src/pipeline/common/join_utils.h
b/be/src/pipeline/common/join_utils.h
index 08708f037ba..8d46c317fb5 100644
--- a/be/src/pipeline/common/join_utils.h
+++ b/be/src/pipeline/common/join_utils.h
@@ -67,6 +67,7 @@ using HashTableVariants = std::variant<
DirectPrimaryTypeHashTableContext<vectorized::UInt64>,
DirectPrimaryTypeHashTableContext<vectorized::UInt128>,
FixedKeyHashTableContext<vectorized::UInt64>,
FixedKeyHashTableContext<vectorized::UInt72>,
+ FixedKeyHashTableContext<vectorized::UInt96>,
FixedKeyHashTableContext<vectorized::UInt104>,
FixedKeyHashTableContext<vectorized::UInt128>,
FixedKeyHashTableContext<vectorized::UInt136>,
FixedKeyHashTableContext<vectorized::UInt256>, MethodOneString>;
@@ -108,6 +109,14 @@ struct JoinDataVariants {
method_variant.emplace<FixedKeyHashTableContext<vectorized::UInt72>>(
get_key_sizes(data_types));
break;
+ case HashKeyType::fixed96:
+
method_variant.emplace<FixedKeyHashTableContext<vectorized::UInt96>>(
+ get_key_sizes(data_types));
+ break;
+ case HashKeyType::fixed104:
+
method_variant.emplace<FixedKeyHashTableContext<vectorized::UInt104>>(
+ get_key_sizes(data_types));
+ break;
case HashKeyType::fixed128:
method_variant.emplace<FixedKeyHashTableContext<vectorized::UInt128>>(
get_key_sizes(data_types));
diff --git a/be/src/pipeline/common/partition_sort_utils.h
b/be/src/pipeline/common/partition_sort_utils.h
index ccd1b6a144d..e1eebddf1c9 100644
--- a/be/src/pipeline/common/partition_sort_utils.h
+++ b/be/src/pipeline/common/partition_sort_utils.h
@@ -141,6 +141,8 @@ using PartitionedMethodVariants = std::variant<
PartitionDataSingleNullable<vectorized::UInt256>,
vectorized::MethodKeysFixed<PartitionData<vectorized::UInt64>>,
vectorized::MethodKeysFixed<PartitionData<vectorized::UInt72>>,
+ vectorized::MethodKeysFixed<PartitionData<vectorized::UInt96>>,
+ vectorized::MethodKeysFixed<PartitionData<vectorized::UInt104>>,
vectorized::MethodKeysFixed<PartitionData<vectorized::UInt128>>,
vectorized::MethodKeysFixed<PartitionData<vectorized::UInt136>>,
vectorized::MethodKeysFixed<PartitionData<vectorized::UInt256>>,
@@ -204,6 +206,14 @@ struct PartitionedHashMapVariants
method_variant.emplace<vectorized::MethodKeysFixed<PartitionData<vectorized::UInt72>>>(
get_key_sizes(data_types));
break;
+ case HashKeyType::fixed96:
+
method_variant.emplace<vectorized::MethodKeysFixed<PartitionData<vectorized::UInt96>>>(
+ get_key_sizes(data_types));
+ break;
+ case HashKeyType::fixed104:
+
method_variant.emplace<vectorized::MethodKeysFixed<PartitionData<vectorized::UInt104>>>(
+ get_key_sizes(data_types));
+ break;
case HashKeyType::fixed128:
method_variant.emplace<vectorized::MethodKeysFixed<PartitionData<vectorized::UInt128>>>(
get_key_sizes(data_types));
diff --git a/be/src/pipeline/common/set_utils.h
b/be/src/pipeline/common/set_utils.h
index 665a7710fa8..d08ad883b83 100644
--- a/be/src/pipeline/common/set_utils.h
+++ b/be/src/pipeline/common/set_utils.h
@@ -68,6 +68,8 @@ using SetHashTableVariants =
SetPrimaryTypeHashTableContext<vectorized::UInt256>,
SetFixedKeyHashTableContext<vectorized::UInt64>,
SetFixedKeyHashTableContext<vectorized::UInt72>,
+ SetFixedKeyHashTableContext<vectorized::UInt96>,
+ SetFixedKeyHashTableContext<vectorized::UInt104>,
SetFixedKeyHashTableContext<vectorized::UInt128>,
SetFixedKeyHashTableContext<vectorized::UInt256>,
SetFixedKeyHashTableContext<vectorized::UInt136>>;
@@ -110,6 +112,14 @@ struct SetDataVariants
method_variant.emplace<SetFixedKeyHashTableContext<vectorized::UInt72>>(
get_key_sizes(data_types));
break;
+ case HashKeyType::fixed96:
+
method_variant.emplace<SetFixedKeyHashTableContext<vectorized::UInt96>>(
+ get_key_sizes(data_types));
+ break;
+ case HashKeyType::fixed104:
+
method_variant.emplace<SetFixedKeyHashTableContext<vectorized::UInt104>>(
+ get_key_sizes(data_types));
+ break;
case HashKeyType::fixed128:
method_variant.emplace<SetFixedKeyHashTableContext<vectorized::UInt128>>(
get_key_sizes(data_types));
diff --git a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
index 1253afabdbd..60261223cdf 100644
--- a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
+++ b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
@@ -813,6 +813,8 @@ struct ExtractType<T(U)> {
INSTANTIATION(JoinOpType,
(PrimaryTypeHashTableContext<vectorized::UInt256>)); \
INSTANTIATION(JoinOpType, (FixedKeyHashTableContext<vectorized::UInt64>));
\
INSTANTIATION(JoinOpType, (FixedKeyHashTableContext<vectorized::UInt72>));
\
+ INSTANTIATION(JoinOpType, (FixedKeyHashTableContext<vectorized::UInt96>));
\
+ INSTANTIATION(JoinOpType,
(FixedKeyHashTableContext<vectorized::UInt104>)); \
INSTANTIATION(JoinOpType,
(FixedKeyHashTableContext<vectorized::UInt128>)); \
INSTANTIATION(JoinOpType,
(FixedKeyHashTableContext<vectorized::UInt136>)); \
INSTANTIATION(JoinOpType,
(FixedKeyHashTableContext<vectorized::UInt256>)); \
diff --git a/be/src/vec/common/hash_table/hash.h
b/be/src/vec/common/hash_table/hash.h
index 6817d7e091d..348cb5d4555 100644
--- a/be/src/vec/common/hash_table/hash.h
+++ b/be/src/vec/common/hash_table/hash.h
@@ -201,6 +201,27 @@ struct HashCRC32<doris::vectorized::UInt72> {
}
};
+template <>
+struct HashCRC32<doris::vectorized::UInt96> {
+ size_t operator()(const doris::vectorized::UInt96& x) const {
+ doris::vectorized::UInt64 crc = -1ULL;
+ crc = _mm_crc32_u32(crc, x.a);
+ crc = _mm_crc32_u64(crc, x.b);
+ return crc;
+ }
+};
+
+template <>
+struct HashCRC32<doris::vectorized::UInt104> {
+ size_t operator()(const doris::vectorized::UInt104& x) const {
+ doris::vectorized::UInt64 crc = -1ULL;
+ crc = _mm_crc32_u8(crc, x.a);
+ crc = _mm_crc32_u32(crc, x.b);
+ crc = _mm_crc32_u64(crc, x.c);
+ return crc;
+ }
+};
+
template <>
struct HashCRC32<doris::vectorized::UInt136> {
size_t operator()(const doris::vectorized::UInt136& x) const {
diff --git a/be/src/vec/common/hash_table/hash_key_type.h
b/be/src/vec/common/hash_table/hash_key_type.h
index 7a04137324e..025af1bdc40 100644
--- a/be/src/vec/common/hash_table/hash_key_type.h
+++ b/be/src/vec/common/hash_table/hash_key_type.h
@@ -38,6 +38,8 @@ enum class HashKeyType {
string_key,
fixed64,
fixed72,
+ fixed96,
+ fixed104,
fixed128,
fixed136,
fixed256
@@ -62,6 +64,10 @@ inline HashKeyType get_hash_key_type_with_fixed(size_t size)
{
return HashKeyType::fixed64;
} else if (size <= sizeof(UInt72)) {
return HashKeyType::fixed72;
+ } else if (size <= sizeof(UInt96)) {
+ return HashKeyType::fixed96;
+ } else if (size <= sizeof(UInt104)) {
+ return HashKeyType::fixed104;
} else if (size <= sizeof(UInt128)) {
return HashKeyType::fixed128;
} else if (size <= sizeof(UInt136)) {
diff --git a/be/src/vec/common/uint128.h b/be/src/vec/common/uint128.h
index 2a6bb70177d..58db42868a9 100644
--- a/be/src/vec/common/uint128.h
+++ b/be/src/vec/common/uint128.h
@@ -70,6 +70,25 @@ struct UInt72 {
};
#pragma pack()
+#pragma pack(1)
+struct UInt96 {
+ UInt32 a;
+ UInt64 b;
+
+ bool operator==(const UInt96& rhs) const { return a == rhs.a && b ==
rhs.b; }
+};
+#pragma pack()
+
+#pragma pack(1)
+struct UInt104 {
+ UInt8 a;
+ UInt32 b;
+ UInt64 c;
+
+ bool operator==(const UInt104& rhs) const { return a == rhs.a && b ==
rhs.b && c == rhs.c; }
+};
+#pragma pack()
+
#pragma pack(1)
struct UInt136 {
UInt8 a;
diff --git a/be/src/vec/functions/complex_dict_hash_map.h
b/be/src/vec/functions/complex_dict_hash_map.h
index d815cbb0904..15db68240cf 100644
--- a/be/src/vec/functions/complex_dict_hash_map.h
+++ b/be/src/vec/functions/complex_dict_hash_map.h
@@ -48,6 +48,7 @@ using DictHashMapVariants = std::variant<
MethodOneNumber<UInt256, DictHashMap<UInt256>>,
MethodKeysFixed<DictHashMap<UInt64>>,
MethodKeysFixed<DictHashMap<UInt72>>,
+ MethodKeysFixed<DictHashMap<UInt96>>,
MethodKeysFixed<DictHashMap<UInt104>>,
MethodKeysFixed<DictHashMap<UInt128>>,
MethodKeysFixed<DictHashMap<UInt136>>,
MethodKeysFixed<DictHashMap<UInt256>>>;
@@ -87,6 +88,13 @@ struct DictionaryHashMapMethod
case HashKeyType::fixed72:
method_variant.emplace<MethodKeysFixed<DictHashMap<UInt72>>>(get_key_sizes(data_types));
break;
+ case HashKeyType::fixed96:
+
method_variant.emplace<MethodKeysFixed<DictHashMap<UInt96>>>(get_key_sizes(data_types));
+ break;
+ case HashKeyType::fixed104:
+ method_variant.emplace<MethodKeysFixed<DictHashMap<UInt104>>>(
+ get_key_sizes(data_types));
+ break;
case HashKeyType::fixed128:
method_variant.emplace<MethodKeysFixed<DictHashMap<UInt128>>>(
get_key_sizes(data_types));
diff --git a/be/test/pipeline/common/distinct_agg_utils_test.cpp
b/be/test/pipeline/common/distinct_agg_utils_test.cpp
index 33a572455e2..788c69e3e53 100644
--- a/be/test/pipeline/common/distinct_agg_utils_test.cpp
+++ b/be/test/pipeline/common/distinct_agg_utils_test.cpp
@@ -164,6 +164,16 @@ TEST_F(DistinctAggUtilsTest,
TestDistinctDataVariantsInitFixedKeys) {
vectorized::MethodKeysFixed<DistinctData<vectorized::UInt72>>>(
variants.method_variant));
break;
+ case HashKeyType::fixed96:
+ ASSERT_TRUE(std::holds_alternative<
+
vectorized::MethodKeysFixed<DistinctData<vectorized::UInt96>>>(
+ variants.method_variant));
+ break;
+ case HashKeyType::fixed104:
+ ASSERT_TRUE(std::holds_alternative<
+
vectorized::MethodKeysFixed<DistinctData<vectorized::UInt104>>>(
+ variants.method_variant));
+ break;
case HashKeyType::fixed128:
ASSERT_TRUE(std::holds_alternative<
vectorized::MethodKeysFixed<DistinctData<vectorized::UInt128>>>(
diff --git a/be/test/pipeline/common/set_utils_test.cpp
b/be/test/pipeline/common/set_utils_test.cpp
index bb12a8edb6f..8766c1dd5b1 100644
--- a/be/test/pipeline/common/set_utils_test.cpp
+++ b/be/test/pipeline/common/set_utils_test.cpp
@@ -97,6 +97,14 @@ TEST_F(SetUtilsTest, TestSetDataVariantsInitFixedKeys) {
ASSERT_TRUE(std::holds_alternative<SetFixedKeyHashTableContext<vectorized::UInt72>>(
variants.method_variant));
break;
+ case HashKeyType::fixed96:
+
ASSERT_TRUE(std::holds_alternative<SetFixedKeyHashTableContext<vectorized::UInt96>>(
+ variants.method_variant));
+ break;
+ case HashKeyType::fixed104:
+
ASSERT_TRUE(std::holds_alternative<SetFixedKeyHashTableContext<vectorized::UInt104>>(
+ variants.method_variant));
+ break;
case HashKeyType::fixed128:
ASSERT_TRUE(std::holds_alternative<SetFixedKeyHashTableContext<vectorized::UInt128>>(
variants.method_variant));
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]