This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 17c8748c310 branch-4.0: [Improvement](hash) add int96 int104 to hash 
method #58770 (#58902)
17c8748c310 is described below

commit 17c8748c310203735a790beea6c556a08205acc9
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Dec 11 19:22:43 2025 +0800

    branch-4.0: [Improvement](hash) add int96 int104 to hash method #58770 
(#58902)
    
    Cherry-picked from #58770
    
    Co-authored-by: Pxl <[email protected]>
---
 be/src/pipeline/common/agg_utils.h                  | 10 ++++++++++
 be/src/pipeline/common/distinct_agg_utils.h         | 10 ++++++++++
 be/src/pipeline/common/join_utils.h                 |  9 +++++++++
 be/src/pipeline/common/partition_sort_utils.h       | 10 ++++++++++
 be/src/pipeline/common/set_utils.h                  | 10 ++++++++++
 .../exec/join/process_hash_table_probe_impl.h       |  2 ++
 be/src/vec/common/hash_table/hash.h                 | 21 +++++++++++++++++++++
 be/src/vec/common/hash_table/hash_key_type.h        |  6 ++++++
 be/src/vec/common/uint128.h                         | 19 +++++++++++++++++++
 be/src/vec/functions/complex_dict_hash_map.h        |  8 ++++++++
 be/test/pipeline/common/distinct_agg_utils_test.cpp | 10 ++++++++++
 be/test/pipeline/common/set_utils_test.cpp          |  8 ++++++++
 12 files changed, 123 insertions(+)

diff --git a/be/src/pipeline/common/agg_utils.h 
b/be/src/pipeline/common/agg_utils.h
index f0cf0a17f2a..f676013c27f 100644
--- a/be/src/pipeline/common/agg_utils.h
+++ b/be/src/pipeline/common/agg_utils.h
@@ -82,6 +82,8 @@ using AggregatedMethodVariants = std::variant<
                 
vectorized::MethodStringNoCache<AggregatedDataWithNullableShortStringKey>>,
         vectorized::MethodKeysFixed<AggData<vectorized::UInt64>>,
         vectorized::MethodKeysFixed<AggData<vectorized::UInt72>>,
+        vectorized::MethodKeysFixed<AggData<vectorized::UInt96>>,
+        vectorized::MethodKeysFixed<AggData<vectorized::UInt104>>,
         vectorized::MethodKeysFixed<AggData<vectorized::UInt128>>,
         vectorized::MethodKeysFixed<AggData<vectorized::UInt136>>,
         vectorized::MethodKeysFixed<AggData<vectorized::UInt256>>>;
@@ -142,6 +144,14 @@ struct AggregatedDataVariants
             
method_variant.emplace<vectorized::MethodKeysFixed<AggData<vectorized::UInt72>>>(
                     get_key_sizes(data_types));
             break;
+        case HashKeyType::fixed96:
+            
method_variant.emplace<vectorized::MethodKeysFixed<AggData<vectorized::UInt96>>>(
+                    get_key_sizes(data_types));
+            break;
+        case HashKeyType::fixed104:
+            
method_variant.emplace<vectorized::MethodKeysFixed<AggData<vectorized::UInt104>>>(
+                    get_key_sizes(data_types));
+            break;
         case HashKeyType::fixed128:
             
method_variant.emplace<vectorized::MethodKeysFixed<AggData<vectorized::UInt128>>>(
                     get_key_sizes(data_types));
diff --git a/be/src/pipeline/common/distinct_agg_utils.h 
b/be/src/pipeline/common/distinct_agg_utils.h
index 3c95a2793fc..17ec246be16 100644
--- a/be/src/pipeline/common/distinct_agg_utils.h
+++ b/be/src/pipeline/common/distinct_agg_utils.h
@@ -106,6 +106,8 @@ using DistinctMethodVariants = std::variant<
                 vectorized::DataWithNullKey<DistinctDataWithShortStringKey>>>,
         vectorized::MethodKeysFixed<DistinctData<vectorized::UInt64>>,
         vectorized::MethodKeysFixed<DistinctData<vectorized::UInt72>>,
+        vectorized::MethodKeysFixed<DistinctData<vectorized::UInt96>>,
+        vectorized::MethodKeysFixed<DistinctData<vectorized::UInt104>>,
         vectorized::MethodKeysFixed<DistinctData<vectorized::UInt128>>,
         vectorized::MethodKeysFixed<DistinctData<vectorized::UInt136>>,
         vectorized::MethodKeysFixed<DistinctData<vectorized::UInt256>>>;
@@ -161,6 +163,14 @@ struct DistinctDataVariants
             
method_variant.emplace<vectorized::MethodKeysFixed<DistinctData<vectorized::UInt72>>>(
                     get_key_sizes(data_types));
             break;
+        case HashKeyType::fixed96:
+            
method_variant.emplace<vectorized::MethodKeysFixed<DistinctData<vectorized::UInt96>>>(
+                    get_key_sizes(data_types));
+            break;
+        case HashKeyType::fixed104:
+            
method_variant.emplace<vectorized::MethodKeysFixed<DistinctData<vectorized::UInt104>>>(
+                    get_key_sizes(data_types));
+            break;
         case HashKeyType::fixed128:
             
method_variant.emplace<vectorized::MethodKeysFixed<DistinctData<vectorized::UInt128>>>(
                     get_key_sizes(data_types));
diff --git a/be/src/pipeline/common/join_utils.h 
b/be/src/pipeline/common/join_utils.h
index 08708f037ba..8d46c317fb5 100644
--- a/be/src/pipeline/common/join_utils.h
+++ b/be/src/pipeline/common/join_utils.h
@@ -67,6 +67,7 @@ using HashTableVariants = std::variant<
         DirectPrimaryTypeHashTableContext<vectorized::UInt64>,
         DirectPrimaryTypeHashTableContext<vectorized::UInt128>,
         FixedKeyHashTableContext<vectorized::UInt64>, 
FixedKeyHashTableContext<vectorized::UInt72>,
+        FixedKeyHashTableContext<vectorized::UInt96>, 
FixedKeyHashTableContext<vectorized::UInt104>,
         FixedKeyHashTableContext<vectorized::UInt128>,
         FixedKeyHashTableContext<vectorized::UInt136>,
         FixedKeyHashTableContext<vectorized::UInt256>, MethodOneString>;
@@ -108,6 +109,14 @@ struct JoinDataVariants {
             
method_variant.emplace<FixedKeyHashTableContext<vectorized::UInt72>>(
                     get_key_sizes(data_types));
             break;
+        case HashKeyType::fixed96:
+            
method_variant.emplace<FixedKeyHashTableContext<vectorized::UInt96>>(
+                    get_key_sizes(data_types));
+            break;
+        case HashKeyType::fixed104:
+            
method_variant.emplace<FixedKeyHashTableContext<vectorized::UInt104>>(
+                    get_key_sizes(data_types));
+            break;
         case HashKeyType::fixed128:
             
method_variant.emplace<FixedKeyHashTableContext<vectorized::UInt128>>(
                     get_key_sizes(data_types));
diff --git a/be/src/pipeline/common/partition_sort_utils.h 
b/be/src/pipeline/common/partition_sort_utils.h
index ccd1b6a144d..e1eebddf1c9 100644
--- a/be/src/pipeline/common/partition_sort_utils.h
+++ b/be/src/pipeline/common/partition_sort_utils.h
@@ -141,6 +141,8 @@ using PartitionedMethodVariants = std::variant<
         PartitionDataSingleNullable<vectorized::UInt256>,
         vectorized::MethodKeysFixed<PartitionData<vectorized::UInt64>>,
         vectorized::MethodKeysFixed<PartitionData<vectorized::UInt72>>,
+        vectorized::MethodKeysFixed<PartitionData<vectorized::UInt96>>,
+        vectorized::MethodKeysFixed<PartitionData<vectorized::UInt104>>,
         vectorized::MethodKeysFixed<PartitionData<vectorized::UInt128>>,
         vectorized::MethodKeysFixed<PartitionData<vectorized::UInt136>>,
         vectorized::MethodKeysFixed<PartitionData<vectorized::UInt256>>,
@@ -204,6 +206,14 @@ struct PartitionedHashMapVariants
             
method_variant.emplace<vectorized::MethodKeysFixed<PartitionData<vectorized::UInt72>>>(
                     get_key_sizes(data_types));
             break;
+        case HashKeyType::fixed96:
+            
method_variant.emplace<vectorized::MethodKeysFixed<PartitionData<vectorized::UInt96>>>(
+                    get_key_sizes(data_types));
+            break;
+        case HashKeyType::fixed104:
+            
method_variant.emplace<vectorized::MethodKeysFixed<PartitionData<vectorized::UInt104>>>(
+                    get_key_sizes(data_types));
+            break;
         case HashKeyType::fixed128:
             
method_variant.emplace<vectorized::MethodKeysFixed<PartitionData<vectorized::UInt128>>>(
                     get_key_sizes(data_types));
diff --git a/be/src/pipeline/common/set_utils.h 
b/be/src/pipeline/common/set_utils.h
index 665a7710fa8..d08ad883b83 100644
--- a/be/src/pipeline/common/set_utils.h
+++ b/be/src/pipeline/common/set_utils.h
@@ -68,6 +68,8 @@ using SetHashTableVariants =
                      SetPrimaryTypeHashTableContext<vectorized::UInt256>,
                      SetFixedKeyHashTableContext<vectorized::UInt64>,
                      SetFixedKeyHashTableContext<vectorized::UInt72>,
+                     SetFixedKeyHashTableContext<vectorized::UInt96>,
+                     SetFixedKeyHashTableContext<vectorized::UInt104>,
                      SetFixedKeyHashTableContext<vectorized::UInt128>,
                      SetFixedKeyHashTableContext<vectorized::UInt256>,
                      SetFixedKeyHashTableContext<vectorized::UInt136>>;
@@ -110,6 +112,14 @@ struct SetDataVariants
             
method_variant.emplace<SetFixedKeyHashTableContext<vectorized::UInt72>>(
                     get_key_sizes(data_types));
             break;
+        case HashKeyType::fixed96:
+            
method_variant.emplace<SetFixedKeyHashTableContext<vectorized::UInt96>>(
+                    get_key_sizes(data_types));
+            break;
+        case HashKeyType::fixed104:
+            
method_variant.emplace<SetFixedKeyHashTableContext<vectorized::UInt104>>(
+                    get_key_sizes(data_types));
+            break;
         case HashKeyType::fixed128:
             
method_variant.emplace<SetFixedKeyHashTableContext<vectorized::UInt128>>(
                     get_key_sizes(data_types));
diff --git a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h 
b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
index 1253afabdbd..60261223cdf 100644
--- a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
+++ b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
@@ -813,6 +813,8 @@ struct ExtractType<T(U)> {
     INSTANTIATION(JoinOpType, 
(PrimaryTypeHashTableContext<vectorized::UInt256>));       \
     INSTANTIATION(JoinOpType, (FixedKeyHashTableContext<vectorized::UInt64>)); 
          \
     INSTANTIATION(JoinOpType, (FixedKeyHashTableContext<vectorized::UInt72>)); 
          \
+    INSTANTIATION(JoinOpType, (FixedKeyHashTableContext<vectorized::UInt96>)); 
          \
+    INSTANTIATION(JoinOpType, 
(FixedKeyHashTableContext<vectorized::UInt104>));          \
     INSTANTIATION(JoinOpType, 
(FixedKeyHashTableContext<vectorized::UInt128>));          \
     INSTANTIATION(JoinOpType, 
(FixedKeyHashTableContext<vectorized::UInt136>));          \
     INSTANTIATION(JoinOpType, 
(FixedKeyHashTableContext<vectorized::UInt256>));          \
diff --git a/be/src/vec/common/hash_table/hash.h 
b/be/src/vec/common/hash_table/hash.h
index 6817d7e091d..348cb5d4555 100644
--- a/be/src/vec/common/hash_table/hash.h
+++ b/be/src/vec/common/hash_table/hash.h
@@ -201,6 +201,27 @@ struct HashCRC32<doris::vectorized::UInt72> {
     }
 };
 
+template <>
+struct HashCRC32<doris::vectorized::UInt96> {
+    size_t operator()(const doris::vectorized::UInt96& x) const {
+        doris::vectorized::UInt64 crc = -1ULL;
+        crc = _mm_crc32_u32(crc, x.a);
+        crc = _mm_crc32_u64(crc, x.b);
+        return crc;
+    }
+};
+
+template <>
+struct HashCRC32<doris::vectorized::UInt104> {
+    size_t operator()(const doris::vectorized::UInt104& x) const {
+        doris::vectorized::UInt64 crc = -1ULL;
+        crc = _mm_crc32_u8(crc, x.a);
+        crc = _mm_crc32_u32(crc, x.b);
+        crc = _mm_crc32_u64(crc, x.c);
+        return crc;
+    }
+};
+
 template <>
 struct HashCRC32<doris::vectorized::UInt136> {
     size_t operator()(const doris::vectorized::UInt136& x) const {
diff --git a/be/src/vec/common/hash_table/hash_key_type.h 
b/be/src/vec/common/hash_table/hash_key_type.h
index 7a04137324e..025af1bdc40 100644
--- a/be/src/vec/common/hash_table/hash_key_type.h
+++ b/be/src/vec/common/hash_table/hash_key_type.h
@@ -38,6 +38,8 @@ enum class HashKeyType {
     string_key,
     fixed64,
     fixed72,
+    fixed96,
+    fixed104,
     fixed128,
     fixed136,
     fixed256
@@ -62,6 +64,10 @@ inline HashKeyType get_hash_key_type_with_fixed(size_t size) 
{
         return HashKeyType::fixed64;
     } else if (size <= sizeof(UInt72)) {
         return HashKeyType::fixed72;
+    } else if (size <= sizeof(UInt96)) {
+        return HashKeyType::fixed96;
+    } else if (size <= sizeof(UInt104)) {
+        return HashKeyType::fixed104;
     } else if (size <= sizeof(UInt128)) {
         return HashKeyType::fixed128;
     } else if (size <= sizeof(UInt136)) {
diff --git a/be/src/vec/common/uint128.h b/be/src/vec/common/uint128.h
index 2a6bb70177d..58db42868a9 100644
--- a/be/src/vec/common/uint128.h
+++ b/be/src/vec/common/uint128.h
@@ -70,6 +70,25 @@ struct UInt72 {
 };
 #pragma pack()
 
+#pragma pack(1)
+struct UInt96 {
+    UInt32 a;
+    UInt64 b;
+
+    bool operator==(const UInt96& rhs) const { return a == rhs.a && b == 
rhs.b; }
+};
+#pragma pack()
+
+#pragma pack(1)
+struct UInt104 {
+    UInt8 a;
+    UInt32 b;
+    UInt64 c;
+
+    bool operator==(const UInt104& rhs) const { return a == rhs.a && b == 
rhs.b && c == rhs.c; }
+};
+#pragma pack()
+
 #pragma pack(1)
 struct UInt136 {
     UInt8 a;
diff --git a/be/src/vec/functions/complex_dict_hash_map.h 
b/be/src/vec/functions/complex_dict_hash_map.h
index d815cbb0904..15db68240cf 100644
--- a/be/src/vec/functions/complex_dict_hash_map.h
+++ b/be/src/vec/functions/complex_dict_hash_map.h
@@ -48,6 +48,7 @@ using DictHashMapVariants = std::variant<
         MethodOneNumber<UInt256, DictHashMap<UInt256>>,
 
         MethodKeysFixed<DictHashMap<UInt64>>, 
MethodKeysFixed<DictHashMap<UInt72>>,
+        MethodKeysFixed<DictHashMap<UInt96>>, 
MethodKeysFixed<DictHashMap<UInt104>>,
         MethodKeysFixed<DictHashMap<UInt128>>, 
MethodKeysFixed<DictHashMap<UInt136>>,
         MethodKeysFixed<DictHashMap<UInt256>>>;
 
@@ -87,6 +88,13 @@ struct DictionaryHashMapMethod
         case HashKeyType::fixed72:
             
method_variant.emplace<MethodKeysFixed<DictHashMap<UInt72>>>(get_key_sizes(data_types));
             break;
+        case HashKeyType::fixed96:
+            
method_variant.emplace<MethodKeysFixed<DictHashMap<UInt96>>>(get_key_sizes(data_types));
+            break;
+        case HashKeyType::fixed104:
+            method_variant.emplace<MethodKeysFixed<DictHashMap<UInt104>>>(
+                    get_key_sizes(data_types));
+            break;
         case HashKeyType::fixed128:
             method_variant.emplace<MethodKeysFixed<DictHashMap<UInt128>>>(
                     get_key_sizes(data_types));
diff --git a/be/test/pipeline/common/distinct_agg_utils_test.cpp 
b/be/test/pipeline/common/distinct_agg_utils_test.cpp
index 33a572455e2..788c69e3e53 100644
--- a/be/test/pipeline/common/distinct_agg_utils_test.cpp
+++ b/be/test/pipeline/common/distinct_agg_utils_test.cpp
@@ -164,6 +164,16 @@ TEST_F(DistinctAggUtilsTest, 
TestDistinctDataVariantsInitFixedKeys) {
                         
vectorized::MethodKeysFixed<DistinctData<vectorized::UInt72>>>(
                     variants.method_variant));
             break;
+        case HashKeyType::fixed96:
+            ASSERT_TRUE(std::holds_alternative<
+                        
vectorized::MethodKeysFixed<DistinctData<vectorized::UInt96>>>(
+                    variants.method_variant));
+            break;
+        case HashKeyType::fixed104:
+            ASSERT_TRUE(std::holds_alternative<
+                        
vectorized::MethodKeysFixed<DistinctData<vectorized::UInt104>>>(
+                    variants.method_variant));
+            break;
         case HashKeyType::fixed128:
             ASSERT_TRUE(std::holds_alternative<
                         
vectorized::MethodKeysFixed<DistinctData<vectorized::UInt128>>>(
diff --git a/be/test/pipeline/common/set_utils_test.cpp 
b/be/test/pipeline/common/set_utils_test.cpp
index bb12a8edb6f..8766c1dd5b1 100644
--- a/be/test/pipeline/common/set_utils_test.cpp
+++ b/be/test/pipeline/common/set_utils_test.cpp
@@ -97,6 +97,14 @@ TEST_F(SetUtilsTest, TestSetDataVariantsInitFixedKeys) {
             
ASSERT_TRUE(std::holds_alternative<SetFixedKeyHashTableContext<vectorized::UInt72>>(
                     variants.method_variant));
             break;
+        case HashKeyType::fixed96:
+            
ASSERT_TRUE(std::holds_alternative<SetFixedKeyHashTableContext<vectorized::UInt96>>(
+                    variants.method_variant));
+            break;
+        case HashKeyType::fixed104:
+            
ASSERT_TRUE(std::holds_alternative<SetFixedKeyHashTableContext<vectorized::UInt104>>(
+                    variants.method_variant));
+            break;
         case HashKeyType::fixed128:
             
ASSERT_TRUE(std::holds_alternative<SetFixedKeyHashTableContext<vectorized::UInt128>>(
                     variants.method_variant));


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to