This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 0077b941806 [Performance](exec) replace SipHash in function by XXHash 
(#32919)
0077b941806 is described below

commit 0077b94180681c5ac9d213546da5397891d9801a
Author: HappenLee <[email protected]>
AuthorDate: Thu Mar 28 11:00:52 2024 +0800

    [Performance](exec) replace SipHash in function by XXHash (#32919)
---
 .../aggregate_functions/aggregate_function_uniq.h    | 10 +++-------
 be/src/vec/functions/uuid_numeric.cpp                | 20 +++++++++-----------
 2 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/be/src/vec/aggregate_functions/aggregate_function_uniq.h 
b/be/src/vec/aggregate_functions/aggregate_function_uniq.h
index 72be9e01833..2e8855134eb 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_uniq.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_uniq.h
@@ -36,7 +36,6 @@
 #include "vec/common/assert_cast.h"
 #include "vec/common/hash_table/hash.h"
 #include "vec/common/hash_table/phmap_fwd_decl.h"
-#include "vec/common/sip_hash.h"
 #include "vec/common/string_ref.h"
 #include "vec/common/uint128.h"
 #include "vec/core/types.h"
@@ -64,17 +63,14 @@ template <typename T>
 struct AggregateFunctionUniqExactData {
     static constexpr bool is_string_key = std::is_same_v<T, String>;
     using Key = std::conditional_t<is_string_key, UInt128, T>;
-    using Hash = std::conditional_t<is_string_key, UInt128TrivialHash, 
HashCRC32<Key>>;
+    using Hash = HashCRC32<Key>;
 
     using Set = flat_hash_set<Key, Hash>;
 
     // TODO: replace SipHash with xxhash to speed up
     static UInt128 ALWAYS_INLINE get_key(const StringRef& value) {
-        UInt128 key;
-        SipHash hash;
-        hash.update(value.data, value.size);
-        hash.get128(key.low, key.high);
-        return key;
+        auto hash_value = XXH_INLINE_XXH128(value.data, value.size, 0);
+        return UInt128 {hash_value.high64, hash_value.low64};
     }
 
     Set set;
diff --git a/be/src/vec/functions/uuid_numeric.cpp 
b/be/src/vec/functions/uuid_numeric.cpp
index 012d0d488a1..221cbd7d34c 100644
--- a/be/src/vec/functions/uuid_numeric.cpp
+++ b/be/src/vec/functions/uuid_numeric.cpp
@@ -22,10 +22,7 @@
 #include "common/status.h"
 #include "runtime/large_int_value.h"
 #include "vec/columns/column_vector.h"
-#include "vec/columns/columns_number.h"
 #include "vec/common/hash_table/hash.h"
-#include "vec/common/sip_hash.h"
-#include "vec/common/uint128.h"
 #include "vec/core/block.h"
 #include "vec/core/types.h"
 #include "vec/data_types/data_type_number.h"
@@ -100,7 +97,6 @@ public:
     }
 
     // TODO(zhiqiang): May be override open function?
-
     Status execute_impl(FunctionContext* /*context*/, Block& block,
                         const ColumnNumbers& /*arguments*/, size_t result,
                         size_t input_rows_count) const override {
@@ -140,14 +136,16 @@ private:
     UInt64 randomSeed() const {
         struct timespec times {};
 
+        clock_gettime(CLOCK_MONOTONIC, &times);
         /// Not cryptographically secure as time, pid and stack address can be 
predictable.
-
-        SipHash hash;
-        hash.update(times.tv_nsec);
-        hash.update(times.tv_sec);
-        hash.update((uintptr_t)pthread_self());
-
-        return hash.get64();
+        auto ret = HashUtil::xxHash64WithSeed(reinterpret_cast<const 
char*>(&times.tv_nsec),
+                                              sizeof(times.tv_nsec), 0);
+        ret = HashUtil::xxHash64WithSeed(reinterpret_cast<const 
char*>(&times.tv_sec),
+                                         sizeof(times.tv_sec), ret);
+        ret = HashUtil::xxHash64WithSeed(reinterpret_cast<const 
char*>((uintptr_t)pthread_self()),
+                                         sizeof(pthread_t), ret);
+
+        return ret;
     }
 };
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to