This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 0077b941806 [Performance](exec) replace SipHash in function by XXHash
(#32919)
0077b941806 is described below
commit 0077b94180681c5ac9d213546da5397891d9801a
Author: HappenLee <[email protected]>
AuthorDate: Thu Mar 28 11:00:52 2024 +0800
[Performance](exec) replace SipHash in function by XXHash (#32919)
---
.../aggregate_functions/aggregate_function_uniq.h | 10 +++-------
be/src/vec/functions/uuid_numeric.cpp | 20 +++++++++-----------
2 files changed, 12 insertions(+), 18 deletions(-)
diff --git a/be/src/vec/aggregate_functions/aggregate_function_uniq.h
b/be/src/vec/aggregate_functions/aggregate_function_uniq.h
index 72be9e01833..2e8855134eb 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_uniq.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_uniq.h
@@ -36,7 +36,6 @@
#include "vec/common/assert_cast.h"
#include "vec/common/hash_table/hash.h"
#include "vec/common/hash_table/phmap_fwd_decl.h"
-#include "vec/common/sip_hash.h"
#include "vec/common/string_ref.h"
#include "vec/common/uint128.h"
#include "vec/core/types.h"
@@ -64,17 +63,14 @@ template <typename T>
struct AggregateFunctionUniqExactData {
static constexpr bool is_string_key = std::is_same_v<T, String>;
using Key = std::conditional_t<is_string_key, UInt128, T>;
- using Hash = std::conditional_t<is_string_key, UInt128TrivialHash,
HashCRC32<Key>>;
+ using Hash = HashCRC32<Key>;
using Set = flat_hash_set<Key, Hash>;
// TODO: replace SipHash with xxhash to speed up
static UInt128 ALWAYS_INLINE get_key(const StringRef& value) {
- UInt128 key;
- SipHash hash;
- hash.update(value.data, value.size);
- hash.get128(key.low, key.high);
- return key;
+ auto hash_value = XXH_INLINE_XXH128(value.data, value.size, 0);
+ return UInt128 {hash_value.high64, hash_value.low64};
}
Set set;
diff --git a/be/src/vec/functions/uuid_numeric.cpp
b/be/src/vec/functions/uuid_numeric.cpp
index 012d0d488a1..221cbd7d34c 100644
--- a/be/src/vec/functions/uuid_numeric.cpp
+++ b/be/src/vec/functions/uuid_numeric.cpp
@@ -22,10 +22,7 @@
#include "common/status.h"
#include "runtime/large_int_value.h"
#include "vec/columns/column_vector.h"
-#include "vec/columns/columns_number.h"
#include "vec/common/hash_table/hash.h"
-#include "vec/common/sip_hash.h"
-#include "vec/common/uint128.h"
#include "vec/core/block.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type_number.h"
@@ -100,7 +97,6 @@ public:
}
// TODO(zhiqiang): May be override open function?
-
Status execute_impl(FunctionContext* /*context*/, Block& block,
const ColumnNumbers& /*arguments*/, size_t result,
size_t input_rows_count) const override {
@@ -140,14 +136,16 @@ private:
UInt64 randomSeed() const {
struct timespec times {};
+ clock_gettime(CLOCK_MONOTONIC, ×);
/// Not cryptographically secure as time, pid and stack address can be
predictable.
-
- SipHash hash;
- hash.update(times.tv_nsec);
- hash.update(times.tv_sec);
- hash.update((uintptr_t)pthread_self());
-
- return hash.get64();
+ auto ret = HashUtil::xxHash64WithSeed(reinterpret_cast<const
char*>(×.tv_nsec),
+ sizeof(times.tv_nsec), 0);
+ ret = HashUtil::xxHash64WithSeed(reinterpret_cast<const
char*>(×.tv_sec),
+ sizeof(times.tv_sec), ret);
+ ret = HashUtil::xxHash64WithSeed(reinterpret_cast<const
char*>((uintptr_t)pthread_self()),
+ sizeof(pthread_t), ret);
+
+ return ret;
}
};
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]