This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d7ae57000ee [Improvement](memcpy) use assume_aligned to hint aligned 
memcpy (#60695)
d7ae57000ee is described below

commit d7ae57000ee4e6f5ff57204cb0f33df5f9ade00c
Author: Pxl <[email protected]>
AuthorDate: Thu Feb 26 15:26:09 2026 +0800

    [Improvement](memcpy) use assume_aligned to hint aligned memcpy (#60695)
    
    
    This pull request improves the performance and correctness of fixed-size
    memory copy operations in hash map key handling by adding
    alignment-aware logic. The main changes introduce runtime alignment
    checks and use compiler hints to optimize memory copying, which can help
    leverage SIMD instructions and avoid undefined behavior due to
    misaligned accesses.
---
 be/src/vec/common/hash_table/hash_map_context.h | 41 ++++++++++++++++++++++---
 be/src/vec/common/memcpy_small.h                |  8 +++--
 2 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/be/src/vec/common/hash_table/hash_map_context.h 
b/be/src/vec/common/hash_table/hash_map_context.h
index e7a5af1706e..95bc05a5f09 100644
--- a/be/src/vec/common/hash_table/hash_map_context.h
+++ b/be/src/vec/common/hash_table/hash_map_context.h
@@ -596,7 +596,7 @@ struct MethodKeysFixed : public MethodBase<TData> {
         for (size_t j = 0; j < key_columns.size(); ++j) {
             const char* __restrict data = key_columns[j]->get_raw_data().data;
 
-            auto foo = [&]<typename Fixed>(Fixed zero) {
+            auto goo = [&]<typename Fixed, bool aligned>(Fixed zero) {
                 CHECK_EQ(sizeof(Fixed), key_sizes[j]);
                 if (has_null_column.size() && has_null_column[j]) {
                     const auto* nullmap =
@@ -606,11 +606,24 @@ struct MethodKeysFixed : public MethodBase<TData> {
                 }
                 auto* __restrict current = result_data + offset;
                 for (size_t i = 0; i < row_numbers; ++i) {
-                    memcpy_fixed<Fixed, true>(current, data);
+                    memcpy_fixed<Fixed, aligned>(current, data);
                     current += sizeof(T);
                     data += sizeof(Fixed);
                 }
             };
+            auto foo = [&]<typename Fixed>(Fixed zero) {
+                // Check alignment of both destination and source pointers.
+                // Also verify that the stride sizeof(T) is a multiple of 
alignof(Fixed),
+                // otherwise alignment will be lost on subsequent loop 
iterations
+                // (e.g. UInt96 has sizeof=12, stride 12 is not a multiple of 
alignof(uint64_t)=8).
+                if (sizeof(T) % alignof(Fixed) == 0 &&
+                    reinterpret_cast<uintptr_t>(result_data + offset) % 
alignof(Fixed) == 0 &&
+                    reinterpret_cast<uintptr_t>(data) % alignof(Fixed) == 0) {
+                    goo.template operator()<Fixed, true>(zero);
+                } else {
+                    goo.template operator()<Fixed, false>(zero);
+                }
+            };
 
             if (key_sizes[j] == sizeof(uint8_t)) {
                 foo(uint8_t());
@@ -688,6 +701,9 @@ struct MethodKeysFixed : public MethodBase<TData> {
 
     void insert_keys_into_columns(std::vector<typename Base::Key>& input_keys,
                                   MutableColumns& key_columns, const uint32_t 
num_rows) override {
+        if (num_rows == 0) {
+            return;
+        }
         size_t pos = std::ranges::any_of(key_columns,
                                          [](const auto& col) { return 
col->is_nullable(); });
 
@@ -717,11 +733,26 @@ struct MethodKeysFixed : public MethodBase<TData> {
                 data = const_cast<char*>(key_columns[i]->get_raw_data().data);
             }
 
-            auto foo = [&]<typename Fixed>(Fixed zero) {
+            auto goo = [&]<typename Fixed, bool aligned>(Fixed zero) {
                 CHECK_EQ(sizeof(Fixed), size);
                 for (size_t j = 0; j < num_rows; j++) {
-                    memcpy_fixed<Fixed, true>(data + j * sizeof(Fixed),
-                                              (char*)(&input_keys[j]) + pos);
+                    memcpy_fixed<Fixed, aligned>(data + j * sizeof(Fixed),
+                                                 (char*)(&input_keys[j]) + 
pos);
+                }
+            };
+            auto foo = [&]<typename Fixed>(Fixed zero) {
+                // Check alignment of both source and destination pointers.
+                // The source steps by sizeof(Key) between iterations, so 
sizeof(Key)
+                // must be a multiple of alignof(Fixed) to maintain alignment 
across
+                // all iterations (e.g. UInt96 has sizeof=12, not a multiple 
of 8).
+                if (sizeof(typename Base::Key) % alignof(Fixed) == 0 &&
+                    reinterpret_cast<uintptr_t>((char*)(input_keys.data()) + 
pos) %
+                                    alignof(Fixed) ==
+                            0 &&
+                    reinterpret_cast<uintptr_t>(data) % alignof(Fixed) == 0) {
+                    goo.template operator()<Fixed, true>(zero);
+                } else {
+                    goo.template operator()<Fixed, false>(zero);
                 }
             };
 
diff --git a/be/src/vec/common/memcpy_small.h b/be/src/vec/common/memcpy_small.h
index 62a093e8b62..62e7059a61e 100644
--- a/be/src/vec/common/memcpy_small.h
+++ b/be/src/vec/common/memcpy_small.h
@@ -24,6 +24,7 @@
 #include <string.h>
 
 #include <cstdint>
+#include <memory>
 
 #if defined(__SSE2__) || defined(__aarch64__)
 #include "util/sse_util.hpp"
@@ -86,10 +87,13 @@ inline void memcpy_small_allow_read_write_overflow15(void* 
__restrict dst,
 #endif
 
 // assume input address not aligned by default
+// hint to compiler that we are copying fixed size data, so it can optimize 
the copy using SIMD instructions if possible.
 template <typename T, bool aligned = false>
 void memcpy_fixed(char* lhs, const char* rhs) {
-    if constexpr (aligned || sizeof(T) <= 8) {
-        *(T*)lhs = *(T*)rhs;
+    if constexpr (aligned) {
+        // hint aligned address to compiler
+        memcpy(std::assume_aligned<alignof(T)>(lhs), 
std::assume_aligned<alignof(T)>(rhs),
+               sizeof(T));
     } else {
         memcpy(lhs, rhs, sizeof(T));
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to