Copilot commented on code in PR #60695:
URL: https://github.com/apache/doris/pull/60695#discussion_r2796424981
##########
be/src/vec/common/hash_table/hash_map_context.h:
##########
@@ -717,11 +727,18 @@ struct MethodKeysFixed : public MethodBase<TData> {
data = const_cast<char*>(key_columns[i]->get_raw_data().data);
}
- auto foo = [&]<typename Fixed>(Fixed zero) {
+ auto goo = [&]<typename Fixed, bool aligned>(Fixed zero) {
CHECK_EQ(sizeof(Fixed), size);
for (size_t j = 0; j < num_rows; j++) {
- memcpy_fixed<Fixed, true>(data + j * sizeof(Fixed),
- (char*)(&input_keys[j]) + pos);
+ memcpy_fixed<Fixed, aligned>(data + j * sizeof(Fixed),
+ (char*)(&input_keys[j]) +
pos);
+ }
+ };
+ auto foo = [&]<typename Fixed>(Fixed zero) {
+ if (reinterpret_cast<uintptr_t>(input_keys.data() + pos) %
sizeof(Fixed) == 0) {
Review Comment:
The alignment check incorrectly performs pointer arithmetic. The expression
input_keys.data() + pos adds pos to a Key* pointer, which moves pos *
sizeof(Key) bytes, not pos bytes. However, on line 734, the code correctly uses
(char*)(&input_keys[j]) + pos to add pos bytes. The alignment check should also
cast to char* before adding pos. The check should be:
reinterpret_cast<uintptr_t>((char*)(input_keys.data()) + pos) % sizeof(Fixed)
== 0
```suggestion
if
(reinterpret_cast<uintptr_t>(reinterpret_cast<char*>(input_keys.data()) + pos)
% sizeof(Fixed) == 0) {
```
##########
be/src/vec/common/hash_table/hash_map_context.h:
##########
@@ -717,11 +727,18 @@ struct MethodKeysFixed : public MethodBase<TData> {
data = const_cast<char*>(key_columns[i]->get_raw_data().data);
}
- auto foo = [&]<typename Fixed>(Fixed zero) {
+ auto goo = [&]<typename Fixed, bool aligned>(Fixed zero) {
CHECK_EQ(sizeof(Fixed), size);
for (size_t j = 0; j < num_rows; j++) {
- memcpy_fixed<Fixed, true>(data + j * sizeof(Fixed),
- (char*)(&input_keys[j]) + pos);
+ memcpy_fixed<Fixed, aligned>(data + j * sizeof(Fixed),
+ (char*)(&input_keys[j]) +
pos);
+ }
+ };
+ auto foo = [&]<typename Fixed>(Fixed zero) {
+ if (reinterpret_cast<uintptr_t>(input_keys.data() + pos) %
sizeof(Fixed) == 0) {
+ goo.template operator()<Fixed, true>(zero);
+ } else {
+ goo.template operator()<Fixed, false>(zero);
}
Review Comment:
When aligned=true, memcpy_fixed uses std::assume_aligned on both source and
destination pointers. However, the alignment check only verifies the source
pointer ((char*)(input_keys.data()) + pos), not the destination pointer (data +
j * sizeof(Fixed)). If the destination is not aligned to alignof(Fixed), this
creates undefined behavior. The code should check both pointers for alignment
or use unaligned memcpy when either pointer is misaligned.
##########
be/src/vec/common/hash_table/hash_map_context.h:
##########
@@ -606,11 +606,18 @@ struct MethodKeysFixed : public MethodBase<TData> {
}
auto* __restrict current = result_data + offset;
for (size_t i = 0; i < row_numbers; ++i) {
- memcpy_fixed<Fixed, true>(current, data);
+ memcpy_fixed<Fixed, aligned>(current, data);
current += sizeof(T);
data += sizeof(Fixed);
}
};
+ auto foo = [&]<typename Fixed>(Fixed zero) {
+ if (reinterpret_cast<uintptr_t>(result_data + offset) %
sizeof(T) == 0) {
+ goo.template operator()<Fixed, true>(zero);
+ } else {
+ goo.template operator()<Fixed, false>(zero);
+ }
Review Comment:
When aligned=true, memcpy_fixed uses std::assume_aligned on both source and
destination pointers, requiring both to be aligned to alignof(Fixed). However,
the alignment check only verifies the destination pointer (result_data +
offset), not the source pointer (data). If data is not aligned to
alignof(Fixed), this creates undefined behavior. The code should either check
both pointers for alignment or only use unaligned memcpy when either pointer is
misaligned.
##########
be/src/vec/common/hash_table/hash_map_context.h:
##########
@@ -606,11 +606,18 @@ struct MethodKeysFixed : public MethodBase<TData> {
}
auto* __restrict current = result_data + offset;
for (size_t i = 0; i < row_numbers; ++i) {
- memcpy_fixed<Fixed, true>(current, data);
+ memcpy_fixed<Fixed, aligned>(current, data);
current += sizeof(T);
data += sizeof(Fixed);
}
};
+ auto foo = [&]<typename Fixed>(Fixed zero) {
+ if (reinterpret_cast<uintptr_t>(result_data + offset) %
sizeof(T) == 0) {
Review Comment:
The alignment check should verify alignment against sizeof(Fixed) or
alignof(Fixed), not sizeof(T). The template parameter T represents the packed
key type (e.g., UInt128), while Fixed is the type of the individual field being
copied (e.g., uint32_t, uint64_t). When memcpy_fixed<Fixed, aligned> is called
with aligned=true, it uses std::assume_aligned<alignof(Fixed)>, which expects
the address to be aligned to alignof(Fixed). However, this check verifies
alignment to sizeof(T), which may be different from alignof(Fixed). This can
lead to undefined behavior if the assumption passed to the compiler is
incorrect.
```suggestion
if (reinterpret_cast<uintptr_t>(result_data + offset) %
alignof(Fixed) == 0) {
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]