jonkeane commented on code in PR #46124:
URL: https://github.com/apache/arrow/pull/46124#discussion_r2048861485


##########
cpp/src/arrow/compute/row/compare_internal.cc:
##########
@@ -276,12 +276,13 @@ void KeyCompare::CompareVarBinaryColumnToRowHelper(
       int32_t tail_length = length - j * 8;
       uint64_t tail_mask = ~0ULL >> (64 - 8 * tail_length);
       uint64_t key_left = 0;
-      std::memcpy(&key_left, key_left_ptr + j, tail_length);
+      const uint8_t* src_bytes = reinterpret_cast<const uint8_t*>(key_left_ptr 
+ j);
+      std::memcpy(&key_left, src_bytes, tail_length);
       uint64_t key_right = key_right_ptr[j];
       result_or |= tail_mask & (key_left ^ key_right);
     }
     int result = result_or == 0 ? 0xff : 0;
-    result *= (length_left == length_right ? 1 : 0);

Review Comment:
   > How does this LOC issue a misaligned load? Compiler reordering? I assume 
the actual code in question is the std::memcpy above right?
   
   Ah yes, you're right I've reverted the change on line 284/285 — it's not 
needed to resolve this sanitizer issue.
   
   > I'm also curious about that, if the problem is in std::memcpy, then why 
does the pointer type (uint64_t * vs uint8_t *) matter given that std::memcpy 
accepts void *.
   
   I was seeing complaints about misalingnment:
   
   ```
   
/Users/runner/work/crossbow/crossbow/arrow/cpp/src/arrow/compute/row/compare_internal.cc:279:30:
 runtime error: load of misaligned address 0x000134060481 for type 'const 
uint64_t *' (aka 'const unsigned long long *'), which requires 8 byte alignment
   0x000134060481: note: pointer points here
    00 00 00  6a 69 68 67 66 65 64 00  00 00 00 00 00 00 00 00  04 00 00 00 05 
00 00 00  06 00 00 00 07
                 ^ 
       #0 0x000120826784 in void 
arrow::compute::KeyCompare::CompareVarBinaryColumnToRowHelper<true, 
true>(unsigned int, unsigned int, unsigned int, unsigned short const*, unsigned 
int const*, arrow::compute::LightContext*, arrow::compute::KeyColumnArray 
const&, arrow::compute::RowTableImpl const&, unsigned char*)+0xa44 
(arrow.so:arm64+0x55e6784)
       #1 0x0001208179c0 in 
arrow::compute::KeyCompare::CompareColumnsToRows(unsigned int, unsigned short 
const*, unsigned int const*, arrow::compute::LightContext*, unsigned int*, 
unsigned short*, std::__1::vector<arrow::compute::KeyColumnArray, 
std::__1::allocator<arrow::compute::KeyColumnArray>> const&, 
arrow::compute::RowTableImpl const&, bool, unsigned char*)+0x780 
(arrow.so:arm64+0x55d79c0)
       #2 0x000120846c0c in 
std::__1::__function::__func<arrow::compute::(anonymous 
namespace)::GrouperFastImpl::Make(std::__1::vector<arrow::TypeHolder, 
std::__1::allocator<arrow::TypeHolder>> const&, 
arrow::compute::ExecContext*)::'lambda'(int, unsigned short const*, unsigned 
int const*, unsigned int*, unsigned short*, void*), 
std::__1::allocator<arrow::compute::(anonymous 
namespace)::GrouperFastImpl::Make(std::__1::vector<arrow::TypeHolder, 
std::__1::allocator<arrow::TypeHolder>> const&, 
arrow::compute::ExecContext*)::'lambda'(int, unsigned short const*, unsigned 
int const*, unsigned int*, unsigned short*, void*)>, void (int, unsigned short 
const*, unsigned int const*, unsigned int*, unsigned short*, 
void*)>::operator()(int&&, unsigned short const*&&, unsigned int const*&&, 
unsigned int*&&, unsigned short*&&, void*&&)+0x1cc (arrow.so:arm64+0x5606c0c)
       #3 0x0001207c79fc in arrow::compute::SwissTable::run_comparisons(int, 
unsigned short const*, unsigned char const*, unsigned int const*, int*, 
unsigned short*, std::__1::function<void (int, unsigned short const*, unsigned 
int const*, unsigned int*, unsigned short*, void*)> const&, void*) const+0x2bc 
(arrow.so:arm64+0x55879fc)
       #4 0x0001207c8014 in arrow::compute::SwissTable::find(int, unsigned int 
const*, unsigned char*, unsigned char const*, unsigned int*, 
arrow::util::TempVectorStack*, std::__1::function<void (int, unsigned short 
const*, unsigned int const*, unsigned int*, unsigned short*, void*)> const&, 
void*) const+0x294 (arrow.so:arm64+0x5588014)
       #5 0x000120843110 in arrow::compute::(anonymous 
namespace)::GrouperFastImpl::ConsumeImpl(arrow::compute::ExecSpan const&, 
arrow::compute::(anonymous namespace)::GrouperMode)+0x1d10 
(arrow.so:arm64+0x5603110)
       #6 0x000120840548 in arrow::compute::(anonymous 
namespace)::GrouperFastImpl::ConsumeImpl(arrow::compute::ExecSpan const&, long 
long, long long, arrow::compute::(anonymous namespace)::GrouperMode)+0x548 
(arrow.so:arm64+0x5600548)
       #7 0x00011c7f4b90 in arrow::acero::aggregate::GroupByNode::Merge()+0x4d0 
(arrow.so:arm64+0x15b4b90)
       #8 0x00011c7f8188 in 
arrow::acero::aggregate::GroupByNode::OutputResult(bool)+0x4c8 
(arrow.so:arm64+0x15b8188)
       #9 0x00011c7f99cc in 
arrow::acero::aggregate::GroupByNode::InputReceived(arrow::acero::ExecNode*, 
arrow::compute::ExecBatch)+0xa8c (arrow.so:arm64+0x15b99cc)
       #10 0x00011ca36488 in 
arrow::acero::MapNode::InputReceived(arrow::acero::ExecNode*, 
arrow::compute::ExecBatch)+0x348 (arrow.so:arm64+0x17f6488)
       #11 0x00011ca36488 in 
arrow::acero::MapNode::InputReceived(arrow::acero::ExecNode*, 
arrow::compute::ExecBatch)+0x348 (arrow.so:arm64+0x17f6488)
       #12 0x00011cac78b8 in 
std::__1::__function::__func<arrow::acero::(anonymous 
namespace)::SourceNode::SliceAndDeliverMorsel(arrow::compute::ExecBatch 
const&)::'lambda'(), std::__1::allocator<arrow::acero::(anonymous 
namespace)::SourceNode::SliceAndDeliverMorsel(arrow::compute::ExecBatch 
const&)::'lambda'()>, arrow::Status ()>::operator()()+0xa78 
(arrow.so:arm64+0x18878b8)
       #13 0x00011ca62db0 in 
std::__1::__bind_return<arrow::detail::ContinueFuture, 
std::__1::tuple<arrow::Future<arrow::internal::Empty>, 
std::__1::function<arrow::Status ()>>, std::__1::tuple<>, 
__is_valid_bind_return<arrow::detail::ContinueFuture, 
std::__1::tuple<arrow::Future<arrow::internal::Empty>, 
std::__1::function<arrow::Status ()>>, std::__1::tuple<>>::value>::type 
std::__1::__bind<arrow::detail::ContinueFuture, 
arrow::Future<arrow::internal::Empty>&, std::__1::function<arrow::Status 
()>>::operator()[abi:ne190102]<>()+0xf0 (arrow.so:arm64+0x1822db0)
       #14 0x00012152b7cc in arrow::internal::FnOnce<void ()>::operator()() 
&&+0x14c (arrow.so:arm64+0x62eb7cc)
       #15 0x00012153f688 in void* 
std::__1::__thread_proxy[abi:ne190102]<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct,
 std::__1::default_delete<std::__1::__thread_struct>>, 
arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::$_0>>(void*)+0x3c8 
(arrow.so:arm64+0x62ff688)
       #16 0x00010299a4a4 in asan_thread_start(void*)+0x4c 
(libclang_rt.asan_osx_dynamic.dylib:arm64e+0x3a4a4)
       #17 0x00018c89dc08 in _pthread_start+0x84 
(libsystem_pthread.dylib:arm64e+0x6c08)
       #18 0x00018c898b7c in thread_start+0x4 
(libsystem_pthread.dylib:arm64e+0x1b7c)
   
   SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior 
/Users/runner/work/crossbow/crossbow/arrow/cpp/src/arrow/compute/row/compare_internal.cc:279:30
 
   ```
   
   But the fix I have up on lines 279–278 might not be totally right? Is there 
something different I should try? 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to