zanmato1984 commented on code in PR #42188: URL: https://github.com/apache/arrow/pull/42188#discussion_r1643204564
########## cpp/src/arrow/compute/row/compare_test.cc: ########## @@ -164,5 +166,126 @@ TEST(KeyCompare, CompareColumnsToRowsTempStackUsage) { } } +// Compare columns to rows at offsets over 2GB within a row table. +// Certain AVX2 instructions may behave unexpectedly causing troubles like GH-41813. +TEST(KeyCompare, CompareColumnsToRowsLarge) { + if constexpr (sizeof(void*) == 4) { + GTEST_SKIP() << "Test only works on 64-bit platforms"; + } + + // The idea of this case is to create a row table using several fixed length columns and + // one var length column (so the row is hence var length and has offset buffer), with + // the overall data size exceeding 2GB. Then compare each row with itself. + constexpr int64_t two_gb = 2ll * 1024ll * 1024ll * 1024ll; + // The compare function requires the row id of the left column to be uint16_t, hence the + // number of rows. + constexpr int64_t num_rows = std::numeric_limits<uint16_t>::max() + 1; + // TODO: This test will fail if we switch the order between uint64 and uint32. Review Comment: This is discovered by accident, and I'm positive that this is non-related to this fix - it exists in non-avx2 code path too. So I intended to do this fix first and revisit this TODO in the near future. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org