zanmato1984 commented on code in PR #45108: URL: https://github.com/apache/arrow/pull/45108#discussion_r1913028296
########## cpp/src/arrow/compute/key_map_internal_avx2.cc: ########## @@ -392,16 +387,30 @@ int SwissTable::extract_group_ids_avx2(const int num_keys, const uint32_t* hashe } else { for (int i = 0; i < num_keys / unroll; ++i) { __m256i hash = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(hashes) + i); + // Extend hash and local_slot to 64-bit to compute 64-bit group id offsets to + // gather from. This is to prevent index overflow issues in GH-44513. + // NB: Use zero-extend conversion for unsigned hash. + __m256i hash_lo = _mm256_cvtepu32_epi64(_mm256_castsi256_si128(hash)); + __m256i hash_hi = _mm256_cvtepu32_epi64(_mm256_extracti128_si256(hash, 1)); __m256i local_slot = _mm256_set1_epi64x(reinterpret_cast<const uint64_t*>(local_slots)[i]); - local_slot = _mm256_shuffle_epi8( - local_slot, _mm256_setr_epi32(0x80808000, 0x80808001, 0x80808002, 0x80808003, - 0x80808004, 0x80808005, 0x80808006, 0x80808007)); - local_slot = _mm256_mullo_epi32(local_slot, _mm256_set1_epi32(byte_size)); - __m256i pos = _mm256_srlv_epi32(hash, _mm256_set1_epi32(bits_hash_ - log_blocks_)); - pos = _mm256_mullo_epi32(pos, _mm256_set1_epi32(byte_multiplier)); - pos = _mm256_add_epi32(pos, local_slot); - __m256i group_id = _mm256_i32gather_epi32(elements, pos, 1); + __m256i local_slot_lo = _mm256_shuffle_epi8( + local_slot, _mm256_setr_epi32(0x80808000, 0x80808080, 0x80808001, 0x80808080, + 0x80808002, 0x80808080, 0x80808003, 0x80808080)); + __m256i local_slot_hi = _mm256_shuffle_epi8( + local_slot, _mm256_setr_epi32(0x80808004, 0x80808080, 0x80808005, 0x80808080, + 0x80808006, 0x80808080, 0x80808007, 0x80808080)); + local_slot_lo = _mm256_mul_epu32(local_slot_lo, _mm256_set1_epi32(byte_size)); + local_slot_lo = _mm256_mul_epu32(local_slot_hi, _mm256_set1_epi32(byte_size)); Review Comment: Thank you for spotting this! Let me check how things go wrong (or not go wrong). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org