zanmato1984 commented on code in PR #45336:
URL: https://github.com/apache/arrow/pull/45336#discussion_r1927370144


##########
cpp/src/arrow/compute/row/compare_internal_avx2.cc:
##########
@@ -64,14 +66,21 @@ uint32_t KeyCompare::NullUpdateColumnToRowImp_avx2(
         irow_right =
             _mm256_loadu_si256(reinterpret_cast<const 
__m256i*>(left_to_right_map) + i);
       }
-      __m256i bitid =
-          _mm256_mullo_epi32(irow_right, _mm256_set1_epi32(null_mask_num_bytes 
* 8));
-      bitid = _mm256_add_epi32(bitid, _mm256_set1_epi32(null_bit_id));
-      __m256i right =
-          _mm256_i32gather_epi32((const int*)null_masks, 
_mm256_srli_epi32(bitid, 3), 1);
-      right = _mm256_and_si256(
-          _mm256_set1_epi32(1),
-          _mm256_srlv_epi32(right, _mm256_and_si256(bitid, 
_mm256_set1_epi32(7))));
+      __m256i irow_right_lo = 
_mm256_cvtepi32_epi64(_mm256_castsi256_si128(irow_right));
+      __m256i irow_right_hi =
+          _mm256_cvtepi32_epi64(_mm256_extracti128_si256(irow_right, 1));
+      __m256i bit_id_lo =
+          _mm256_mul_epi32(irow_right_lo, 
_mm256_set1_epi64x(null_mask_num_bytes * 8));
+      __m256i bit_id_hi =
+          _mm256_mul_epi32(irow_right_hi, 
_mm256_set1_epi64x(null_mask_num_bytes * 8));
+      bit_id_lo = _mm256_add_epi64(bit_id_lo, pos_after_encoding);
+      bit_id_hi = _mm256_add_epi64(bit_id_hi, pos_after_encoding);
+      __m128i right_lo = _mm256_i64gather_epi32(reinterpret_cast<const 
int*>(null_masks),
+                                                _mm256_srli_epi64(bit_id_lo, 
3), 1);
+      __m128i right_hi = _mm256_i64gather_epi32(reinterpret_cast<const 
int*>(null_masks),
+                                                _mm256_srli_epi64(bit_id_hi, 
3), 1);
+      __m256i right = _mm256_set_m128i(right_hi, right_lo);
+      right = _mm256_and_si256(right, bit_in_right);

Review Comment:
   Done. I made the helper you suggested and put it in a common header. Other 
than this file, there is one more piece of code in swiss_join_avx2.cc can reuse 
it. Pretty nice.
   
   I also made two helper functions `Cmp32/64To8` local in 
compare_internal_avx2.cc that also save some LOC.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to