Re: [PR] GH-45334: [C++][Acero] Fix swiss join overflow issues in row offset calculation for fixed length and null masks [arrow]

via GitHub Mon, 27 Jan 2025 06:23:38 -0800


zanmato1984 commented on code in PR #45336:
URL: https://github.com/apache/arrow/pull/45336#discussion_r1930611737



##########
cpp/src/arrow/acero/swiss_join_avx2.cc:
##########
@@ -237,31 +238,16 @@ int RowArrayAccessor::VisitNulls_avx2(const RowTableImpl& 
rows, int column_id,
   //
   constexpr int kUnroll = 8;
 
-  const uint8_t* null_masks = rows.null_masks();
-  __m256i null_bits_per_row =
-      _mm256_set1_epi32(8 * rows.metadata().null_masks_bytes_per_row);
-  __m256i pos_after_encoding =
-      _mm256_set1_epi32(rows.metadata().pos_after_encoding(column_id));
+  uint32_t pos_after_encoding = rows.metadata().pos_after_encoding(column_id);
   for (int i = 0; i < num_rows / kUnroll; ++i) {
     __m256i row_id = _mm256_loadu_si256(reinterpret_cast<const 
__m256i*>(row_ids) + i);
-    __m256i bit_id = _mm256_mullo_epi32(row_id, null_bits_per_row);
-    bit_id = _mm256_add_epi32(bit_id, pos_after_encoding);
-    __m256i bytes = _mm256_i32gather_epi32(reinterpret_cast<const 
int*>(null_masks),
-                                           _mm256_srli_epi32(bit_id, 3), 1);
-    __m256i bit_in_word = _mm256_sllv_epi32(
-        _mm256_set1_epi32(1), _mm256_and_si256(bit_id, _mm256_set1_epi32(7)));
-    // `result` will contain one 32-bit word per tested null bit, either 
0xffffffff if the
-    // null bit was set or 0 if it was unset.
-    __m256i result =
-        _mm256_cmpeq_epi32(_mm256_and_si256(bytes, bit_in_word), bit_in_word);
-    // NB: Be careful about sign-extension when casting the return value of
-    // _mm256_movemask_epi8 (signed 32-bit) to unsigned 64-bit, which will 
pollute the
-    // higher bits of the following OR.
-    uint32_t null_bytes_lo = static_cast<uint32_t>(
-        
_mm256_movemask_epi8(_mm256_cvtepi32_epi64(_mm256_castsi256_si128(result))));
-    uint64_t null_bytes_hi =
-        
_mm256_movemask_epi8(_mm256_cvtepi32_epi64(_mm256_extracti128_si256(result, 
1)));
-    uint64_t null_bytes = null_bytes_lo | (null_bytes_hi << 32);
+    __m256i null32 = GetNullBitInt32(rows, pos_after_encoding, row_id);
+    null32 = _mm256_cmpeq_epi32(null32, _mm256_set1_epi32(1));
+    uint32_t null32_lo =
+        
_mm256_movemask_epi8(_mm256_cvtepi32_epi64(_mm256_castsi256_si128(null32)));
+    uint32_t null32_hi =
+        
_mm256_movemask_epi8(_mm256_cvtepi32_epi64(_mm256_extracti128_si256(null32, 
1)));

Review Comment:
   Indeed. Moved `Cmp32/64To8` to common header and used it here. Thank you.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] GH-45334: [C++][Acero] Fix swiss join overflow issues in row offset calculation for fixed length and null masks [arrow]

Reply via email to