guyuqi commented on a change in pull request #9424:
URL: https://github.com/apache/arrow/pull/9424#discussion_r631588738
##########
File path: cpp/src/arrow/util/byte_stream_split.h
##########
@@ -63,30 +71,34 @@ void ByteStreamSplitDecodeSse2(const uint8_t* data, int64_t
num_values, int64_t
// Stage 1: AAAA BBBB CCCC DDDD
// Stage 2: ACAC ACAC BDBD BDBD
// Stage 3: ABCD ABCD ABCD ABCD
- __m128i stage[kNumStreamsLog2 + 1U][kNumStreams];
+ simd_batch stage[kNumStreamsLog2 + 1U][kNumStreams];
constexpr size_t kNumStreamsHalf = kNumStreams / 2U;
for (int64_t i = 0; i < num_blocks; ++i) {
for (size_t j = 0; j < kNumStreams; ++j) {
- stage[0][j] = _mm_loadu_si128(
- reinterpret_cast<const __m128i*>(&data[i * sizeof(__m128i) + j *
stride]));
+ stage[0][j] = simd_batch(
+ reinterpret_cast<const int8_t*>(&data[i * dTypeSize128bits + j *
stride]),
+ xsimd::unaligned_mode());
}
for (size_t step = 0; step < kNumStreamsLog2; ++step) {
for (size_t j = 0; j < kNumStreamsHalf; ++j) {
stage[step + 1U][j * 2] =
- _mm_unpacklo_epi8(stage[step][j], stage[step][kNumStreamsHalf +
j]);
+ xsimd::zip_lo(stage[step][j], stage[step][kNumStreamsHalf + j]);
stage[step + 1U][j * 2 + 1U] =
- _mm_unpackhi_epi8(stage[step][j], stage[step][kNumStreamsHalf +
j]);
+ xsimd::zip_hi(stage[step][j], stage[step][kNumStreamsHalf + j]);
}
}
for (size_t j = 0; j < kNumStreams; ++j) {
- _mm_storeu_si128(reinterpret_cast<__m128i*>(
- &output_data[(i * kNumStreams + j) *
sizeof(__m128i)]),
- stage[kNumStreamsLog2][j]);
+ xsimd::store_simd<int8_t, int8_t>(
Review comment:
It makes sense, thanks.
##########
File path: cpp/src/arrow/util/byte_stream_split.h
##########
@@ -63,30 +71,34 @@ void ByteStreamSplitDecodeSse2(const uint8_t* data, int64_t
num_values, int64_t
// Stage 1: AAAA BBBB CCCC DDDD
// Stage 2: ACAC ACAC BDBD BDBD
// Stage 3: ABCD ABCD ABCD ABCD
- __m128i stage[kNumStreamsLog2 + 1U][kNumStreams];
+ simd_batch stage[kNumStreamsLog2 + 1U][kNumStreams];
constexpr size_t kNumStreamsHalf = kNumStreams / 2U;
for (int64_t i = 0; i < num_blocks; ++i) {
for (size_t j = 0; j < kNumStreams; ++j) {
- stage[0][j] = _mm_loadu_si128(
- reinterpret_cast<const __m128i*>(&data[i * sizeof(__m128i) + j *
stride]));
+ stage[0][j] = simd_batch(
+ reinterpret_cast<const int8_t*>(&data[i * dTypeSize128bits + j *
stride]),
+ xsimd::unaligned_mode());
}
for (size_t step = 0; step < kNumStreamsLog2; ++step) {
for (size_t j = 0; j < kNumStreamsHalf; ++j) {
stage[step + 1U][j * 2] =
- _mm_unpacklo_epi8(stage[step][j], stage[step][kNumStreamsHalf +
j]);
+ xsimd::zip_lo(stage[step][j], stage[step][kNumStreamsHalf + j]);
stage[step + 1U][j * 2 + 1U] =
- _mm_unpackhi_epi8(stage[step][j], stage[step][kNumStreamsHalf +
j]);
+ xsimd::zip_hi(stage[step][j], stage[step][kNumStreamsHalf + j]);
}
}
for (size_t j = 0; j < kNumStreams; ++j) {
- _mm_storeu_si128(reinterpret_cast<__m128i*>(
- &output_data[(i * kNumStreams + j) *
sizeof(__m128i)]),
- stage[kNumStreamsLog2][j]);
+ xsimd::store_simd<int8_t, int8_t>(
+ reinterpret_cast<int8_t*>(
+ &output_data[(i * kNumStreams + j) * dTypeSize128bits]),
+ stage[kNumStreamsLog2][j], xsimd::unaligned_mode());
}
}
}
+#endif // ARROW_HAVE_SIMD_DECODE_SPLIT
+#if defined(ARROW_HAVE_SSE4_2)
Review comment:
Updated.
##########
File path: cpp/src/arrow/util/byte_stream_split.h
##########
@@ -545,21 +557,23 @@ void ByteStreamSplitEncodeAvx512(const uint8_t*
raw_values, const size_t num_val
}
#endif // ARROW_HAVE_AVX512
-#if defined(ARROW_HAVE_SIMD_SPLIT)
+#if defined(ARROW_HAVE_SIMD_DECODE_SPLIT)
template <typename T>
void inline ByteStreamSplitDecodeSimd(const uint8_t* data, int64_t num_values,
int64_t stride, T* out) {
#if defined(ARROW_HAVE_AVX512)
return ByteStreamSplitDecodeAvx512(data, num_values, stride, out);
#elif defined(ARROW_HAVE_AVX2)
return ByteStreamSplitDecodeAvx2(data, num_values, stride, out);
-#elif defined(ARROW_HAVE_SSE4_2)
- return ByteStreamSplitDecodeSse2(data, num_values, stride, out);
+#elif defined(ARROW_HAVE_SSE4_2) || defined(ARROW_HAVE_NEON)
Review comment:
This branch is included in `#if defined(ARROW_HAVE_SIMD_DECODE_SPLIT)`.
IMO, it seems we shoud separated them by ARCH Macro.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]