This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new fbf8a970c3 GH-36641: [C++] Remove reference to acero from non-acero 
file (#36650)
fbf8a970c3 is described below

commit fbf8a970c3989cc4b196a724764879d9343817f6
Author: Weston Pace <[email protected]>
AuthorDate: Thu Jul 13 06:01:59 2023 -0700

    GH-36641: [C++] Remove reference to acero from non-acero file (#36650)
    
    ### Rationale for this change
    
    Files in modules which do not depend on the acero module should not 
reference files inside the acero module.
    
    ### What changes are included in this PR?
    
    There were no changes to the body of any functions.  I simply moved 
functions around so that the acero include was no longer needed.  There were 
some conflicts that arose between the class `bit_util` and the namespace 
`bit_util` and so I got rid of the class in favor of the namespace as that is 
more similar to how we handle `bit_util` elsewhere.
    
    ### Are these changes tested?
    
    Sort of.  I would like to add an AVX2 CI system as well.  I'm not confident 
any of the CI builds are building with AVX2 enabled.  Also, even if we have an 
AVX2 CI system it would not have caught this issue since the code was only 
needed definitions from the acero header and was not relying on any actual 
compiled symbols.  However, I think setting up tests to catch this sort of 
invalid include are beyond the scope of this PR.
    
    ### Are there any user-facing changes?
    
    No.
    * Closes: #36641
    
    Lead-authored-by: Weston Pace <[email protected]>
    Co-authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/compute/util.cc      | 70 +++++++++++++++----------------
 cpp/src/arrow/compute/util.h       | 86 ++++++++++++++++----------------------
 cpp/src/arrow/compute/util_avx2.cc | 62 +++++++++++++--------------
 3 files changed, 99 insertions(+), 119 deletions(-)

diff --git a/cpp/src/arrow/compute/util.cc b/cpp/src/arrow/compute/util.cc
index 78f90ea37f..f69f60a5af 100644
--- a/cpp/src/arrow/compute/util.cc
+++ b/cpp/src/arrow/compute/util.cc
@@ -56,7 +56,9 @@ void TempVectorStack::release(int id, uint32_t num_bytes) {
   --num_vectors_;
 }
 
-inline uint64_t bit_util::SafeLoadUpTo8Bytes(const uint8_t* bytes, int 
num_bytes) {
+namespace bit_util {
+
+inline uint64_t SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes) {
   // This will not be correct on big-endian architectures.
 #if !ARROW_LITTLE_ENDIAN
   ARROW_DCHECK(false);
@@ -73,7 +75,7 @@ inline uint64_t bit_util::SafeLoadUpTo8Bytes(const uint8_t* 
bytes, int num_bytes
   }
 }
 
-inline void bit_util::SafeStoreUpTo8Bytes(uint8_t* bytes, int num_bytes, 
uint64_t value) {
+inline void SafeStoreUpTo8Bytes(uint8_t* bytes, int num_bytes, uint64_t value) 
{
   // This will not be correct on big-endian architectures.
 #if !ARROW_LITTLE_ENDIAN
   ARROW_DCHECK(false);
@@ -88,8 +90,8 @@ inline void bit_util::SafeStoreUpTo8Bytes(uint8_t* bytes, int 
num_bytes, uint64_
   }
 }
 
-inline void bit_util::bits_to_indexes_helper(uint64_t word, uint16_t 
base_index,
-                                             int* num_indexes, uint16_t* 
indexes) {
+inline void bits_to_indexes_helper(uint64_t word, uint16_t base_index, int* 
num_indexes,
+                                   uint16_t* indexes) {
   int n = *num_indexes;
   while (word) {
     indexes[n++] = base_index + 
static_cast<uint16_t>(CountTrailingZeros(word));
@@ -98,9 +100,8 @@ inline void bit_util::bits_to_indexes_helper(uint64_t word, 
uint16_t base_index,
   *num_indexes = n;
 }
 
-inline void bit_util::bits_filter_indexes_helper(uint64_t word,
-                                                 const uint16_t* input_indexes,
-                                                 int* num_indexes, uint16_t* 
indexes) {
+inline void bits_filter_indexes_helper(uint64_t word, const uint16_t* 
input_indexes,
+                                       int* num_indexes, uint16_t* indexes) {
   int n = *num_indexes;
   while (word) {
     indexes[n++] = input_indexes[CountTrailingZeros(word)];
@@ -110,21 +111,21 @@ inline void bit_util::bits_filter_indexes_helper(uint64_t 
word,
 }
 
 template <int bit_to_search, bool filter_input_indexes>
-void bit_util::bits_to_indexes_internal(int64_t hardware_flags, const int 
num_bits,
-                                        const uint8_t* bits,
-                                        const uint16_t* input_indexes, int* 
num_indexes,
-                                        uint16_t* indexes, uint16_t 
base_index) {
+void bits_to_indexes_internal(int64_t hardware_flags, const int num_bits,
+                              const uint8_t* bits, const uint16_t* 
input_indexes,
+                              int* num_indexes, uint16_t* indexes,
+                              uint16_t base_index = 0) {
   // 64 bits at a time
   constexpr int unroll = 64;
   int tail = num_bits % unroll;
 #if defined(ARROW_HAVE_AVX2)
   if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
     if (filter_input_indexes) {
-      bits_filter_indexes_avx2(bit_to_search, num_bits - tail, bits, 
input_indexes,
-                               num_indexes, indexes);
+      avx2::bits_filter_indexes_avx2(bit_to_search, num_bits - tail, bits, 
input_indexes,
+                                     num_indexes, indexes);
     } else {
-      bits_to_indexes_avx2(bit_to_search, num_bits - tail, bits, num_indexes, 
indexes,
-                           base_index);
+      avx2::bits_to_indexes_avx2(bit_to_search, num_bits - tail, bits, 
num_indexes,
+                                 indexes, base_index);
     }
   } else {
 #endif
@@ -160,9 +161,9 @@ void bit_util::bits_to_indexes_internal(int64_t 
hardware_flags, const int num_bi
   }
 }
 
-void bit_util::bits_to_indexes(int bit_to_search, int64_t hardware_flags, int 
num_bits,
-                               const uint8_t* bits, int* num_indexes, 
uint16_t* indexes,
-                               int bit_offset) {
+void bits_to_indexes(int bit_to_search, int64_t hardware_flags, int num_bits,
+                     const uint8_t* bits, int* num_indexes, uint16_t* indexes,
+                     int bit_offset) {
   bits += bit_offset / 8;
   bit_offset %= 8;
   *num_indexes = 0;
@@ -193,10 +194,9 @@ void bit_util::bits_to_indexes(int bit_to_search, int64_t 
hardware_flags, int nu
   *num_indexes += num_indexes_new;
 }
 
-void bit_util::bits_filter_indexes(int bit_to_search, int64_t hardware_flags,
-                                   const int num_bits, const uint8_t* bits,
-                                   const uint16_t* input_indexes, int* 
num_indexes,
-                                   uint16_t* indexes, int bit_offset) {
+void bits_filter_indexes(int bit_to_search, int64_t hardware_flags, const int 
num_bits,
+                         const uint8_t* bits, const uint16_t* input_indexes,
+                         int* num_indexes, uint16_t* indexes, int bit_offset) {
   bits += bit_offset / 8;
   bit_offset %= 8;
   if (bit_offset != 0) {
@@ -226,10 +226,9 @@ void bit_util::bits_filter_indexes(int bit_to_search, 
int64_t hardware_flags,
   }
 }
 
-void bit_util::bits_split_indexes(int64_t hardware_flags, const int num_bits,
-                                  const uint8_t* bits, int* num_indexes_bit0,
-                                  uint16_t* indexes_bit0, uint16_t* 
indexes_bit1,
-                                  int bit_offset) {
+void bits_split_indexes(int64_t hardware_flags, const int num_bits, const 
uint8_t* bits,
+                        int* num_indexes_bit0, uint16_t* indexes_bit0,
+                        uint16_t* indexes_bit1, int bit_offset) {
   bits_to_indexes(0, hardware_flags, num_bits, bits, num_indexes_bit0, 
indexes_bit0,
                   bit_offset);
   int num_indexes_bit1;
@@ -237,8 +236,8 @@ void bit_util::bits_split_indexes(int64_t hardware_flags, 
const int num_bits,
                   bit_offset);
 }
 
-void bit_util::bits_to_bytes(int64_t hardware_flags, const int num_bits,
-                             const uint8_t* bits, uint8_t* bytes, int 
bit_offset) {
+void bits_to_bytes(int64_t hardware_flags, const int num_bits, const uint8_t* 
bits,
+                   uint8_t* bytes, int bit_offset) {
   bits += bit_offset / 8;
   bit_offset %= 8;
   if (bit_offset != 0) {
@@ -258,7 +257,7 @@ void bit_util::bits_to_bytes(int64_t hardware_flags, const 
int num_bits,
   if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
     // The function call below processes whole 32 bit chunks together.
     num_processed = num_bits - (num_bits % 32);
-    bits_to_bytes_avx2(num_processed, bits, bytes);
+    avx2::bits_to_bytes_avx2(num_processed, bits, bytes);
   }
 #endif
   // Processing 8 bits at a time
@@ -290,8 +289,8 @@ void bit_util::bits_to_bytes(int64_t hardware_flags, const 
int num_bits,
   }
 }
 
-void bit_util::bytes_to_bits(int64_t hardware_flags, const int num_bits,
-                             const uint8_t* bytes, uint8_t* bits, int 
bit_offset) {
+void bytes_to_bits(int64_t hardware_flags, const int num_bits, const uint8_t* 
bytes,
+                   uint8_t* bits, int bit_offset) {
   bits += bit_offset / 8;
   bit_offset %= 8;
   if (bit_offset != 0) {
@@ -314,7 +313,7 @@ void bit_util::bytes_to_bits(int64_t hardware_flags, const 
int num_bits,
   if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
     // The function call below processes whole 32 bit chunks together.
     num_processed = num_bits - (num_bits % 32);
-    bytes_to_bits_avx2(num_processed, bytes, bits);
+    avx2::bytes_to_bits_avx2(num_processed, bytes, bits);
   }
 #endif
   // Process 8 bits at a time
@@ -338,11 +337,11 @@ void bit_util::bytes_to_bits(int64_t hardware_flags, 
const int num_bits,
   }
 }
 
-bool bit_util::are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes,
-                                  uint32_t num_bytes) {
+bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes,
+                        uint32_t num_bytes) {
 #if defined(ARROW_HAVE_AVX2)
   if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
-    return are_all_bytes_zero_avx2(bytes, num_bytes);
+    return avx2::are_all_bytes_zero_avx2(bytes, num_bytes);
   }
 #endif
   uint64_t result_or = 0;
@@ -358,6 +357,7 @@ bool bit_util::are_all_bytes_zero(int64_t hardware_flags, 
const uint8_t* bytes,
   return result_or == 0;
 }
 
+}  // namespace bit_util
 }  // namespace util
 
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/util.h b/cpp/src/arrow/compute/util.h
index 6e1bb79674..489139eab8 100644
--- a/cpp/src/arrow/compute/util.h
+++ b/cpp/src/arrow/compute/util.h
@@ -139,69 +139,55 @@ class TempVectorHolder {
   uint32_t num_elements_;
 };
 
-class ARROW_EXPORT bit_util {
- public:
-  static void bits_to_indexes(int bit_to_search, int64_t hardware_flags,
-                              const int num_bits, const uint8_t* bits, int* 
num_indexes,
-                              uint16_t* indexes, int bit_offset = 0);
+namespace bit_util {
 
-  static void bits_filter_indexes(int bit_to_search, int64_t hardware_flags,
+ARROW_EXPORT void bits_to_indexes(int bit_to_search, int64_t hardware_flags,
                                   const int num_bits, const uint8_t* bits,
-                                  const uint16_t* input_indexes, int* 
num_indexes,
-                                  uint16_t* indexes, int bit_offset = 0);
+                                  int* num_indexes, uint16_t* indexes,
+                                  int bit_offset = 0);
 
-  // Input and output indexes may be pointing to the same data (in-place 
filtering).
-  static void bits_split_indexes(int64_t hardware_flags, const int num_bits,
-                                 const uint8_t* bits, int* num_indexes_bit0,
-                                 uint16_t* indexes_bit0, uint16_t* 
indexes_bit1,
-                                 int bit_offset = 0);
+ARROW_EXPORT void bits_filter_indexes(int bit_to_search, int64_t 
hardware_flags,
+                                      const int num_bits, const uint8_t* bits,
+                                      const uint16_t* input_indexes, int* 
num_indexes,
+                                      uint16_t* indexes, int bit_offset = 0);
 
-  // Bit 1 is replaced with byte 0xFF.
-  static void bits_to_bytes(int64_t hardware_flags, const int num_bits,
-                            const uint8_t* bits, uint8_t* bytes, int 
bit_offset = 0);
+// Input and output indexes may be pointing to the same data (in-place 
filtering).
+ARROW_EXPORT void bits_split_indexes(int64_t hardware_flags, const int 
num_bits,
+                                     const uint8_t* bits, int* 
num_indexes_bit0,
+                                     uint16_t* indexes_bit0, uint16_t* 
indexes_bit1,
+                                     int bit_offset = 0);
 
-  // Return highest bit of each byte.
-  static void bytes_to_bits(int64_t hardware_flags, const int num_bits,
-                            const uint8_t* bytes, uint8_t* bits, int 
bit_offset = 0);
+// Bit 1 is replaced with byte 0xFF.
+ARROW_EXPORT void bits_to_bytes(int64_t hardware_flags, const int num_bits,
+                                const uint8_t* bits, uint8_t* bytes, int 
bit_offset = 0);
 
-  static bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes,
-                                 uint32_t num_bytes);
+// Return highest bit of each byte.
+ARROW_EXPORT void bytes_to_bits(int64_t hardware_flags, const int num_bits,
+                                const uint8_t* bytes, uint8_t* bits, int 
bit_offset = 0);
 
- private:
-  inline static uint64_t SafeLoadUpTo8Bytes(const uint8_t* bytes, int 
num_bytes);
-  inline static void SafeStoreUpTo8Bytes(uint8_t* bytes, int num_bytes, 
uint64_t value);
-  inline static void bits_to_indexes_helper(uint64_t word, uint16_t base_index,
-                                            int* num_indexes, uint16_t* 
indexes);
-  inline static void bits_filter_indexes_helper(uint64_t word,
-                                                const uint16_t* input_indexes,
-                                                int* num_indexes, uint16_t* 
indexes);
-  template <int bit_to_search, bool filter_input_indexes>
-  static void bits_to_indexes_internal(int64_t hardware_flags, const int 
num_bits,
-                                       const uint8_t* bits, const uint16_t* 
input_indexes,
-                                       int* num_indexes, uint16_t* indexes,
-                                       uint16_t base_index = 0);
+ARROW_EXPORT bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* 
bytes,
+                                     uint32_t num_bytes);
 
 #if defined(ARROW_HAVE_AVX2)
-  static void bits_to_indexes_avx2(int bit_to_search, const int num_bits,
-                                   const uint8_t* bits, int* num_indexes,
-                                   uint16_t* indexes, uint16_t base_index = 0);
-  static void bits_filter_indexes_avx2(int bit_to_search, const int num_bits,
-                                       const uint8_t* bits, const uint16_t* 
input_indexes,
-                                       int* num_indexes, uint16_t* indexes);
-  template <int bit_to_search>
-  static void bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* bits,
-                                       int* num_indexes, uint16_t* indexes,
-                                       uint16_t base_index = 0);
-  template <int bit_to_search>
-  static void bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* 
bits,
+
+namespace avx2 {
+ARROW_EXPORT void bits_filter_indexes_avx2(int bit_to_search, const int 
num_bits,
+                                           const uint8_t* bits,
                                            const uint16_t* input_indexes,
                                            int* num_indexes, uint16_t* 
indexes);
-  static void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, 
uint8_t* bytes);
-  static void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, 
uint8_t* bits);
-  static bool are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t 
num_bytes);
+ARROW_EXPORT void bits_to_indexes_avx2(int bit_to_search, const int num_bits,
+                                       const uint8_t* bits, int* num_indexes,
+                                       uint16_t* indexes, uint16_t base_index 
= 0);
+ARROW_EXPORT void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits,
+                                     uint8_t* bytes);
+ARROW_EXPORT void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes,
+                                     uint8_t* bits);
+ARROW_EXPORT bool are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t 
num_bytes);
+}  // namespace avx2
+
 #endif
-};
 
+}  // namespace bit_util
 }  // namespace util
 
 namespace compute {
diff --git a/cpp/src/arrow/compute/util_avx2.cc 
b/cpp/src/arrow/compute/util_avx2.cc
index 7c2a378254..89ec6aa97a 100644
--- a/cpp/src/arrow/compute/util_avx2.cc
+++ b/cpp/src/arrow/compute/util_avx2.cc
@@ -16,30 +16,18 @@
 // under the License.
 
 #include <immintrin.h>
+#include <cstring>
 
-#include "arrow/acero/util.h"
 #include "arrow/util/bit_util.h"
-
-namespace arrow {
-namespace util {
+#include "arrow/util/logging.h"
 
 #if defined(ARROW_HAVE_AVX2)
 
-void bit_util::bits_to_indexes_avx2(int bit_to_search, const int num_bits,
-                                    const uint8_t* bits, int* num_indexes,
-                                    uint16_t* indexes, uint16_t base_index) {
-  if (bit_to_search == 0) {
-    bits_to_indexes_imp_avx2<0>(num_bits, bits, num_indexes, indexes, 
base_index);
-  } else {
-    ARROW_DCHECK(bit_to_search == 1);
-    bits_to_indexes_imp_avx2<1>(num_bits, bits, num_indexes, indexes, 
base_index);
-  }
-}
+namespace arrow::util::avx2 {
 
 template <int bit_to_search>
-void bit_util::bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* 
bits,
-                                        int* num_indexes, uint16_t* indexes,
-                                        uint16_t base_index) {
+void bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* bits, int* 
num_indexes,
+                              uint16_t* indexes, uint16_t base_index = 0) {
   // 64 bits at a time
   constexpr int unroll = 64;
 
@@ -82,21 +70,20 @@ void bit_util::bits_to_indexes_imp_avx2(const int num_bits, 
const uint8_t* bits,
   }
 }
 
-void bit_util::bits_filter_indexes_avx2(int bit_to_search, const int num_bits,
-                                        const uint8_t* bits,
-                                        const uint16_t* input_indexes, int* 
num_indexes,
-                                        uint16_t* indexes) {
+void bits_to_indexes_avx2(int bit_to_search, const int num_bits, const 
uint8_t* bits,
+                          int* num_indexes, uint16_t* indexes, uint16_t 
base_index) {
   if (bit_to_search == 0) {
-    bits_filter_indexes_imp_avx2<0>(num_bits, bits, input_indexes, 
num_indexes, indexes);
+    bits_to_indexes_imp_avx2<0>(num_bits, bits, num_indexes, indexes, 
base_index);
   } else {
-    bits_filter_indexes_imp_avx2<1>(num_bits, bits, input_indexes, 
num_indexes, indexes);
+    ARROW_DCHECK(bit_to_search == 1);
+    bits_to_indexes_imp_avx2<1>(num_bits, bits, num_indexes, indexes, 
base_index);
   }
 }
 
 template <int bit_to_search>
-void bit_util::bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* 
bits,
-                                            const uint16_t* input_indexes,
-                                            int* out_num_indexes, uint16_t* 
indexes) {
+void bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* bits,
+                                  const uint16_t* input_indexes, int* 
out_num_indexes,
+                                  uint16_t* indexes) {
   // 64 bits at a time
   constexpr int unroll = 64;
 
@@ -167,8 +154,17 @@ void bit_util::bits_filter_indexes_imp_avx2(const int 
num_bits, const uint8_t* b
   *out_num_indexes = num_indexes;
 }
 
-void bit_util::bits_to_bytes_avx2(const int num_bits, const uint8_t* bits,
-                                  uint8_t* bytes) {
+void bits_filter_indexes_avx2(int bit_to_search, const int num_bits, const 
uint8_t* bits,
+                              const uint16_t* input_indexes, int* num_indexes,
+                              uint16_t* indexes) {
+  if (bit_to_search == 0) {
+    bits_filter_indexes_imp_avx2<0>(num_bits, bits, input_indexes, 
num_indexes, indexes);
+  } else {
+    bits_filter_indexes_imp_avx2<1>(num_bits, bits, input_indexes, 
num_indexes, indexes);
+  }
+}
+
+void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, uint8_t* 
bytes) {
   constexpr int unroll = 32;
 
   constexpr uint64_t kEachByteIs1 = 0x0101010101010101ULL;
@@ -188,8 +184,7 @@ void bit_util::bits_to_bytes_avx2(const int num_bits, const 
uint8_t* bits,
   }
 }
 
-void bit_util::bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes,
-                                  uint8_t* bits) {
+void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, uint8_t* 
bits) {
   constexpr int unroll = 32;
   // Processing 32 bits at a time
   for (int i = 0; i < num_bits / unroll; ++i) {
@@ -198,7 +193,7 @@ void bit_util::bytes_to_bits_avx2(const int num_bits, const 
uint8_t* bytes,
   }
 }
 
-bool bit_util::are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t 
num_bytes) {
+bool are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes) {
   __m256i result_or = _mm256_setzero_si256();
   uint32_t i;
   for (i = 0; i < num_bytes / 32; ++i) {
@@ -216,7 +211,6 @@ bool bit_util::are_all_bytes_zero_avx2(const uint8_t* 
bytes, uint32_t num_bytes)
   return result_or32 == 0;
 }
 
-#endif  // ARROW_HAVE_AVX2
+}  // namespace arrow::util::avx2
 
-}  // namespace util
-}  // namespace arrow
+#endif  // ARROW_HAVE_AVX2

Reply via email to