bkietz commented on a change in pull request #10487:
URL: https://github.com/apache/arrow/pull/10487#discussion_r660081336



##########
File path: cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
##########
@@ -56,8 +57,12 @@ TYPED_TEST(TestIfElsePrimitive, IfElseFixedSizeRand) {
 
   random::RandomArrayGenerator rand(/*seed=*/0);
   int64_t len = 1000;
-  auto cond = std::static_pointer_cast<BooleanArray>(
-      rand.ArrayOf(boolean(), len, /*null_probability=*/0.01));
+  ASSERT_OK_AND_ASSIGN(auto temp1, MakeArrayFromScalar(BooleanScalar(true), 
64));

Review comment:
       Please add a comment describing what's useful about this construction

##########
File path: cpp/src/arrow/util/bitmap.h
##########
@@ -225,6 +243,132 @@ class ARROW_EXPORT Bitmap : public 
util::ToStringOstreamable<Bitmap>,
     return min_offset;
   }
 
+  template <size_t N, size_t M, typename ReaderT, typename WriterT, typename 
Visitor,
+            typename Word = typename std::decay<
+                internal::call_traits::argument_type<0, 
Visitor&&>>::type::value_type>
+  static void RunVisitWordsAndWriteLoop(int64_t bit_length,
+                                        std::array<ReaderT, N>& readers,
+                                        std::array<WriterT, M>& writers,
+                                        Visitor&& visitor) {
+    constexpr int64_t kBitWidth = sizeof(Word) * 8;
+
+    std::array<Word, N> visited_words;
+    std::array<Word, M> output_words;
+
+    // every reader will have same number of words, since they are same 
length'ed
+    // TODO($JIRA) this will be inefficient in some cases. When there are 
offsets beyond
+    //  Word boundary, every Word would have to be created from 2 adjoining 
Words
+    auto n_words = readers[0].words();
+    bit_length -= n_words * kBitWidth;
+    while (n_words--) {
+      // first collect all words to visited_words array
+      for (size_t i = 0; i < N; i++) {
+        visited_words[i] = readers[i].NextWord();
+      }
+      visitor(visited_words, &output_words);
+      for (size_t i = 0; i < M; i++) {
+        writers[i].PutNextWord(output_words[i]);
+      }
+    }
+
+    // every reader will have same number of trailing bytes, because of the 
above reason
+    // tailing portion could be more than one word! (ref: BitmapWordReader 
constructor)
+    // remaining full/ partial words to write
+
+    if (bit_length) {
+      // convert the word visitor lambda to a byte_visitor
+      auto byte_visitor = [&](const std::array<uint8_t, N>& in,
+                              std::array<uint8_t, M>* out) {
+        std::array<Word, N> in_words;
+        std::array<Word, M> out_words;
+        std::copy(in.begin(), in.end(), in_words.begin());
+        visitor(in_words, &out_words);
+        for (size_t i = 0; i < M; i++) {
+          out->at(i) = static_cast<uint8_t>(out_words[i]);
+        }
+      };
+
+      std::array<uint8_t, N> visited_bytes;
+      std::array<uint8_t, M> output_bytes;
+      int n_bytes = readers[0].trailing_bytes();
+      while (n_bytes--) {
+        visited_bytes.fill(0);
+        output_bytes.fill(0);
+        int valid_bits;
+        for (size_t i = 0; i < N; i++) {
+          visited_bytes[i] = readers[i].NextTrailingByte(valid_bits);
+        }
+        byte_visitor(visited_bytes, &output_bytes);
+        for (size_t i = 0; i < M; i++) {
+          writers[i].PutNextTrailingByte(output_bytes[i], valid_bits);
+        }
+      }
+    }
+  }
+
+  /// \brief Visit words of bits from each input bitmap as array<Word, N> and 
collects
+  /// outputs to an array<Word, M>, to be written into the output bitmaps 
accordingly.
+  ///
+  /// All bitmaps must have identical length. The first bit in a visited bitmap
+  /// may be offset within the first visited word, but words will otherwise 
contain
+  /// densely packed bits loaded from the bitmap. That offset within the first 
word is
+  /// returned.
+  /// Visitor is expected to have the following signature
+  ///     [](const std::array<Word, N>& in_words, std::array<Word, M>* 
out_words){...}
+  ///
+  // NOTE: this function is efficient on 3+ sufficiently large bitmaps.
+  // It also has a large prolog / epilog overhead and should be used
+  // carefully in other cases.
+  // For 2 bitmaps or less, and/or smaller bitmaps, see also 
VisitTwoBitBlocksVoid
+  // and BitmapUInt64Reader.
+  template <size_t N, size_t M, typename Visitor,
+            typename Word = typename std::decay<
+                internal::call_traits::argument_type<0, 
Visitor&&>>::type::value_type>
+  static void VisitWordsAndWrite(const std::array<Bitmap, N>& bitmaps_arg,
+                                 std::array<Bitmap, M>* out_bitmaps_arg,
+                                 Visitor&& visitor) {
+    int64_t bit_length = BitLength(bitmaps_arg);
+    assert(bit_length == BitLength(*out_bitmaps_arg));
+
+    // if both input and output bitmaps have no byte offset, then use special 
template
+    if (std::all_of(bitmaps_arg.begin(), bitmaps_arg.end(),
+                    [](const Bitmap& b) { return b.offset_ % 8 == 0; }) &&
+        std::all_of(out_bitmaps_arg->begin(), out_bitmaps_arg->end(),
+                    [](const Bitmap& b) { return b.offset_ % 8 == 0; })) {
+      std::array<BitmapWordReader<Word, /*may_have_byte_offset=*/false>, N> 
readers;
+      for (size_t i = 0; i < N; ++i) {
+        const Bitmap& in_bitmap = bitmaps_arg[i];
+        readers[i] = BitmapWordReader<Word, /*may_have_byte_offset=*/false>(
+            in_bitmap.buffer_->data(), in_bitmap.offset_, in_bitmap.length_);
+      }
+
+      std::array<BitmapWordWriter<Word, /*may_have_byte_offset=*/false>, M> 
writers;
+      for (size_t i = 0; i < M; ++i) {
+        const Bitmap& out_bitmap = out_bitmaps_arg->at(i);
+        writers[i] = BitmapWordWriter<Word, /*may_have_byte_offset=*/false>(
+            out_bitmap.buffer_->mutable_data(), out_bitmap.offset_, 
out_bitmap.length_);
+      }
+
+      RunVisitWordsAndWriteLoop(bit_length, readers, writers, 
std::move(visitor));
+    } else {
+      std::array<BitmapWordReader<Word>, N> readers;
+      for (size_t i = 0; i < N; ++i) {
+        const Bitmap& in_bitmap = bitmaps_arg[i];
+        readers[i] = BitmapWordReader<Word>(in_bitmap.buffer_->data(), 
in_bitmap.offset_,
+                                            in_bitmap.length_);
+      }
+
+      std::array<BitmapWordWriter<Word>, M> writers;
+      for (size_t i = 0; i < M; ++i) {
+        const Bitmap& out_bitmap = out_bitmaps_arg->at(i);
+        writers[i] = BitmapWordWriter<Word>(out_bitmap.buffer_->mutable_data(),
+                                            out_bitmap.offset_, 
out_bitmap.length_);
+      }
+
+      RunVisitWordsAndWriteLoop(bit_length, readers, writers, 
std::move(visitor));

Review comment:
       ```suggestion
         RunVisitWordsAndWriteLoop(bit_length, readers, writers, visitor);
   ```

##########
File path: cpp/src/arrow/util/bit_util.h
##########
@@ -316,5 +316,37 @@ static inline void SetBitTo(uint8_t* bits, int64_t i, bool 
bit_is_set) {
 ARROW_EXPORT
 void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool 
bits_are_set);
 
+/// \brief Sets all bits in the bitmap to true
+ARROW_EXPORT
+void SetBitmap(uint8_t* data, int64_t offset, int64_t length);
+
+/// \brief Clears all bits in the bitmap (set to false)
+ARROW_EXPORT
+void ClearBitmap(uint8_t* data, int64_t offset, int64_t length);
+
+/// Returns a mask with lower i bits set to 1. If i >= sizeof(Word)*8, 
all-ones will be
+/// returned
+/// ex:
+/// PrecedingWordBitmask<uint_8>(0)= 0x00
+/// PrecedingWordBitmask<uint_8>(4)= 0x0f
+/// PrecedingWordBitmask<uint_8>(8)= 0xff
+/// PrecedingWordBitmask<uint_32>(8)= 0x00ff
+/// ref: https://stackoverflow.com/a/59523400
+template <typename Word>
+constexpr Word PrecedingWordBitmask(unsigned int const i) {
+  return (static_cast<Word>(i < sizeof(Word) * 8) << (i & (sizeof(Word) * 8 - 
1))) - 1;
+}

Review comment:
       Since this is constexpr, the examples can be written as static assertions
   ```suggestion
   /// ref: https://stackoverflow.com/a/59523400
   template <typename Word>
   constexpr Word PrecedingWordBitmask(unsigned int const i) {
     return (static_cast<Word>(i < sizeof(Word) * 8) << (i & (sizeof(Word) * 8 
- 1))) - 1;
   }
   static_assert(PrecedingWordBitmask<uint8_t>(0) == 0x00, "");
   static_assert(PrecedingWordBitmask<uint8_t>(4) == 0x0f, "");
   static_assert(PrecedingWordBitmask<uint8_t>(8) == 0xff, "");
   static_assert(PrecedingWordBitmask<uint16_t>(8) == 0x00ff, "");
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to