bkietz commented on a change in pull request #10487:
URL: https://github.com/apache/arrow/pull/10487#discussion_r652054092



##########
File path: cpp/src/arrow/util/bitmap.h
##########
@@ -225,6 +248,99 @@ class ARROW_EXPORT Bitmap : public 
util::ToStringOstreamable<Bitmap>,
     return min_offset;
   }
 
+  /// \brief Visit words of bits from each input bitmap as array<Word, N> and 
collects
+  /// outputs to an array<Word, M>, to be written into the output bitmaps 
accordingly.
+  ///
+  /// All bitmaps must have identical length. The first bit in a visited bitmap
+  /// may be offset within the first visited word, but words will otherwise 
contain
+  /// densely packed bits loaded from the bitmap. That offset within the first 
word is
+  /// returned.
+  /// Visitor is expected to have the following signature
+  ///     [](const std::array<Word, N>& in_words, std::array<Word, M>* 
out_words){...}
+  ///
+  // NOTE: this function is efficient on 3+ sufficiently large bitmaps.
+  // It also has a large prolog / epilog overhead and should be used
+  // carefully in other cases.
+  // For 2 bitmaps or less, and/or smaller bitmaps, see also 
VisitTwoBitBlocksVoid
+  // and BitmapUInt64Reader.
+  template <size_t N, size_t M, typename Visitor,
+            typename Word = typename std::decay<
+                internal::call_traits::argument_type<0, 
Visitor&&>>::type::value_type>
+  static void VisitWordsAndWrite(const std::array<Bitmap, N>& bitmaps_arg,
+                                 std::array<Bitmap, M>* out_bitmaps_arg,
+                                 Visitor&& visitor) {
+    constexpr int64_t kBitWidth = sizeof(Word) * 8;
+
+    int64_t bit_length = BitLength(bitmaps_arg);
+    assert(bit_length == BitLength(*out_bitmaps_arg));
+
+    std::array<BitmapWordReader<Word>, N> readers;
+    for (size_t i = 0; i < N; ++i) {
+      readers[i] = BitmapWordReader<Word>(bitmaps_arg[i].buffer_->data(),
+                                          bitmaps_arg[i].offset_, 
bitmaps_arg[i].length_);
+    }
+
+    std::array<BitmapWordWriter<Word>, M> writers;
+    for (size_t i = 0; i < M; ++i) {
+      const Bitmap& out_bitmap = out_bitmaps_arg->at(i);
+      writers[i] = BitmapWordWriter<Word>(out_bitmap.buffer_->mutable_data(),
+                                          out_bitmap.offset_, 
out_bitmap.length_);
+    }
+
+    std::array<Word, N> visited_words;
+    visited_words.fill(0);
+    std::array<Word, M> output_words;
+    output_words.fill(0);
+
+    // every reader will have same number of words, since they are same 
length'ed
+    // todo this will be inefficient in some cases. When there are offsets 
beyond Word
+    //  boundary, every Word would have to be created from 2 adjoining Words
+    auto n_words = readers[0].words();
+    bit_length -= n_words * kBitWidth;
+    while (n_words--) {
+      // first collect all words to visited_words array
+      for (size_t i = 0; i < N; i++) {
+        visited_words[i] = readers[i].NextWord();
+      }
+      visitor(visited_words, &output_words);
+      for (size_t i = 0; i < M; i++) {
+        writers[i].PutNextWord(output_words[i]);
+      }
+    }
+
+    // every reader will have same number of trailing bytes, because of the 
above reason
+    // tailing portion could be more than one word! (ref: BitmapWordReader 
constructor)
+    // remaining full/ partial words to write
+
+    if (bit_length) {
+      // convert the word visitor lambda to a byte_visitor
+      auto byte_visitor = [&](const std::array<uint8_t, N>& in,
+                              std::array<uint8_t, M>* out) {
+        std::array<Word, N> in_words;
+        std::array<Word, M> out_words;
+        std::copy(in.begin(), in.end(), in_words.begin());
+        visitor(in_words, &out_words);
+        std::move(out_words.begin(), out_words.end(), out->begin());

Review comment:
       Looks like this is prompts a conversion warning on MSVC
   
https://github.com/apache/arrow/pull/10487/checks?check_run_id=2822703161#step:7:774
   You'll have to write the loop out with explicit casts




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to