bkietz commented on a change in pull request #10487:
URL: https://github.com/apache/arrow/pull/10487#discussion_r660081336
##########
File path: cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
##########
@@ -56,8 +57,12 @@ TYPED_TEST(TestIfElsePrimitive, IfElseFixedSizeRand) {
random::RandomArrayGenerator rand(/*seed=*/0);
int64_t len = 1000;
- auto cond = std::static_pointer_cast<BooleanArray>(
- rand.ArrayOf(boolean(), len, /*null_probability=*/0.01));
+ ASSERT_OK_AND_ASSIGN(auto temp1, MakeArrayFromScalar(BooleanScalar(true),
64));
Review comment:
Please add a comment describing what's useful about this construction
##########
File path: cpp/src/arrow/util/bitmap.h
##########
@@ -225,6 +243,132 @@ class ARROW_EXPORT Bitmap : public
util::ToStringOstreamable<Bitmap>,
return min_offset;
}
+ template <size_t N, size_t M, typename ReaderT, typename WriterT, typename
Visitor,
+ typename Word = typename std::decay<
+ internal::call_traits::argument_type<0,
Visitor&&>>::type::value_type>
+ static void RunVisitWordsAndWriteLoop(int64_t bit_length,
+ std::array<ReaderT, N>& readers,
+ std::array<WriterT, M>& writers,
+ Visitor&& visitor) {
+ constexpr int64_t kBitWidth = sizeof(Word) * 8;
+
+ std::array<Word, N> visited_words;
+ std::array<Word, M> output_words;
+
+ // every reader will have same number of words, since they are same
length'ed
+ // TODO($JIRA) this will be inefficient in some cases. When there are
offsets beyond
+ // Word boundary, every Word would have to be created from 2 adjoining
Words
+ auto n_words = readers[0].words();
+ bit_length -= n_words * kBitWidth;
+ while (n_words--) {
+ // first collect all words to visited_words array
+ for (size_t i = 0; i < N; i++) {
+ visited_words[i] = readers[i].NextWord();
+ }
+ visitor(visited_words, &output_words);
+ for (size_t i = 0; i < M; i++) {
+ writers[i].PutNextWord(output_words[i]);
+ }
+ }
+
+ // every reader will have same number of trailing bytes, because of the
above reason
+ // tailing portion could be more than one word! (ref: BitmapWordReader
constructor)
+ // remaining full/ partial words to write
+
+ if (bit_length) {
+ // convert the word visitor lambda to a byte_visitor
+ auto byte_visitor = [&](const std::array<uint8_t, N>& in,
+ std::array<uint8_t, M>* out) {
+ std::array<Word, N> in_words;
+ std::array<Word, M> out_words;
+ std::copy(in.begin(), in.end(), in_words.begin());
+ visitor(in_words, &out_words);
+ for (size_t i = 0; i < M; i++) {
+ out->at(i) = static_cast<uint8_t>(out_words[i]);
+ }
+ };
+
+ std::array<uint8_t, N> visited_bytes;
+ std::array<uint8_t, M> output_bytes;
+ int n_bytes = readers[0].trailing_bytes();
+ while (n_bytes--) {
+ visited_bytes.fill(0);
+ output_bytes.fill(0);
+ int valid_bits;
+ for (size_t i = 0; i < N; i++) {
+ visited_bytes[i] = readers[i].NextTrailingByte(valid_bits);
+ }
+ byte_visitor(visited_bytes, &output_bytes);
+ for (size_t i = 0; i < M; i++) {
+ writers[i].PutNextTrailingByte(output_bytes[i], valid_bits);
+ }
+ }
+ }
+ }
+
+ /// \brief Visit words of bits from each input bitmap as array<Word, N> and
collects
+ /// outputs to an array<Word, M>, to be written into the output bitmaps
accordingly.
+ ///
+ /// All bitmaps must have identical length. The first bit in a visited bitmap
+ /// may be offset within the first visited word, but words will otherwise
contain
+ /// densely packed bits loaded from the bitmap. That offset within the first
word is
+ /// returned.
+ /// Visitor is expected to have the following signature
+ /// [](const std::array<Word, N>& in_words, std::array<Word, M>*
out_words){...}
+ ///
+ // NOTE: this function is efficient on 3+ sufficiently large bitmaps.
+ // It also has a large prolog / epilog overhead and should be used
+ // carefully in other cases.
+ // For 2 bitmaps or less, and/or smaller bitmaps, see also
VisitTwoBitBlocksVoid
+ // and BitmapUInt64Reader.
+ template <size_t N, size_t M, typename Visitor,
+ typename Word = typename std::decay<
+ internal::call_traits::argument_type<0,
Visitor&&>>::type::value_type>
+ static void VisitWordsAndWrite(const std::array<Bitmap, N>& bitmaps_arg,
+ std::array<Bitmap, M>* out_bitmaps_arg,
+ Visitor&& visitor) {
+ int64_t bit_length = BitLength(bitmaps_arg);
+ assert(bit_length == BitLength(*out_bitmaps_arg));
+
+ // if both input and output bitmaps have no byte offset, then use special
template
+ if (std::all_of(bitmaps_arg.begin(), bitmaps_arg.end(),
+ [](const Bitmap& b) { return b.offset_ % 8 == 0; }) &&
+ std::all_of(out_bitmaps_arg->begin(), out_bitmaps_arg->end(),
+ [](const Bitmap& b) { return b.offset_ % 8 == 0; })) {
+ std::array<BitmapWordReader<Word, /*may_have_byte_offset=*/false>, N>
readers;
+ for (size_t i = 0; i < N; ++i) {
+ const Bitmap& in_bitmap = bitmaps_arg[i];
+ readers[i] = BitmapWordReader<Word, /*may_have_byte_offset=*/false>(
+ in_bitmap.buffer_->data(), in_bitmap.offset_, in_bitmap.length_);
+ }
+
+ std::array<BitmapWordWriter<Word, /*may_have_byte_offset=*/false>, M>
writers;
+ for (size_t i = 0; i < M; ++i) {
+ const Bitmap& out_bitmap = out_bitmaps_arg->at(i);
+ writers[i] = BitmapWordWriter<Word, /*may_have_byte_offset=*/false>(
+ out_bitmap.buffer_->mutable_data(), out_bitmap.offset_,
out_bitmap.length_);
+ }
+
+ RunVisitWordsAndWriteLoop(bit_length, readers, writers,
std::move(visitor));
+ } else {
+ std::array<BitmapWordReader<Word>, N> readers;
+ for (size_t i = 0; i < N; ++i) {
+ const Bitmap& in_bitmap = bitmaps_arg[i];
+ readers[i] = BitmapWordReader<Word>(in_bitmap.buffer_->data(),
in_bitmap.offset_,
+ in_bitmap.length_);
+ }
+
+ std::array<BitmapWordWriter<Word>, M> writers;
+ for (size_t i = 0; i < M; ++i) {
+ const Bitmap& out_bitmap = out_bitmaps_arg->at(i);
+ writers[i] = BitmapWordWriter<Word>(out_bitmap.buffer_->mutable_data(),
+ out_bitmap.offset_,
out_bitmap.length_);
+ }
+
+ RunVisitWordsAndWriteLoop(bit_length, readers, writers,
std::move(visitor));
Review comment:
```suggestion
RunVisitWordsAndWriteLoop(bit_length, readers, writers, visitor);
```
##########
File path: cpp/src/arrow/util/bit_util.h
##########
@@ -316,5 +316,37 @@ static inline void SetBitTo(uint8_t* bits, int64_t i, bool
bit_is_set) {
ARROW_EXPORT
void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool
bits_are_set);
+/// \brief Sets all bits in the bitmap to true
+ARROW_EXPORT
+void SetBitmap(uint8_t* data, int64_t offset, int64_t length);
+
+/// \brief Clears all bits in the bitmap (set to false)
+ARROW_EXPORT
+void ClearBitmap(uint8_t* data, int64_t offset, int64_t length);
+
+/// Returns a mask with lower i bits set to 1. If i >= sizeof(Word)*8,
all-ones will be
+/// returned
+/// ex:
+/// PrecedingWordBitmask<uint_8>(0)= 0x00
+/// PrecedingWordBitmask<uint_8>(4)= 0x0f
+/// PrecedingWordBitmask<uint_8>(8)= 0xff
+/// PrecedingWordBitmask<uint_32>(8)= 0x00ff
+/// ref: https://stackoverflow.com/a/59523400
+template <typename Word>
+constexpr Word PrecedingWordBitmask(unsigned int const i) {
+ return (static_cast<Word>(i < sizeof(Word) * 8) << (i & (sizeof(Word) * 8 -
1))) - 1;
+}
Review comment:
Since this is constexpr, the examples can be written as static assertions
```suggestion
/// ref: https://stackoverflow.com/a/59523400
template <typename Word>
constexpr Word PrecedingWordBitmask(unsigned int const i) {
return (static_cast<Word>(i < sizeof(Word) * 8) << (i & (sizeof(Word) * 8
- 1))) - 1;
}
static_assert(PrecedingWordBitmask<uint8_t>(0) == 0x00, "");
static_assert(PrecedingWordBitmask<uint8_t>(4) == 0x0f, "");
static_assert(PrecedingWordBitmask<uint8_t>(8) == 0xff, "");
static_assert(PrecedingWordBitmask<uint16_t>(8) == 0x00ff, "");
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]