This is an automated email from the ASF dual-hosted git repository.
pitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 9abd6d3285 GH-45819: [C++] Add OptionalBitmapAnd utility (#49848)
9abd6d3285 is described below
commit 9abd6d328598080df628e4892c53daaf090d3d4f
Author: Adrián Feito Blázquez <[email protected]>
AuthorDate: Thu Apr 30 17:51:11 2026 +0200
GH-45819: [C++] Add OptionalBitmapAnd utility (#49848)
### Rationale for this change
In Arrow, null bitmaps are optional and represented by `nullptr` when a
column contains no nulls. Previously, conjoining two optional bitmaps required
downstream code to manually handle the `nullptr` checks and memory allocations.
This change centralizes that logic into a single, highly optimized utility
function.
### What changes are included in this PR?
* Added `OptionalBitmapAnd` to `cpp/src/arrow/util/bitmap_ops.h` and
`bitmap_ops.cc`.
* The implementation safely handles the four possible memory permutations
with minimal allocations:
1. Both buffers `nullptr`: Returns `nullptr`.
2. Left is `nullptr`: Returns a copy of the right buffer.
3. Right is `nullptr`: Returns a copy of the left buffer.
4. Both valid: Defers to the low-level `BitmapAnd` function.
### Are these changes tested?
Yes. I added a new test case (`OptionalBitmapAnd`) to
`cpp/src/arrow/util/bitmap_test.cc` that explicitly verifies the correct buffer
allocation and bitwise output for all four memory states.
### Are there any user-facing changes?
No. This simply exposes a new C++ utility for internal development.
Closes #45819
* GitHub Issue: #45819
Authored-by: shockp <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/compute/kernels/hash_aggregate.cc | 11 ++---
cpp/src/arrow/util/bitmap_ops.cc | 30 ++++++++++++
cpp/src/arrow/util/bitmap_ops.h | 24 ++++++++++
cpp/src/arrow/util/bitmap_test.cc | 64 +++++++++++++++++++++++++
4 files changed, 122 insertions(+), 7 deletions(-)
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index 73ae31f7ea..8359945319 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -1339,13 +1339,10 @@ struct GroupedBooleanAggregator : public
GroupedAggregator {
null_count = kUnknownNullCount;
ARROW_ASSIGN_OR_RAISE(auto no_nulls, no_nulls_.Finish());
Impl::AdjustForMinCount(no_nulls->mutable_data(), reduced->data(),
num_groups_);
- if (null_bitmap) {
- arrow::internal::BitmapAnd(null_bitmap->data(), /*left_offset=*/0,
- no_nulls->data(), /*right_offset=*/0,
num_groups_,
- /*out_offset=*/0,
null_bitmap->mutable_data());
- } else {
- null_bitmap = std::move(no_nulls);
- }
+
+ ARROW_ASSIGN_OR_RAISE(null_bitmap, arrow::internal::OptionalBitmapAnd(
+ pool_, null_bitmap,
/*left_offset=*/0,
+ no_nulls, /*right_offset=*/0,
num_groups_));
}
return ArrayData::Make(out_type(), num_groups_,
diff --git a/cpp/src/arrow/util/bitmap_ops.cc b/cpp/src/arrow/util/bitmap_ops.cc
index cc24146ae9..33a95150d5 100644
--- a/cpp/src/arrow/util/bitmap_ops.cc
+++ b/cpp/src/arrow/util/bitmap_ops.cc
@@ -329,6 +329,36 @@ bool OptionalBitmapEquals(const std::shared_ptr<Buffer>&
left, int64_t left_offs
right ? right->data() : nullptr, right_offset,
length);
}
+Result<std::shared_ptr<Buffer>> OptionalBitmapAnd(MemoryPool* pool,
+ const
std::shared_ptr<Buffer>& left,
+ int64_t left_offset,
+ const
std::shared_ptr<Buffer>& right,
+ int64_t right_offset,
int64_t length,
+ int64_t out_offset) {
+ if (left == nullptr && right == nullptr) {
+ return nullptr;
+ }
+ if (left == nullptr) {
+ if (right_offset >= out_offset && (right_offset - out_offset) % 8 == 0) {
+ int64_t byte_shift = (right_offset - out_offset) / 8;
+ int64_t byte_length = bit_util::BytesForBits(out_offset + length);
+ return SliceBuffer(right, byte_shift, byte_length);
+ }
+ return CopyBitmap(pool, right->data(), right_offset, length, out_offset);
+ }
+ if (right == nullptr) {
+ if (left_offset >= out_offset && (left_offset - out_offset) % 8 == 0) {
+ int64_t byte_shift = (left_offset - out_offset) / 8;
+ int64_t byte_length = bit_util::BytesForBits(out_offset + length);
+ return SliceBuffer(left, byte_shift, byte_length);
+ }
+ return CopyBitmap(pool, left->data(), left_offset, length, out_offset);
+ }
+
+ return BitmapAnd(pool, left->data(), left_offset, right->data(),
right_offset, length,
+ out_offset);
+}
+
namespace {
template <template <typename> class BitOp>
diff --git a/cpp/src/arrow/util/bitmap_ops.h b/cpp/src/arrow/util/bitmap_ops.h
index ac05bc87b3..a503b9ec03 100644
--- a/cpp/src/arrow/util/bitmap_ops.h
+++ b/cpp/src/arrow/util/bitmap_ops.h
@@ -166,6 +166,30 @@ ARROW_EXPORT
void BitmapAnd(const uint8_t* left, int64_t left_offset, const uint8_t* right,
int64_t right_offset, int64_t length, int64_t out_offset,
uint8_t* out);
+/// \brief Do a "bitmap and" on two buffers, handling null buffers safely.
+///
+/// If both buffers are null, returns nullptr.
+/// If one buffer is null, returns a potentially sliced view of the valid
buffer
+/// (if bit-alignment allows) or a new copy of it.
+/// If both buffers are valid, allocates a new buffer and performs a bitwise
AND.
+///
+/// \param[in] pool memory pool to allocate memory from
+/// \param[in] left left source buffer (may be null)
+/// \param[in] left_offset bit offset into the left buffer
+/// \param[in] right right source buffer (may be null)
+/// \param[in] right_offset bit offset into the right buffer
+/// \param[in] length number of bits to compute
+/// \param[in] out_offset bit offset into the output buffer
+///
+/// \return Resulting buffer (may be null)
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> OptionalBitmapAnd(MemoryPool* pool,
+ const
std::shared_ptr<Buffer>& left,
+ int64_t left_offset,
+ const
std::shared_ptr<Buffer>& right,
+ int64_t right_offset,
int64_t length,
+ int64_t out_offset = 0);
+
/// \brief Do a "bitmap or" for the given bit length on right and left buffers
/// starting at their respective bit-offsets and put the results in out_buffer
/// starting at the given bit-offset.
diff --git a/cpp/src/arrow/util/bitmap_test.cc
b/cpp/src/arrow/util/bitmap_test.cc
index 603008420a..0a71f4e603 100644
--- a/cpp/src/arrow/util/bitmap_test.cc
+++ b/cpp/src/arrow/util/bitmap_test.cc
@@ -17,6 +17,7 @@
#include <gmock/gmock.h>
#include <gtest/gtest.h>
+#include <cstring>
#include "arrow/array/array_base.h"
#include "arrow/array/data.h"
@@ -1236,6 +1237,69 @@ TEST(BitmapOpTest, PartialLeadingByteSafety) {
}
}
+TEST(BitmapOpTest, OptionalBitmapAnd) {
+ MemoryPool* pool = default_memory_pool();
+
+ // Define non-trivial, messy bit patterns
+ std::vector<int> left_bits = {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1};
+ std::vector<int> right_bits = {0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0};
+ std::vector<int> expected_bits = {0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0};
+
+ std::shared_ptr<Buffer> left, right, expected;
+ int64_t length;
+
+ // Convert our vectors into actual memory-aligned Arrow buffers
+ BitmapFromVector(left_bits, /*offset=*/0, &left, &length);
+ BitmapFromVector(right_bits, /*offset=*/0, &right, &length);
+ BitmapFromVector(expected_bits, /*offset=*/0, &expected, &length);
+
+ // State 1: Both nullptr. Should return nullptr.
+ ASSERT_OK_AND_ASSIGN(auto result1,
+ OptionalBitmapAnd(pool, nullptr, 0, nullptr, 0, length,
0));
+ ASSERT_EQ(result1, nullptr);
+
+ // State 2: Left nullptr, Right valid. Zero offset (triggers SliceBuffer).
+ ASSERT_OK_AND_ASSIGN(auto result2,
+ OptionalBitmapAnd(pool, nullptr, 0, right, 0, length,
0));
+ ASSERT_NE(result2, nullptr);
+ EXPECT_TRUE(BitmapEquals(right->data(), 0, result2->data(), 0, length));
+
+ // State 3: Left valid, Right nullptr. Zero offset (triggers SliceBuffer).
+ ASSERT_OK_AND_ASSIGN(auto result3,
+ OptionalBitmapAnd(pool, left, 0, nullptr, 0, length,
0));
+ ASSERT_NE(result3, nullptr);
+ EXPECT_TRUE(BitmapEquals(left->data(), 0, result3->data(), 0, length));
+
+ // State 4: Both valid (Non-trivial bitwise AND)
+ ASSERT_OK_AND_ASSIGN(auto result4,
+ OptionalBitmapAnd(pool, left, 0, right, 0, length, 0));
+ ASSERT_NE(result4, nullptr);
+
+ // Verify the bitwise AND matches our expected messy pattern
+ EXPECT_TRUE(BitmapEquals(expected->data(), 0, result4->data(), 0, length));
+
+ // State 5: Left nullptr, Right valid. Matching offset modulo 8 (triggers
SliceBuffer).
+ int64_t offset1 = 2;
+ int64_t length1 = length - offset1;
+ ASSERT_OK_AND_ASSIGN(auto result5, OptionalBitmapAnd(pool, nullptr, 0,
right, offset1,
+ length1, offset1));
+ ASSERT_NE(result5, nullptr);
+ EXPECT_TRUE(BitmapEquals(right->data(), offset1, result5->data(), offset1,
length1));
+
+ // State 6: Left nullptr, Right valid. Differing offset modulo 8 (triggers
CopyBitmap).
+ int64_t offset2 = 3;
+ ASSERT_OK_AND_ASSIGN(auto result6, OptionalBitmapAnd(pool, nullptr, 0,
right, offset1,
+ length1, offset2));
+ ASSERT_NE(result6, nullptr);
+ EXPECT_TRUE(BitmapEquals(right->data(), offset1, result6->data(), offset2,
length1));
+
+ // State 7: Left valid, Right nullptr. Differing offset modulo 8 (triggers
CopyBitmap).
+ ASSERT_OK_AND_ASSIGN(
+ auto result7, OptionalBitmapAnd(pool, left, offset1, nullptr, 0,
length1, offset2));
+ ASSERT_NE(result7, nullptr);
+ EXPECT_TRUE(BitmapEquals(left->data(), offset1, result7->data(), offset2,
length1));
+}
+
// Tests for Bitmap visiting.
// test the basic assumption of word level Bitmap::Visit