This is an automated email from the ASF dual-hosted git repository.

pitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 9abd6d3285 GH-45819: [C++] Add OptionalBitmapAnd utility (#49848)
9abd6d3285 is described below

commit 9abd6d328598080df628e4892c53daaf090d3d4f
Author: Adrián Feito Blázquez <[email protected]>
AuthorDate: Thu Apr 30 17:51:11 2026 +0200

    GH-45819: [C++] Add OptionalBitmapAnd utility (#49848)
    
    ### Rationale for this change
    
    In Arrow, null bitmaps are optional and represented by `nullptr` when a 
column contains no nulls. Previously, conjoining two optional bitmaps required 
downstream code to manually handle the `nullptr` checks and memory allocations. 
This change centralizes that logic into a single, highly optimized utility 
function.
    
    ### What changes are included in this PR?
    
    * Added `OptionalBitmapAnd` to `cpp/src/arrow/util/bitmap_ops.h` and 
`bitmap_ops.cc`.
    * The implementation safely handles the four possible memory permutations 
with minimal allocations:
      1. Both buffers `nullptr`: Returns `nullptr`.
      2. Left is `nullptr`: Returns a copy of the right buffer.
      3. Right is `nullptr`: Returns a copy of the left buffer.
      4. Both valid: Defers to the low-level `BitmapAnd` function.
    
    ### Are these changes tested?
    
    Yes. I added a new test case (`OptionalBitmapAnd`) to 
`cpp/src/arrow/util/bitmap_test.cc` that explicitly verifies the correct buffer 
allocation and bitwise output for all four memory states.
    
    ### Are there any user-facing changes?
    
    No. This simply exposes a new C++ utility for internal development.
    
    Closes #45819
    * GitHub Issue: #45819
    
    Authored-by: shockp <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/compute/kernels/hash_aggregate.cc | 11 ++---
 cpp/src/arrow/util/bitmap_ops.cc                | 30 ++++++++++++
 cpp/src/arrow/util/bitmap_ops.h                 | 24 ++++++++++
 cpp/src/arrow/util/bitmap_test.cc               | 64 +++++++++++++++++++++++++
 4 files changed, 122 insertions(+), 7 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc 
b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index 73ae31f7ea..8359945319 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -1339,13 +1339,10 @@ struct GroupedBooleanAggregator : public 
GroupedAggregator {
       null_count = kUnknownNullCount;
       ARROW_ASSIGN_OR_RAISE(auto no_nulls, no_nulls_.Finish());
       Impl::AdjustForMinCount(no_nulls->mutable_data(), reduced->data(), 
num_groups_);
-      if (null_bitmap) {
-        arrow::internal::BitmapAnd(null_bitmap->data(), /*left_offset=*/0,
-                                   no_nulls->data(), /*right_offset=*/0, 
num_groups_,
-                                   /*out_offset=*/0, 
null_bitmap->mutable_data());
-      } else {
-        null_bitmap = std::move(no_nulls);
-      }
+
+      ARROW_ASSIGN_OR_RAISE(null_bitmap, arrow::internal::OptionalBitmapAnd(
+                                             pool_, null_bitmap, 
/*left_offset=*/0,
+                                             no_nulls, /*right_offset=*/0, 
num_groups_));
     }
 
     return ArrayData::Make(out_type(), num_groups_,
diff --git a/cpp/src/arrow/util/bitmap_ops.cc b/cpp/src/arrow/util/bitmap_ops.cc
index cc24146ae9..33a95150d5 100644
--- a/cpp/src/arrow/util/bitmap_ops.cc
+++ b/cpp/src/arrow/util/bitmap_ops.cc
@@ -329,6 +329,36 @@ bool OptionalBitmapEquals(const std::shared_ptr<Buffer>& 
left, int64_t left_offs
                               right ? right->data() : nullptr, right_offset, 
length);
 }
 
+Result<std::shared_ptr<Buffer>> OptionalBitmapAnd(MemoryPool* pool,
+                                                  const 
std::shared_ptr<Buffer>& left,
+                                                  int64_t left_offset,
+                                                  const 
std::shared_ptr<Buffer>& right,
+                                                  int64_t right_offset, 
int64_t length,
+                                                  int64_t out_offset) {
+  if (left == nullptr && right == nullptr) {
+    return nullptr;
+  }
+  if (left == nullptr) {
+    if (right_offset >= out_offset && (right_offset - out_offset) % 8 == 0) {
+      int64_t byte_shift = (right_offset - out_offset) / 8;
+      int64_t byte_length = bit_util::BytesForBits(out_offset + length);
+      return SliceBuffer(right, byte_shift, byte_length);
+    }
+    return CopyBitmap(pool, right->data(), right_offset, length, out_offset);
+  }
+  if (right == nullptr) {
+    if (left_offset >= out_offset && (left_offset - out_offset) % 8 == 0) {
+      int64_t byte_shift = (left_offset - out_offset) / 8;
+      int64_t byte_length = bit_util::BytesForBits(out_offset + length);
+      return SliceBuffer(left, byte_shift, byte_length);
+    }
+    return CopyBitmap(pool, left->data(), left_offset, length, out_offset);
+  }
+
+  return BitmapAnd(pool, left->data(), left_offset, right->data(), 
right_offset, length,
+                   out_offset);
+}
+
 namespace {
 
 template <template <typename> class BitOp>
diff --git a/cpp/src/arrow/util/bitmap_ops.h b/cpp/src/arrow/util/bitmap_ops.h
index ac05bc87b3..a503b9ec03 100644
--- a/cpp/src/arrow/util/bitmap_ops.h
+++ b/cpp/src/arrow/util/bitmap_ops.h
@@ -166,6 +166,30 @@ ARROW_EXPORT
 void BitmapAnd(const uint8_t* left, int64_t left_offset, const uint8_t* right,
                int64_t right_offset, int64_t length, int64_t out_offset, 
uint8_t* out);
 
+/// \brief Do a "bitmap and" on two buffers, handling null buffers safely.
+///
+/// If both buffers are null, returns nullptr.
+/// If one buffer is null, returns a potentially sliced view of the valid 
buffer
+/// (if bit-alignment allows) or a new copy of it.
+/// If both buffers are valid, allocates a new buffer and performs a bitwise 
AND.
+///
+/// \param[in] pool memory pool to allocate memory from
+/// \param[in] left left source buffer (may be null)
+/// \param[in] left_offset bit offset into the left buffer
+/// \param[in] right right source buffer (may be null)
+/// \param[in] right_offset bit offset into the right buffer
+/// \param[in] length number of bits to compute
+/// \param[in] out_offset bit offset into the output buffer
+///
+/// \return Resulting buffer (may be null)
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> OptionalBitmapAnd(MemoryPool* pool,
+                                                  const 
std::shared_ptr<Buffer>& left,
+                                                  int64_t left_offset,
+                                                  const 
std::shared_ptr<Buffer>& right,
+                                                  int64_t right_offset, 
int64_t length,
+                                                  int64_t out_offset = 0);
+
 /// \brief Do a "bitmap or" for the given bit length on right and left buffers
 /// starting at their respective bit-offsets and put the results in out_buffer
 /// starting at the given bit-offset.
diff --git a/cpp/src/arrow/util/bitmap_test.cc 
b/cpp/src/arrow/util/bitmap_test.cc
index 603008420a..0a71f4e603 100644
--- a/cpp/src/arrow/util/bitmap_test.cc
+++ b/cpp/src/arrow/util/bitmap_test.cc
@@ -17,6 +17,7 @@
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
+#include <cstring>
 
 #include "arrow/array/array_base.h"
 #include "arrow/array/data.h"
@@ -1236,6 +1237,69 @@ TEST(BitmapOpTest, PartialLeadingByteSafety) {
   }
 }
 
+TEST(BitmapOpTest, OptionalBitmapAnd) {
+  MemoryPool* pool = default_memory_pool();
+
+  // Define non-trivial, messy bit patterns
+  std::vector<int> left_bits = {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1};
+  std::vector<int> right_bits = {0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0};
+  std::vector<int> expected_bits = {0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0};
+
+  std::shared_ptr<Buffer> left, right, expected;
+  int64_t length;
+
+  // Convert our vectors into actual memory-aligned Arrow buffers
+  BitmapFromVector(left_bits, /*offset=*/0, &left, &length);
+  BitmapFromVector(right_bits, /*offset=*/0, &right, &length);
+  BitmapFromVector(expected_bits, /*offset=*/0, &expected, &length);
+
+  // State 1: Both nullptr. Should return nullptr.
+  ASSERT_OK_AND_ASSIGN(auto result1,
+                       OptionalBitmapAnd(pool, nullptr, 0, nullptr, 0, length, 
0));
+  ASSERT_EQ(result1, nullptr);
+
+  // State 2: Left nullptr, Right valid. Zero offset (triggers SliceBuffer).
+  ASSERT_OK_AND_ASSIGN(auto result2,
+                       OptionalBitmapAnd(pool, nullptr, 0, right, 0, length, 
0));
+  ASSERT_NE(result2, nullptr);
+  EXPECT_TRUE(BitmapEquals(right->data(), 0, result2->data(), 0, length));
+
+  // State 3: Left valid, Right nullptr. Zero offset (triggers SliceBuffer).
+  ASSERT_OK_AND_ASSIGN(auto result3,
+                       OptionalBitmapAnd(pool, left, 0, nullptr, 0, length, 
0));
+  ASSERT_NE(result3, nullptr);
+  EXPECT_TRUE(BitmapEquals(left->data(), 0, result3->data(), 0, length));
+
+  // State 4: Both valid (Non-trivial bitwise AND)
+  ASSERT_OK_AND_ASSIGN(auto result4,
+                       OptionalBitmapAnd(pool, left, 0, right, 0, length, 0));
+  ASSERT_NE(result4, nullptr);
+
+  // Verify the bitwise AND matches our expected messy pattern
+  EXPECT_TRUE(BitmapEquals(expected->data(), 0, result4->data(), 0, length));
+
+  // State 5: Left nullptr, Right valid. Matching offset modulo 8 (triggers 
SliceBuffer).
+  int64_t offset1 = 2;
+  int64_t length1 = length - offset1;
+  ASSERT_OK_AND_ASSIGN(auto result5, OptionalBitmapAnd(pool, nullptr, 0, 
right, offset1,
+                                                       length1, offset1));
+  ASSERT_NE(result5, nullptr);
+  EXPECT_TRUE(BitmapEquals(right->data(), offset1, result5->data(), offset1, 
length1));
+
+  // State 6: Left nullptr, Right valid. Differing offset modulo 8 (triggers 
CopyBitmap).
+  int64_t offset2 = 3;
+  ASSERT_OK_AND_ASSIGN(auto result6, OptionalBitmapAnd(pool, nullptr, 0, 
right, offset1,
+                                                       length1, offset2));
+  ASSERT_NE(result6, nullptr);
+  EXPECT_TRUE(BitmapEquals(right->data(), offset1, result6->data(), offset2, 
length1));
+
+  // State 7: Left valid, Right nullptr. Differing offset modulo 8 (triggers 
CopyBitmap).
+  ASSERT_OK_AND_ASSIGN(
+      auto result7, OptionalBitmapAnd(pool, left, offset1, nullptr, 0, 
length1, offset2));
+  ASSERT_NE(result7, nullptr);
+  EXPECT_TRUE(BitmapEquals(left->data(), offset1, result7->data(), offset2, 
length1));
+}
+
 // Tests for Bitmap visiting.
 
 // test the basic assumption of word level Bitmap::Visit

Reply via email to