pitrou commented on code in PR #49848:
URL: https://github.com/apache/arrow/pull/49848#discussion_r3147508483


##########
cpp/src/arrow/util/bitmap_ops.h:
##########
@@ -166,6 +166,29 @@ ARROW_EXPORT
 void BitmapAnd(const uint8_t* left, int64_t left_offset, const uint8_t* right,
                int64_t right_offset, int64_t length, int64_t out_offset, 
uint8_t* out);
 
+/// \brief Do a "bitmap and" on two buffers, handling null buffers safely.
+///
+/// If both buffers are null, returns nullptr.
+/// If one buffer is null, allocates a new buffer and copies the valid buffer 
into it.
+/// If both buffers are valid, allocates a new buffer and performs a bitwise 
AND.
+///
+/// \param[in] pool memory pool to allocate memory from
+/// \param[in] left left source buffer (may be null)
+/// \param[in] left_offset bit offset into the left buffer
+/// \param[in] right right source buffer (may be null)
+/// \param[in] right_offset bit offset into the right buffer
+/// \param[in] length number of bits to compute
+/// \param[in] out_offset bit offset into the output buffer
+///
+/// \return Resulting buffer (may be null)
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> OptionalBitmapAnd(MemoryPool* pool,
+                                                  const 
std::shared_ptr<Buffer>& left,
+                                                  int64_t left_offset,
+                                                  const 
std::shared_ptr<Buffer>& right,
+                                                  int64_t right_offset, 
int64_t length,
+                                                  int64_t out_offset);

Review Comment:
   Can probably make `out_offset` optional? In many cases it'll simply be zero.
   
   ```suggestion
                                                     int64_t out_offset = 0);
   ```



##########
cpp/src/arrow/util/bitmap_test.cc:
##########
@@ -1236,6 +1237,45 @@ TEST(BitmapOpTest, PartialLeadingByteSafety) {
   }
 }
 
+TEST(BitmapOpTest, OptionalBitmapAnd) {
+  MemoryPool* pool = default_memory_pool();
+  int64_t length = 16;
+  int64_t phys_bits = length;
+
+  // Create two distinct test buffers
+  ASSERT_OK_AND_ASSIGN(auto buf1, AllocateEmptyBitmap(phys_bits, pool));
+  ASSERT_OK_AND_ASSIGN(auto buf2, AllocateEmptyBitmap(phys_bits, pool));
+
+  // Fill buf1 with 1s (true)
+  std::memset(buf1->mutable_data(), 0xFF, buf1->size());
+  // Fill buf2 with alternating bits 01010101 (0xAA)
+  std::memset(buf2->mutable_data(), 0xAA, buf2->size());
+
+  // State 1: Both nullptr
+  ASSERT_OK_AND_ASSIGN(auto result1,
+                       OptionalBitmapAnd(pool, nullptr, 0, nullptr, 0, length, 
0));
+  ASSERT_EQ(result1, nullptr);
+
+  // State 2: Left nullptr, Right valid (Should copy Right)
+  ASSERT_OK_AND_ASSIGN(auto result2,
+                       OptionalBitmapAnd(pool, nullptr, 0, buf2, 0, length, 
0));
+  ASSERT_NE(result2, nullptr);
+  EXPECT_TRUE(BitmapEquals(buf2->data(), 0, result2->data(), 0, length));
+
+  // State 3: Left valid, Right nullptr (Should copy Left)
+  ASSERT_OK_AND_ASSIGN(auto result3,
+                       OptionalBitmapAnd(pool, buf1, 0, nullptr, 0, length, 
0));
+  ASSERT_NE(result3, nullptr);
+  EXPECT_TRUE(BitmapEquals(buf1->data(), 0, result3->data(), 0, length));
+
+  // State 4: Both valid (Should perform Bitwise AND)
+  ASSERT_OK_AND_ASSIGN(auto result4,
+                       OptionalBitmapAnd(pool, buf1, 0, buf2, 0, length, 0));
+  ASSERT_NE(result4, nullptr);
+  // Since buf1 is all 1s, (buf1 AND buf2) should perfectly equal buf2

Review Comment:
   This is an easy case to test, but can we test with inputs where the output 
is not trivially equal to one of the inputs?



##########
cpp/src/arrow/util/bitmap_ops.cc:
##########
@@ -329,6 +329,25 @@ bool OptionalBitmapEquals(const std::shared_ptr<Buffer>& 
left, int64_t left_offs
                               right ? right->data() : nullptr, right_offset, 
length);
 }
 
+Result<std::shared_ptr<Buffer>> OptionalBitmapAnd(MemoryPool* pool,
+                                                  const 
std::shared_ptr<Buffer>& left,
+                                                  int64_t left_offset,
+                                                  const 
std::shared_ptr<Buffer>& right,
+                                                  int64_t right_offset, 
int64_t length,
+                                                  int64_t out_offset) {
+  if (left == nullptr && right == nullptr) {
+    return nullptr;
+  }
+  if (left == nullptr) {
+    return CopyBitmap(pool, right->data(), right_offset, length, out_offset);
+  }

Review Comment:
   I think we can go one step further: if `(out_offset - right_offset) % 8 == 
0`, then we don't need to copy the bitmap at all, we can just do a simple slice 
of the input.



##########
testing:
##########


Review Comment:
   Hmm, it seems you need to revert the `testing` submodule to its git main 
state, can you do that?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to