This is an automated email from the ASF dual-hosted git repository.

felipecrv pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new fe1f7c0de4 GH-41955: [C++] Follow up of adding null_bitmap to 
MapArray::FromArrays (#41956)
fe1f7c0de4 is described below

commit fe1f7c0de4557784b20d3936f22d2efaed01a9d0
Author: Alenka Frim <[email protected]>
AuthorDate: Thu Jun 13 19:06:28 2024 +0200

    GH-41955: [C++] Follow up of adding null_bitmap to MapArray::FromArrays 
(#41956)
    
    ### Rationale for this change
    
    There have been some new comments regarding the work done in 
https://github.com/apache/arrow/pull/41757.
    
    ### What changes are included in this PR?
    
    This PR addresses the comments from 
https://github.com/apache/arrow/pull/41757#pullrequestreview-2094287563
    
    ### Are these changes tested?
    
    Yes. Existing tests should pass.
    
    ### Are there any user-facing changes?
    
    No.
    * GitHub Issue: #41955
    
    Lead-authored-by: AlenkaF <[email protected]>
    Co-authored-by: Alenka Frim <[email protected]>
    Co-authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Felipe Oliveira Carvalho <[email protected]>
---
 cpp/src/arrow/array/array_list_test.cc | 20 +++++++++++++----
 cpp/src/arrow/array/array_nested.cc    | 41 ++++++++++++++++++----------------
 cpp/src/arrow/array/array_nested.h     |  6 ++---
 python/pyarrow/tests/test_array.py     | 14 ++++++++++++
 4 files changed, 55 insertions(+), 26 deletions(-)

diff --git a/cpp/src/arrow/array/array_list_test.cc 
b/cpp/src/arrow/array/array_list_test.cc
index 063b68706b..3d18d5f967 100644
--- a/cpp/src/arrow/array/array_list_test.cc
+++ b/cpp/src/arrow/array/array_list_test.cc
@@ -1369,14 +1369,26 @@ TEST_F(TestMapArray, FromArrays) {
   ASSERT_RAISES(Invalid,
                 MapArray::FromArrays(offsets1, keys_with_null, tmp_items, 
pool_));
 
-  // With null_bitmap
-  ASSERT_OK_AND_ASSIGN(auto map7, MapArray::FromArrays(offsets1, keys, items, 
pool_,
-                                                       
offsets3->data()->buffers[0]));
+  // With null_bitmap and null_count=1
+  auto null_bitmap_1 = ArrayFromJSON(boolean(), "[1, 0, 
1]")->data()->buffers[1];
+  ASSERT_OK_AND_ASSIGN(auto map7,
+                       MapArray::FromArrays(offsets1, keys, items, pool_, 
null_bitmap_1));
   ASSERT_OK(map7->Validate());
   MapArray expected7(map_type, length, offsets1->data()->buffers[1], keys, 
items,
-                     offsets3->data()->buffers[0], 1);
+                     null_bitmap_1, 1);
+  ASSERT_EQ(map7->null_count(), 1);
   AssertArraysEqual(expected7, *map7);
 
+  // With null_bitmap and null_count=2
+  auto null_bitmap_2 = ArrayFromJSON(boolean(), "[0, 1, 
0]")->data()->buffers[1];
+  ASSERT_OK_AND_ASSIGN(auto map8,
+                       MapArray::FromArrays(offsets1, keys, items, pool_, 
null_bitmap_2));
+  ASSERT_OK(map8->Validate());
+  MapArray expected8(map_type, length, offsets1->data()->buffers[1], keys, 
items,
+                     null_bitmap_2, 2);
+  ASSERT_EQ(map8->null_count(), 2);
+  AssertArraysEqual(expected8, *map8);
+
   // Null bitmap and offset with null
   ASSERT_RAISES(Invalid, MapArray::FromArrays(offsets3, keys, items, pool_,
                                               offsets3->data()->buffers[0]));
diff --git a/cpp/src/arrow/array/array_nested.cc 
b/cpp/src/arrow/array/array_nested.cc
index 2f6bca3d57..47c0fd3582 100644
--- a/cpp/src/arrow/array/array_nested.cc
+++ b/cpp/src/arrow/array/array_nested.cc
@@ -807,7 +807,7 @@ MapArray::MapArray(const std::shared_ptr<DataType>& type, 
int64_t length,
 Result<std::shared_ptr<Array>> MapArray::FromArraysInternal(
     std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
     const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
-    MemoryPool* pool, const std::shared_ptr<Buffer>& null_bitmap) {
+    MemoryPool* pool, std::shared_ptr<Buffer> null_bitmap) {
   using offset_type = typename MapType::offset_type;
   using OffsetArrowType = typename CTypeTraits<offset_type>::ArrowType;
 
@@ -836,7 +836,7 @@ Result<std::shared_ptr<Array>> MapArray::FromArraysInternal(
     return Status::NotImplemented("Null bitmap with offsets slice not 
supported.");
   }
 
-  if (offsets->null_count() > 0) {
+  if (offsets->data()->MayHaveNulls()) {
     ARROW_ASSIGN_OR_RAISE(auto buffers,
                           CleanListOffsets<MapType>(NULLPTR, *offsets, pool));
     return std::make_shared<MapArray>(type, offsets->length() - 1, 
std::move(buffers),
@@ -847,30 +847,32 @@ Result<std::shared_ptr<Array>> 
MapArray::FromArraysInternal(
   const auto& typed_offsets = checked_cast<const OffsetArrayType&>(*offsets);
 
   BufferVector buffers;
-  int64_t null_count;
-  if (null_bitmap != nullptr) {
-    buffers = BufferVector({std::move(null_bitmap), typed_offsets.values()});
-    null_count = null_bitmap->size();
-  } else {
-    buffers = BufferVector({null_bitmap, typed_offsets.values()});
-    null_count = 0;
+  buffers.resize(2);
+  int64_t null_count = 0;
+  if (null_bitmap) {
+    buffers[0] = std::move(null_bitmap);
+    null_count = kUnknownNullCount;
   }
+  buffers[1] = typed_offsets.values();
   return std::make_shared<MapArray>(type, offsets->length() - 1, 
std::move(buffers), keys,
                                     items, /*null_count=*/null_count, 
offsets->offset());
 }
 
-Result<std::shared_ptr<Array>> MapArray::FromArrays(
-    const std::shared_ptr<Array>& offsets, const std::shared_ptr<Array>& keys,
-    const std::shared_ptr<Array>& items, MemoryPool* pool,
-    const std::shared_ptr<Buffer>& null_bitmap) {
+Result<std::shared_ptr<Array>> MapArray::FromArrays(const 
std::shared_ptr<Array>& offsets,
+                                                    const 
std::shared_ptr<Array>& keys,
+                                                    const 
std::shared_ptr<Array>& items,
+                                                    MemoryPool* pool,
+                                                    std::shared_ptr<Buffer> 
null_bitmap) {
   return FromArraysInternal(std::make_shared<MapType>(keys->type(), 
items->type()),
-                            offsets, keys, items, pool, null_bitmap);
+                            offsets, keys, items, pool, 
std::move(null_bitmap));
 }
 
-Result<std::shared_ptr<Array>> MapArray::FromArrays(
-    std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
-    const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
-    MemoryPool* pool, const std::shared_ptr<Buffer>& null_bitmap) {
+Result<std::shared_ptr<Array>> MapArray::FromArrays(std::shared_ptr<DataType> 
type,
+                                                    const 
std::shared_ptr<Array>& offsets,
+                                                    const 
std::shared_ptr<Array>& keys,
+                                                    const 
std::shared_ptr<Array>& items,
+                                                    MemoryPool* pool,
+                                                    std::shared_ptr<Buffer> 
null_bitmap) {
   if (type->id() != Type::MAP) {
     return Status::TypeError("Expected map type, got ", type->ToString());
   }
@@ -881,7 +883,8 @@ Result<std::shared_ptr<Array>> MapArray::FromArrays(
   if (!map_type.item_type()->Equals(items->type())) {
     return Status::TypeError("Mismatching map items type");
   }
-  return FromArraysInternal(std::move(type), offsets, keys, items, pool, 
null_bitmap);
+  return FromArraysInternal(std::move(type), offsets, keys, items, pool,
+                            std::move(null_bitmap));
 }
 
 Status MapArray::ValidateChildData(
diff --git a/cpp/src/arrow/array/array_nested.h 
b/cpp/src/arrow/array/array_nested.h
index f96b6bd3b1..a6d4977839 100644
--- a/cpp/src/arrow/array/array_nested.h
+++ b/cpp/src/arrow/array/array_nested.h
@@ -537,13 +537,13 @@ class ARROW_EXPORT MapArray : public ListArray {
   static Result<std::shared_ptr<Array>> FromArrays(
       const std::shared_ptr<Array>& offsets, const std::shared_ptr<Array>& 
keys,
       const std::shared_ptr<Array>& items, MemoryPool* pool = 
default_memory_pool(),
-      const std::shared_ptr<Buffer>& null_bitmap = NULLPTR);
+      std::shared_ptr<Buffer> null_bitmap = NULLPTR);
 
   static Result<std::shared_ptr<Array>> FromArrays(
       std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
       const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
       MemoryPool* pool = default_memory_pool(),
-      const std::shared_ptr<Buffer>& null_bitmap = NULLPTR);
+      std::shared_ptr<Buffer> null_bitmap = NULLPTR);
 
   const MapType* map_type() const { return map_type_; }
 
@@ -563,7 +563,7 @@ class ARROW_EXPORT MapArray : public ListArray {
   static Result<std::shared_ptr<Array>> FromArraysInternal(
       std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
       const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
-      MemoryPool* pool, const std::shared_ptr<Buffer>& null_bitmap = NULLPTR);
+      MemoryPool* pool, std::shared_ptr<Buffer> null_bitmap = NULLPTR);
 
  private:
   const MapType* map_type_;
diff --git a/python/pyarrow/tests/test_array.py 
b/python/pyarrow/tests/test_array.py
index 1032ab9add..becf00ead8 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -1097,6 +1097,7 @@ def test_map_from_arrays():
         items.type),
         mask=pa.array([False, True, False], type=pa.bool_())
     )
+    assert result.null_count == 1
     assert result.equals(expected)
 
     # pass in null bitmap without the type
@@ -1106,6 +1107,19 @@ def test_map_from_arrays():
                                      )
     assert result.equals(expected)
 
+    # pass in null bitmap with two nulls
+    offsets = [0, None, None, 6]
+    pyentries = [None, None, pypairs[2:]]
+
+    result = pa.MapArray.from_arrays([0, 2, 2, 6], keys, items, pa.map_(
+        keys.type,
+        items.type),
+        mask=pa.array([True, True, False], type=pa.bool_())
+    )
+    expected = pa.array(pyentries, type=pa.map_(pa.binary(), pa.int32()))
+    assert result.null_count == 2
+    assert result.equals(expected)
+
     # error if null bitmap and offsets with nulls passed
     msg1 = 'Ambiguous to specify both validity map and offsets with nulls'
     with pytest.raises(pa.ArrowInvalid, match=msg1):

Reply via email to