This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 255dbf990c GH-41684: [C++][Python] Add optional null_bitmap to 
MapArray::FromArrays (#41757)
255dbf990c is described below

commit 255dbf990c3d3e5fb1270a2a11efe0af2be195ab
Author: Alenka Frim <[email protected]>
AuthorDate: Fri May 31 10:09:54 2024 +0200

    GH-41684: [C++][Python] Add optional null_bitmap to MapArray::FromArrays 
(#41757)
    
    ### Rationale for this change
    
    When constructing a `MapArray` with `FromArrays` one can not supply a 
`null_bitmap`.
    
    ### What changes are included in this PR?
    
    Optional `null_bitmap` argument is added to `MapArray::FromArrays`.
    
    ### Are these changes tested?
    
    TODO (have them locally, need to clean them up and commit.
    
    ### Are there any user-facing changes?
    
    No.
    * GitHub Issue: #41684
    
    Authored-by: AlenkaF <[email protected]>
    Signed-off-by: Joris Van den Bossche <[email protected]>
---
 cpp/src/arrow/array/array_list_test.cc | 17 +++++++++++++
 cpp/src/arrow/array/array_nested.cc    | 45 +++++++++++++++++++++++-----------
 cpp/src/arrow/array/array_nested.h     |  9 ++++---
 python/pyarrow/array.pxi               | 11 ++++++---
 python/pyarrow/includes/libarrow.pxd   |  8 ++++--
 python/pyarrow/tests/test_array.py     | 34 +++++++++++++++++++++++++
 6 files changed, 102 insertions(+), 22 deletions(-)

diff --git a/cpp/src/arrow/array/array_list_test.cc 
b/cpp/src/arrow/array/array_list_test.cc
index e79ce6fe17..55f91dc341 100644
--- a/cpp/src/arrow/array/array_list_test.cc
+++ b/cpp/src/arrow/array/array_list_test.cc
@@ -1368,6 +1368,23 @@ TEST_F(TestMapArray, FromArrays) {
   ASSERT_EQ(keys_with_null->length(), tmp_items->length());
   ASSERT_RAISES(Invalid,
                 MapArray::FromArrays(offsets1, keys_with_null, tmp_items, 
pool_));
+
+  // With null_bitmap
+  ASSERT_OK_AND_ASSIGN(auto map7, MapArray::FromArrays(offsets1, keys, items, 
pool_,
+                                                       
offsets3->data()->buffers[0]));
+  ASSERT_OK(map7->Validate());
+  MapArray expected7(map_type, length, offsets1->data()->buffers[1], keys, 
items,
+                     offsets3->data()->buffers[0], 1);
+  AssertArraysEqual(expected7, *map7);
+
+  // Null bitmap and offset with null
+  ASSERT_RAISES(Invalid, MapArray::FromArrays(offsets3, keys, items, pool_,
+                                              offsets3->data()->buffers[0]));
+
+  // Null bitmap and offset with offset
+  ASSERT_RAISES(NotImplemented,
+                MapArray::FromArrays(offsets3->Slice(2), keys, items, pool_,
+                                     offsets3->data()->buffers[0]));
 }
 
 TEST_F(TestMapArray, FromArraysEquality) {
diff --git a/cpp/src/arrow/array/array_nested.cc 
b/cpp/src/arrow/array/array_nested.cc
index 67a499c2b8..bb5c6bf018 100644
--- a/cpp/src/arrow/array/array_nested.cc
+++ b/cpp/src/arrow/array/array_nested.cc
@@ -807,7 +807,7 @@ MapArray::MapArray(const std::shared_ptr<DataType>& type, 
int64_t length,
 Result<std::shared_ptr<Array>> MapArray::FromArraysInternal(
     std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
     const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
-    MemoryPool* pool) {
+    MemoryPool* pool, const std::shared_ptr<Buffer>& null_bitmap) {
   using offset_type = typename MapType::offset_type;
   using OffsetArrowType = typename CTypeTraits<offset_type>::ArrowType;
 
@@ -827,6 +827,15 @@ Result<std::shared_ptr<Array>> 
MapArray::FromArraysInternal(
     return Status::Invalid("Map key and item arrays must be equal length");
   }
 
+  if (null_bitmap != nullptr && offsets->null_count() > 0) {
+    return Status::Invalid(
+        "Ambiguous to specify both validity map and offsets with nulls");
+  }
+
+  if (null_bitmap != nullptr && offsets->offset() != 0) {
+    return Status::NotImplemented("Null bitmap with offsets slice not 
supported.");
+  }
+
   if (offsets->null_count() > 0) {
     ARROW_ASSIGN_OR_RAISE(auto buffers,
                           CleanListOffsets<MapType>(NULLPTR, *offsets, pool));
@@ -836,24 +845,32 @@ Result<std::shared_ptr<Array>> 
MapArray::FromArraysInternal(
 
   using OffsetArrayType = typename TypeTraits<OffsetArrowType>::ArrayType;
   const auto& typed_offsets = checked_cast<const OffsetArrayType&>(*offsets);
-  auto buffers = BufferVector({nullptr, typed_offsets.values()});
+
+  BufferVector buffers;
+  int64_t null_count;
+  if (null_bitmap != nullptr) {
+    buffers = BufferVector({std::move(null_bitmap), typed_offsets.values()});
+    null_count = null_bitmap->size();
+  } else {
+    buffers = BufferVector({null_bitmap, typed_offsets.values()});
+    null_count = 0;
+  }
   return std::make_shared<MapArray>(type, offsets->length() - 1, 
std::move(buffers), keys,
-                                    items, /*null_count=*/0, 
offsets->offset());
+                                    items, /*null_count=*/null_count, 
offsets->offset());
 }
 
-Result<std::shared_ptr<Array>> MapArray::FromArrays(const 
std::shared_ptr<Array>& offsets,
-                                                    const 
std::shared_ptr<Array>& keys,
-                                                    const 
std::shared_ptr<Array>& items,
-                                                    MemoryPool* pool) {
+Result<std::shared_ptr<Array>> MapArray::FromArrays(
+    const std::shared_ptr<Array>& offsets, const std::shared_ptr<Array>& keys,
+    const std::shared_ptr<Array>& items, MemoryPool* pool,
+    const std::shared_ptr<Buffer>& null_bitmap) {
   return FromArraysInternal(std::make_shared<MapType>(keys->type(), 
items->type()),
-                            offsets, keys, items, pool);
+                            offsets, keys, items, pool, null_bitmap);
 }
 
-Result<std::shared_ptr<Array>> MapArray::FromArrays(std::shared_ptr<DataType> 
type,
-                                                    const 
std::shared_ptr<Array>& offsets,
-                                                    const 
std::shared_ptr<Array>& keys,
-                                                    const 
std::shared_ptr<Array>& items,
-                                                    MemoryPool* pool) {
+Result<std::shared_ptr<Array>> MapArray::FromArrays(
+    std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
+    const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
+    MemoryPool* pool, const std::shared_ptr<Buffer>& null_bitmap) {
   if (type->id() != Type::MAP) {
     return Status::TypeError("Expected map type, got ", type->ToString());
   }
@@ -864,7 +881,7 @@ Result<std::shared_ptr<Array>> 
MapArray::FromArrays(std::shared_ptr<DataType> ty
   if (!map_type.item_type()->Equals(items->type())) {
     return Status::TypeError("Mismatching map items type");
   }
-  return FromArraysInternal(std::move(type), offsets, keys, items, pool);
+  return FromArraysInternal(std::move(type), offsets, keys, items, pool, 
null_bitmap);
 }
 
 Status MapArray::ValidateChildData(
diff --git a/cpp/src/arrow/array/array_nested.h 
b/cpp/src/arrow/array/array_nested.h
index 5744f5fcad..f96b6bd3b1 100644
--- a/cpp/src/arrow/array/array_nested.h
+++ b/cpp/src/arrow/array/array_nested.h
@@ -532,15 +532,18 @@ class ARROW_EXPORT MapArray : public ListArray {
   /// \param[in] keys Array containing key values
   /// \param[in] items Array containing item values
   /// \param[in] pool MemoryPool in case new offsets array needs to be
+  /// \param[in] null_bitmap Optional validity bitmap
   /// allocated because of null values
   static Result<std::shared_ptr<Array>> FromArrays(
       const std::shared_ptr<Array>& offsets, const std::shared_ptr<Array>& 
keys,
-      const std::shared_ptr<Array>& items, MemoryPool* pool = 
default_memory_pool());
+      const std::shared_ptr<Array>& items, MemoryPool* pool = 
default_memory_pool(),
+      const std::shared_ptr<Buffer>& null_bitmap = NULLPTR);
 
   static Result<std::shared_ptr<Array>> FromArrays(
       std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
       const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
-      MemoryPool* pool = default_memory_pool());
+      MemoryPool* pool = default_memory_pool(),
+      const std::shared_ptr<Buffer>& null_bitmap = NULLPTR);
 
   const MapType* map_type() const { return map_type_; }
 
@@ -560,7 +563,7 @@ class ARROW_EXPORT MapArray : public ListArray {
   static Result<std::shared_ptr<Array>> FromArraysInternal(
       std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
       const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
-      MemoryPool* pool);
+      MemoryPool* pool, const std::shared_ptr<Buffer>& null_bitmap = NULLPTR);
 
  private:
   const MapType* map_type_;
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 406830ad4d..3c26e85887 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -3060,7 +3060,7 @@ cdef class MapArray(ListArray):
     """
 
     @staticmethod
-    def from_arrays(offsets, keys, items, DataType type=None, MemoryPool 
pool=None):
+    def from_arrays(offsets, keys, items, DataType type=None, MemoryPool 
pool=None, mask=None):
         """
         Construct MapArray from arrays of int32 offsets and key, item arrays.
 
@@ -3072,6 +3072,8 @@ cdef class MapArray(ListArray):
         type : DataType, optional
             If not specified, a default MapArray with the keys' and items' 
type is used.
         pool : MemoryPool
+        mask : Array (boolean type), optional
+            Indicate which values are null (True) or not null (False).
 
         Returns
         -------
@@ -3153,24 +3155,27 @@ cdef class MapArray(ListArray):
         cdef:
             Array _offsets, _keys, _items
             shared_ptr[CArray] out
+            shared_ptr[CBuffer] c_mask
         cdef CMemoryPool* cpool = maybe_unbox_memory_pool(pool)
 
         _offsets = asarray(offsets, type='int32')
         _keys = asarray(keys)
         _items = asarray(items)
 
+        c_mask = c_mask_inverted_from_obj(mask, pool)
+
         if type is not None:
             with nogil:
                 out = GetResultValue(
                     CMapArray.FromArraysAndType(
                         type.sp_type, _offsets.sp_array,
-                        _keys.sp_array, _items.sp_array, cpool))
+                        _keys.sp_array, _items.sp_array, cpool, c_mask))
         else:
             with nogil:
                 out = GetResultValue(
                     CMapArray.FromArrays(_offsets.sp_array,
                                          _keys.sp_array,
-                                         _items.sp_array, cpool))
+                                         _items.sp_array, cpool, c_mask))
         cdef Array result = pyarrow_wrap_array(out)
         result.validate()
         return result
diff --git a/python/pyarrow/includes/libarrow.pxd 
b/python/pyarrow/includes/libarrow.pxd
index a66f584b83..0d63ec6be3 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -823,7 +823,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
             const shared_ptr[CArray]& offsets,
             const shared_ptr[CArray]& keys,
             const shared_ptr[CArray]& items,
-            CMemoryPool* pool)
+            CMemoryPool* pool,
+            const shared_ptr[CBuffer] null_bitmap,
+        )
 
         @staticmethod
         CResult[shared_ptr[CArray]] FromArraysAndType" FromArrays"(
@@ -831,7 +833,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
             const shared_ptr[CArray]& offsets,
             const shared_ptr[CArray]& keys,
             const shared_ptr[CArray]& items,
-            CMemoryPool* pool)
+            CMemoryPool* pool,
+            const shared_ptr[CBuffer] null_bitmap,
+        )
 
         shared_ptr[CArray] keys()
         shared_ptr[CArray] items()
diff --git a/python/pyarrow/tests/test_array.py 
b/python/pyarrow/tests/test_array.py
index b89e0ace15..49a00517fc 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -1079,6 +1079,40 @@ def test_map_from_arrays():
             pa.int64()
         ))
 
+    # pass in null bitmap with type
+    result = pa.MapArray.from_arrays([0, 2, 2, 6], keys, items, pa.map_(
+        keys.type,
+        items.type),
+        mask=pa.array([False, True, False], type=pa.bool_())
+    )
+    assert result.equals(expected)
+
+    # pass in null bitmap without the type
+    result = pa.MapArray.from_arrays([0, 2, 2, 6], keys, items,
+                                     mask=pa.array([False, True, False],
+                                                   type=pa.bool_())
+                                     )
+    assert result.equals(expected)
+
+    # error if null bitmap and offsets with nulls passed
+    msg1 = 'Ambiguous to specify both validity map and offsets with nulls'
+    with pytest.raises(pa.ArrowInvalid, match=msg1):
+        pa.MapArray.from_arrays(offsets, keys, items, pa.map_(
+            keys.type,
+            items.type),
+            mask=pa.array([False, True, False], type=pa.bool_())
+        )
+
+    # error if null bitmap passed to sliced offset
+    msg2 = 'Null bitmap with offsets slice not supported.'
+    offsets = pa.array(offsets, pa.int32())
+    with pytest.raises(pa.ArrowNotImplementedError, match=msg2):
+        pa.MapArray.from_arrays(offsets.slice(2), keys, items, pa.map_(
+            keys.type,
+            items.type),
+            mask=pa.array([False, True, False], type=pa.bool_())
+        )
+
     # check invalid usage
     offsets = [0, 1, 3, 5]
     keys = np.arange(5)

Reply via email to