This is an automated email from the ASF dual-hosted git repository.

wjones127 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 0e597ab1ac GH-34316: [Python] FixedSizeListArray.from_arrays supports 
mask parameter (#39396)
0e597ab1ac is described below

commit 0e597ab1ac62f12a4cf020994b2097643fdb9657
Author: LucasG0 <[email protected]>
AuthorDate: Thu Jan 4 00:12:24 2024 +0100

    GH-34316: [Python] FixedSizeListArray.from_arrays supports mask parameter 
(#39396)
    
    
    
    ### What changes are included in this PR?
    
    Add `mask` / `null_bitmap` parameters in corresponding Cython / C++ 
`FixedSizeListArray` methods, and propagate this bitmap instead of using the 
current dummy `validity_buf`.
    
    ### Are these changes tested?
    
    Yes
    
    ### Are there any user-facing changes?
    
    Yes, `mask` parameter has been added to `FixedSizeListArray.from_arrays`
    * Closes: #34316
    
    Authored-by: LucasG0 <[email protected]>
    Signed-off-by: Will Jones <[email protected]>
---
 cpp/src/arrow/array/array_nested.cc  | 16 ++++++++--------
 cpp/src/arrow/array/array_nested.h   | 16 ++++++++++++----
 python/pyarrow/array.pxi             | 13 +++++++++----
 python/pyarrow/includes/libarrow.pxd |  8 ++++++--
 python/pyarrow/tests/test_array.py   | 10 ++++++++++
 5 files changed, 45 insertions(+), 18 deletions(-)

diff --git a/cpp/src/arrow/array/array_nested.cc 
b/cpp/src/arrow/array/array_nested.cc
index acdd0a0742..0b0e340a67 100644
--- a/cpp/src/arrow/array/array_nested.cc
+++ b/cpp/src/arrow/array/array_nested.cc
@@ -894,7 +894,8 @@ const std::shared_ptr<DataType>& 
FixedSizeListArray::value_type() const {
 const std::shared_ptr<Array>& FixedSizeListArray::values() const { return 
values_; }
 
 Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
-    const std::shared_ptr<Array>& values, int32_t list_size) {
+    const std::shared_ptr<Array>& values, int32_t list_size,
+    std::shared_ptr<Buffer> null_bitmap, int64_t null_count) {
   if (list_size <= 0) {
     return Status::Invalid("list_size needs to be a strict positive integer");
   }
@@ -905,14 +906,14 @@ Result<std::shared_ptr<Array>> 
FixedSizeListArray::FromArrays(
   }
   int64_t length = values->length() / list_size;
   auto list_type = std::make_shared<FixedSizeListType>(values->type(), 
list_size);
-  std::shared_ptr<Buffer> validity_buf;
 
-  return std::make_shared<FixedSizeListArray>(list_type, length, values, 
validity_buf,
-                                              /*null_count=*/0, /*offset=*/0);
+  return std::make_shared<FixedSizeListArray>(list_type, length, values, 
null_bitmap,
+                                              null_count);
 }
 
 Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
-    const std::shared_ptr<Array>& values, std::shared_ptr<DataType> type) {
+    const std::shared_ptr<Array>& values, std::shared_ptr<DataType> type,
+    std::shared_ptr<Buffer> null_bitmap, int64_t null_count) {
   if (type->id() != Type::FIXED_SIZE_LIST) {
     return Status::TypeError("Expected fixed size list type, got ", 
type->ToString());
   }
@@ -926,10 +927,9 @@ Result<std::shared_ptr<Array>> 
FixedSizeListArray::FromArrays(
         "The length of the values Array needs to be a multiple of the list 
size");
   }
   int64_t length = values->length() / list_type.list_size();
-  std::shared_ptr<Buffer> validity_buf;
 
-  return std::make_shared<FixedSizeListArray>(type, length, values, 
validity_buf,
-                                              /*null_count=*/0, /*offset=*/0);
+  return std::make_shared<FixedSizeListArray>(type, length, values, 
null_bitmap,
+                                              null_count);
 }
 
 Result<std::shared_ptr<Array>> FixedSizeListArray::Flatten(
diff --git a/cpp/src/arrow/array/array_nested.h 
b/cpp/src/arrow/array/array_nested.h
index 61606e1592..768a630e0a 100644
--- a/cpp/src/arrow/array/array_nested.h
+++ b/cpp/src/arrow/array/array_nested.h
@@ -599,17 +599,25 @@ class ARROW_EXPORT FixedSizeListArray : public Array {
   ///
   /// \param[in] values Array containing list values
   /// \param[in] list_size The fixed length of each list
+  /// \param[in] null_bitmap Optional validity bitmap
+  /// \param[in] null_count Optional null count in null_bitmap
   /// \return Will have length equal to values.length() / list_size
-  static Result<std::shared_ptr<Array>> FromArrays(const 
std::shared_ptr<Array>& values,
-                                                   int32_t list_size);
+  static Result<std::shared_ptr<Array>> FromArrays(
+      const std::shared_ptr<Array>& values, int32_t list_size,
+      std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+      int64_t null_count = kUnknownNullCount);
 
   /// \brief Construct FixedSizeListArray from child value array and type
   ///
   /// \param[in] values Array containing list values
   /// \param[in] type The fixed sized list type
+  /// \param[in] null_bitmap Optional validity bitmap
+  /// \param[in] null_count Optional null count in null_bitmap
   /// \return Will have length equal to values.length() / type.list_size()
-  static Result<std::shared_ptr<Array>> FromArrays(const 
std::shared_ptr<Array>& values,
-                                                   std::shared_ptr<DataType> 
type);
+  static Result<std::shared_ptr<Array>> FromArrays(
+      const std::shared_ptr<Array>& values, std::shared_ptr<DataType> type,
+      std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+      int64_t null_count = kUnknownNullCount);
 
  protected:
   void SetData(const std::shared_ptr<ArrayData>& data);
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 74a196002b..751dfbcce4 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -2484,7 +2484,7 @@ cdef class MapArray(ListArray):
 
         Examples
         --------
-        First, let's understand the structure of our dataset when viewed in a 
rectangular data model. 
+        First, let's understand the structure of our dataset when viewed in a 
rectangular data model.
         The total of 5 respondents answered the question "How much did you 
like the movie x?".
         The value -1 in the integer array means that the value is missing. The 
boolean array
         represents the null bitmask corresponding to the missing values in the 
integer array.
@@ -2590,7 +2590,7 @@ cdef class FixedSizeListArray(BaseListArray):
     """
 
     @staticmethod
-    def from_arrays(values, list_size=None, DataType type=None):
+    def from_arrays(values, list_size=None, DataType type=None, mask=None):
         """
         Construct FixedSizeListArray from array of values and a list length.
 
@@ -2602,6 +2602,9 @@ cdef class FixedSizeListArray(BaseListArray):
         type : DataType, optional
             If not specified, a default ListType with the values' type and
             `list_size` length is used.
+        mask : Array (boolean type), optional
+            Indicate which values are null (True) or not null (False).
+
 
         Returns
         -------
@@ -2652,19 +2655,21 @@ cdef class FixedSizeListArray(BaseListArray):
 
         _values = asarray(values)
 
+        c_mask = c_mask_inverted_from_obj(mask, None)
+
         if type is not None:
             if list_size is not None:
                 raise ValueError("Cannot specify both list_size and type")
             with nogil:
                 c_result = CFixedSizeListArray.FromArraysAndType(
-                    _values.sp_array, type.sp_type)
+                    _values.sp_array, type.sp_type, c_mask)
         else:
             if list_size is None:
                 raise ValueError("Should specify one of list_size and type")
             _list_size = <int32_t>list_size
             with nogil:
                 c_result = CFixedSizeListArray.FromArrays(
-                    _values.sp_array, _list_size)
+                    _values.sp_array, _list_size, c_mask)
         cdef Array result = pyarrow_wrap_array(GetResultValue(c_result))
         result.validate()
         return result
diff --git a/python/pyarrow/includes/libarrow.pxd 
b/python/pyarrow/includes/libarrow.pxd
index bad5ec606c..82b888f584 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -673,11 +673,15 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
     cdef cppclass CFixedSizeListArray" arrow::FixedSizeListArray"(CArray):
         @staticmethod
         CResult[shared_ptr[CArray]] FromArrays(
-            const shared_ptr[CArray]& values, int32_t list_size)
+            const shared_ptr[CArray]& values,
+            int32_t list_size,
+            shared_ptr[CBuffer] null_bitmap)
 
         @staticmethod
         CResult[shared_ptr[CArray]] FromArraysAndType" FromArrays"(
-            const shared_ptr[CArray]& values, shared_ptr[CDataType])
+            const shared_ptr[CArray]& values,
+            shared_ptr[CDataType],
+            shared_ptr[CBuffer] null_bitmap)
 
         int64_t value_offset(int i)
         int64_t value_length(int i)
diff --git a/python/pyarrow/tests/test_array.py 
b/python/pyarrow/tests/test_array.py
index 599d15d023..d598630dc2 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -1091,6 +1091,16 @@ def test_fixed_size_list_from_arrays():
     assert result.type.equals(typ)
     assert result.type.value_field.name == "name"
 
+    result = pa.FixedSizeListArray.from_arrays(values,
+                                               type=typ,
+                                               mask=pa.array([False, True, 
False]))
+    assert result.to_pylist() == [[0, 1, 2, 3], None, [8, 9, 10, 11]]
+
+    result = pa.FixedSizeListArray.from_arrays(values,
+                                               list_size=4,
+                                               mask=pa.array([False, True, 
False]))
+    assert result.to_pylist() == [[0, 1, 2, 3], None, [8, 9, 10, 11]]
+
     # raise on invalid values / list_size
     with pytest.raises(ValueError):
         pa.FixedSizeListArray.from_arrays(values, -4)

Reply via email to