This is an automated email from the ASF dual-hosted git repository.
wjones127 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 0e597ab1ac GH-34316: [Python] FixedSizeListArray.from_arrays supports
mask parameter (#39396)
0e597ab1ac is described below
commit 0e597ab1ac62f12a4cf020994b2097643fdb9657
Author: LucasG0 <[email protected]>
AuthorDate: Thu Jan 4 00:12:24 2024 +0100
GH-34316: [Python] FixedSizeListArray.from_arrays supports mask parameter
(#39396)
### What changes are included in this PR?
Add `mask` / `null_bitmap` parameters in corresponding Cython / C++
`FixedSizeListArray` methods, and propagate this bitmap instead of using the
current dummy `validity_buf`.
### Are these changes tested?
Yes
### Are there any user-facing changes?
Yes, `mask` parameter has been added to `FixedSizeListArray.from_arrays`
* Closes: #34316
Authored-by: LucasG0 <[email protected]>
Signed-off-by: Will Jones <[email protected]>
---
cpp/src/arrow/array/array_nested.cc | 16 ++++++++--------
cpp/src/arrow/array/array_nested.h | 16 ++++++++++++----
python/pyarrow/array.pxi | 13 +++++++++----
python/pyarrow/includes/libarrow.pxd | 8 ++++++--
python/pyarrow/tests/test_array.py | 10 ++++++++++
5 files changed, 45 insertions(+), 18 deletions(-)
diff --git a/cpp/src/arrow/array/array_nested.cc
b/cpp/src/arrow/array/array_nested.cc
index acdd0a0742..0b0e340a67 100644
--- a/cpp/src/arrow/array/array_nested.cc
+++ b/cpp/src/arrow/array/array_nested.cc
@@ -894,7 +894,8 @@ const std::shared_ptr<DataType>&
FixedSizeListArray::value_type() const {
const std::shared_ptr<Array>& FixedSizeListArray::values() const { return
values_; }
Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
- const std::shared_ptr<Array>& values, int32_t list_size) {
+ const std::shared_ptr<Array>& values, int32_t list_size,
+ std::shared_ptr<Buffer> null_bitmap, int64_t null_count) {
if (list_size <= 0) {
return Status::Invalid("list_size needs to be a strict positive integer");
}
@@ -905,14 +906,14 @@ Result<std::shared_ptr<Array>>
FixedSizeListArray::FromArrays(
}
int64_t length = values->length() / list_size;
auto list_type = std::make_shared<FixedSizeListType>(values->type(),
list_size);
- std::shared_ptr<Buffer> validity_buf;
- return std::make_shared<FixedSizeListArray>(list_type, length, values,
validity_buf,
- /*null_count=*/0, /*offset=*/0);
+ return std::make_shared<FixedSizeListArray>(list_type, length, values,
null_bitmap,
+ null_count);
}
Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
- const std::shared_ptr<Array>& values, std::shared_ptr<DataType> type) {
+ const std::shared_ptr<Array>& values, std::shared_ptr<DataType> type,
+ std::shared_ptr<Buffer> null_bitmap, int64_t null_count) {
if (type->id() != Type::FIXED_SIZE_LIST) {
return Status::TypeError("Expected fixed size list type, got ",
type->ToString());
}
@@ -926,10 +927,9 @@ Result<std::shared_ptr<Array>>
FixedSizeListArray::FromArrays(
"The length of the values Array needs to be a multiple of the list
size");
}
int64_t length = values->length() / list_type.list_size();
- std::shared_ptr<Buffer> validity_buf;
- return std::make_shared<FixedSizeListArray>(type, length, values,
validity_buf,
- /*null_count=*/0, /*offset=*/0);
+ return std::make_shared<FixedSizeListArray>(type, length, values,
null_bitmap,
+ null_count);
}
Result<std::shared_ptr<Array>> FixedSizeListArray::Flatten(
diff --git a/cpp/src/arrow/array/array_nested.h
b/cpp/src/arrow/array/array_nested.h
index 61606e1592..768a630e0a 100644
--- a/cpp/src/arrow/array/array_nested.h
+++ b/cpp/src/arrow/array/array_nested.h
@@ -599,17 +599,25 @@ class ARROW_EXPORT FixedSizeListArray : public Array {
///
/// \param[in] values Array containing list values
/// \param[in] list_size The fixed length of each list
+ /// \param[in] null_bitmap Optional validity bitmap
+ /// \param[in] null_count Optional null count in null_bitmap
/// \return Will have length equal to values.length() / list_size
- static Result<std::shared_ptr<Array>> FromArrays(const
std::shared_ptr<Array>& values,
- int32_t list_size);
+ static Result<std::shared_ptr<Array>> FromArrays(
+ const std::shared_ptr<Array>& values, int32_t list_size,
+ std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount);
/// \brief Construct FixedSizeListArray from child value array and type
///
/// \param[in] values Array containing list values
/// \param[in] type The fixed sized list type
+ /// \param[in] null_bitmap Optional validity bitmap
+ /// \param[in] null_count Optional null count in null_bitmap
/// \return Will have length equal to values.length() / type.list_size()
- static Result<std::shared_ptr<Array>> FromArrays(const
std::shared_ptr<Array>& values,
- std::shared_ptr<DataType>
type);
+ static Result<std::shared_ptr<Array>> FromArrays(
+ const std::shared_ptr<Array>& values, std::shared_ptr<DataType> type,
+ std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount);
protected:
void SetData(const std::shared_ptr<ArrayData>& data);
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 74a196002b..751dfbcce4 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -2484,7 +2484,7 @@ cdef class MapArray(ListArray):
Examples
--------
- First, let's understand the structure of our dataset when viewed in a
rectangular data model.
+ First, let's understand the structure of our dataset when viewed in a
rectangular data model.
The total of 5 respondents answered the question "How much did you
like the movie x?".
The value -1 in the integer array means that the value is missing. The
boolean array
represents the null bitmask corresponding to the missing values in the
integer array.
@@ -2590,7 +2590,7 @@ cdef class FixedSizeListArray(BaseListArray):
"""
@staticmethod
- def from_arrays(values, list_size=None, DataType type=None):
+ def from_arrays(values, list_size=None, DataType type=None, mask=None):
"""
Construct FixedSizeListArray from array of values and a list length.
@@ -2602,6 +2602,9 @@ cdef class FixedSizeListArray(BaseListArray):
type : DataType, optional
If not specified, a default ListType with the values' type and
`list_size` length is used.
+ mask : Array (boolean type), optional
+ Indicate which values are null (True) or not null (False).
+
Returns
-------
@@ -2652,19 +2655,21 @@ cdef class FixedSizeListArray(BaseListArray):
_values = asarray(values)
+ c_mask = c_mask_inverted_from_obj(mask, None)
+
if type is not None:
if list_size is not None:
raise ValueError("Cannot specify both list_size and type")
with nogil:
c_result = CFixedSizeListArray.FromArraysAndType(
- _values.sp_array, type.sp_type)
+ _values.sp_array, type.sp_type, c_mask)
else:
if list_size is None:
raise ValueError("Should specify one of list_size and type")
_list_size = <int32_t>list_size
with nogil:
c_result = CFixedSizeListArray.FromArrays(
- _values.sp_array, _list_size)
+ _values.sp_array, _list_size, c_mask)
cdef Array result = pyarrow_wrap_array(GetResultValue(c_result))
result.validate()
return result
diff --git a/python/pyarrow/includes/libarrow.pxd
b/python/pyarrow/includes/libarrow.pxd
index bad5ec606c..82b888f584 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -673,11 +673,15 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
cdef cppclass CFixedSizeListArray" arrow::FixedSizeListArray"(CArray):
@staticmethod
CResult[shared_ptr[CArray]] FromArrays(
- const shared_ptr[CArray]& values, int32_t list_size)
+ const shared_ptr[CArray]& values,
+ int32_t list_size,
+ shared_ptr[CBuffer] null_bitmap)
@staticmethod
CResult[shared_ptr[CArray]] FromArraysAndType" FromArrays"(
- const shared_ptr[CArray]& values, shared_ptr[CDataType])
+ const shared_ptr[CArray]& values,
+ shared_ptr[CDataType],
+ shared_ptr[CBuffer] null_bitmap)
int64_t value_offset(int i)
int64_t value_length(int i)
diff --git a/python/pyarrow/tests/test_array.py
b/python/pyarrow/tests/test_array.py
index 599d15d023..d598630dc2 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -1091,6 +1091,16 @@ def test_fixed_size_list_from_arrays():
assert result.type.equals(typ)
assert result.type.value_field.name == "name"
+ result = pa.FixedSizeListArray.from_arrays(values,
+ type=typ,
+ mask=pa.array([False, True,
False]))
+ assert result.to_pylist() == [[0, 1, 2, 3], None, [8, 9, 10, 11]]
+
+ result = pa.FixedSizeListArray.from_arrays(values,
+ list_size=4,
+ mask=pa.array([False, True,
False]))
+ assert result.to_pylist() == [[0, 1, 2, 3], None, [8, 9, 10, 11]]
+
# raise on invalid values / list_size
with pytest.raises(ValueError):
pa.FixedSizeListArray.from_arrays(values, -4)