pitrou commented on code in PR #13894:
URL: https://github.com/apache/arrow/pull/13894#discussion_r952721077
##########
python/pyarrow/array.pxi:
##########
@@ -1943,21 +1944,24 @@ cdef class ListArray(BaseListArray):
cdef:
Array _offsets, _values
shared_ptr[CArray] out
+ shared_ptr[CBuffer] c_mask
cdef CMemoryPool* cpool = maybe_unbox_memory_pool(pool)
_offsets = asarray(offsets, type='int32')
_values = asarray(values)
+ c_mask = c_mask_inverted_from_obj(mask, pool)
+
if type is not None:
with nogil:
out = GetResultValue(
CListArray.FromArraysAndType(
- type.sp_type, _offsets.ap[0], _values.ap[0], cpool))
+ type.sp_type, _offsets.ap[0], _values.ap[0], cpool,
c_mask, -1))
Review Comment:
IMHO it would be less kludgy if we didn't pass -1 explicitly.
##########
cpp/src/arrow/array/array_nested.h:
##########
@@ -125,13 +125,19 @@ class ARROW_EXPORT ListArray : public
BaseListArray<ListType> {
/// \param[in] values Array containing list values
/// \param[in] pool MemoryPool in case new offsets array needs to be
Review Comment:
Please mention above that either a offsets Array's null bitmap can be
present or an explicit null_bitmap, but not both.
##########
cpp/src/arrow/array/array_nested.h:
##########
@@ -125,13 +125,19 @@ class ARROW_EXPORT ListArray : public
BaseListArray<ListType> {
/// \param[in] values Array containing list values
/// \param[in] pool MemoryPool in case new offsets array needs to be
/// allocated because of null values
+ /// \param[in] null_bitmap Array of valid and null values,
+ /// where 0 represents null and 1 represents valid.
+ /// \param[in] null_count Count of null values
static Result<std::shared_ptr<ListArray>> FromArrays(
- const Array& offsets, const Array& values,
- MemoryPool* pool = default_memory_pool());
+ const Array& offsets, const Array& values, MemoryPool* pool =
default_memory_pool(),
+ std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount);
static Result<std::shared_ptr<ListArray>> FromArrays(
std::shared_ptr<DataType> type, const Array& offsets, const Array&
values,
- MemoryPool* pool = default_memory_pool());
+ MemoryPool* pool = default_memory_pool(),
+ std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount);
Review Comment:
Also, should similar changes be done for `LargeListArray`?
##########
python/pyarrow/array.pxi:
##########
@@ -2812,6 +2806,25 @@ cdef dict _array_classes = {
}
+cdef inline shared_ptr[CBuffer] c_mask_inverted_from_obj(object mask,
MemoryPool pool) except *:
+ """
+ Convert mask array obj to c_mask while also inverting to signify 1 for
valid and 0 for null
+ """
+ cdef shared_ptr[CBuffer] c_mask
+ if mask is None:
+ c_mask = shared_ptr[CBuffer]()
+ elif isinstance(mask, Array):
+ if mask.type.id != Type_BOOL:
+ raise ValueError('Mask must be a pyarrow.Array of type bool')
Review Comment:
I'd rather use "boolean" as "bool" can be confused with the Python `bool`
type.
##########
python/pyarrow/array.pxi:
##########
@@ -2812,6 +2806,25 @@ cdef dict _array_classes = {
}
+cdef inline shared_ptr[CBuffer] c_mask_inverted_from_obj(object mask,
MemoryPool pool) except *:
+ """
+ Convert mask array obj to c_mask while also inverting to signify 1 for
valid and 0 for null
+ """
+ cdef shared_ptr[CBuffer] c_mask
+ if mask is None:
+ c_mask = shared_ptr[CBuffer]()
+ elif isinstance(mask, Array):
+ if mask.type.id != Type_BOOL:
+ raise ValueError('Mask must be a pyarrow.Array of type bool')
Review Comment:
Also let's use `TypeError` here.
##########
python/pyarrow/array.pxi:
##########
@@ -2812,6 +2806,25 @@ cdef dict _array_classes = {
}
+cdef inline shared_ptr[CBuffer] c_mask_inverted_from_obj(object mask,
MemoryPool pool) except *:
+ """
+ Convert mask array obj to c_mask while also inverting to signify 1 for
valid and 0 for null
+ """
+ cdef shared_ptr[CBuffer] c_mask
+ if mask is None:
+ c_mask = shared_ptr[CBuffer]()
+ elif isinstance(mask, Array):
+ if mask.type.id != Type_BOOL:
+ raise ValueError('Mask must be a pyarrow.Array of type bool')
+ if mask.null_count != 0:
+ raise ValueError('Mask must not contain nulls')
+ inverted_mask = _pc().invert(mask, memory_pool=pool)
+ c_mask = pyarrow_unwrap_buffer(inverted_mask.buffers()[1])
+ else:
+ raise ValueError('Mask must be a pyarrow.Array of type bool')
Review Comment:
Definitely should be `TypeError` here.
##########
cpp/src/arrow/array/array_nested.h:
##########
@@ -125,13 +125,19 @@ class ARROW_EXPORT ListArray : public
BaseListArray<ListType> {
/// \param[in] values Array containing list values
/// \param[in] pool MemoryPool in case new offsets array needs to be
/// allocated because of null values
+ /// \param[in] null_bitmap Array of valid and null values,
+ /// where 0 represents null and 1 represents valid.
+ /// \param[in] null_count Count of null values
Review Comment:
```suggestion
/// \param[in] null_bitmap Optional validity bitmap
/// \param[in] null_count Optional null count in null_bitmap
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]