jorisvandenbossche commented on code in PR #39813:
URL: https://github.com/apache/arrow/pull/39813#discussion_r1474552385
##########
python/pyarrow/array.pxi:
##########
@@ -2460,6 +2460,403 @@ cdef class LargeListArray(BaseListArray):
return pyarrow_wrap_array((<CLargeListArray*> self.ap).offsets())
+cdef class ListViewArray(Array):
+ """
+ Concrete class for Arrow arrays of a list view data type.
+ """
+
+ @staticmethod
+ def from_arrays(offsets, sizes, values, DataType type=None, MemoryPool
pool=None, mask=None):
+ """
+ Construct ListViewArray from arrays of int32 offsets, sizes, and
values.
+
+ Parameters
+ ----------
+ offsets : Array (int32 type)
+ sizes : Array (int32 type)
+ values : Array (any type)
+ type : DataType, optional
+ If not specified, a default ListType with the values' type is
+ used.
+ pool : MemoryPool, optional
+ mask : Array (boolean type), optional
+ Indicate which values are null (True) or not null (False).
+
+ Returns
+ -------
+ list_view_array : ListViewArray
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> values = pa.array([1, 2, 3, 4])
+ >>> offsets = pa.array([0, 1, 2])
+ >>> sizes = pa.array([2, 2, 2])
+ >>> pa.ListViewArray.from_arrays(offsets, sizes, values)
+ <pyarrow.lib.ListViewArray object at ...>
+ [
+ [
+ 1,
+ 2
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ]
+ ]
+ >>> # nulls in the offsets array become null lists
+ >>> offsets = pa.array([0, None, 2])
+ >>> sizes = pa.array([2, 0, 2])
+ >>> pa.ListViewArray.from_arrays(offsets, sizes, values)
+ <pyarrow.lib.ListViewArray object at ...>
+ [
+ [
+ 1,
+ 2
+ ],
+ null,
+ [
+ 3,
+ 4
+ ]
+ ]
+ """
+ cdef:
+ Array _offsets, _sizes, _values
+ shared_ptr[CArray] out
+ shared_ptr[CBuffer] c_mask
+ cdef CMemoryPool* cpool = maybe_unbox_memory_pool(pool)
+
+ _offsets = asarray(offsets, type='int32')
+ _sizes = asarray(sizes, type='int32')
+ _values = asarray(values)
+
+ c_mask = c_mask_inverted_from_obj(mask, pool)
+
+ if type is not None:
+ with nogil:
+ out = GetResultValue(
+ CListViewArray.FromArraysAndType(
+ type.sp_type, _offsets.ap[0], _sizes.ap[0],
_values.ap[0], cpool, c_mask))
+ else:
+ with nogil:
+ out = GetResultValue(
+ CListViewArray.FromArrays(
+ _offsets.ap[0], _sizes.ap[0], _values.ap[0], cpool,
c_mask))
+ cdef Array result = pyarrow_wrap_array(out)
+ result.validate()
+ return result
+
+ @property
+ def values(self):
+ """
+ Return the underlying array of values which backs the ListViewArray
+ ignoring the array's offset and sizes.
+
+ If any of the list elements are null, but are backed by a
+ non-empty sub-list, those elements will be included in the
+ output.
+
+ Returns
+ -------
+ values : Array
+
+ Examples
+ --------
+ The values include null elements from sub-lists:
+
+ >>> import pyarrow as pa
+ >>> values = [[1, 2], [3, 4, None, 6]]
+ >>> offsets = [0, None, 2]
+ >>> sizes = [2, 0, 4]
+ >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
+ >>> array.values
+ <pyarrow.lib.ListArray object at ...>
+ [
+ [
+ 1,
+ 2
+ ],
+ [
+ 3,
+ 4,
+ null,
+ 6
+ ]
+ ]
+ """
+ cdef CListViewArray* arr = <CListViewArray*> self.ap
+ return pyarrow_wrap_array(arr.values())
+
+ @property
+ def offsets(self):
+ """
+ Return the list offsets as an int32 array.
+
+ The returned array will not have a validity bitmap, so you cannot
+ expect to pass it to `ListViewArray.from_arrays` and get back the same
+ list array if the original one has nulls.
+
+ Returns
+ -------
+ offsets : Int32Array
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> values = [[1, 2], [3, 4, None, 6]]
+ >>> offsets = [0, None, 2]
+ >>> sizes = [2, 0, 4]
+ >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
+ >>> array.offsets
+ <pyarrow.lib.Int32Array object at ...>
+ [
+ 0,
+ 0,
+ 2
+ ]
+ """
+ return pyarrow_wrap_array((<CListViewArray*> self.ap).offsets())
+
+ @property
+ def sizes(self):
+ """
+ Return the sizes offsets as an int32 array.
Review Comment:
```suggestion
Return the list sizes as an int32 array.
```
##########
python/pyarrow/array.pxi:
##########
@@ -2460,6 +2460,403 @@ cdef class LargeListArray(BaseListArray):
return pyarrow_wrap_array((<CLargeListArray*> self.ap).offsets())
+cdef class ListViewArray(Array):
+ """
+ Concrete class for Arrow arrays of a list view data type.
+ """
+
+ @staticmethod
+ def from_arrays(offsets, sizes, values, DataType type=None, MemoryPool
pool=None, mask=None):
+ """
+ Construct ListViewArray from arrays of int32 offsets, sizes, and
values.
+
+ Parameters
+ ----------
+ offsets : Array (int32 type)
+ sizes : Array (int32 type)
+ values : Array (any type)
+ type : DataType, optional
+ If not specified, a default ListType with the values' type is
+ used.
+ pool : MemoryPool, optional
+ mask : Array (boolean type), optional
+ Indicate which values are null (True) or not null (False).
+
+ Returns
+ -------
+ list_view_array : ListViewArray
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> values = pa.array([1, 2, 3, 4])
+ >>> offsets = pa.array([0, 1, 2])
+ >>> sizes = pa.array([2, 2, 2])
+ >>> pa.ListViewArray.from_arrays(offsets, sizes, values)
+ <pyarrow.lib.ListViewArray object at ...>
+ [
+ [
+ 1,
+ 2
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ]
+ ]
+ >>> # nulls in the offsets array become null lists
+ >>> offsets = pa.array([0, None, 2])
+ >>> sizes = pa.array([2, 0, 2])
+ >>> pa.ListViewArray.from_arrays(offsets, sizes, values)
+ <pyarrow.lib.ListViewArray object at ...>
+ [
+ [
+ 1,
+ 2
+ ],
+ null,
+ [
+ 3,
+ 4
+ ]
+ ]
+ """
+ cdef:
+ Array _offsets, _sizes, _values
+ shared_ptr[CArray] out
+ shared_ptr[CBuffer] c_mask
+ cdef CMemoryPool* cpool = maybe_unbox_memory_pool(pool)
+
+ _offsets = asarray(offsets, type='int32')
+ _sizes = asarray(sizes, type='int32')
+ _values = asarray(values)
+
+ c_mask = c_mask_inverted_from_obj(mask, pool)
+
+ if type is not None:
+ with nogil:
+ out = GetResultValue(
+ CListViewArray.FromArraysAndType(
+ type.sp_type, _offsets.ap[0], _sizes.ap[0],
_values.ap[0], cpool, c_mask))
+ else:
+ with nogil:
+ out = GetResultValue(
+ CListViewArray.FromArrays(
+ _offsets.ap[0], _sizes.ap[0], _values.ap[0], cpool,
c_mask))
+ cdef Array result = pyarrow_wrap_array(out)
+ result.validate()
+ return result
+
+ @property
+ def values(self):
+ """
+ Return the underlying array of values which backs the ListViewArray
+ ignoring the array's offset and sizes.
+
+ If any of the list elements are null, but are backed by a
+ non-empty sub-list, those elements will be included in the
+ output.
Review Comment:
I think here the additional disclaimer could be added that this also can
return more values / values out of order if certain parts of the `values` array
are not pointed to by the offsets.
In general, given the layout of the "views" list type, there is no guarantee
about the content of the `values` here.
The ListArray.values version has a pointer to `flatten()`, which would be
useful here as well, to explain the difference. But I assume Flatten is not yet
implemented for ListView?
##########
python/pyarrow/array.pxi:
##########
@@ -2460,6 +2460,403 @@ cdef class LargeListArray(BaseListArray):
return pyarrow_wrap_array((<CLargeListArray*> self.ap).offsets())
+cdef class ListViewArray(Array):
+ """
+ Concrete class for Arrow arrays of a list view data type.
+ """
+
+ @staticmethod
+ def from_arrays(offsets, sizes, values, DataType type=None, MemoryPool
pool=None, mask=None):
+ """
+ Construct ListViewArray from arrays of int32 offsets, sizes, and
values.
+
+ Parameters
+ ----------
+ offsets : Array (int32 type)
+ sizes : Array (int32 type)
+ values : Array (any type)
+ type : DataType, optional
+ If not specified, a default ListType with the values' type is
+ used.
+ pool : MemoryPool, optional
+ mask : Array (boolean type), optional
+ Indicate which values are null (True) or not null (False).
+
+ Returns
+ -------
+ list_view_array : ListViewArray
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> values = pa.array([1, 2, 3, 4])
+ >>> offsets = pa.array([0, 1, 2])
+ >>> sizes = pa.array([2, 2, 2])
+ >>> pa.ListViewArray.from_arrays(offsets, sizes, values)
+ <pyarrow.lib.ListViewArray object at ...>
+ [
+ [
+ 1,
+ 2
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ]
+ ]
+ >>> # nulls in the offsets array become null lists
+ >>> offsets = pa.array([0, None, 2])
Review Comment:
What happens if this is passed together with a mask? EDIT: I see that is
checked and raises an error in the C++ code
##########
python/pyarrow/tests/test_array.py:
##########
@@ -3573,3 +3573,28 @@ def test_run_end_encoded_from_buffers():
with pytest.raises(ValueError):
pa.RunEndEncodedArray.from_buffers(ree_type, length, buffers,
1, offset, children)
+
+
[email protected](('list_array_type'),
+ [pa.ListViewArray, pa.LargeListViewArray])
+def test_list_view_from_arrays(list_array_type):
+ values = [[1, 2], [3, 4, 5], [6, None, 7], [8]]
+ offsets = [0, 0, 1, 2, 3]
+ sizes = [2, 0, 3, 3, 1]
+ array = list_array_type.from_arrays(offsets, sizes, values)
+
+ assert array.values.to_pylist() == values
Review Comment:
I would expect that you check `array.to_pylist()` here as well?
##########
python/pyarrow/array.pxi:
##########
@@ -2460,6 +2460,403 @@ cdef class LargeListArray(BaseListArray):
return pyarrow_wrap_array((<CLargeListArray*> self.ap).offsets())
+cdef class ListViewArray(Array):
+ """
+ Concrete class for Arrow arrays of a list view data type.
+ """
+
+ @staticmethod
+ def from_arrays(offsets, sizes, values, DataType type=None, MemoryPool
pool=None, mask=None):
+ """
+ Construct ListViewArray from arrays of int32 offsets, sizes, and
values.
+
+ Parameters
+ ----------
+ offsets : Array (int32 type)
+ sizes : Array (int32 type)
+ values : Array (any type)
+ type : DataType, optional
+ If not specified, a default ListType with the values' type is
+ used.
+ pool : MemoryPool, optional
+ mask : Array (boolean type), optional
+ Indicate which values are null (True) or not null (False).
+
+ Returns
+ -------
+ list_view_array : ListViewArray
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> values = pa.array([1, 2, 3, 4])
+ >>> offsets = pa.array([0, 1, 2])
+ >>> sizes = pa.array([2, 2, 2])
+ >>> pa.ListViewArray.from_arrays(offsets, sizes, values)
+ <pyarrow.lib.ListViewArray object at ...>
+ [
+ [
+ 1,
+ 2
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ]
+ ]
+ >>> # nulls in the offsets array become null lists
+ >>> offsets = pa.array([0, None, 2])
+ >>> sizes = pa.array([2, 0, 2])
+ >>> pa.ListViewArray.from_arrays(offsets, sizes, values)
+ <pyarrow.lib.ListViewArray object at ...>
+ [
+ [
+ 1,
+ 2
+ ],
+ null,
+ [
+ 3,
+ 4
+ ]
+ ]
+ """
+ cdef:
+ Array _offsets, _sizes, _values
+ shared_ptr[CArray] out
+ shared_ptr[CBuffer] c_mask
+ cdef CMemoryPool* cpool = maybe_unbox_memory_pool(pool)
+
+ _offsets = asarray(offsets, type='int32')
+ _sizes = asarray(sizes, type='int32')
+ _values = asarray(values)
+
+ c_mask = c_mask_inverted_from_obj(mask, pool)
+
+ if type is not None:
+ with nogil:
+ out = GetResultValue(
+ CListViewArray.FromArraysAndType(
+ type.sp_type, _offsets.ap[0], _sizes.ap[0],
_values.ap[0], cpool, c_mask))
+ else:
+ with nogil:
+ out = GetResultValue(
+ CListViewArray.FromArrays(
+ _offsets.ap[0], _sizes.ap[0], _values.ap[0], cpool,
c_mask))
+ cdef Array result = pyarrow_wrap_array(out)
+ result.validate()
+ return result
+
+ @property
+ def values(self):
+ """
+ Return the underlying array of values which backs the ListViewArray
+ ignoring the array's offset and sizes.
+
+ If any of the list elements are null, but are backed by a
+ non-empty sub-list, those elements will be included in the
+ output.
+
+ Returns
+ -------
+ values : Array
+
+ Examples
+ --------
+ The values include null elements from sub-lists:
+
+ >>> import pyarrow as pa
+ >>> values = [[1, 2], [3, 4, None, 6]]
Review Comment:
Shouldn't this be a flat array?
##########
python/pyarrow/types.pxi:
##########
@@ -4498,6 +4593,80 @@ cpdef LargeListType large_list(value_type):
return out
+cpdef ListViewType list_view(value_type):
+ """
+ Create ListViewType instance from child data type or field.
+
+ Parameters
+ ----------
+ value_type : DataType or Field
+
+ Returns
+ -------
+ list_view_type : DataType
+
+ Examples
+ --------
+ Create an instance of ListViewType:
+
+ >>> import pyarrow as pa
+ >>> pa.list_view(pa.string())
+ ListViewType(list_view<item: string>)
+ """
+ cdef:
+ Field _field
+ shared_ptr[CDataType] list_view_type
+
+ if isinstance(value_type, DataType):
+ _field = field('item', value_type)
+ elif isinstance(value_type, Field):
+ _field = value_type
+ else:
+ raise TypeError('ListView requires DataType or Field')
+
+ list_view_type = CMakeListViewType(_field.sp_field)
+ return pyarrow_wrap_data_type(list_view_type)
+
+
+cpdef LargeListViewType large_list_view(value_type):
+ """
+ Create LargeListViewType instance from child data type or field.
+
+ This data type may not be supported by all Arrow implementations.
+ Unless you need to represent data larger than 2**31 elements, you should
+ prefer list_view().
Review Comment:
This comment can probably be generalized, mentioning that the "view" type in
general might not be supported by all Arrow implementations (regardless of the
size?)
##########
python/pyarrow/array.pxi:
##########
@@ -2460,6 +2460,403 @@ cdef class LargeListArray(BaseListArray):
return pyarrow_wrap_array((<CLargeListArray*> self.ap).offsets())
+cdef class ListViewArray(Array):
+ """
+ Concrete class for Arrow arrays of a list view data type.
+ """
+
+ @staticmethod
+ def from_arrays(offsets, sizes, values, DataType type=None, MemoryPool
pool=None, mask=None):
+ """
+ Construct ListViewArray from arrays of int32 offsets, sizes, and
values.
+
+ Parameters
+ ----------
+ offsets : Array (int32 type)
+ sizes : Array (int32 type)
+ values : Array (any type)
+ type : DataType, optional
+ If not specified, a default ListType with the values' type is
+ used.
+ pool : MemoryPool, optional
+ mask : Array (boolean type), optional
+ Indicate which values are null (True) or not null (False).
+
+ Returns
+ -------
+ list_view_array : ListViewArray
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> values = pa.array([1, 2, 3, 4])
+ >>> offsets = pa.array([0, 1, 2])
+ >>> sizes = pa.array([2, 2, 2])
+ >>> pa.ListViewArray.from_arrays(offsets, sizes, values)
+ <pyarrow.lib.ListViewArray object at ...>
+ [
+ [
+ 1,
+ 2
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ]
+ ]
+ >>> # nulls in the offsets array become null lists
+ >>> offsets = pa.array([0, None, 2])
+ >>> sizes = pa.array([2, 0, 2])
+ >>> pa.ListViewArray.from_arrays(offsets, sizes, values)
+ <pyarrow.lib.ListViewArray object at ...>
+ [
+ [
+ 1,
+ 2
+ ],
+ null,
+ [
+ 3,
+ 4
+ ]
+ ]
+ """
+ cdef:
+ Array _offsets, _sizes, _values
+ shared_ptr[CArray] out
+ shared_ptr[CBuffer] c_mask
+ cdef CMemoryPool* cpool = maybe_unbox_memory_pool(pool)
+
+ _offsets = asarray(offsets, type='int32')
+ _sizes = asarray(sizes, type='int32')
+ _values = asarray(values)
+
+ c_mask = c_mask_inverted_from_obj(mask, pool)
+
+ if type is not None:
+ with nogil:
+ out = GetResultValue(
+ CListViewArray.FromArraysAndType(
+ type.sp_type, _offsets.ap[0], _sizes.ap[0],
_values.ap[0], cpool, c_mask))
+ else:
+ with nogil:
+ out = GetResultValue(
+ CListViewArray.FromArrays(
+ _offsets.ap[0], _sizes.ap[0], _values.ap[0], cpool,
c_mask))
+ cdef Array result = pyarrow_wrap_array(out)
+ result.validate()
+ return result
+
+ @property
+ def values(self):
+ """
+ Return the underlying array of values which backs the ListViewArray
+ ignoring the array's offset and sizes.
+
+ If any of the list elements are null, but are backed by a
+ non-empty sub-list, those elements will be included in the
+ output.
+
+ Returns
+ -------
+ values : Array
+
+ Examples
+ --------
+ The values include null elements from sub-lists:
+
+ >>> import pyarrow as pa
+ >>> values = [[1, 2], [3, 4, None, 6]]
+ >>> offsets = [0, None, 2]
+ >>> sizes = [2, 0, 4]
+ >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
+ >>> array.values
+ <pyarrow.lib.ListArray object at ...>
+ [
+ [
+ 1,
+ 2
+ ],
+ [
+ 3,
+ 4,
+ null,
+ 6
+ ]
+ ]
+ """
+ cdef CListViewArray* arr = <CListViewArray*> self.ap
+ return pyarrow_wrap_array(arr.values())
+
+ @property
+ def offsets(self):
+ """
+ Return the list offsets as an int32 array.
+
+ The returned array will not have a validity bitmap, so you cannot
+ expect to pass it to `ListViewArray.from_arrays` and get back the same
+ list array if the original one has nulls.
+
+ Returns
+ -------
+ offsets : Int32Array
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> values = [[1, 2], [3, 4, None, 6]]
+ >>> offsets = [0, None, 2]
+ >>> sizes = [2, 0, 4]
+ >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
+ >>> array.offsets
+ <pyarrow.lib.Int32Array object at ...>
+ [
+ 0,
+ 0,
+ 2
+ ]
+ """
+ return pyarrow_wrap_array((<CListViewArray*> self.ap).offsets())
+
+ @property
+ def sizes(self):
+ """
+ Return the sizes offsets as an int32 array.
+
+ The returned array will not have a validity bitmap, so you cannot
+ expect to pass it to `ListViewArray.from_arrays` and get back the same
+ list array if the original one has nulls.
Review Comment:
This might be a left-over from copy pasting this from `offsets`. Or can you
also pass a null for the sizes to FromArrays?
##########
python/pyarrow/array.pxi:
##########
@@ -2460,6 +2460,403 @@ cdef class LargeListArray(BaseListArray):
return pyarrow_wrap_array((<CLargeListArray*> self.ap).offsets())
+cdef class ListViewArray(Array):
+ """
+ Concrete class for Arrow arrays of a list view data type.
+ """
+
+ @staticmethod
+ def from_arrays(offsets, sizes, values, DataType type=None, MemoryPool
pool=None, mask=None):
+ """
+ Construct ListViewArray from arrays of int32 offsets, sizes, and
values.
+
+ Parameters
+ ----------
+ offsets : Array (int32 type)
+ sizes : Array (int32 type)
+ values : Array (any type)
+ type : DataType, optional
+ If not specified, a default ListType with the values' type is
+ used.
+ pool : MemoryPool, optional
+ mask : Array (boolean type), optional
+ Indicate which values are null (True) or not null (False).
+
+ Returns
+ -------
+ list_view_array : ListViewArray
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> values = pa.array([1, 2, 3, 4])
+ >>> offsets = pa.array([0, 1, 2])
+ >>> sizes = pa.array([2, 2, 2])
+ >>> pa.ListViewArray.from_arrays(offsets, sizes, values)
+ <pyarrow.lib.ListViewArray object at ...>
+ [
+ [
+ 1,
+ 2
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ]
+ ]
+ >>> # nulls in the offsets array become null lists
+ >>> offsets = pa.array([0, None, 2])
+ >>> sizes = pa.array([2, 0, 2])
+ >>> pa.ListViewArray.from_arrays(offsets, sizes, values)
+ <pyarrow.lib.ListViewArray object at ...>
+ [
+ [
+ 1,
+ 2
+ ],
+ null,
+ [
+ 3,
+ 4
+ ]
+ ]
+ """
+ cdef:
+ Array _offsets, _sizes, _values
+ shared_ptr[CArray] out
+ shared_ptr[CBuffer] c_mask
+ cdef CMemoryPool* cpool = maybe_unbox_memory_pool(pool)
+
+ _offsets = asarray(offsets, type='int32')
+ _sizes = asarray(sizes, type='int32')
+ _values = asarray(values)
+
+ c_mask = c_mask_inverted_from_obj(mask, pool)
+
+ if type is not None:
+ with nogil:
+ out = GetResultValue(
+ CListViewArray.FromArraysAndType(
+ type.sp_type, _offsets.ap[0], _sizes.ap[0],
_values.ap[0], cpool, c_mask))
+ else:
+ with nogil:
+ out = GetResultValue(
+ CListViewArray.FromArrays(
+ _offsets.ap[0], _sizes.ap[0], _values.ap[0], cpool,
c_mask))
+ cdef Array result = pyarrow_wrap_array(out)
+ result.validate()
+ return result
+
+ @property
+ def values(self):
+ """
+ Return the underlying array of values which backs the ListViewArray
+ ignoring the array's offset and sizes.
+
+ If any of the list elements are null, but are backed by a
+ non-empty sub-list, those elements will be included in the
+ output.
+
+ Returns
+ -------
+ values : Array
+
+ Examples
+ --------
+ The values include null elements from sub-lists:
+
+ >>> import pyarrow as pa
+ >>> values = [[1, 2], [3, 4, None, 6]]
+ >>> offsets = [0, None, 2]
+ >>> sizes = [2, 0, 4]
+ >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
+ >>> array.values
+ <pyarrow.lib.ListArray object at ...>
+ [
+ [
+ 1,
+ 2
+ ],
+ [
+ 3,
+ 4,
+ null,
+ 6
+ ]
+ ]
+ """
+ cdef CListViewArray* arr = <CListViewArray*> self.ap
+ return pyarrow_wrap_array(arr.values())
+
+ @property
+ def offsets(self):
+ """
+ Return the list offsets as an int32 array.
+
+ The returned array will not have a validity bitmap, so you cannot
+ expect to pass it to `ListViewArray.from_arrays` and get back the same
+ list array if the original one has nulls.
+
+ Returns
+ -------
+ offsets : Int32Array
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> values = [[1, 2], [3, 4, None, 6]]
+ >>> offsets = [0, None, 2]
+ >>> sizes = [2, 0, 4]
+ >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
+ >>> array.offsets
+ <pyarrow.lib.Int32Array object at ...>
+ [
+ 0,
+ 0,
+ 2
+ ]
+ """
+ return pyarrow_wrap_array((<CListViewArray*> self.ap).offsets())
+
+ @property
+ def sizes(self):
+ """
+ Return the sizes offsets as an int32 array.
+
+ The returned array will not have a validity bitmap, so you cannot
+ expect to pass it to `ListViewArray.from_arrays` and get back the same
+ list array if the original one has nulls.
+
+ Returns
+ -------
+ sizes : Int32Array
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> values = [[1, 2], [3, 4, None, 6]]
+ >>> offsets = [0, None, 2]
+ >>> sizes = [2, 0, 4]
+ >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
+ >>> array.offsets
Review Comment:
```suggestion
>>> array.sizes
```
Will need to update the below output as well.
##########
python/pyarrow/array.pxi:
##########
@@ -2460,6 +2460,403 @@ cdef class LargeListArray(BaseListArray):
return pyarrow_wrap_array((<CLargeListArray*> self.ap).offsets())
+cdef class ListViewArray(Array):
+ """
+ Concrete class for Arrow arrays of a list view data type.
+ """
+
+ @staticmethod
+ def from_arrays(offsets, sizes, values, DataType type=None, MemoryPool
pool=None, mask=None):
+ """
+ Construct ListViewArray from arrays of int32 offsets, sizes, and
values.
+
+ Parameters
+ ----------
+ offsets : Array (int32 type)
+ sizes : Array (int32 type)
+ values : Array (any type)
+ type : DataType, optional
+ If not specified, a default ListType with the values' type is
+ used.
+ pool : MemoryPool, optional
+ mask : Array (boolean type), optional
+ Indicate which values are null (True) or not null (False).
+
+ Returns
+ -------
+ list_view_array : ListViewArray
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> values = pa.array([1, 2, 3, 4])
+ >>> offsets = pa.array([0, 1, 2])
+ >>> sizes = pa.array([2, 2, 2])
+ >>> pa.ListViewArray.from_arrays(offsets, sizes, values)
+ <pyarrow.lib.ListViewArray object at ...>
+ [
+ [
+ 1,
+ 2
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 3,
+ 4
+ ]
+ ]
+ >>> # nulls in the offsets array become null lists
+ >>> offsets = pa.array([0, None, 2])
Review Comment:
Ah, I hadn't expected this to work. So those get filled by
`ListViewArray::FromArrays`? In which case this is not zero-copy?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]