This is an automated email from the ASF dual-hosted git repository.
kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 2b9e1e4 ARROW-45: [Python] Add unnest/flatten function for List types
2b9e1e4 is described below
commit 2b9e1e45c45ee411032212affaafb6e32a1bffd8
Author: Krisztián Szűcs <[email protected]>
AuthorDate: Wed Oct 17 13:56:11 2018 +0200
ARROW-45: [Python] Add unnest/flatten function for List types
Author: Krisztián Szűcs <[email protected]>
Closes #2757 from kszucs/ARROW-45 and squashes the following commits:
0420020a <Krisztián Szűcs> remove Flatten from cpp API
3aabaf72 <Krisztián Szűcs> lint
c2f71f1b <Krisztián Szűcs> small docstring
bbd42472 <Krisztián Szűcs> ListArray::Flatten
---
python/pyarrow/array.pxi | 11 ++++++++
python/pyarrow/tests/test_array.py | 51 ++++++++++++++++++++++++++++++++++++++
2 files changed, 62 insertions(+)
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 320852a..2d0f56d 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -937,6 +937,17 @@ cdef class ListArray(Array):
cpool, &out))
return pyarrow_wrap_array(out)
+ def flatten(self):
+ """
+ Unnest this ListArray by one level
+
+ Returns
+ -------
+ result : Array
+ """
+ cdef CListArray* arr = <CListArray*> self.ap
+ return pyarrow_wrap_array(arr.values())
+
cdef class UnionArray(Array):
diff --git a/python/pyarrow/tests/test_array.py
b/python/pyarrow/tests/test_array.py
index 65deddc..c340228 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -1090,6 +1090,57 @@ def test_invalid_tensor_construction():
pa.Tensor()
+def test_list_array_flatten():
+ typ2 = pa.list_(
+ pa.list_(
+ pa.int64()
+ )
+ )
+ arr2 = pa.array([
+ None,
+ [
+ [1, None, 2],
+ None,
+ [3, 4]
+ ],
+ [],
+ [
+ [],
+ [5, 6],
+ None
+ ],
+ [
+ [7, 8]
+ ]
+ ])
+ assert arr2.type.equals(typ2)
+
+ typ1 = pa.list_(pa.int64())
+ arr1 = pa.array([
+ [1, None, 2],
+ None,
+ [3, 4],
+ [],
+ [5, 6],
+ None,
+ [7, 8]
+ ])
+ assert arr1.type.equals(typ1)
+
+ typ0 = pa.int64()
+ arr0 = pa.array([
+ 1, None, 2,
+ 3, 4,
+ 5, 6,
+ 7, 8
+ ])
+ assert arr0.type.equals(typ0)
+
+ assert arr2.flatten().equals(arr1)
+ assert arr1.flatten().equals(arr0)
+ assert arr2.flatten().flatten().equals(arr0)
+
+
def test_struct_array_flatten():
ty = pa.struct([pa.field('x', pa.int16()),
pa.field('y', pa.float32())])