Repository: arrow Updated Branches: refs/heads/master a2ead2f64 -> c369709c4
ARROW-426: Python: Conversion from pyarrow.Array to a Python list Author: Uwe L. Korn <uw...@xhochy.com> Closes #242 from xhochy/ARROW-426 and squashes the following commits: 10739ac [Uwe L. Korn] ARROW-426: Python: Conversion from pyarrow.Array to a Python list Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/c369709c Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/c369709c Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/c369709c Branch: refs/heads/master Commit: c369709c4f8157cb5e6c8121e1e613b104305aed Parents: a2ead2f Author: Uwe L. Korn <uw...@xhochy.com> Authored: Mon Dec 19 11:47:32 2016 -0500 Committer: Wes McKinney <wes.mckin...@twosigma.com> Committed: Mon Dec 19 11:47:32 2016 -0500 ---------------------------------------------------------------------- python/pyarrow/array.pyx | 6 ++++++ python/pyarrow/scalar.pyx | 4 +++- python/pyarrow/table.pyx | 15 +++++++++++++++ python/pyarrow/tests/test_column.py | 1 + python/pyarrow/tests/test_convert_builtin.py | 13 +++++++++++-- 5 files changed, 36 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/c369709c/python/pyarrow/array.pyx ---------------------------------------------------------------------- diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx index 6c86275..d44212f 100644 --- a/python/pyarrow/array.pyx +++ b/python/pyarrow/array.pyx @@ -167,6 +167,12 @@ cdef class Array: return PyObject_to_object(np_arr) + def to_pylist(self): + """ + Convert to an list of native Python objects. + """ + return [x.as_py() for x in self] + cdef class NullArray(Array): pass http://git-wip-us.apache.org/repos/asf/arrow/blob/c369709c/python/pyarrow/scalar.pyx ---------------------------------------------------------------------- diff --git a/python/pyarrow/scalar.pyx b/python/pyarrow/scalar.pyx index 0d391e5..c2d20e4 100644 --- a/python/pyarrow/scalar.pyx +++ b/python/pyarrow/scalar.pyx @@ -194,7 +194,9 @@ cdef object box_arrow_scalar(DataType type, const shared_ptr[CArray]& sp_array, int index): cdef ArrayValue val - if sp_array.get().IsNull(index): + if type.type.type == Type_NA: + return NA + elif sp_array.get().IsNull(index): return NA else: val = _scalar_classes[type.type.type]() http://git-wip-us.apache.org/repos/asf/arrow/blob/c369709c/python/pyarrow/table.pyx ---------------------------------------------------------------------- diff --git a/python/pyarrow/table.pyx b/python/pyarrow/table.pyx index 333686f..2f7d430 100644 --- a/python/pyarrow/table.pyx +++ b/python/pyarrow/table.pyx @@ -108,6 +108,15 @@ cdef class ChunkedArray: for i in range(self.num_chunks): yield self.chunk(i) + def to_pylist(self): + """ + Convert to a list of native Python objects. + """ + result = [] + for i in range(self.num_chunks): + result += self.chunk(i).to_pylist() + return result + cdef class Column: """ @@ -143,6 +152,12 @@ cdef class Column: return pd.Series(PyObject_to_object(arr), name=self.name) + def to_pylist(self): + """ + Convert to a list of native Python objects. + """ + return self.data.to_pylist() + cdef _check_nullptr(self): if self.column == NULL: raise ReferenceError("Column object references a NULL pointer." http://git-wip-us.apache.org/repos/asf/arrow/blob/c369709c/python/pyarrow/tests/test_column.py ---------------------------------------------------------------------- diff --git a/python/pyarrow/tests/test_column.py b/python/pyarrow/tests/test_column.py index b62f582..32202cb 100644 --- a/python/pyarrow/tests/test_column.py +++ b/python/pyarrow/tests/test_column.py @@ -35,6 +35,7 @@ class TestColumn(unittest.TestCase): assert column.length() == 5 assert len(column) == 5 assert column.shape == (5,) + assert column.to_pylist() == [-10, -5, 0, 5, 10] def test_pandas(self): data = [ http://git-wip-us.apache.org/repos/asf/arrow/blob/c369709c/python/pyarrow/tests/test_convert_builtin.py ---------------------------------------------------------------------- diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py index 8937f8d..34371b0 100644 --- a/python/pyarrow/tests/test_convert_builtin.py +++ b/python/pyarrow/tests/test_convert_builtin.py @@ -22,28 +22,34 @@ import pyarrow class TestConvertList(unittest.TestCase): def test_boolean(self): - arr = pyarrow.from_pylist([True, None, False, None]) + expected = [True, None, False, None] + arr = pyarrow.from_pylist(expected) assert len(arr) == 4 assert arr.null_count == 2 assert arr.type == pyarrow.bool_() + assert arr.to_pylist() == expected def test_empty_list(self): arr = pyarrow.from_pylist([]) assert len(arr) == 0 assert arr.null_count == 0 assert arr.type == pyarrow.null() + assert arr.to_pylist() == [] def test_all_none(self): arr = pyarrow.from_pylist([None, None]) assert len(arr) == 2 assert arr.null_count == 2 assert arr.type == pyarrow.null() + assert arr.to_pylist() == [None, None] def test_integer(self): - arr = pyarrow.from_pylist([1, None, 3, None]) + expected = [1, None, 3, None] + arr = pyarrow.from_pylist(expected) assert len(arr) == 4 assert arr.null_count == 2 assert arr.type == pyarrow.int64() + assert arr.to_pylist() == expected def test_garbage_collection(self): import gc @@ -62,6 +68,7 @@ class TestConvertList(unittest.TestCase): assert len(arr) == 6 assert arr.null_count == 3 assert arr.type == pyarrow.double() + assert arr.to_pylist() == data def test_string(self): data = ['foo', b'bar', None, 'arrow'] @@ -69,6 +76,7 @@ class TestConvertList(unittest.TestCase): assert len(arr) == 4 assert arr.null_count == 1 assert arr.type == pyarrow.string() + assert arr.to_pylist() == ['foo', 'bar', None, 'arrow'] def test_mixed_nesting_levels(self): pyarrow.from_pylist([1, 2, None]) @@ -90,3 +98,4 @@ class TestConvertList(unittest.TestCase): assert len(arr) == 4 assert arr.null_count == 1 assert arr.type == pyarrow.list_(pyarrow.int64()) + assert arr.to_pylist() == data