This is an automated email from the ASF dual-hosted git repository. uwe pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new 0a3bdba ARROW-2287: [Python] chunked array not iterable, not indexable 0a3bdba is described below commit 0a3bdbaf456b4e19489b1bcebec92e90b41bbb1a Author: Krisztián Szűcs <szucs.kriszt...@gmail.com> AuthorDate: Thu Jun 7 14:11:35 2018 +0200 ARROW-2287: [Python] chunked array not iterable, not indexable Author: Krisztián Szűcs <szucs.kriszt...@gmail.com> Closes #2111 from kszucs/ARROW-2287 and squashes the following commits: 36c80ed3 <Krisztián Szűcs> iterable chunked array --- python/pyarrow/array.pxi | 1 - python/pyarrow/lib.pxd | 1 + python/pyarrow/table.pxi | 26 ++++++++++++++++---------- python/pyarrow/tests/test_array.py | 10 ++++++++++ python/pyarrow/tests/test_table.py | 17 ++++++++++++++++- 5 files changed, 43 insertions(+), 12 deletions(-) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index d98bebc..f7fd24d 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -502,7 +502,6 @@ cdef class Array: def __iter__(self): for i in range(len(self)): yield self.getitem(i) - raise StopIteration def __repr__(self): from pyarrow.formatting import array_format diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd index 653cecf..29e3e3a 100644 --- a/python/pyarrow/lib.pxd +++ b/python/pyarrow/lib.pxd @@ -297,6 +297,7 @@ cdef class ChunkedArray: CChunkedArray* chunked_array cdef void init(self, const shared_ptr[CChunkedArray]& chunked_array) + cdef getitem(self, int64_t i) cdef class Column: diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index 8a58ff5..06325a6 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -72,23 +72,29 @@ cdef class ChunkedArray: """ return self.chunked_array.null_count() - def __getitem__(self, key): - cdef: - int64_t item - int i + def __iter__(self): + for chunk in self.iterchunks(): + for item in chunk: + yield item + def __getitem__(self, key): if isinstance(key, slice): return _normalize_slice(self, key) elif isinstance(key, six.integer_types): - index = _normalize_index(key, self.chunked_array.length()) - for i in range(self.num_chunks): - if index < self.chunked_array.chunk(i).get().length(): - return self.chunk(i)[index] - else: - index -= self.chunked_array.chunk(i).get().length() + return self.getitem(key) else: raise TypeError("key must either be a slice or integer") + cdef getitem(self, int64_t i): + cdef int j + + index = _normalize_index(i, self.chunked_array.length()) + for j in range(self.num_chunks): + if index < self.chunked_array.chunk(j).get().length(): + return self.chunk(j)[index] + else: + index -= self.chunked_array.chunk(j).get().length() + def slice(self, offset=0, length=None): """ Compute zero-copy slice of this ChunkedArray diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 90679bf..f30203c 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. +import collections import datetime import pytest import struct @@ -162,6 +163,15 @@ def test_array_slice(): assert arr[start:stop].to_pylist() == arr.to_pylist()[start:stop] +def test_array_iter(): + arr = pa.array(range(10)) + + for i, j in zip(range(10), arr): + assert i == j + + assert isinstance(arr, collections.Iterable) + + def test_struct_array_slice(): # ARROW-2311: slicing nested arrays needs special care ty = pa.struct([pa.field('a', pa.int8()), diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py index a3f0711..634a179 100644 --- a/python/pyarrow/tests/test_table.py +++ b/python/pyarrow/tests/test_table.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -from collections import OrderedDict +from collections import OrderedDict, Iterable import numpy as np from pandas.util.testing import assert_frame_equal import pandas as pd @@ -49,6 +49,21 @@ def test_chunked_array_getitem(): assert data_slice.to_pylist() == [] +def test_chunked_array_iter(): + data = [ + pa.array([0]), + pa.array([1, 2, 3]), + pa.array([4, 5, 6]), + pa.array([7, 8, 9]) + ] + arr = pa.chunked_array(data) + + for i, j in zip(range(10), arr): + assert i == j + + assert isinstance(arr, Iterable) + + def test_column_basics(): data = [ pa.array([-10, -5, 0, 5, 10]) -- To stop receiving notification emails like this one, please contact u...@apache.org.