This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 0a3bdba  ARROW-2287: [Python] chunked array not iterable, not indexable
0a3bdba is described below

commit 0a3bdbaf456b4e19489b1bcebec92e90b41bbb1a
Author: Krisztián Szűcs <szucs.kriszt...@gmail.com>
AuthorDate: Thu Jun 7 14:11:35 2018 +0200

    ARROW-2287: [Python] chunked array not iterable, not indexable
    
    Author: Krisztián Szűcs <szucs.kriszt...@gmail.com>
    
    Closes #2111 from kszucs/ARROW-2287 and squashes the following commits:
    
    36c80ed3 <Krisztián Szűcs> iterable chunked array
---
 python/pyarrow/array.pxi           |  1 -
 python/pyarrow/lib.pxd             |  1 +
 python/pyarrow/table.pxi           | 26 ++++++++++++++++----------
 python/pyarrow/tests/test_array.py | 10 ++++++++++
 python/pyarrow/tests/test_table.py | 17 ++++++++++++++++-
 5 files changed, 43 insertions(+), 12 deletions(-)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index d98bebc..f7fd24d 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -502,7 +502,6 @@ cdef class Array:
     def __iter__(self):
         for i in range(len(self)):
             yield self.getitem(i)
-        raise StopIteration
 
     def __repr__(self):
         from pyarrow.formatting import array_format
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index 653cecf..29e3e3a 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -297,6 +297,7 @@ cdef class ChunkedArray:
         CChunkedArray* chunked_array
 
     cdef void init(self, const shared_ptr[CChunkedArray]& chunked_array)
+    cdef getitem(self, int64_t i)
 
 
 cdef class Column:
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 8a58ff5..06325a6 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -72,23 +72,29 @@ cdef class ChunkedArray:
         """
         return self.chunked_array.null_count()
 
-    def __getitem__(self, key):
-        cdef:
-            int64_t item
-            int i
+    def __iter__(self):
+        for chunk in self.iterchunks():
+            for item in chunk:
+                yield item
 
+    def __getitem__(self, key):
         if isinstance(key, slice):
             return _normalize_slice(self, key)
         elif isinstance(key, six.integer_types):
-            index = _normalize_index(key, self.chunked_array.length())
-            for i in range(self.num_chunks):
-                if index < self.chunked_array.chunk(i).get().length():
-                    return self.chunk(i)[index]
-                else:
-                    index -= self.chunked_array.chunk(i).get().length()
+            return self.getitem(key)
         else:
             raise TypeError("key must either be a slice or integer")
 
+    cdef getitem(self, int64_t i):
+        cdef int j
+
+        index = _normalize_index(i, self.chunked_array.length())
+        for j in range(self.num_chunks):
+            if index < self.chunked_array.chunk(j).get().length():
+                return self.chunk(j)[index]
+            else:
+                index -= self.chunked_array.chunk(j).get().length()
+
     def slice(self, offset=0, length=None):
         """
         Compute zero-copy slice of this ChunkedArray
diff --git a/python/pyarrow/tests/test_array.py 
b/python/pyarrow/tests/test_array.py
index 90679bf..f30203c 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import collections
 import datetime
 import pytest
 import struct
@@ -162,6 +163,15 @@ def test_array_slice():
             assert arr[start:stop].to_pylist() == arr.to_pylist()[start:stop]
 
 
+def test_array_iter():
+    arr = pa.array(range(10))
+
+    for i, j in zip(range(10), arr):
+        assert i == j
+
+    assert isinstance(arr, collections.Iterable)
+
+
 def test_struct_array_slice():
     # ARROW-2311: slicing nested arrays needs special care
     ty = pa.struct([pa.field('a', pa.int8()),
diff --git a/python/pyarrow/tests/test_table.py 
b/python/pyarrow/tests/test_table.py
index a3f0711..634a179 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from collections import OrderedDict
+from collections import OrderedDict, Iterable
 import numpy as np
 from pandas.util.testing import assert_frame_equal
 import pandas as pd
@@ -49,6 +49,21 @@ def test_chunked_array_getitem():
     assert data_slice.to_pylist() == []
 
 
+def test_chunked_array_iter():
+    data = [
+        pa.array([0]),
+        pa.array([1, 2, 3]),
+        pa.array([4, 5, 6]),
+        pa.array([7, 8, 9])
+    ]
+    arr = pa.chunked_array(data)
+
+    for i, j in zip(range(10), arr):
+        assert i == j
+
+    assert isinstance(arr, Iterable)
+
+
 def test_column_basics():
     data = [
         pa.array([-10, -5, 0, 5, 10])

-- 
To stop receiving notification emails like this one, please contact
u...@apache.org.

Reply via email to