This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 2f718d7 ARROW-2262: [Python] Support slicing on pyarrow.ChunkedArray
2f718d7 is described below
commit 2f718d755f8b20add501597048bf1945ee9dca4b
Author: Uwe L. Korn <[email protected]>
AuthorDate: Fri Mar 9 15:27:17 2018 -0500
ARROW-2262: [Python] Support slicing on pyarrow.ChunkedArray
Author: Uwe L. Korn <[email protected]>
Closes #1702 from xhochy/ARROW-2262 and squashes the following commits:
7446414d <Uwe L. Korn> ARROW-2262: Support slicing on pyarrow.ChunkedArray
---
python/pyarrow/includes/libarrow.pxd | 2 ++
python/pyarrow/table.pxi | 46 ++++++++++++++++++++++++++++++++++++
python/pyarrow/tests/test_table.py | 15 ++++++++++++
3 files changed, 63 insertions(+)
diff --git a/python/pyarrow/includes/libarrow.pxd
b/python/pyarrow/includes/libarrow.pxd
index 22c39a8..2622300 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -388,6 +388,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
int num_chunks()
shared_ptr[CArray] chunk(int i)
shared_ptr[CDataType] type()
+ shared_ptr[CChunkedArray] Slice(int64_t offset, int64_t length) const
+ shared_ptr[CChunkedArray] Slice(int64_t offset) const
cdef cppclass CColumn" arrow::Column":
CColumn(const shared_ptr[CField]& field,
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index c27c0ed..94041e4 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -77,6 +77,52 @@ cdef class ChunkedArray:
self._check_nullptr()
return self.chunked_array.null_count()
+ def __getitem__(self, key):
+ cdef int64_t item
+ cdef int i
+ self._check_nullptr()
+ if isinstance(key, slice):
+ return _normalize_slice(self, key)
+ elif isinstance(key, six.integer_types):
+ item = key
+ if item >= self.chunked_array.length() or item < 0:
+ return IndexError("ChunkedArray selection out of bounds")
+ for i in range(self.num_chunks):
+ if item < self.chunked_array.chunk(i).get().length():
+ return self.chunk(i)[item]
+ else:
+ item -= self.chunked_array.chunk(i).get().length()
+ else:
+ raise TypeError("key must either be a slice or integer")
+
+ def slice(self, offset=0, length=None):
+ """
+ Compute zero-copy slice of this ChunkedArray
+
+ Parameters
+ ----------
+ offset : int, default 0
+ Offset from start of array to slice
+ length : int, default None
+ Length of slice (default is until end of batch starting from
+ offset)
+
+ Returns
+ -------
+ sliced : ChunkedArray
+ """
+ cdef shared_ptr[CChunkedArray] result
+
+ if offset < 0:
+ raise IndexError('Offset must be non-negative')
+
+ if length is None:
+ result = self.chunked_array.Slice(offset)
+ else:
+ result = self.chunked_array.Slice(offset, length)
+
+ return pyarrow_wrap_chunked_array(result)
+
@property
def num_chunks(self):
"""
diff --git a/python/pyarrow/tests/test_table.py
b/python/pyarrow/tests/test_table.py
index e72761d..356ecb7 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -24,6 +24,21 @@ import pytest
import pyarrow as pa
+def test_chunked_array_getitem():
+ data = [
+ pa.array([1, 2, 3]),
+ pa.array([4, 5, 6])
+ ]
+ data = pa.chunked_array(data)
+ assert data[1].as_py() == 2
+
+ data_slice = data[2:4]
+ assert data_slice.to_pylist() == [3, 4]
+
+ data_slice = data[4:-1]
+ assert data_slice.to_pylist() == [5]
+
+
def test_column_basics():
data = [
pa.array([-10, -5, 0, 5, 10])
--
To stop receiving notification emails like this one, please contact
[email protected].