[
https://issues.apache.org/jira/browse/ARROW-2262?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16393497#comment-16393497
]
ASF GitHub Bot commented on ARROW-2262:
---------------------------------------
wesm closed pull request #1702: ARROW-2262: [Python] Support slicing on
pyarrow.ChunkedArray
URL: https://github.com/apache/arrow/pull/1702
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/python/pyarrow/includes/libarrow.pxd
b/python/pyarrow/includes/libarrow.pxd
index d95f01661..776b96531 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -387,6 +387,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
int num_chunks()
shared_ptr[CArray] chunk(int i)
shared_ptr[CDataType] type()
+ shared_ptr[CChunkedArray] Slice(int64_t offset, int64_t length) const
+ shared_ptr[CChunkedArray] Slice(int64_t offset) const
cdef cppclass CColumn" arrow::Column":
CColumn(const shared_ptr[CField]& field,
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index c27c0edd9..94041e465 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -77,6 +77,52 @@ cdef class ChunkedArray:
self._check_nullptr()
return self.chunked_array.null_count()
+ def __getitem__(self, key):
+ cdef int64_t item
+ cdef int i
+ self._check_nullptr()
+ if isinstance(key, slice):
+ return _normalize_slice(self, key)
+ elif isinstance(key, six.integer_types):
+ item = key
+ if item >= self.chunked_array.length() or item < 0:
+ return IndexError("ChunkedArray selection out of bounds")
+ for i in range(self.num_chunks):
+ if item < self.chunked_array.chunk(i).get().length():
+ return self.chunk(i)[item]
+ else:
+ item -= self.chunked_array.chunk(i).get().length()
+ else:
+ raise TypeError("key must either be a slice or integer")
+
+ def slice(self, offset=0, length=None):
+ """
+ Compute zero-copy slice of this ChunkedArray
+
+ Parameters
+ ----------
+ offset : int, default 0
+ Offset from start of array to slice
+ length : int, default None
+ Length of slice (default is until end of batch starting from
+ offset)
+
+ Returns
+ -------
+ sliced : ChunkedArray
+ """
+ cdef shared_ptr[CChunkedArray] result
+
+ if offset < 0:
+ raise IndexError('Offset must be non-negative')
+
+ if length is None:
+ result = self.chunked_array.Slice(offset)
+ else:
+ result = self.chunked_array.Slice(offset, length)
+
+ return pyarrow_wrap_chunked_array(result)
+
@property
def num_chunks(self):
"""
diff --git a/python/pyarrow/tests/test_table.py
b/python/pyarrow/tests/test_table.py
index e72761d32..356ecb7e0 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -24,6 +24,21 @@
import pyarrow as pa
+def test_chunked_array_getitem():
+ data = [
+ pa.array([1, 2, 3]),
+ pa.array([4, 5, 6])
+ ]
+ data = pa.chunked_array(data)
+ assert data[1].as_py() == 2
+
+ data_slice = data[2:4]
+ assert data_slice.to_pylist() == [3, 4]
+
+ data_slice = data[4:-1]
+ assert data_slice.to_pylist() == [5]
+
+
def test_column_basics():
data = [
pa.array([-10, -5, 0, 5, 10])
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> [Python] Support slicing on pyarrow.ChunkedArray
> ------------------------------------------------
>
> Key: ARROW-2262
> URL: https://issues.apache.org/jira/browse/ARROW-2262
> Project: Apache Arrow
> Issue Type: New Feature
> Components: Python
> Reporter: Uwe L. Korn
> Assignee: Uwe L. Korn
> Priority: Major
> Labels: pull-request-available
> Fix For: 0.9.0
>
>
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)