This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new c372dfb ARROW-2280: [Python] Return the offset for the buffers in
pyarrow.Array
c372dfb is described below
commit c372dfbdae95fe445c036188aca174c6ff18ecde
Author: Uwe L. Korn <[email protected]>
AuthorDate: Wed Mar 7 14:29:38 2018 -0500
ARROW-2280: [Python] Return the offset for the buffers in pyarrow.Array
Author: Uwe L. Korn <[email protected]>
Closes #1719 from xhochy/ARROW-2280 and squashes the following commits:
82b50a76 <Uwe L. Korn> ARROW-2280: Return the offset for the buffers in
pyarrow.Array
---
python/pyarrow/array.pxi | 13 +++++++++++++
python/pyarrow/includes/libarrow.pxd | 1 +
python/pyarrow/tests/test_array.py | 9 +++++++++
3 files changed, 23 insertions(+)
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 7899d9d..e785c0e 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -483,10 +483,23 @@ cdef class Array:
with nogil:
check_status(ValidateArray(deref(self.ap)))
+ property offset:
+
+ def __get__(self):
+ """
+ A relative position into another array's data, to enable zero-copy
+ slicing. This value defaults to zero but must be applied on all
+ operations with the physical storage buffers.
+ """
+ return self.sp_array.get().offset()
+
def buffers(self):
"""
Return a list of Buffer objects pointing to this array's physical
storage.
+
+ To correctly interpret these buffers, you need to also apply the offset
+ multiplied with the size of the stored data type.
"""
res = []
_append_array_buffers(self.sp_array.get().data().get(), res)
diff --git a/python/pyarrow/includes/libarrow.pxd
b/python/pyarrow/includes/libarrow.pxd
index d95f016..456fcca 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -103,6 +103,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
int64_t length()
int64_t null_count()
+ int64_t offset()
Type type_id()
int num_fields()
diff --git a/python/pyarrow/tests/test_array.py
b/python/pyarrow/tests/test_array.py
index c1131a0..f034d78 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -600,6 +600,15 @@ def test_buffers_primitive():
assert 1 <= len(null_bitmap) <= 64 # XXX this is varying
assert bytearray(null_bitmap)[0] == 0b00001011
+ # Slicing does not affect the buffers but the offset
+ a_sliced = a[1:]
+ buffers = a_sliced.buffers()
+ a_sliced.offset == 1
+ assert len(buffers) == 2
+ null_bitmap = buffers[0].to_pybytes()
+ assert 1 <= len(null_bitmap) <= 64 # XXX this is varying
+ assert bytearray(null_bitmap)[0] == 0b00001011
+
assert struct.unpack('hhxxh', buffers[1].to_pybytes()) == (1, 2, 4)
a = pa.array(np.int8([4, 5, 6]))
--
To stop receiving notification emails like this one, please contact
[email protected].