This is an automated email from the ASF dual-hosted git repository.
uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new c077986 ARROW-2068: [Python] Expose array's buffers
c077986 is described below
commit c0779861417f44535196d5c383d35f07ff144627
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Feb 15 18:58:34 2018 +0100
ARROW-2068: [Python] Expose array's buffers
This recurses into child data if present (for nested types).
Author: Antoine Pitrou <[email protected]>
Closes #1613 from pitrou/ARROW-2068-expose-array-buffers and squashes the
following commits:
0634aaf [Antoine Pitrou] ARROW-2068: [Python] Expose array's buffers
---
python/pyarrow/array.pxi | 25 +++++++++++++++
python/pyarrow/tests/test_array.py | 64 ++++++++++++++++++++++++++++++++++++++
2 files changed, 89 insertions(+)
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index f85363c..a43bfb9 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -244,6 +244,22 @@ cdef wrap_datum(const CDatum& datum):
raise ValueError("Unable to wrap Datum in a Python object")
+cdef _append_array_buffers(const CArrayData* ad, list res):
+ """
+ Recursively append Buffer wrappers from *ad* and its children.
+ """
+ cdef size_t i, n
+ assert ad != NULL
+ n = ad.buffers.size()
+ for i in range(n):
+ buf = ad.buffers[i]
+ res.append(pyarrow_wrap_buffer(buf)
+ if buf.get() != NULL else None)
+ n = ad.child_data.size()
+ for i in range(n):
+ _append_array_buffers(ad.child_data[i].get(), res)
+
+
cdef class Array:
cdef void init(self, const shared_ptr[CArray]& sp_array):
@@ -463,6 +479,15 @@ cdef class Array:
with nogil:
check_status(ValidateArray(deref(self.ap)))
+ def buffers(self):
+ """
+ Return a list of Buffer objects pointing to this array's physical
+ storage.
+ """
+ res = []
+ _append_array_buffers(self.sp_array.get().data().get(), res)
+ return res
+
cdef class Tensor:
diff --git a/python/pyarrow/tests/test_array.py
b/python/pyarrow/tests/test_array.py
index efbcef5..197dac0 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -17,6 +17,7 @@
import datetime
import pytest
+import struct
import sys
import numpy as np
@@ -589,3 +590,66 @@ def test_array_from_numpy_unicode():
arrow_arr = pa.array(arr)
expected = pa.array(['', '', ''], type='utf8')
assert arrow_arr.equals(expected)
+
+
+def test_buffers_primitive():
+ a = pa.array([1, 2, None, 4], type=pa.int16())
+ buffers = a.buffers()
+ assert len(buffers) == 2
+ null_bitmap = buffers[0].to_pybytes()
+ assert 1 <= len(null_bitmap) <= 64 # XXX this is varying
+ assert bytearray(null_bitmap)[0] == 0b00001011
+
+ assert struct.unpack('hhxxh', buffers[1].to_pybytes()) == (1, 2, 4)
+
+ a = pa.array(np.int8([4, 5, 6]))
+ buffers = a.buffers()
+ assert len(buffers) == 2
+ # No null bitmap from Numpy int array
+ assert buffers[0] is None
+ assert struct.unpack('3b', buffers[1].to_pybytes()) == (4, 5, 6)
+
+ a = pa.array([b'foo!', None, b'bar!!'])
+ buffers = a.buffers()
+ assert len(buffers) == 3
+ null_bitmap = buffers[0].to_pybytes()
+ assert bytearray(null_bitmap)[0] == 0b00000101
+ offsets = buffers[1].to_pybytes()
+ assert struct.unpack('4i', offsets) == (0, 4, 4, 9)
+ values = buffers[2].to_pybytes()
+ assert values == b'foo!bar!!'
+
+
+def test_buffers_nested():
+ a = pa.array([[1, 2], None, [3, None, 4, 5]], type=pa.list_(pa.int64()))
+ buffers = a.buffers()
+ assert len(buffers) == 4
+ # The parent buffers
+ null_bitmap = buffers[0].to_pybytes()
+ assert bytearray(null_bitmap)[0] == 0b00000101
+ offsets = buffers[1].to_pybytes()
+ assert struct.unpack('4i', offsets) == (0, 2, 2, 6)
+ # The child buffers
+ null_bitmap = buffers[2].to_pybytes()
+ assert bytearray(null_bitmap)[0] == 0b00110111
+ values = buffers[3].to_pybytes()
+ assert struct.unpack('qqq8xqq', values) == (1, 2, 3, 4, 5)
+
+ a = pa.array([(42, None), None, (None, 43)],
+ type=pa.struct([pa.field('a', pa.int8()),
+ pa.field('b', pa.int16())]))
+ buffers = a.buffers()
+ assert len(buffers) == 5
+ # The parent buffer
+ null_bitmap = buffers[0].to_pybytes()
+ assert bytearray(null_bitmap)[0] == 0b00000101
+ # The child buffers: 'a'
+ null_bitmap = buffers[1].to_pybytes()
+ assert bytearray(null_bitmap)[0] == 0b00000001
+ values = buffers[2].to_pybytes()
+ assert struct.unpack('bxx', values) == (42,)
+ # The child buffers: 'b'
+ null_bitmap = buffers[3].to_pybytes()
+ assert bytearray(null_bitmap)[0] == 0b00000100
+ values = buffers[4].to_pybytes()
+ assert struct.unpack('4xh', values) == (43,)
--
To stop receiving notification emails like this one, please contact
[email protected].