This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new c077986  ARROW-2068: [Python] Expose array's buffers
c077986 is described below

commit c0779861417f44535196d5c383d35f07ff144627
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Feb 15 18:58:34 2018 +0100

    ARROW-2068: [Python] Expose array's buffers
    
    This recurses into child data if present (for nested types).
    
    Author: Antoine Pitrou <[email protected]>
    
    Closes #1613 from pitrou/ARROW-2068-expose-array-buffers and squashes the 
following commits:
    
    0634aaf [Antoine Pitrou] ARROW-2068: [Python] Expose array's buffers
---
 python/pyarrow/array.pxi           | 25 +++++++++++++++
 python/pyarrow/tests/test_array.py | 64 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index f85363c..a43bfb9 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -244,6 +244,22 @@ cdef wrap_datum(const CDatum& datum):
         raise ValueError("Unable to wrap Datum in a Python object")
 
 
+cdef _append_array_buffers(const CArrayData* ad, list res):
+    """
+    Recursively append Buffer wrappers from *ad* and its children.
+    """
+    cdef size_t i, n
+    assert ad != NULL
+    n = ad.buffers.size()
+    for i in range(n):
+        buf = ad.buffers[i]
+        res.append(pyarrow_wrap_buffer(buf)
+                   if buf.get() != NULL else None)
+    n = ad.child_data.size()
+    for i in range(n):
+        _append_array_buffers(ad.child_data[i].get(), res)
+
+
 cdef class Array:
 
     cdef void init(self, const shared_ptr[CArray]& sp_array):
@@ -463,6 +479,15 @@ cdef class Array:
         with nogil:
             check_status(ValidateArray(deref(self.ap)))
 
+    def buffers(self):
+        """
+        Return a list of Buffer objects pointing to this array's physical
+        storage.
+        """
+        res = []
+        _append_array_buffers(self.sp_array.get().data().get(), res)
+        return res
+
 
 cdef class Tensor:
 
diff --git a/python/pyarrow/tests/test_array.py 
b/python/pyarrow/tests/test_array.py
index efbcef5..197dac0 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -17,6 +17,7 @@
 
 import datetime
 import pytest
+import struct
 import sys
 
 import numpy as np
@@ -589,3 +590,66 @@ def test_array_from_numpy_unicode():
     arrow_arr = pa.array(arr)
     expected = pa.array(['', '', ''], type='utf8')
     assert arrow_arr.equals(expected)
+
+
+def test_buffers_primitive():
+    a = pa.array([1, 2, None, 4], type=pa.int16())
+    buffers = a.buffers()
+    assert len(buffers) == 2
+    null_bitmap = buffers[0].to_pybytes()
+    assert 1 <= len(null_bitmap) <= 64  # XXX this is varying
+    assert bytearray(null_bitmap)[0] == 0b00001011
+
+    assert struct.unpack('hhxxh', buffers[1].to_pybytes()) == (1, 2, 4)
+
+    a = pa.array(np.int8([4, 5, 6]))
+    buffers = a.buffers()
+    assert len(buffers) == 2
+    # No null bitmap from Numpy int array
+    assert buffers[0] is None
+    assert struct.unpack('3b', buffers[1].to_pybytes()) == (4, 5, 6)
+
+    a = pa.array([b'foo!', None, b'bar!!'])
+    buffers = a.buffers()
+    assert len(buffers) == 3
+    null_bitmap = buffers[0].to_pybytes()
+    assert bytearray(null_bitmap)[0] == 0b00000101
+    offsets = buffers[1].to_pybytes()
+    assert struct.unpack('4i', offsets) == (0, 4, 4, 9)
+    values = buffers[2].to_pybytes()
+    assert values == b'foo!bar!!'
+
+
+def test_buffers_nested():
+    a = pa.array([[1, 2], None, [3, None, 4, 5]], type=pa.list_(pa.int64()))
+    buffers = a.buffers()
+    assert len(buffers) == 4
+    # The parent buffers
+    null_bitmap = buffers[0].to_pybytes()
+    assert bytearray(null_bitmap)[0] == 0b00000101
+    offsets = buffers[1].to_pybytes()
+    assert struct.unpack('4i', offsets) == (0, 2, 2, 6)
+    # The child buffers
+    null_bitmap = buffers[2].to_pybytes()
+    assert bytearray(null_bitmap)[0] == 0b00110111
+    values = buffers[3].to_pybytes()
+    assert struct.unpack('qqq8xqq', values) == (1, 2, 3, 4, 5)
+
+    a = pa.array([(42, None), None, (None, 43)],
+                 type=pa.struct([pa.field('a', pa.int8()),
+                                 pa.field('b', pa.int16())]))
+    buffers = a.buffers()
+    assert len(buffers) == 5
+    # The parent buffer
+    null_bitmap = buffers[0].to_pybytes()
+    assert bytearray(null_bitmap)[0] == 0b00000101
+    # The child buffers: 'a'
+    null_bitmap = buffers[1].to_pybytes()
+    assert bytearray(null_bitmap)[0] == 0b00000001
+    values = buffers[2].to_pybytes()
+    assert struct.unpack('bxx', values) == (42,)
+    # The child buffers: 'b'
+    null_bitmap = buffers[3].to_pybytes()
+    assert bytearray(null_bitmap)[0] == 0b00000100
+    values = buffers[4].to_pybytes()
+    assert struct.unpack('4xh', values) == (43,)

-- 
To stop receiving notification emails like this one, please contact
[email protected].

Reply via email to