This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 58fa873  ARROW-2282: [Python] Create StringArray from buffers
58fa873 is described below

commit 58fa873f70907a44ed8c40ae3481f6a0bbf2a669
Author: Uwe L. Korn <uw...@xhochy.com>
AuthorDate: Mon Mar 12 14:20:30 2018 -0400

    ARROW-2282: [Python] Create StringArray from buffers
    
    Author: Uwe L. Korn <uw...@xhochy.com>
    
    Closes #1720 from xhochy/ARROW-2282 and squashes the following commits:
    
    36dc6b86 <Uwe L. Korn> Check computed null_count
    5bb257de <Uwe L. Korn> ARROW-2282:  Create StringArray from buffers
---
 python/pyarrow/array.pxi             | 35 ++++++++++++++++++++++++++++++++++-
 python/pyarrow/includes/libarrow.pxd |  6 ++++++
 python/pyarrow/tests/test_array.py   | 30 ++++++++++++++++++++++++++++++
 3 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 2ea131b..8dac57d 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -788,8 +788,41 @@ cdef class UnionArray(Array):
         return pyarrow_wrap_array(out)
 
 cdef class StringArray(Array):
-    pass
 
+    @staticmethod
+    def from_buffers(int length, Buffer value_offsets, Buffer data,
+                     Buffer null_bitmap=None, int null_count=-1,
+                     int offset=0):
+        """
+        Construct a StringArray from value_offsets and data buffers.
+        If there are nulls in the data, also a null_bitmap and the matching
+        null_count must be passed.
+
+        Parameters
+        ----------
+        length : int
+        value_offsets : Buffer
+        data : Buffer
+        null_bitmap : Buffer, optional
+        null_count : int, default 0
+        offset : int, default 0
+
+        Returns
+        -------
+        string_array : StringArray
+        """
+        cdef shared_ptr[CBuffer] c_null_bitmap
+        cdef shared_ptr[CArray] out
+
+        if null_bitmap is not None:
+            c_null_bitmap = null_bitmap.buffer
+        else:
+            null_count = 0
+
+        out.reset(new CStringArray(
+            length, value_offsets.buffer, data.buffer, c_null_bitmap,
+            null_count, offset))
+        return pyarrow_wrap_array(out)
 
 cdef class BinaryArray(Array):
     pass
diff --git a/python/pyarrow/includes/libarrow.pxd 
b/python/pyarrow/includes/libarrow.pxd
index 503ee88..3d0c02b 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -372,6 +372,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         const uint8_t* GetValue(int i, int32_t* length)
 
     cdef cppclass CStringArray" arrow::StringArray"(CBinaryArray):
+        CStringArray(int64_t length, shared_ptr[CBuffer] value_offsets,
+                     shared_ptr[CBuffer] data,
+                     shared_ptr[CBuffer] null_bitmap,
+                     int64_t null_count,
+                     int64_t offset)
+
         c_string GetString(int i)
 
     cdef cppclass CStructArray" arrow::StructArray"(CArray):
diff --git a/python/pyarrow/tests/test_array.py 
b/python/pyarrow/tests/test_array.py
index 69d6a93..fea5686 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -290,6 +290,36 @@ def test_union_from_sparse():
     assert result.to_pylist() == [b'a', 1, b'b', b'c', 2, 3, b'd']
 
 
+def test_string_from_buffers():
+    array = pa.array(["a", None, "b", "c"])
+
+    buffers = array.buffers()
+    copied = pa.StringArray.from_buffers(
+        len(array), buffers[1], buffers[2], buffers[0], array.null_count,
+        array.offset)
+    assert copied.to_pylist() == ["a", None, "b", "c"]
+
+    copied = pa.StringArray.from_buffers(
+        len(array), buffers[1], buffers[2], buffers[0])
+    assert copied.to_pylist() == ["a", None, "b", "c"]
+
+    sliced = array[1:]
+    buffers = sliced.buffers()
+    copied = pa.StringArray.from_buffers(
+        len(sliced), buffers[1], buffers[2], buffers[0], -1, sliced.offset)
+    assert copied.to_pylist() == [None, "b", "c"]
+    assert copied.null_count == 1
+
+    # Slice but exclude all null entries so that we don't need to pass
+    # the null bitmap.
+    sliced = array[2:]
+    buffers = sliced.buffers()
+    copied = pa.StringArray.from_buffers(
+        len(sliced), buffers[1], buffers[2], None, -1, sliced.offset)
+    assert copied.to_pylist() == ["b", "c"]
+    assert copied.null_count == 0
+
+
 def _check_cast_case(case, safe=True):
     in_data, in_type, out_data, out_type = case
 

-- 
To stop receiving notification emails like this one, please contact
w...@apache.org.

Reply via email to