This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new eecb1bc  ARROW-2281: [Python] Add Array.from_buffers()
eecb1bc is described below

commit eecb1bc023d2a62536bf1fe2a3edb11673fa06c3
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Mar 22 13:56:33 2018 -0400

    ARROW-2281: [Python] Add Array.from_buffers()
    
    Note this is shadowed by the specialized StringArray.from_buffers().
    
    Author: Antoine Pitrou <[email protected]>
    
    Closes #1772 from pitrou/ARROW-2281-python-array-from-buffers and squashes 
the following commits:
    
    c6bf3730 <Antoine Pitrou> Try to fix crashes
    a7f658e2 <Antoine Pitrou> ARROW-2281:  Add Array.from_buffers()
---
 cpp/src/arrow/array.cc               |  7 +++++++
 cpp/src/arrow/array.h                |  5 +++++
 python/pyarrow/array.pxi             | 39 +++++++++++++++++++++++++++++++++++-
 python/pyarrow/includes/libarrow.pxd |  7 +++++++
 python/pyarrow/tests/test_array.py   | 20 ++++++++++++++++++
 5 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index 7e66999..80d64c8 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -47,6 +47,13 @@ std::shared_ptr<ArrayData> ArrayData::Make(const 
std::shared_ptr<DataType>& type
                                      offset);
 }
 
+std::shared_ptr<ArrayData> ArrayData::Make(
+    const std::shared_ptr<DataType>& type, int64_t length,
+    const std::vector<std::shared_ptr<Buffer>>& buffers, int64_t null_count,
+    int64_t offset) {
+  return std::make_shared<ArrayData>(type, length, buffers, null_count, 
offset);
+}
+
 std::shared_ptr<ArrayData> ArrayData::Make(const std::shared_ptr<DataType>& 
type,
                                            int64_t length, int64_t null_count,
                                            int64_t offset) {
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index 0a155af..660d0c3 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -111,6 +111,11 @@ struct ARROW_EXPORT ArrayData {
                                          int64_t null_count = 
kUnknownNullCount,
                                          int64_t offset = 0);
 
+  static std::shared_ptr<ArrayData> Make(
+      const std::shared_ptr<DataType>& type, int64_t length,
+      const std::vector<std::shared_ptr<Buffer>>& buffers,
+      int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
   static std::shared_ptr<ArrayData> Make(const std::shared_ptr<DataType>& type,
                                          int64_t length,
                                          int64_t null_count = 
kUnknownNullCount,
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 8dac57d..afb68a2 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -344,7 +344,7 @@ cdef class Array:
         """
         Convert pandas.Series to an Arrow Array, using pandas's semantics about
         what values indicate nulls. See pyarrow.array for more general
-        conversion from arrays or sequences to Arrow arrays
+        conversion from arrays or sequences to Arrow arrays.
 
         Parameters
         ----------
@@ -372,6 +372,41 @@ cdef class Array:
         return array(obj, mask=mask, type=type, memory_pool=memory_pool,
                      from_pandas=True)
 
+    @staticmethod
+    def from_buffers(DataType type, length, buffers, null_count=-1, offset=0):
+        """
+        Construct an Array from a sequence of buffers.  The concrete type
+        returned depends on the datatype.
+
+        Parameters
+        ----------
+        type : DataType
+            The value type of the array
+        length : int
+            The number of values in the array
+        buffers: List[Buffer]
+            The buffers backing this array
+        null_count : int, default -1
+        offset : int, default 0
+            The array's logical offset (in values, not in bytes) from the
+            start of each buffer
+
+        Returns
+        -------
+        array : Array
+        """
+        cdef:
+            Buffer buf
+            vector[shared_ptr[CBuffer]] c_buffers
+            shared_ptr[CArrayData] ad
+
+        for buf in buffers:
+            # None will produce a null buffer pointer
+            c_buffers.push_back(pyarrow_unwrap_buffer(buf))
+        ad = CArrayData.Make(type.sp_type, length, c_buffers,
+                             null_count, offset)
+        return pyarrow_wrap_array(MakeArray(ad))
+
     property null_count:
 
         def __get__(self):
@@ -787,6 +822,7 @@ cdef class UnionArray(Array):
             check_status(CUnionArray.MakeSparse(deref(types.ap), c, &out))
         return pyarrow_wrap_array(out)
 
+
 cdef class StringArray(Array):
 
     @staticmethod
@@ -824,6 +860,7 @@ cdef class StringArray(Array):
             null_count, offset))
         return pyarrow_wrap_array(out)
 
+
 cdef class BinaryArray(Array):
     pass
 
diff --git a/python/pyarrow/includes/libarrow.pxd 
b/python/pyarrow/includes/libarrow.pxd
index 5891036..f103249 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -98,6 +98,13 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         vector[shared_ptr[CBuffer]] buffers
         vector[shared_ptr[CArrayData]] child_data
 
+        @staticmethod
+        shared_ptr[CArrayData] Make(const shared_ptr[CDataType]& type,
+                                    int64_t length,
+                                    vector[shared_ptr[CBuffer]]& buffers,
+                                    int64_t null_count,
+                                    int64_t offset)
+
     cdef cppclass CArray" arrow::Array":
         shared_ptr[CDataType] type()
 
diff --git a/python/pyarrow/tests/test_array.py 
b/python/pyarrow/tests/test_array.py
index 6faf456..d126db3 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -178,6 +178,26 @@ def test_array_eq_raises():
         arr1 == arr2
 
 
+def test_array_from_buffers():
+    values_buf = pa.py_buffer(np.int16([4, 5, 6, 7]))
+    nulls_buf = pa.py_buffer(np.uint8([0b00001101]))
+    arr = pa.Array.from_buffers(pa.int16(), 4, [nulls_buf, values_buf])
+    assert arr.type == pa.int16()
+    assert arr.to_pylist() == [4, None, 6, 7]
+
+    arr = pa.Array.from_buffers(pa.int16(), 4, [None, values_buf])
+    assert arr.type == pa.int16()
+    assert arr.to_pylist() == [4, 5, 6, 7]
+
+    arr = pa.Array.from_buffers(pa.int16(), 3, [nulls_buf, values_buf],
+                                offset=1)
+    assert arr.type == pa.int16()
+    assert arr.to_pylist() == [None, 6, 7]
+
+    with pytest.raises(TypeError):
+        pa.Array.from_buffers(pa.int16(), 3, [u'', u''], offset=1)
+
+
 def test_dictionary_from_numpy():
     indices = np.repeat([0, 1, 2], 2)
     dictionary = np.array(['foo', 'bar', 'baz'], dtype=object)

-- 
To stop receiving notification emails like this one, please contact
[email protected].

Reply via email to