[ 
https://issues.apache.org/jira/browse/ARROW-2281?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16409988#comment-16409988
 ] 

ASF GitHub Bot commented on ARROW-2281:
---------------------------------------

wesm closed pull request #1772: ARROW-2281: [Python] Add Array.from_buffers()
URL: https://github.com/apache/arrow/pull/1772
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index 7e66999a3..80d64c871 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -47,6 +47,13 @@ std::shared_ptr<ArrayData> ArrayData::Make(const 
std::shared_ptr<DataType>& type
                                      offset);
 }
 
+std::shared_ptr<ArrayData> ArrayData::Make(
+    const std::shared_ptr<DataType>& type, int64_t length,
+    const std::vector<std::shared_ptr<Buffer>>& buffers, int64_t null_count,
+    int64_t offset) {
+  return std::make_shared<ArrayData>(type, length, buffers, null_count, 
offset);
+}
+
 std::shared_ptr<ArrayData> ArrayData::Make(const std::shared_ptr<DataType>& 
type,
                                            int64_t length, int64_t null_count,
                                            int64_t offset) {
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index 0a155af7e..660d0c3e5 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -111,6 +111,11 @@ struct ARROW_EXPORT ArrayData {
                                          int64_t null_count = 
kUnknownNullCount,
                                          int64_t offset = 0);
 
+  static std::shared_ptr<ArrayData> Make(
+      const std::shared_ptr<DataType>& type, int64_t length,
+      const std::vector<std::shared_ptr<Buffer>>& buffers,
+      int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
   static std::shared_ptr<ArrayData> Make(const std::shared_ptr<DataType>& type,
                                          int64_t length,
                                          int64_t null_count = 
kUnknownNullCount,
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 8dac57d18..afb68a2fb 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -344,7 +344,7 @@ cdef class Array:
         """
         Convert pandas.Series to an Arrow Array, using pandas's semantics about
         what values indicate nulls. See pyarrow.array for more general
-        conversion from arrays or sequences to Arrow arrays
+        conversion from arrays or sequences to Arrow arrays.
 
         Parameters
         ----------
@@ -372,6 +372,41 @@ cdef class Array:
         return array(obj, mask=mask, type=type, memory_pool=memory_pool,
                      from_pandas=True)
 
+    @staticmethod
+    def from_buffers(DataType type, length, buffers, null_count=-1, offset=0):
+        """
+        Construct an Array from a sequence of buffers.  The concrete type
+        returned depends on the datatype.
+
+        Parameters
+        ----------
+        type : DataType
+            The value type of the array
+        length : int
+            The number of values in the array
+        buffers: List[Buffer]
+            The buffers backing this array
+        null_count : int, default -1
+        offset : int, default 0
+            The array's logical offset (in values, not in bytes) from the
+            start of each buffer
+
+        Returns
+        -------
+        array : Array
+        """
+        cdef:
+            Buffer buf
+            vector[shared_ptr[CBuffer]] c_buffers
+            shared_ptr[CArrayData] ad
+
+        for buf in buffers:
+            # None will produce a null buffer pointer
+            c_buffers.push_back(pyarrow_unwrap_buffer(buf))
+        ad = CArrayData.Make(type.sp_type, length, c_buffers,
+                             null_count, offset)
+        return pyarrow_wrap_array(MakeArray(ad))
+
     property null_count:
 
         def __get__(self):
@@ -787,6 +822,7 @@ cdef class UnionArray(Array):
             check_status(CUnionArray.MakeSparse(deref(types.ap), c, &out))
         return pyarrow_wrap_array(out)
 
+
 cdef class StringArray(Array):
 
     @staticmethod
@@ -824,6 +860,7 @@ cdef class StringArray(Array):
             null_count, offset))
         return pyarrow_wrap_array(out)
 
+
 cdef class BinaryArray(Array):
     pass
 
diff --git a/python/pyarrow/includes/libarrow.pxd 
b/python/pyarrow/includes/libarrow.pxd
index 589103635..f1032495e 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -98,6 +98,13 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         vector[shared_ptr[CBuffer]] buffers
         vector[shared_ptr[CArrayData]] child_data
 
+        @staticmethod
+        shared_ptr[CArrayData] Make(const shared_ptr[CDataType]& type,
+                                    int64_t length,
+                                    vector[shared_ptr[CBuffer]]& buffers,
+                                    int64_t null_count,
+                                    int64_t offset)
+
     cdef cppclass CArray" arrow::Array":
         shared_ptr[CDataType] type()
 
diff --git a/python/pyarrow/tests/test_array.py 
b/python/pyarrow/tests/test_array.py
index 6faf4566b..d126db373 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -178,6 +178,26 @@ def test_array_eq_raises():
         arr1 == arr2
 
 
+def test_array_from_buffers():
+    values_buf = pa.py_buffer(np.int16([4, 5, 6, 7]))
+    nulls_buf = pa.py_buffer(np.uint8([0b00001101]))
+    arr = pa.Array.from_buffers(pa.int16(), 4, [nulls_buf, values_buf])
+    assert arr.type == pa.int16()
+    assert arr.to_pylist() == [4, None, 6, 7]
+
+    arr = pa.Array.from_buffers(pa.int16(), 4, [None, values_buf])
+    assert arr.type == pa.int16()
+    assert arr.to_pylist() == [4, 5, 6, 7]
+
+    arr = pa.Array.from_buffers(pa.int16(), 3, [nulls_buf, values_buf],
+                                offset=1)
+    assert arr.type == pa.int16()
+    assert arr.to_pylist() == [None, 6, 7]
+
+    with pytest.raises(TypeError):
+        pa.Array.from_buffers(pa.int16(), 3, [u'', u''], offset=1)
+
+
 def test_dictionary_from_numpy():
     indices = np.repeat([0, 1, 2], 2)
     dictionary = np.array(['foo', 'bar', 'baz'], dtype=object)


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> [Python] Expose MakeArray to construct arrays from buffers
> ----------------------------------------------------------
>
>                 Key: ARROW-2281
>                 URL: https://issues.apache.org/jira/browse/ARROW-2281
>             Project: Apache Arrow
>          Issue Type: Improvement
>          Components: Python
>            Reporter: Uwe L. Korn
>            Assignee: Antoine Pitrou
>            Priority: Major
>              Labels: pull-request-available
>             Fix For: 0.10.0
>
>
> To create new arrays from existing buffers in Python, we would need to call 
> into the C++ {{MakeArray}} method. This would then construct the Array and we 
> would only wrap it in Python to have construction support for all Array types.
> This would also mean that we need to have a Python representation of 
> {{ArrayData}}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to