This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new eecb1bc ARROW-2281: [Python] Add Array.from_buffers()
eecb1bc is described below
commit eecb1bc023d2a62536bf1fe2a3edb11673fa06c3
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Mar 22 13:56:33 2018 -0400
ARROW-2281: [Python] Add Array.from_buffers()
Note this is shadowed by the specialized StringArray.from_buffers().
Author: Antoine Pitrou <[email protected]>
Closes #1772 from pitrou/ARROW-2281-python-array-from-buffers and squashes
the following commits:
c6bf3730 <Antoine Pitrou> Try to fix crashes
a7f658e2 <Antoine Pitrou> ARROW-2281: Add Array.from_buffers()
---
cpp/src/arrow/array.cc | 7 +++++++
cpp/src/arrow/array.h | 5 +++++
python/pyarrow/array.pxi | 39 +++++++++++++++++++++++++++++++++++-
python/pyarrow/includes/libarrow.pxd | 7 +++++++
python/pyarrow/tests/test_array.py | 20 ++++++++++++++++++
5 files changed, 77 insertions(+), 1 deletion(-)
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index 7e66999..80d64c8 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -47,6 +47,13 @@ std::shared_ptr<ArrayData> ArrayData::Make(const
std::shared_ptr<DataType>& type
offset);
}
+std::shared_ptr<ArrayData> ArrayData::Make(
+ const std::shared_ptr<DataType>& type, int64_t length,
+ const std::vector<std::shared_ptr<Buffer>>& buffers, int64_t null_count,
+ int64_t offset) {
+ return std::make_shared<ArrayData>(type, length, buffers, null_count,
offset);
+}
+
std::shared_ptr<ArrayData> ArrayData::Make(const std::shared_ptr<DataType>&
type,
int64_t length, int64_t null_count,
int64_t offset) {
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index 0a155af..660d0c3 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -111,6 +111,11 @@ struct ARROW_EXPORT ArrayData {
int64_t null_count =
kUnknownNullCount,
int64_t offset = 0);
+ static std::shared_ptr<ArrayData> Make(
+ const std::shared_ptr<DataType>& type, int64_t length,
+ const std::vector<std::shared_ptr<Buffer>>& buffers,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
static std::shared_ptr<ArrayData> Make(const std::shared_ptr<DataType>& type,
int64_t length,
int64_t null_count =
kUnknownNullCount,
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 8dac57d..afb68a2 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -344,7 +344,7 @@ cdef class Array:
"""
Convert pandas.Series to an Arrow Array, using pandas's semantics about
what values indicate nulls. See pyarrow.array for more general
- conversion from arrays or sequences to Arrow arrays
+ conversion from arrays or sequences to Arrow arrays.
Parameters
----------
@@ -372,6 +372,41 @@ cdef class Array:
return array(obj, mask=mask, type=type, memory_pool=memory_pool,
from_pandas=True)
+ @staticmethod
+ def from_buffers(DataType type, length, buffers, null_count=-1, offset=0):
+ """
+ Construct an Array from a sequence of buffers. The concrete type
+ returned depends on the datatype.
+
+ Parameters
+ ----------
+ type : DataType
+ The value type of the array
+ length : int
+ The number of values in the array
+ buffers: List[Buffer]
+ The buffers backing this array
+ null_count : int, default -1
+ offset : int, default 0
+ The array's logical offset (in values, not in bytes) from the
+ start of each buffer
+
+ Returns
+ -------
+ array : Array
+ """
+ cdef:
+ Buffer buf
+ vector[shared_ptr[CBuffer]] c_buffers
+ shared_ptr[CArrayData] ad
+
+ for buf in buffers:
+ # None will produce a null buffer pointer
+ c_buffers.push_back(pyarrow_unwrap_buffer(buf))
+ ad = CArrayData.Make(type.sp_type, length, c_buffers,
+ null_count, offset)
+ return pyarrow_wrap_array(MakeArray(ad))
+
property null_count:
def __get__(self):
@@ -787,6 +822,7 @@ cdef class UnionArray(Array):
check_status(CUnionArray.MakeSparse(deref(types.ap), c, &out))
return pyarrow_wrap_array(out)
+
cdef class StringArray(Array):
@staticmethod
@@ -824,6 +860,7 @@ cdef class StringArray(Array):
null_count, offset))
return pyarrow_wrap_array(out)
+
cdef class BinaryArray(Array):
pass
diff --git a/python/pyarrow/includes/libarrow.pxd
b/python/pyarrow/includes/libarrow.pxd
index 5891036..f103249 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -98,6 +98,13 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
vector[shared_ptr[CBuffer]] buffers
vector[shared_ptr[CArrayData]] child_data
+ @staticmethod
+ shared_ptr[CArrayData] Make(const shared_ptr[CDataType]& type,
+ int64_t length,
+ vector[shared_ptr[CBuffer]]& buffers,
+ int64_t null_count,
+ int64_t offset)
+
cdef cppclass CArray" arrow::Array":
shared_ptr[CDataType] type()
diff --git a/python/pyarrow/tests/test_array.py
b/python/pyarrow/tests/test_array.py
index 6faf456..d126db3 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -178,6 +178,26 @@ def test_array_eq_raises():
arr1 == arr2
+def test_array_from_buffers():
+ values_buf = pa.py_buffer(np.int16([4, 5, 6, 7]))
+ nulls_buf = pa.py_buffer(np.uint8([0b00001101]))
+ arr = pa.Array.from_buffers(pa.int16(), 4, [nulls_buf, values_buf])
+ assert arr.type == pa.int16()
+ assert arr.to_pylist() == [4, None, 6, 7]
+
+ arr = pa.Array.from_buffers(pa.int16(), 4, [None, values_buf])
+ assert arr.type == pa.int16()
+ assert arr.to_pylist() == [4, 5, 6, 7]
+
+ arr = pa.Array.from_buffers(pa.int16(), 3, [nulls_buf, values_buf],
+ offset=1)
+ assert arr.type == pa.int16()
+ assert arr.to_pylist() == [None, 6, 7]
+
+ with pytest.raises(TypeError):
+ pa.Array.from_buffers(pa.int16(), 3, [u'', u''], offset=1)
+
+
def test_dictionary_from_numpy():
indices = np.repeat([0, 1, 2], 2)
dictionary = np.array(['foo', 'bar', 'baz'], dtype=object)
--
To stop receiving notification emails like this one, please contact
[email protected].