This is an automated email from the ASF dual-hosted git repository.
cpcloud pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 3d5880a ARROW-2040: [Python] Deserialized Numpy array must keep ref
to underlying tensor
3d5880a is described below
commit 3d5880aa0fd2325f1c0a4d5557a71f57838def83
Author: Antoine Pitrou <[email protected]>
AuthorDate: Wed Feb 28 18:18:47 2018 -0500
ARROW-2040: [Python] Deserialized Numpy array must keep ref to underlying
tensor
Author: Antoine Pitrou <[email protected]>
Closes #1680 from pitrou/ARROW-2040-ndarray-from-tensor-base and squashes
the following commits:
15534c41 [Antoine Pitrou] ARROW-2040: [Python] Deserialized Numpy array
must keep ref to underlying tensor
---
cpp/src/arrow/python/arrow_to_python.cc | 2 +-
cpp/src/arrow/python/numpy_convert.cc | 35 +++++++++++++++++-------------
cpp/src/arrow/python/numpy_convert.h | 3 ++-
python/pyarrow/array.pxi | 2 +-
python/pyarrow/includes/common.pxd | 1 +
python/pyarrow/includes/libarrow.pxd | 2 +-
python/pyarrow/tests/test_serialization.py | 17 +++++++++++++++
7 files changed, 43 insertions(+), 19 deletions(-)
diff --git a/cpp/src/arrow/python/arrow_to_python.cc
b/cpp/src/arrow/python/arrow_to_python.cc
index 54a71d5..5515d24 100644
--- a/cpp/src/arrow/python/arrow_to_python.cc
+++ b/cpp/src/arrow/python/arrow_to_python.cc
@@ -94,7 +94,7 @@ Status DeserializeDict(PyObject* context, const Array& array,
int64_t start_idx,
Status DeserializeArray(const Array& array, int64_t offset, PyObject* base,
const SerializedPyObject& blobs, PyObject** out) {
int32_t index = static_cast<const Int32Array&>(array).Value(offset);
- RETURN_NOT_OK(py::TensorToNdarray(*blobs.tensors[index], base, out));
+ RETURN_NOT_OK(py::TensorToNdarray(blobs.tensors[index], base, out));
// Mark the array as immutable
OwnedRef flags(PyObject_GetAttrString(*out, "flags"));
DCHECK(flags.obj() != NULL) << "Could not mark Numpy array immutable";
diff --git a/cpp/src/arrow/python/numpy_convert.cc
b/cpp/src/arrow/python/numpy_convert.cc
index 7ba1387..0cd616a 100644
--- a/cpp/src/arrow/python/numpy_convert.cc
+++ b/cpp/src/arrow/python/numpy_convert.cc
@@ -30,6 +30,7 @@
#include "arrow/type.h"
#include "arrow/python/common.h"
+#include "arrow/python/pyarrow.h"
#include "arrow/python/type_traits.h"
namespace arrow {
@@ -251,50 +252,54 @@ Status NdarrayToTensor(MemoryPool* pool, PyObject* ao,
std::shared_ptr<Tensor>*
return Status::OK();
}
-Status TensorToNdarray(const Tensor& tensor, PyObject* base, PyObject** out) {
+Status TensorToNdarray(const std::shared_ptr<Tensor>& tensor, PyObject* base,
+ PyObject** out) {
PyAcquireGIL lock;
int type_num;
- RETURN_NOT_OK(GetNumPyType(*tensor.type(), &type_num));
+ RETURN_NOT_OK(GetNumPyType(*tensor->type(), &type_num));
PyArray_Descr* dtype = PyArray_DescrNewFromType(type_num);
RETURN_IF_PYERROR();
- std::vector<npy_intp> npy_shape(tensor.ndim());
- std::vector<npy_intp> npy_strides(tensor.ndim());
+ const int ndim = tensor->ndim();
+ std::vector<npy_intp> npy_shape(ndim);
+ std::vector<npy_intp> npy_strides(ndim);
- for (int i = 0; i < tensor.ndim(); ++i) {
- npy_shape[i] = tensor.shape()[i];
- npy_strides[i] = tensor.strides()[i];
+ for (int i = 0; i < ndim; ++i) {
+ npy_shape[i] = tensor->shape()[i];
+ npy_strides[i] = tensor->strides()[i];
}
const void* immutable_data = nullptr;
- if (tensor.data()) {
- immutable_data = tensor.data()->data();
+ if (tensor->data()) {
+ immutable_data = tensor->data()->data();
}
// Remove const =(
void* mutable_data = const_cast<void*>(immutable_data);
int array_flags = 0;
- if (tensor.is_row_major()) {
+ if (tensor->is_row_major()) {
array_flags |= NPY_ARRAY_C_CONTIGUOUS;
}
- if (tensor.is_column_major()) {
+ if (tensor->is_column_major()) {
array_flags |= NPY_ARRAY_F_CONTIGUOUS;
}
- if (tensor.is_mutable()) {
+ if (tensor->is_mutable()) {
array_flags |= NPY_ARRAY_WRITEABLE;
}
PyObject* result =
- PyArray_NewFromDescr(&PyArray_Type, dtype, tensor.ndim(),
npy_shape.data(),
+ PyArray_NewFromDescr(&PyArray_Type, dtype, ndim, npy_shape.data(),
npy_strides.data(), mutable_data, array_flags,
nullptr);
RETURN_IF_PYERROR()
- if (base != Py_None) {
- PyArray_SetBaseObject(reinterpret_cast<PyArrayObject*>(result), base);
+ if (base == Py_None || base == nullptr) {
+ base = py::wrap_tensor(tensor);
+ } else {
Py_XINCREF(base);
}
+ PyArray_SetBaseObject(reinterpret_cast<PyArrayObject*>(result), base);
*out = result;
return Status::OK();
}
diff --git a/cpp/src/arrow/python/numpy_convert.h
b/cpp/src/arrow/python/numpy_convert.h
index 220e38f..dfdb1ac 100644
--- a/cpp/src/arrow/python/numpy_convert.h
+++ b/cpp/src/arrow/python/numpy_convert.h
@@ -65,7 +65,8 @@ Status GetNumPyType(const DataType& type, int* type_num);
ARROW_EXPORT Status NdarrayToTensor(MemoryPool* pool, PyObject* ao,
std::shared_ptr<Tensor>* out);
-ARROW_EXPORT Status TensorToNdarray(const Tensor& tensor, PyObject* base,
PyObject** out);
+ARROW_EXPORT Status TensorToNdarray(const std::shared_ptr<Tensor>& tensor,
PyObject* base,
+ PyObject** out);
} // namespace py
} // namespace arrow
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index a43bfb9..5b8621f 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -518,7 +518,7 @@ strides: {2}""".format(self.type, self.shape, self.strides)
PyObject* out
with nogil:
- check_status(TensorToNdarray(deref(self.tp), self, &out))
+ check_status(TensorToNdarray(self.sp_tensor, self, &out))
return PyObject_to_object(out)
def equals(self, Tensor other):
diff --git a/python/pyarrow/includes/common.pxd
b/python/pyarrow/includes/common.pxd
index f323fea..4d799ec 100644
--- a/python/pyarrow/includes/common.pxd
+++ b/python/pyarrow/includes/common.pxd
@@ -32,6 +32,7 @@ cdef extern from "arrow/python/platform.h":
cdef extern from "<Python.h>":
void Py_XDECREF(PyObject* o)
+ Py_ssize_t Py_REFCNT(PyObject* o)
cdef extern from "arrow/api.h" namespace "arrow" nogil:
# We can later add more of the common status factory methods as needed
diff --git a/python/pyarrow/includes/libarrow.pxd
b/python/pyarrow/includes/libarrow.pxd
index 8da126a..900c3a5 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -871,7 +871,7 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py"
nogil:
CStatus NdarrayToTensor(CMemoryPool* pool, object ao,
shared_ptr[CTensor]* out)
- CStatus TensorToNdarray(const CTensor& tensor, object base,
+ CStatus TensorToNdarray(const shared_ptr[CTensor]& tensor, object base,
PyObject** out)
CStatus ConvertArrayToPandas(PandasOptions options,
diff --git a/python/pyarrow/tests/test_serialization.py
b/python/pyarrow/tests/test_serialization.py
index 3ee02cb..72315d2 100644
--- a/python/pyarrow/tests/test_serialization.py
+++ b/python/pyarrow/tests/test_serialization.py
@@ -372,6 +372,23 @@ def test_numpy_immutable(large_buffer):
result[0] = 1.0
+def test_numpy_base_object(tmpdir):
+ # ARROW-2040: deserialized Numpy array should keep a reference to the
+ # owner of its memory
+ path = os.path.join(str(tmpdir), 'zzz.bin')
+ data = np.arange(12, dtype=np.int32)
+
+ with open(path, 'wb') as f:
+ f.write(pa.serialize(data).to_buffer())
+
+ serialized = pa.read_serialized(pa.OSFile(path))
+ result = serialized.deserialize()
+ assert_equal(result, data)
+ serialized = None
+ assert_equal(result, data)
+ assert result.base is not None
+
+
# see https://issues.apache.org/jira/browse/ARROW-1695
def test_serialization_callback_numpy():
--
To stop receiving notification emails like this one, please contact
[email protected].