This is an automated email from the ASF dual-hosted git repository.

cpcloud pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 3d5880a  ARROW-2040: [Python] Deserialized Numpy array must keep ref 
to underlying tensor
3d5880a is described below

commit 3d5880aa0fd2325f1c0a4d5557a71f57838def83
Author: Antoine Pitrou <[email protected]>
AuthorDate: Wed Feb 28 18:18:47 2018 -0500

    ARROW-2040: [Python] Deserialized Numpy array must keep ref to underlying 
tensor
    
    Author: Antoine Pitrou <[email protected]>
    
    Closes #1680 from pitrou/ARROW-2040-ndarray-from-tensor-base and squashes 
the following commits:
    
    15534c41 [Antoine Pitrou] ARROW-2040: [Python] Deserialized Numpy array 
must keep ref to underlying tensor
---
 cpp/src/arrow/python/arrow_to_python.cc    |  2 +-
 cpp/src/arrow/python/numpy_convert.cc      | 35 +++++++++++++++++-------------
 cpp/src/arrow/python/numpy_convert.h       |  3 ++-
 python/pyarrow/array.pxi                   |  2 +-
 python/pyarrow/includes/common.pxd         |  1 +
 python/pyarrow/includes/libarrow.pxd       |  2 +-
 python/pyarrow/tests/test_serialization.py | 17 +++++++++++++++
 7 files changed, 43 insertions(+), 19 deletions(-)

diff --git a/cpp/src/arrow/python/arrow_to_python.cc 
b/cpp/src/arrow/python/arrow_to_python.cc
index 54a71d5..5515d24 100644
--- a/cpp/src/arrow/python/arrow_to_python.cc
+++ b/cpp/src/arrow/python/arrow_to_python.cc
@@ -94,7 +94,7 @@ Status DeserializeDict(PyObject* context, const Array& array, 
int64_t start_idx,
 Status DeserializeArray(const Array& array, int64_t offset, PyObject* base,
                         const SerializedPyObject& blobs, PyObject** out) {
   int32_t index = static_cast<const Int32Array&>(array).Value(offset);
-  RETURN_NOT_OK(py::TensorToNdarray(*blobs.tensors[index], base, out));
+  RETURN_NOT_OK(py::TensorToNdarray(blobs.tensors[index], base, out));
   // Mark the array as immutable
   OwnedRef flags(PyObject_GetAttrString(*out, "flags"));
   DCHECK(flags.obj() != NULL) << "Could not mark Numpy array immutable";
diff --git a/cpp/src/arrow/python/numpy_convert.cc 
b/cpp/src/arrow/python/numpy_convert.cc
index 7ba1387..0cd616a 100644
--- a/cpp/src/arrow/python/numpy_convert.cc
+++ b/cpp/src/arrow/python/numpy_convert.cc
@@ -30,6 +30,7 @@
 #include "arrow/type.h"
 
 #include "arrow/python/common.h"
+#include "arrow/python/pyarrow.h"
 #include "arrow/python/type_traits.h"
 
 namespace arrow {
@@ -251,50 +252,54 @@ Status NdarrayToTensor(MemoryPool* pool, PyObject* ao, 
std::shared_ptr<Tensor>*
   return Status::OK();
 }
 
-Status TensorToNdarray(const Tensor& tensor, PyObject* base, PyObject** out) {
+Status TensorToNdarray(const std::shared_ptr<Tensor>& tensor, PyObject* base,
+                       PyObject** out) {
   PyAcquireGIL lock;
 
   int type_num;
-  RETURN_NOT_OK(GetNumPyType(*tensor.type(), &type_num));
+  RETURN_NOT_OK(GetNumPyType(*tensor->type(), &type_num));
   PyArray_Descr* dtype = PyArray_DescrNewFromType(type_num);
   RETURN_IF_PYERROR();
 
-  std::vector<npy_intp> npy_shape(tensor.ndim());
-  std::vector<npy_intp> npy_strides(tensor.ndim());
+  const int ndim = tensor->ndim();
+  std::vector<npy_intp> npy_shape(ndim);
+  std::vector<npy_intp> npy_strides(ndim);
 
-  for (int i = 0; i < tensor.ndim(); ++i) {
-    npy_shape[i] = tensor.shape()[i];
-    npy_strides[i] = tensor.strides()[i];
+  for (int i = 0; i < ndim; ++i) {
+    npy_shape[i] = tensor->shape()[i];
+    npy_strides[i] = tensor->strides()[i];
   }
 
   const void* immutable_data = nullptr;
-  if (tensor.data()) {
-    immutable_data = tensor.data()->data();
+  if (tensor->data()) {
+    immutable_data = tensor->data()->data();
   }
 
   // Remove const =(
   void* mutable_data = const_cast<void*>(immutable_data);
 
   int array_flags = 0;
-  if (tensor.is_row_major()) {
+  if (tensor->is_row_major()) {
     array_flags |= NPY_ARRAY_C_CONTIGUOUS;
   }
-  if (tensor.is_column_major()) {
+  if (tensor->is_column_major()) {
     array_flags |= NPY_ARRAY_F_CONTIGUOUS;
   }
-  if (tensor.is_mutable()) {
+  if (tensor->is_mutable()) {
     array_flags |= NPY_ARRAY_WRITEABLE;
   }
 
   PyObject* result =
-      PyArray_NewFromDescr(&PyArray_Type, dtype, tensor.ndim(), 
npy_shape.data(),
+      PyArray_NewFromDescr(&PyArray_Type, dtype, ndim, npy_shape.data(),
                            npy_strides.data(), mutable_data, array_flags, 
nullptr);
   RETURN_IF_PYERROR()
 
-  if (base != Py_None) {
-    PyArray_SetBaseObject(reinterpret_cast<PyArrayObject*>(result), base);
+  if (base == Py_None || base == nullptr) {
+    base = py::wrap_tensor(tensor);
+  } else {
     Py_XINCREF(base);
   }
+  PyArray_SetBaseObject(reinterpret_cast<PyArrayObject*>(result), base);
   *out = result;
   return Status::OK();
 }
diff --git a/cpp/src/arrow/python/numpy_convert.h 
b/cpp/src/arrow/python/numpy_convert.h
index 220e38f..dfdb1ac 100644
--- a/cpp/src/arrow/python/numpy_convert.h
+++ b/cpp/src/arrow/python/numpy_convert.h
@@ -65,7 +65,8 @@ Status GetNumPyType(const DataType& type, int* type_num);
 ARROW_EXPORT Status NdarrayToTensor(MemoryPool* pool, PyObject* ao,
                                     std::shared_ptr<Tensor>* out);
 
-ARROW_EXPORT Status TensorToNdarray(const Tensor& tensor, PyObject* base, 
PyObject** out);
+ARROW_EXPORT Status TensorToNdarray(const std::shared_ptr<Tensor>& tensor, 
PyObject* base,
+                                    PyObject** out);
 
 }  // namespace py
 }  // namespace arrow
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index a43bfb9..5b8621f 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -518,7 +518,7 @@ strides: {2}""".format(self.type, self.shape, self.strides)
             PyObject* out
 
         with nogil:
-            check_status(TensorToNdarray(deref(self.tp), self, &out))
+            check_status(TensorToNdarray(self.sp_tensor, self, &out))
         return PyObject_to_object(out)
 
     def equals(self, Tensor other):
diff --git a/python/pyarrow/includes/common.pxd 
b/python/pyarrow/includes/common.pxd
index f323fea..4d799ec 100644
--- a/python/pyarrow/includes/common.pxd
+++ b/python/pyarrow/includes/common.pxd
@@ -32,6 +32,7 @@ cdef extern from "arrow/python/platform.h":
 
 cdef extern from "<Python.h>":
     void Py_XDECREF(PyObject* o)
+    Py_ssize_t Py_REFCNT(PyObject* o)
 
 cdef extern from "arrow/api.h" namespace "arrow" nogil:
     # We can later add more of the common status factory methods as needed
diff --git a/python/pyarrow/includes/libarrow.pxd 
b/python/pyarrow/includes/libarrow.pxd
index 8da126a..900c3a5 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -871,7 +871,7 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" 
nogil:
     CStatus NdarrayToTensor(CMemoryPool* pool, object ao,
                             shared_ptr[CTensor]* out)
 
-    CStatus TensorToNdarray(const CTensor& tensor, object base,
+    CStatus TensorToNdarray(const shared_ptr[CTensor]& tensor, object base,
                             PyObject** out)
 
     CStatus ConvertArrayToPandas(PandasOptions options,
diff --git a/python/pyarrow/tests/test_serialization.py 
b/python/pyarrow/tests/test_serialization.py
index 3ee02cb..72315d2 100644
--- a/python/pyarrow/tests/test_serialization.py
+++ b/python/pyarrow/tests/test_serialization.py
@@ -372,6 +372,23 @@ def test_numpy_immutable(large_buffer):
         result[0] = 1.0
 
 
+def test_numpy_base_object(tmpdir):
+    # ARROW-2040: deserialized Numpy array should keep a reference to the
+    # owner of its memory
+    path = os.path.join(str(tmpdir), 'zzz.bin')
+    data = np.arange(12, dtype=np.int32)
+
+    with open(path, 'wb') as f:
+        f.write(pa.serialize(data).to_buffer())
+
+    serialized = pa.read_serialized(pa.OSFile(path))
+    result = serialized.deserialize()
+    assert_equal(result, data)
+    serialized = None
+    assert_equal(result, data)
+    assert result.base is not None
+
+
 # see https://issues.apache.org/jira/browse/ARROW-1695
 def test_serialization_callback_numpy():
 

-- 
To stop receiving notification emails like this one, please contact
[email protected].

Reply via email to