This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new f56fdc9  ARROW-2270: [Python] Fix lifetime of ForeignBuffer base object
f56fdc9 is described below

commit f56fdc9ccd77987af22093d0d711b5c1a566a094
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Mar 8 23:32:26 2018 -0500

    ARROW-2270: [Python] Fix lifetime of ForeignBuffer base object
    
    Author: Antoine Pitrou <[email protected]>
    
    Closes #1714 from pitrou/ARROW-2270-pyforeignbuffer and squashes the 
following commits:
    
    51f2d85e <Antoine Pitrou> ARROW-2270:  Fix lifetime of ForeignBuffer base 
object
---
 cpp/src/arrow/python/io.cc           | 14 ++++++++++++++
 cpp/src/arrow/python/io.h            | 21 +++++++++++++++++++++
 python/doc/source/api.rst            |  2 ++
 python/pyarrow/__init__.py           |  2 +-
 python/pyarrow/includes/libarrow.pxd |  5 +++++
 python/pyarrow/io.pxi                | 27 +++++++++++++++------------
 python/pyarrow/lib.pxd               |  5 -----
 python/pyarrow/tests/test_io.py      | 16 ++++++++++------
 8 files changed, 68 insertions(+), 24 deletions(-)

diff --git a/cpp/src/arrow/python/io.cc b/cpp/src/arrow/python/io.cc
index 801a325..36c193d 100644
--- a/cpp/src/arrow/python/io.cc
+++ b/cpp/src/arrow/python/io.cc
@@ -216,5 +216,19 @@ Status PyOutputStream::Write(const void* data, int64_t 
nbytes) {
   return file_->Write(data, nbytes);
 }
 
+// ----------------------------------------------------------------------
+// Foreign buffer
+
+Status PyForeignBuffer::Make(const uint8_t* data, int64_t size, PyObject* base,
+                             std::shared_ptr<Buffer>* out) {
+  PyForeignBuffer* buf = new PyForeignBuffer(data, size, base);
+  if (buf == NULL) {
+    return Status::OutOfMemory("could not allocate foreign buffer object");
+  } else {
+    *out = std::shared_ptr<Buffer>(buf);
+    return Status::OK();
+  }
+}
+
 }  // namespace py
 }  // namespace arrow
diff --git a/cpp/src/arrow/python/io.h b/cpp/src/arrow/python/io.h
index 6960556..5c76fe9 100644
--- a/cpp/src/arrow/python/io.h
+++ b/cpp/src/arrow/python/io.h
@@ -81,6 +81,27 @@ class ARROW_EXPORT PyOutputStream : public io::OutputStream {
 
 // TODO(wesm): seekable output files
 
+// A Buffer subclass that keeps a PyObject reference throughout its
+// lifetime, such that the Python object is kept alive as long as the
+// C++ buffer is still needed.
+// Keeping the reference in a Python wrapper would be incorrect as
+// the Python wrapper can get destroyed even though the wrapped C++
+// buffer is still alive (ARROW-2270).
+class ARROW_EXPORT PyForeignBuffer : public Buffer {
+ public:
+  static Status Make(const uint8_t* data, int64_t size, PyObject* base,
+                     std::shared_ptr<Buffer>* out);
+
+ private:
+  PyForeignBuffer(const uint8_t* data, int64_t size, PyObject* base)
+      : Buffer(data, size) {
+    Py_INCREF(base);
+    base_.reset(base);
+  }
+
+  OwnedRefNoGIL base_;
+};
+
 }  // namespace py
 }  // namespace arrow
 
diff --git a/python/doc/source/api.rst b/python/doc/source/api.rst
index a71e92b..3db1a04 100644
--- a/python/doc/source/api.rst
+++ b/python/doc/source/api.rst
@@ -186,6 +186,7 @@ Tables and Record Batches
 
    column
    chunked_array
+   concat_tables
    ChunkedArray
    Column
    RecordBatch
@@ -213,6 +214,7 @@ Input / Output and Shared Memory
    compress
    decompress
    frombuffer
+   foreign_buffer
    Buffer
    ResizableBuffer
    BufferReader
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 28ac98e..225dfd0 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -86,7 +86,7 @@ from pyarrow.lib import (null, bool_,
 from pyarrow.lib import TimestampType
 
 # Buffers, allocation
-from pyarrow.lib import (Buffer, ForeignBuffer, ResizableBuffer, compress,
+from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, compress,
                          decompress, allocate_buffer, frombuffer)
 
 from pyarrow.lib import (MemoryPool, total_allocated_bytes,
diff --git a/python/pyarrow/includes/libarrow.pxd 
b/python/pyarrow/includes/libarrow.pxd
index 456fcca..22c39a8 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -904,6 +904,11 @@ cdef extern from "arrow/python/api.h" namespace 
"arrow::py" nogil:
         @staticmethod
         CStatus FromPyObject(object obj, shared_ptr[CBuffer]* out)
 
+    cdef cppclass PyForeignBuffer(CBuffer):
+        @staticmethod
+        CStatus Make(const uint8_t* data, int64_t size, object base,
+                     shared_ptr[CBuffer]* out)
+
     cdef cppclass PyReadableFile(RandomAccessFile):
         PyReadableFile(object fo)
 
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 611c8a8..15ecd01 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -726,18 +726,6 @@ cdef class Buffer:
         return self.size
 
 
-cdef class ForeignBuffer(Buffer):
-
-    def __init__(self, addr, size, base):
-        cdef:
-            intptr_t c_addr = addr
-            int64_t c_size = size
-        self.base = base
-        cdef shared_ptr[CBuffer] buffer = make_shared[CBuffer](
-            <uint8_t*>c_addr, c_size)
-        self.init(<shared_ptr[CBuffer]> buffer)
-
-
 cdef class ResizableBuffer(Buffer):
 
     cdef void init_rz(self, const shared_ptr[CResizableBuffer]& buffer):
@@ -861,6 +849,21 @@ def frombuffer(object obj):
     return pyarrow_wrap_buffer(buf)
 
 
+def foreign_buffer(address, size, base):
+    """
+    Construct an Arrow buffer with the given *address* and *size*,
+    backed by the Python *base* object.
+    """
+    cdef:
+        intptr_t c_addr = address
+        int64_t c_size = size
+        shared_ptr[CBuffer] buf
+
+    check_status(PyForeignBuffer.Make(<uint8_t*> c_addr, c_size,
+                                      base, &buf))
+    return pyarrow_wrap_buffer(buf)
+
+
 cdef get_reader(object source, shared_ptr[RandomAccessFile]* reader):
     cdef NativeFile nf
 
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index c37bc2b..e4d574f 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -324,11 +324,6 @@ cdef class Buffer:
     cdef int _check_nullptr(self) except -1
 
 
-cdef class ForeignBuffer(Buffer):
-    cdef:
-        object base
-
-
 cdef class ResizableBuffer(Buffer):
 
     cdef void init_rz(self, const shared_ptr[CResizableBuffer]& buffer)
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index e42914f..fe68013 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -24,7 +24,6 @@ import sys
 import weakref
 
 import numpy as np
-import numpy.testing as npt
 
 import pandas as pd
 
@@ -271,11 +270,16 @@ def test_buffer_hashing():
 
 
 def test_foreign_buffer():
-    n = np.array([1, 2])
-    addr = n.__array_interface__["data"][0]
-    size = n.nbytes
-    fb = pa.ForeignBuffer(addr, size, n)
-    npt.assert_array_equal(np.asarray(fb), n.view(dtype=np.int8))
+    obj = np.array([1, 2], dtype=np.int32)
+    addr = obj.__array_interface__["data"][0]
+    size = obj.nbytes
+    buf = pa.foreign_buffer(addr, size, obj)
+    wr = weakref.ref(obj)
+    del obj
+    assert np.frombuffer(buf, dtype=np.int32).tolist() == [1, 2]
+    assert wr() is not None
+    del buf
+    assert wr() is None
 
 
 def test_allocate_buffer():

-- 
To stop receiving notification emails like this one, please contact
[email protected].

Reply via email to