This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new f56fdc9 ARROW-2270: [Python] Fix lifetime of ForeignBuffer base object
f56fdc9 is described below
commit f56fdc9ccd77987af22093d0d711b5c1a566a094
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Mar 8 23:32:26 2018 -0500
ARROW-2270: [Python] Fix lifetime of ForeignBuffer base object
Author: Antoine Pitrou <[email protected]>
Closes #1714 from pitrou/ARROW-2270-pyforeignbuffer and squashes the
following commits:
51f2d85e <Antoine Pitrou> ARROW-2270: Fix lifetime of ForeignBuffer base
object
---
cpp/src/arrow/python/io.cc | 14 ++++++++++++++
cpp/src/arrow/python/io.h | 21 +++++++++++++++++++++
python/doc/source/api.rst | 2 ++
python/pyarrow/__init__.py | 2 +-
python/pyarrow/includes/libarrow.pxd | 5 +++++
python/pyarrow/io.pxi | 27 +++++++++++++++------------
python/pyarrow/lib.pxd | 5 -----
python/pyarrow/tests/test_io.py | 16 ++++++++++------
8 files changed, 68 insertions(+), 24 deletions(-)
diff --git a/cpp/src/arrow/python/io.cc b/cpp/src/arrow/python/io.cc
index 801a325..36c193d 100644
--- a/cpp/src/arrow/python/io.cc
+++ b/cpp/src/arrow/python/io.cc
@@ -216,5 +216,19 @@ Status PyOutputStream::Write(const void* data, int64_t
nbytes) {
return file_->Write(data, nbytes);
}
+// ----------------------------------------------------------------------
+// Foreign buffer
+
+Status PyForeignBuffer::Make(const uint8_t* data, int64_t size, PyObject* base,
+ std::shared_ptr<Buffer>* out) {
+ PyForeignBuffer* buf = new PyForeignBuffer(data, size, base);
+ if (buf == NULL) {
+ return Status::OutOfMemory("could not allocate foreign buffer object");
+ } else {
+ *out = std::shared_ptr<Buffer>(buf);
+ return Status::OK();
+ }
+}
+
} // namespace py
} // namespace arrow
diff --git a/cpp/src/arrow/python/io.h b/cpp/src/arrow/python/io.h
index 6960556..5c76fe9 100644
--- a/cpp/src/arrow/python/io.h
+++ b/cpp/src/arrow/python/io.h
@@ -81,6 +81,27 @@ class ARROW_EXPORT PyOutputStream : public io::OutputStream {
// TODO(wesm): seekable output files
+// A Buffer subclass that keeps a PyObject reference throughout its
+// lifetime, such that the Python object is kept alive as long as the
+// C++ buffer is still needed.
+// Keeping the reference in a Python wrapper would be incorrect as
+// the Python wrapper can get destroyed even though the wrapped C++
+// buffer is still alive (ARROW-2270).
+class ARROW_EXPORT PyForeignBuffer : public Buffer {
+ public:
+ static Status Make(const uint8_t* data, int64_t size, PyObject* base,
+ std::shared_ptr<Buffer>* out);
+
+ private:
+ PyForeignBuffer(const uint8_t* data, int64_t size, PyObject* base)
+ : Buffer(data, size) {
+ Py_INCREF(base);
+ base_.reset(base);
+ }
+
+ OwnedRefNoGIL base_;
+};
+
} // namespace py
} // namespace arrow
diff --git a/python/doc/source/api.rst b/python/doc/source/api.rst
index a71e92b..3db1a04 100644
--- a/python/doc/source/api.rst
+++ b/python/doc/source/api.rst
@@ -186,6 +186,7 @@ Tables and Record Batches
column
chunked_array
+ concat_tables
ChunkedArray
Column
RecordBatch
@@ -213,6 +214,7 @@ Input / Output and Shared Memory
compress
decompress
frombuffer
+ foreign_buffer
Buffer
ResizableBuffer
BufferReader
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 28ac98e..225dfd0 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -86,7 +86,7 @@ from pyarrow.lib import (null, bool_,
from pyarrow.lib import TimestampType
# Buffers, allocation
-from pyarrow.lib import (Buffer, ForeignBuffer, ResizableBuffer, compress,
+from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, compress,
decompress, allocate_buffer, frombuffer)
from pyarrow.lib import (MemoryPool, total_allocated_bytes,
diff --git a/python/pyarrow/includes/libarrow.pxd
b/python/pyarrow/includes/libarrow.pxd
index 456fcca..22c39a8 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -904,6 +904,11 @@ cdef extern from "arrow/python/api.h" namespace
"arrow::py" nogil:
@staticmethod
CStatus FromPyObject(object obj, shared_ptr[CBuffer]* out)
+ cdef cppclass PyForeignBuffer(CBuffer):
+ @staticmethod
+ CStatus Make(const uint8_t* data, int64_t size, object base,
+ shared_ptr[CBuffer]* out)
+
cdef cppclass PyReadableFile(RandomAccessFile):
PyReadableFile(object fo)
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 611c8a8..15ecd01 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -726,18 +726,6 @@ cdef class Buffer:
return self.size
-cdef class ForeignBuffer(Buffer):
-
- def __init__(self, addr, size, base):
- cdef:
- intptr_t c_addr = addr
- int64_t c_size = size
- self.base = base
- cdef shared_ptr[CBuffer] buffer = make_shared[CBuffer](
- <uint8_t*>c_addr, c_size)
- self.init(<shared_ptr[CBuffer]> buffer)
-
-
cdef class ResizableBuffer(Buffer):
cdef void init_rz(self, const shared_ptr[CResizableBuffer]& buffer):
@@ -861,6 +849,21 @@ def frombuffer(object obj):
return pyarrow_wrap_buffer(buf)
+def foreign_buffer(address, size, base):
+ """
+ Construct an Arrow buffer with the given *address* and *size*,
+ backed by the Python *base* object.
+ """
+ cdef:
+ intptr_t c_addr = address
+ int64_t c_size = size
+ shared_ptr[CBuffer] buf
+
+ check_status(PyForeignBuffer.Make(<uint8_t*> c_addr, c_size,
+ base, &buf))
+ return pyarrow_wrap_buffer(buf)
+
+
cdef get_reader(object source, shared_ptr[RandomAccessFile]* reader):
cdef NativeFile nf
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index c37bc2b..e4d574f 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -324,11 +324,6 @@ cdef class Buffer:
cdef int _check_nullptr(self) except -1
-cdef class ForeignBuffer(Buffer):
- cdef:
- object base
-
-
cdef class ResizableBuffer(Buffer):
cdef void init_rz(self, const shared_ptr[CResizableBuffer]& buffer)
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index e42914f..fe68013 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -24,7 +24,6 @@ import sys
import weakref
import numpy as np
-import numpy.testing as npt
import pandas as pd
@@ -271,11 +270,16 @@ def test_buffer_hashing():
def test_foreign_buffer():
- n = np.array([1, 2])
- addr = n.__array_interface__["data"][0]
- size = n.nbytes
- fb = pa.ForeignBuffer(addr, size, n)
- npt.assert_array_equal(np.asarray(fb), n.view(dtype=np.int8))
+ obj = np.array([1, 2], dtype=np.int32)
+ addr = obj.__array_interface__["data"][0]
+ size = obj.nbytes
+ buf = pa.foreign_buffer(addr, size, obj)
+ wr = weakref.ref(obj)
+ del obj
+ assert np.frombuffer(buf, dtype=np.int32).tolist() == [1, 2]
+ assert wr() is not None
+ del buf
+ assert wr() is None
def test_allocate_buffer():
--
To stop receiving notification emails like this one, please contact
[email protected].