[
https://issues.apache.org/jira/browse/ARROW-2270?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16392395#comment-16392395
]
ASF GitHub Bot commented on ARROW-2270:
---------------------------------------
wesm closed pull request #1714: ARROW-2270: [Python] Fix lifetime of
ForeignBuffer base object
URL: https://github.com/apache/arrow/pull/1714
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/cpp/src/arrow/python/io.cc b/cpp/src/arrow/python/io.cc
index 801a32574..36c193dbf 100644
--- a/cpp/src/arrow/python/io.cc
+++ b/cpp/src/arrow/python/io.cc
@@ -216,5 +216,19 @@ Status PyOutputStream::Write(const void* data, int64_t
nbytes) {
return file_->Write(data, nbytes);
}
+// ----------------------------------------------------------------------
+// Foreign buffer
+
+Status PyForeignBuffer::Make(const uint8_t* data, int64_t size, PyObject* base,
+ std::shared_ptr<Buffer>* out) {
+ PyForeignBuffer* buf = new PyForeignBuffer(data, size, base);
+ if (buf == NULL) {
+ return Status::OutOfMemory("could not allocate foreign buffer object");
+ } else {
+ *out = std::shared_ptr<Buffer>(buf);
+ return Status::OK();
+ }
+}
+
} // namespace py
} // namespace arrow
diff --git a/cpp/src/arrow/python/io.h b/cpp/src/arrow/python/io.h
index 696055610..5c76fe9fe 100644
--- a/cpp/src/arrow/python/io.h
+++ b/cpp/src/arrow/python/io.h
@@ -81,6 +81,27 @@ class ARROW_EXPORT PyOutputStream : public io::OutputStream {
// TODO(wesm): seekable output files
+// A Buffer subclass that keeps a PyObject reference throughout its
+// lifetime, such that the Python object is kept alive as long as the
+// C++ buffer is still needed.
+// Keeping the reference in a Python wrapper would be incorrect as
+// the Python wrapper can get destroyed even though the wrapped C++
+// buffer is still alive (ARROW-2270).
+class ARROW_EXPORT PyForeignBuffer : public Buffer {
+ public:
+ static Status Make(const uint8_t* data, int64_t size, PyObject* base,
+ std::shared_ptr<Buffer>* out);
+
+ private:
+ PyForeignBuffer(const uint8_t* data, int64_t size, PyObject* base)
+ : Buffer(data, size) {
+ Py_INCREF(base);
+ base_.reset(base);
+ }
+
+ OwnedRefNoGIL base_;
+};
+
} // namespace py
} // namespace arrow
diff --git a/python/doc/source/api.rst b/python/doc/source/api.rst
index a71e92b0b..3db1a04b6 100644
--- a/python/doc/source/api.rst
+++ b/python/doc/source/api.rst
@@ -186,6 +186,7 @@ Tables and Record Batches
column
chunked_array
+ concat_tables
ChunkedArray
Column
RecordBatch
@@ -213,6 +214,7 @@ Input / Output and Shared Memory
compress
decompress
frombuffer
+ foreign_buffer
Buffer
ResizableBuffer
BufferReader
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 28ac98ea0..225dfd0b2 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -86,7 +86,7 @@ def parse_version(root):
from pyarrow.lib import TimestampType
# Buffers, allocation
-from pyarrow.lib import (Buffer, ForeignBuffer, ResizableBuffer, compress,
+from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, compress,
decompress, allocate_buffer, frombuffer)
from pyarrow.lib import (MemoryPool, total_allocated_bytes,
diff --git a/python/pyarrow/includes/libarrow.pxd
b/python/pyarrow/includes/libarrow.pxd
index 456fcca36..22c39a865 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -904,6 +904,11 @@ cdef extern from "arrow/python/api.h" namespace
"arrow::py" nogil:
@staticmethod
CStatus FromPyObject(object obj, shared_ptr[CBuffer]* out)
+ cdef cppclass PyForeignBuffer(CBuffer):
+ @staticmethod
+ CStatus Make(const uint8_t* data, int64_t size, object base,
+ shared_ptr[CBuffer]* out)
+
cdef cppclass PyReadableFile(RandomAccessFile):
PyReadableFile(object fo)
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 611c8a86d..15ecd0164 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -726,18 +726,6 @@ cdef class Buffer:
return self.size
-cdef class ForeignBuffer(Buffer):
-
- def __init__(self, addr, size, base):
- cdef:
- intptr_t c_addr = addr
- int64_t c_size = size
- self.base = base
- cdef shared_ptr[CBuffer] buffer = make_shared[CBuffer](
- <uint8_t*>c_addr, c_size)
- self.init(<shared_ptr[CBuffer]> buffer)
-
-
cdef class ResizableBuffer(Buffer):
cdef void init_rz(self, const shared_ptr[CResizableBuffer]& buffer):
@@ -861,6 +849,21 @@ def frombuffer(object obj):
return pyarrow_wrap_buffer(buf)
+def foreign_buffer(address, size, base):
+ """
+ Construct an Arrow buffer with the given *address* and *size*,
+ backed by the Python *base* object.
+ """
+ cdef:
+ intptr_t c_addr = address
+ int64_t c_size = size
+ shared_ptr[CBuffer] buf
+
+ check_status(PyForeignBuffer.Make(<uint8_t*> c_addr, c_size,
+ base, &buf))
+ return pyarrow_wrap_buffer(buf)
+
+
cdef get_reader(object source, shared_ptr[RandomAccessFile]* reader):
cdef NativeFile nf
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index c37bc2beb..e4d574f18 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -324,11 +324,6 @@ cdef class Buffer:
cdef int _check_nullptr(self) except -1
-cdef class ForeignBuffer(Buffer):
- cdef:
- object base
-
-
cdef class ResizableBuffer(Buffer):
cdef void init_rz(self, const shared_ptr[CResizableBuffer]& buffer)
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index e42914ffc..fe680133b 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -24,7 +24,6 @@
import weakref
import numpy as np
-import numpy.testing as npt
import pandas as pd
@@ -271,11 +270,16 @@ def test_buffer_hashing():
def test_foreign_buffer():
- n = np.array([1, 2])
- addr = n.__array_interface__["data"][0]
- size = n.nbytes
- fb = pa.ForeignBuffer(addr, size, n)
- npt.assert_array_equal(np.asarray(fb), n.view(dtype=np.int8))
+ obj = np.array([1, 2], dtype=np.int32)
+ addr = obj.__array_interface__["data"][0]
+ size = obj.nbytes
+ buf = pa.foreign_buffer(addr, size, obj)
+ wr = weakref.ref(obj)
+ del obj
+ assert np.frombuffer(buf, dtype=np.int32).tolist() == [1, 2]
+ assert wr() is not None
+ del buf
+ assert wr() is None
def test_allocate_buffer():
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> [Python] ForeignBuffer doesn't tie Python object lifetime to C++ buffer
> lifetime
> --------------------------------------------------------------------------------
>
> Key: ARROW-2270
> URL: https://issues.apache.org/jira/browse/ARROW-2270
> Project: Apache Arrow
> Issue Type: Bug
> Components: Python
> Reporter: Antoine Pitrou
> Assignee: Antoine Pitrou
> Priority: Major
> Labels: pull-request-available
> Fix For: 0.9.0
>
>
> {{ForeignBuffer}} keeps the reference to the Python base object in the Python
> wrapper class, not in the C++ buffer instance, meaning if the C++ buffer gets
> passed around but the Python wrapper gets destroyed, the reference to the
> original Python base object will be released.
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)