This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 5dce01f ARROW-2155: [Python] frombuffer() should respect mutability
of argument
5dce01f is described below
commit 5dce01f76f0569d820089682aef32d87c806b61d
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Feb 15 10:09:53 2018 -0500
ARROW-2155: [Python] frombuffer() should respect mutability of argument
Also raise TypeError instead of ValueError for invalid arguments to
frombuffer()
Author: Antoine Pitrou <[email protected]>
Closes #1608 from pitrou/ARROW-2155-frombuffer-mutable2 and squashes the
following commits:
b3a66364 [Antoine Pitrou] ARROW-2155: [Python] frombuffer() should respect
mutability of argument
---
cpp/src/arrow/python/common.cc | 38 ++++++++++++++++++++++++++----------
cpp/src/arrow/python/common.h | 13 +++++++-----
cpp/src/arrow/python/io.cc | 19 ++++--------------
cpp/src/arrow/python/io.h | 7 -------
cpp/src/arrow/python/python-test.cc | 9 ++++++++-
python/pyarrow/includes/libarrow.pxd | 6 ++----
python/pyarrow/io.pxi | 8 ++------
python/pyarrow/tests/test_io.py | 26 ++++++++++++++++++++++--
8 files changed, 76 insertions(+), 50 deletions(-)
diff --git a/cpp/src/arrow/python/common.cc b/cpp/src/arrow/python/common.cc
index 14a8ae6..1ded880 100644
--- a/cpp/src/arrow/python/common.cc
+++ b/cpp/src/arrow/python/common.cc
@@ -23,6 +23,7 @@
#include "arrow/memory_pool.h"
#include "arrow/status.h"
+#include "arrow/util/logging.h"
namespace arrow {
namespace py {
@@ -47,22 +48,39 @@ MemoryPool* get_memory_pool() {
// ----------------------------------------------------------------------
// PyBuffer
-PyBuffer::PyBuffer(PyObject* obj) : Buffer(nullptr, 0), obj_(nullptr) {
- if (PyObject_CheckBuffer(obj)) {
- obj_ = PyMemoryView_FromObject(obj);
- Py_buffer* buffer = PyMemoryView_GET_BUFFER(obj_);
- data_ = reinterpret_cast<const uint8_t*>(buffer->buf);
- size_ = buffer->len;
- capacity_ = buffer->len;
- is_mutable_ = false;
+PyBuffer::PyBuffer() : Buffer(nullptr, 0) {}
+
+Status PyBuffer::Init(PyObject* obj) {
+ if (!PyObject_GetBuffer(obj, &py_buf_, PyBUF_ANY_CONTIGUOUS)) {
+ data_ = reinterpret_cast<const uint8_t*>(py_buf_.buf);
+ DCHECK(data_ != nullptr);
+ size_ = py_buf_.len;
+ capacity_ = py_buf_.len;
+ is_mutable_ = !py_buf_.readonly;
+ return Status::OK();
+ } else {
+ return Status(StatusCode::PythonError, "");
}
}
+Status PyBuffer::FromPyObject(PyObject* obj, std::shared_ptr<Buffer>* out) {
+ PyBuffer* buf = new PyBuffer();
+ std::shared_ptr<Buffer> res(buf);
+ RETURN_NOT_OK(buf->Init(obj));
+ *out = res;
+ return Status::OK();
+}
+
PyBuffer::~PyBuffer() {
- PyAcquireGIL lock;
- Py_XDECREF(obj_);
+ if (data_ != nullptr) {
+ PyAcquireGIL lock;
+ PyBuffer_Release(&py_buf_);
+ }
}
+// ----------------------------------------------------------------------
+// Python exception -> Status
+
Status CheckPyError(StatusCode code) {
if (PyErr_Occurred()) {
PyObject* exc_type = nullptr;
diff --git a/cpp/src/arrow/python/common.h b/cpp/src/arrow/python/common.h
index b1e0888..269385c 100644
--- a/cpp/src/arrow/python/common.h
+++ b/cpp/src/arrow/python/common.h
@@ -18,6 +18,7 @@
#ifndef ARROW_PYTHON_COMMON_H
#define ARROW_PYTHON_COMMON_H
+#include <memory>
#include <string>
#include "arrow/python/config.h"
@@ -140,15 +141,17 @@ ARROW_EXPORT MemoryPool* get_memory_pool();
class ARROW_EXPORT PyBuffer : public Buffer {
public:
- /// Note that the GIL must be held when calling the PyBuffer constructor.
- ///
- /// While memoryview objects support multi-demensional buffers, PyBuffer
only supports
+ /// While memoryview objects support multi-dimensional buffers, PyBuffer
only supports
/// one-dimensional byte buffers.
- explicit PyBuffer(PyObject* obj);
~PyBuffer();
+ static Status FromPyObject(PyObject* obj, std::shared_ptr<Buffer>* out);
+
private:
- PyObject* obj_;
+ PyBuffer();
+ Status Init(PyObject*);
+
+ Py_buffer py_buf_;
};
} // namespace py
diff --git a/cpp/src/arrow/python/io.cc b/cpp/src/arrow/python/io.cc
index 2cff046..801a325 100644
--- a/cpp/src/arrow/python/io.cc
+++ b/cpp/src/arrow/python/io.cc
@@ -149,14 +149,11 @@ Status PyReadableFile::Read(int64_t nbytes, int64_t*
bytes_read, void* out) {
Status PyReadableFile::Read(int64_t nbytes, std::shared_ptr<Buffer>* out) {
PyAcquireGIL lock;
- PyObject* bytes_obj = NULL;
- ARROW_RETURN_NOT_OK(file_->Read(nbytes, &bytes_obj));
- DCHECK(bytes_obj != NULL);
-
- *out = std::make_shared<PyBuffer>(bytes_obj);
- Py_XDECREF(bytes_obj);
+ OwnedRef bytes_obj;
+ ARROW_RETURN_NOT_OK(file_->Read(nbytes, bytes_obj.ref()));
+ DCHECK(bytes_obj.obj() != NULL);
- return Status::OK();
+ return PyBuffer::FromPyObject(bytes_obj.obj(), out);
}
Status PyReadableFile::ReadAt(int64_t position, int64_t nbytes, int64_t*
bytes_read,
@@ -219,13 +216,5 @@ Status PyOutputStream::Write(const void* data, int64_t
nbytes) {
return file_->Write(data, nbytes);
}
-// ----------------------------------------------------------------------
-// A readable file that is backed by a PyBuffer
-
-PyBytesReader::PyBytesReader(PyObject* obj)
- : io::BufferReader(std::make_shared<PyBuffer>(obj)) {}
-
-PyBytesReader::~PyBytesReader() {}
-
} // namespace py
} // namespace arrow
diff --git a/cpp/src/arrow/python/io.h b/cpp/src/arrow/python/io.h
index 0632d28..648f6de 100644
--- a/cpp/src/arrow/python/io.h
+++ b/cpp/src/arrow/python/io.h
@@ -79,13 +79,6 @@ class ARROW_EXPORT PyOutputStream : public io::OutputStream {
int64_t position_;
};
-// A zero-copy reader backed by a PyBuffer object
-class ARROW_EXPORT PyBytesReader : public io::BufferReader {
- public:
- explicit PyBytesReader(PyObject* obj);
- virtual ~PyBytesReader();
-};
-
// TODO(wesm): seekable output files
} // namespace py
diff --git a/cpp/src/arrow/python/python-test.cc
b/cpp/src/arrow/python/python-test.cc
index d9919ee..bcf89a4 100644
--- a/cpp/src/arrow/python/python-test.cc
+++ b/cpp/src/arrow/python/python-test.cc
@@ -33,7 +33,14 @@
namespace arrow {
namespace py {
-TEST(PyBuffer, InvalidInputObject) { PyBuffer buffer(Py_None); }
+TEST(PyBuffer, InvalidInputObject) {
+ std::shared_ptr<Buffer> res;
+ PyObject* input = Py_None;
+ auto old_refcnt = Py_REFCNT(input);
+ ASSERT_RAISES(PythonError, PyBuffer::FromPyObject(input, &res));
+ PyErr_Clear();
+ ASSERT_EQ(old_refcnt, Py_REFCNT(input));
+}
class DecimalTest : public ::testing::Test {
public:
diff --git a/python/pyarrow/includes/libarrow.pxd
b/python/pyarrow/includes/libarrow.pxd
index 2e83f07..81cc4d2 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -894,7 +894,8 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py"
nogil:
" arrow::py::get_memory_pool"()
cdef cppclass PyBuffer(CBuffer):
- PyBuffer(object o)
+ @staticmethod
+ CStatus FromPyObject(object obj, shared_ptr[CBuffer]* out)
cdef cppclass PyReadableFile(RandomAccessFile):
PyReadableFile(object fo)
@@ -902,9 +903,6 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py"
nogil:
cdef cppclass PyOutputStream(OutputStream):
PyOutputStream(object fo)
- cdef cppclass PyBytesReader(CBufferReader):
- PyBytesReader(object fo)
-
cdef struct PandasOptions:
c_bool strings_to_categorical
c_bool zero_copy_only
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index bd508cf..b0996f8 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -802,12 +802,8 @@ def frombuffer(object obj):
Construct an Arrow buffer from a Python bytes object
"""
cdef shared_ptr[CBuffer] buf
- try:
- memoryview(obj)
- buf.reset(new PyBuffer(obj))
- return pyarrow_wrap_buffer(buf)
- except TypeError:
- raise ValueError('Must pass object that implements buffer protocol')
+ check_status(PyBuffer.FromPyObject(obj, &buf))
+ return pyarrow_wrap_buffer(buf)
cdef get_reader(object source, shared_ptr[RandomAccessFile]* reader):
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index da26b10..0947cb7 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -104,7 +104,7 @@ def test_bytes_reader():
def test_bytes_reader_non_bytes():
- with pytest.raises(ValueError):
+ with pytest.raises(TypeError):
pa.BufferReader(u('some sample data'))
@@ -132,6 +132,7 @@ def test_buffer_bytes():
buf = pa.frombuffer(val)
assert isinstance(buf, pa.Buffer)
+ assert not buf.is_mutable
result = buf.to_pybytes()
@@ -143,6 +144,7 @@ def test_buffer_memoryview():
buf = pa.frombuffer(val)
assert isinstance(buf, pa.Buffer)
+ assert not buf.is_mutable
result = memoryview(buf)
@@ -154,13 +156,20 @@ def test_buffer_bytearray():
buf = pa.frombuffer(val)
assert isinstance(buf, pa.Buffer)
+ assert buf.is_mutable
result = bytearray(buf)
assert result == val
-def test_buffer_numpy():
+def test_buffer_invalid():
+ with pytest.raises(TypeError,
+ match="(bytes-like object|buffer interface)"):
+ pa.frombuffer(None)
+
+
+def test_buffer_to_numpy():
# Make sure creating a numpy array from an arrow buffer works
byte_array = bytearray(20)
byte_array[0] = 42
@@ -170,6 +179,19 @@ def test_buffer_numpy():
assert array.base == buf
+def test_buffer_from_numpy():
+ # C-contiguous
+ arr = np.arange(12, dtype=np.int8).reshape((3, 4))
+ buf = pa.frombuffer(arr)
+ assert buf.to_pybytes() == arr.tobytes()
+ # F-contiguous; note strides informations is lost
+ buf = pa.frombuffer(arr.T)
+ assert buf.to_pybytes() == arr.tobytes()
+ # Non-contiguous
+ with pytest.raises(ValueError, match="not contiguous"):
+ buf = pa.frombuffer(arr.T[::2])
+
+
def test_allocate_buffer():
buf = pa.allocate_buffer(100)
assert buf.size == 100
--
To stop receiving notification emails like this one, please contact
[email protected].