This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 5dce01f  ARROW-2155: [Python] frombuffer() should respect mutability 
of argument
5dce01f is described below

commit 5dce01f76f0569d820089682aef32d87c806b61d
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Feb 15 10:09:53 2018 -0500

    ARROW-2155: [Python] frombuffer() should respect mutability of argument
    
    Also raise TypeError instead of ValueError for invalid arguments to 
frombuffer()
    
    Author: Antoine Pitrou <[email protected]>
    
    Closes #1608 from pitrou/ARROW-2155-frombuffer-mutable2 and squashes the 
following commits:
    
    b3a66364 [Antoine Pitrou] ARROW-2155: [Python] frombuffer() should respect 
mutability of argument
---
 cpp/src/arrow/python/common.cc       | 38 ++++++++++++++++++++++++++----------
 cpp/src/arrow/python/common.h        | 13 +++++++-----
 cpp/src/arrow/python/io.cc           | 19 ++++--------------
 cpp/src/arrow/python/io.h            |  7 -------
 cpp/src/arrow/python/python-test.cc  |  9 ++++++++-
 python/pyarrow/includes/libarrow.pxd |  6 ++----
 python/pyarrow/io.pxi                |  8 ++------
 python/pyarrow/tests/test_io.py      | 26 ++++++++++++++++++++++--
 8 files changed, 76 insertions(+), 50 deletions(-)

diff --git a/cpp/src/arrow/python/common.cc b/cpp/src/arrow/python/common.cc
index 14a8ae6..1ded880 100644
--- a/cpp/src/arrow/python/common.cc
+++ b/cpp/src/arrow/python/common.cc
@@ -23,6 +23,7 @@
 
 #include "arrow/memory_pool.h"
 #include "arrow/status.h"
+#include "arrow/util/logging.h"
 
 namespace arrow {
 namespace py {
@@ -47,22 +48,39 @@ MemoryPool* get_memory_pool() {
 // ----------------------------------------------------------------------
 // PyBuffer
 
-PyBuffer::PyBuffer(PyObject* obj) : Buffer(nullptr, 0), obj_(nullptr) {
-  if (PyObject_CheckBuffer(obj)) {
-    obj_ = PyMemoryView_FromObject(obj);
-    Py_buffer* buffer = PyMemoryView_GET_BUFFER(obj_);
-    data_ = reinterpret_cast<const uint8_t*>(buffer->buf);
-    size_ = buffer->len;
-    capacity_ = buffer->len;
-    is_mutable_ = false;
+PyBuffer::PyBuffer() : Buffer(nullptr, 0) {}
+
+Status PyBuffer::Init(PyObject* obj) {
+  if (!PyObject_GetBuffer(obj, &py_buf_, PyBUF_ANY_CONTIGUOUS)) {
+    data_ = reinterpret_cast<const uint8_t*>(py_buf_.buf);
+    DCHECK(data_ != nullptr);
+    size_ = py_buf_.len;
+    capacity_ = py_buf_.len;
+    is_mutable_ = !py_buf_.readonly;
+    return Status::OK();
+  } else {
+    return Status(StatusCode::PythonError, "");
   }
 }
 
+Status PyBuffer::FromPyObject(PyObject* obj, std::shared_ptr<Buffer>* out) {
+  PyBuffer* buf = new PyBuffer();
+  std::shared_ptr<Buffer> res(buf);
+  RETURN_NOT_OK(buf->Init(obj));
+  *out = res;
+  return Status::OK();
+}
+
 PyBuffer::~PyBuffer() {
-  PyAcquireGIL lock;
-  Py_XDECREF(obj_);
+  if (data_ != nullptr) {
+    PyAcquireGIL lock;
+    PyBuffer_Release(&py_buf_);
+  }
 }
 
+// ----------------------------------------------------------------------
+// Python exception -> Status
+
 Status CheckPyError(StatusCode code) {
   if (PyErr_Occurred()) {
     PyObject* exc_type = nullptr;
diff --git a/cpp/src/arrow/python/common.h b/cpp/src/arrow/python/common.h
index b1e0888..269385c 100644
--- a/cpp/src/arrow/python/common.h
+++ b/cpp/src/arrow/python/common.h
@@ -18,6 +18,7 @@
 #ifndef ARROW_PYTHON_COMMON_H
 #define ARROW_PYTHON_COMMON_H
 
+#include <memory>
 #include <string>
 
 #include "arrow/python/config.h"
@@ -140,15 +141,17 @@ ARROW_EXPORT MemoryPool* get_memory_pool();
 
 class ARROW_EXPORT PyBuffer : public Buffer {
  public:
-  /// Note that the GIL must be held when calling the PyBuffer constructor.
-  ///
-  /// While memoryview objects support multi-demensional buffers, PyBuffer 
only supports
+  /// While memoryview objects support multi-dimensional buffers, PyBuffer 
only supports
   /// one-dimensional byte buffers.
-  explicit PyBuffer(PyObject* obj);
   ~PyBuffer();
 
+  static Status FromPyObject(PyObject* obj, std::shared_ptr<Buffer>* out);
+
  private:
-  PyObject* obj_;
+  PyBuffer();
+  Status Init(PyObject*);
+
+  Py_buffer py_buf_;
 };
 
 }  // namespace py
diff --git a/cpp/src/arrow/python/io.cc b/cpp/src/arrow/python/io.cc
index 2cff046..801a325 100644
--- a/cpp/src/arrow/python/io.cc
+++ b/cpp/src/arrow/python/io.cc
@@ -149,14 +149,11 @@ Status PyReadableFile::Read(int64_t nbytes, int64_t* 
bytes_read, void* out) {
 Status PyReadableFile::Read(int64_t nbytes, std::shared_ptr<Buffer>* out) {
   PyAcquireGIL lock;
 
-  PyObject* bytes_obj = NULL;
-  ARROW_RETURN_NOT_OK(file_->Read(nbytes, &bytes_obj));
-  DCHECK(bytes_obj != NULL);
-
-  *out = std::make_shared<PyBuffer>(bytes_obj);
-  Py_XDECREF(bytes_obj);
+  OwnedRef bytes_obj;
+  ARROW_RETURN_NOT_OK(file_->Read(nbytes, bytes_obj.ref()));
+  DCHECK(bytes_obj.obj() != NULL);
 
-  return Status::OK();
+  return PyBuffer::FromPyObject(bytes_obj.obj(), out);
 }
 
 Status PyReadableFile::ReadAt(int64_t position, int64_t nbytes, int64_t* 
bytes_read,
@@ -219,13 +216,5 @@ Status PyOutputStream::Write(const void* data, int64_t 
nbytes) {
   return file_->Write(data, nbytes);
 }
 
-// ----------------------------------------------------------------------
-// A readable file that is backed by a PyBuffer
-
-PyBytesReader::PyBytesReader(PyObject* obj)
-    : io::BufferReader(std::make_shared<PyBuffer>(obj)) {}
-
-PyBytesReader::~PyBytesReader() {}
-
 }  // namespace py
 }  // namespace arrow
diff --git a/cpp/src/arrow/python/io.h b/cpp/src/arrow/python/io.h
index 0632d28..648f6de 100644
--- a/cpp/src/arrow/python/io.h
+++ b/cpp/src/arrow/python/io.h
@@ -79,13 +79,6 @@ class ARROW_EXPORT PyOutputStream : public io::OutputStream {
   int64_t position_;
 };
 
-// A zero-copy reader backed by a PyBuffer object
-class ARROW_EXPORT PyBytesReader : public io::BufferReader {
- public:
-  explicit PyBytesReader(PyObject* obj);
-  virtual ~PyBytesReader();
-};
-
 // TODO(wesm): seekable output files
 
 }  // namespace py
diff --git a/cpp/src/arrow/python/python-test.cc 
b/cpp/src/arrow/python/python-test.cc
index d9919ee..bcf89a4 100644
--- a/cpp/src/arrow/python/python-test.cc
+++ b/cpp/src/arrow/python/python-test.cc
@@ -33,7 +33,14 @@
 namespace arrow {
 namespace py {
 
-TEST(PyBuffer, InvalidInputObject) { PyBuffer buffer(Py_None); }
+TEST(PyBuffer, InvalidInputObject) {
+  std::shared_ptr<Buffer> res;
+  PyObject* input = Py_None;
+  auto old_refcnt = Py_REFCNT(input);
+  ASSERT_RAISES(PythonError, PyBuffer::FromPyObject(input, &res));
+  PyErr_Clear();
+  ASSERT_EQ(old_refcnt, Py_REFCNT(input));
+}
 
 class DecimalTest : public ::testing::Test {
  public:
diff --git a/python/pyarrow/includes/libarrow.pxd 
b/python/pyarrow/includes/libarrow.pxd
index 2e83f07..81cc4d2 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -894,7 +894,8 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" 
nogil:
         " arrow::py::get_memory_pool"()
 
     cdef cppclass PyBuffer(CBuffer):
-        PyBuffer(object o)
+        @staticmethod
+        CStatus FromPyObject(object obj, shared_ptr[CBuffer]* out)
 
     cdef cppclass PyReadableFile(RandomAccessFile):
         PyReadableFile(object fo)
@@ -902,9 +903,6 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" 
nogil:
     cdef cppclass PyOutputStream(OutputStream):
         PyOutputStream(object fo)
 
-    cdef cppclass PyBytesReader(CBufferReader):
-        PyBytesReader(object fo)
-
     cdef struct PandasOptions:
         c_bool strings_to_categorical
         c_bool zero_copy_only
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index bd508cf..b0996f8 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -802,12 +802,8 @@ def frombuffer(object obj):
     Construct an Arrow buffer from a Python bytes object
     """
     cdef shared_ptr[CBuffer] buf
-    try:
-        memoryview(obj)
-        buf.reset(new PyBuffer(obj))
-        return pyarrow_wrap_buffer(buf)
-    except TypeError:
-        raise ValueError('Must pass object that implements buffer protocol')
+    check_status(PyBuffer.FromPyObject(obj, &buf))
+    return pyarrow_wrap_buffer(buf)
 
 
 cdef get_reader(object source, shared_ptr[RandomAccessFile]* reader):
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index da26b10..0947cb7 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -104,7 +104,7 @@ def test_bytes_reader():
 
 
 def test_bytes_reader_non_bytes():
-    with pytest.raises(ValueError):
+    with pytest.raises(TypeError):
         pa.BufferReader(u('some sample data'))
 
 
@@ -132,6 +132,7 @@ def test_buffer_bytes():
 
     buf = pa.frombuffer(val)
     assert isinstance(buf, pa.Buffer)
+    assert not buf.is_mutable
 
     result = buf.to_pybytes()
 
@@ -143,6 +144,7 @@ def test_buffer_memoryview():
 
     buf = pa.frombuffer(val)
     assert isinstance(buf, pa.Buffer)
+    assert not buf.is_mutable
 
     result = memoryview(buf)
 
@@ -154,13 +156,20 @@ def test_buffer_bytearray():
 
     buf = pa.frombuffer(val)
     assert isinstance(buf, pa.Buffer)
+    assert buf.is_mutable
 
     result = bytearray(buf)
 
     assert result == val
 
 
-def test_buffer_numpy():
+def test_buffer_invalid():
+    with pytest.raises(TypeError,
+                       match="(bytes-like object|buffer interface)"):
+        pa.frombuffer(None)
+
+
+def test_buffer_to_numpy():
     # Make sure creating a numpy array from an arrow buffer works
     byte_array = bytearray(20)
     byte_array[0] = 42
@@ -170,6 +179,19 @@ def test_buffer_numpy():
     assert array.base == buf
 
 
+def test_buffer_from_numpy():
+    # C-contiguous
+    arr = np.arange(12, dtype=np.int8).reshape((3, 4))
+    buf = pa.frombuffer(arr)
+    assert buf.to_pybytes() == arr.tobytes()
+    # F-contiguous; note strides informations is lost
+    buf = pa.frombuffer(arr.T)
+    assert buf.to_pybytes() == arr.tobytes()
+    # Non-contiguous
+    with pytest.raises(ValueError, match="not contiguous"):
+        buf = pa.frombuffer(arr.T[::2])
+
+
 def test_allocate_buffer():
     buf = pa.allocate_buffer(100)
     assert buf.size == 100

-- 
To stop receiving notification emails like this one, please contact
[email protected].

Reply via email to