This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new e15f545  ARROW-2039: [Python] Avoid crashing on uninitialized Buffer
e15f545 is described below

commit e15f54505d4d92810c8c216a89eee3e703013d1c
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Feb 15 11:34:59 2018 -0500

    ARROW-2039: [Python] Avoid crashing on uninitialized Buffer
    
    Author: Antoine Pitrou <[email protected]>
    
    Closes #1605 from pitrou/ARROW-2039-uninitialized-buffer and squashes the 
following commits:
    
    358443c6 [Antoine Pitrou] ARROW-2039: [Python] Avoid crashing on 
uninitialized Buffer
---
 python/pyarrow/io.pxi           | 16 ++++++++++++++
 python/pyarrow/lib.pxd          |  1 +
 python/pyarrow/tests/test_io.py | 47 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+)

diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index b0996f8..aa2f7ed 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -595,22 +595,30 @@ cdef class Buffer:
         self.shape[0] = self.size
         self.strides[0] = <Py_ssize_t>(1)
 
+    cdef int _check_nullptr(self) except -1:
+        if self.buffer.get() == NULL:
+            raise ReferenceError("operation on uninitialized Buffer object")
+        return 0
+
     def __len__(self):
         return self.size
 
     property size:
 
         def __get__(self):
+            self._check_nullptr()
             return self.buffer.get().size()
 
     property is_mutable:
 
         def __get__(self):
+            self._check_nullptr()
             return self.buffer.get().is_mutable()
 
     property parent:
 
         def __get__(self):
+            self._check_nullptr()
             cdef shared_ptr[CBuffer] parent_buf = self.buffer.get().parent()
 
             if parent_buf.get() == NULL:
@@ -620,6 +628,7 @@ cdef class Buffer:
 
     def __getitem__(self, key):
         # TODO(wesm): buffer slicing
+        self._check_nullptr()
         raise NotImplementedError
 
     def equals(self, Buffer other):
@@ -634,17 +643,21 @@ cdef class Buffer:
         -------
         are_equal : True if buffer contents and size are equal
         """
+        self._check_nullptr()
+        other._check_nullptr()
         cdef c_bool result = False
         with nogil:
             result = self.buffer.get().Equals(deref(other.buffer.get()))
         return result
 
     def to_pybytes(self):
+        self._check_nullptr()
         return cp.PyBytes_FromStringAndSize(
             <const char*>self.buffer.get().data(),
             self.buffer.get().size())
 
     def __getbuffer__(self, cp.Py_buffer* buffer, int flags):
+        self._check_nullptr()
 
         buffer.buf = <char *>self.buffer.get().data()
         buffer.format = 'b'
@@ -662,11 +675,13 @@ cdef class Buffer:
         buffer.suboffsets = NULL
 
     def __getsegcount__(self, Py_ssize_t *len_out):
+        self._check_nullptr()
         if len_out != NULL:
             len_out[0] = <Py_ssize_t>self.size
         return 1
 
     def __getreadbuffer__(self, Py_ssize_t idx, void **p):
+        self._check_nullptr()
         if idx != 0:
             raise SystemError("accessing non-existent buffer segment")
         if p != NULL:
@@ -674,6 +689,7 @@ cdef class Buffer:
         return self.size
 
     def __getwritebuffer__(self, Py_ssize_t idx, void **p):
+        self._check_nullptr()
         if not self.buffer.get().is_mutable():
             raise SystemError("trying to write an immutable buffer")
         if idx != 0:
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index b1433ec..31732a6 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -321,6 +321,7 @@ cdef class Buffer:
         Py_ssize_t strides[1]
 
     cdef void init(self, const shared_ptr[CBuffer]& buffer)
+    cdef int _check_nullptr(self) except -1
 
 
 cdef class ResizableBuffer(Buffer):
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index 0947cb7..736020f 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from functools import partial
 from io import BytesIO, TextIOWrapper
 import gc
 import os
@@ -176,6 +177,8 @@ def test_buffer_to_numpy():
     buf = pa.frombuffer(byte_array)
     array = np.frombuffer(buf, dtype="uint8")
     assert array[0] == byte_array[0]
+    byte_array[0] += 1
+    assert array[0] == byte_array[0]
     assert array.base == buf
 
 
@@ -192,6 +195,25 @@ def test_buffer_from_numpy():
         buf = pa.frombuffer(arr.T[::2])
 
 
+def test_buffer_equals():
+    # Buffer.equals() returns true iff the buffers have the same contents
+    b1 = b'some data!'
+    b2 = bytearray(b1)
+    b3 = bytearray(b1)
+    b3[0] = 42
+    buf1 = pa.frombuffer(b1)
+    buf2 = pa.frombuffer(b2)
+    buf3 = pa.frombuffer(b2)
+    buf4 = pa.frombuffer(b3)
+    buf5 = pa.frombuffer(np.frombuffer(b2, dtype=np.int16))
+    assert buf1.equals(buf1)
+    assert buf1.equals(buf2)
+    assert buf2.equals(buf3)
+    assert not buf2.equals(buf4)
+    # Data type is indifferent
+    assert buf2.equals(buf5)
+
+
 def test_allocate_buffer():
     buf = pa.allocate_buffer(100)
     assert buf.size == 100
@@ -244,9 +266,11 @@ def test_buffer_memoryview_is_immutable():
     val = b'some data'
 
     buf = pa.frombuffer(val)
+    assert not buf.is_mutable
     assert isinstance(buf, pa.Buffer)
 
     result = memoryview(buf)
+    assert result.readonly
 
     with pytest.raises(TypeError) as exc:
         result[0] = b'h'
@@ -258,6 +282,29 @@ def test_buffer_memoryview_is_immutable():
         assert 'cannot modify read-only' in str(exc.value)
 
 
+def test_uninitialized_buffer():
+    # ARROW-2039: calling Buffer() directly creates an uninitialized object
+    check_uninitialized = partial(pytest.raises,
+                                  ReferenceError, match="uninitialized")
+    buf = pa.Buffer()
+    with check_uninitialized():
+        buf.size
+    with check_uninitialized():
+        len(buf)
+    with check_uninitialized():
+        buf.is_mutable
+    with check_uninitialized():
+        buf.parent
+    with check_uninitialized():
+        buf.to_pybytes()
+    with check_uninitialized():
+        memoryview(buf)
+    with check_uninitialized():
+        buf.equals(pa.frombuffer(b''))
+    with check_uninitialized():
+        pa.frombuffer(b'').equals(buf)
+
+
 def test_memory_output_stream():
     # 10 bytes
     val = b'dataabcdef'

-- 
To stop receiving notification emails like this one, please contact
[email protected].

Reply via email to