This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new e15f545 ARROW-2039: [Python] Avoid crashing on uninitialized Buffer
e15f545 is described below
commit e15f54505d4d92810c8c216a89eee3e703013d1c
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Feb 15 11:34:59 2018 -0500
ARROW-2039: [Python] Avoid crashing on uninitialized Buffer
Author: Antoine Pitrou <[email protected]>
Closes #1605 from pitrou/ARROW-2039-uninitialized-buffer and squashes the
following commits:
358443c6 [Antoine Pitrou] ARROW-2039: [Python] Avoid crashing on
uninitialized Buffer
---
python/pyarrow/io.pxi | 16 ++++++++++++++
python/pyarrow/lib.pxd | 1 +
python/pyarrow/tests/test_io.py | 47 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 64 insertions(+)
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index b0996f8..aa2f7ed 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -595,22 +595,30 @@ cdef class Buffer:
self.shape[0] = self.size
self.strides[0] = <Py_ssize_t>(1)
+ cdef int _check_nullptr(self) except -1:
+ if self.buffer.get() == NULL:
+ raise ReferenceError("operation on uninitialized Buffer object")
+ return 0
+
def __len__(self):
return self.size
property size:
def __get__(self):
+ self._check_nullptr()
return self.buffer.get().size()
property is_mutable:
def __get__(self):
+ self._check_nullptr()
return self.buffer.get().is_mutable()
property parent:
def __get__(self):
+ self._check_nullptr()
cdef shared_ptr[CBuffer] parent_buf = self.buffer.get().parent()
if parent_buf.get() == NULL:
@@ -620,6 +628,7 @@ cdef class Buffer:
def __getitem__(self, key):
# TODO(wesm): buffer slicing
+ self._check_nullptr()
raise NotImplementedError
def equals(self, Buffer other):
@@ -634,17 +643,21 @@ cdef class Buffer:
-------
are_equal : True if buffer contents and size are equal
"""
+ self._check_nullptr()
+ other._check_nullptr()
cdef c_bool result = False
with nogil:
result = self.buffer.get().Equals(deref(other.buffer.get()))
return result
def to_pybytes(self):
+ self._check_nullptr()
return cp.PyBytes_FromStringAndSize(
<const char*>self.buffer.get().data(),
self.buffer.get().size())
def __getbuffer__(self, cp.Py_buffer* buffer, int flags):
+ self._check_nullptr()
buffer.buf = <char *>self.buffer.get().data()
buffer.format = 'b'
@@ -662,11 +675,13 @@ cdef class Buffer:
buffer.suboffsets = NULL
def __getsegcount__(self, Py_ssize_t *len_out):
+ self._check_nullptr()
if len_out != NULL:
len_out[0] = <Py_ssize_t>self.size
return 1
def __getreadbuffer__(self, Py_ssize_t idx, void **p):
+ self._check_nullptr()
if idx != 0:
raise SystemError("accessing non-existent buffer segment")
if p != NULL:
@@ -674,6 +689,7 @@ cdef class Buffer:
return self.size
def __getwritebuffer__(self, Py_ssize_t idx, void **p):
+ self._check_nullptr()
if not self.buffer.get().is_mutable():
raise SystemError("trying to write an immutable buffer")
if idx != 0:
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index b1433ec..31732a6 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -321,6 +321,7 @@ cdef class Buffer:
Py_ssize_t strides[1]
cdef void init(self, const shared_ptr[CBuffer]& buffer)
+ cdef int _check_nullptr(self) except -1
cdef class ResizableBuffer(Buffer):
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index 0947cb7..736020f 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
+from functools import partial
from io import BytesIO, TextIOWrapper
import gc
import os
@@ -176,6 +177,8 @@ def test_buffer_to_numpy():
buf = pa.frombuffer(byte_array)
array = np.frombuffer(buf, dtype="uint8")
assert array[0] == byte_array[0]
+ byte_array[0] += 1
+ assert array[0] == byte_array[0]
assert array.base == buf
@@ -192,6 +195,25 @@ def test_buffer_from_numpy():
buf = pa.frombuffer(arr.T[::2])
+def test_buffer_equals():
+ # Buffer.equals() returns true iff the buffers have the same contents
+ b1 = b'some data!'
+ b2 = bytearray(b1)
+ b3 = bytearray(b1)
+ b3[0] = 42
+ buf1 = pa.frombuffer(b1)
+ buf2 = pa.frombuffer(b2)
+ buf3 = pa.frombuffer(b2)
+ buf4 = pa.frombuffer(b3)
+ buf5 = pa.frombuffer(np.frombuffer(b2, dtype=np.int16))
+ assert buf1.equals(buf1)
+ assert buf1.equals(buf2)
+ assert buf2.equals(buf3)
+ assert not buf2.equals(buf4)
+ # Data type is indifferent
+ assert buf2.equals(buf5)
+
+
def test_allocate_buffer():
buf = pa.allocate_buffer(100)
assert buf.size == 100
@@ -244,9 +266,11 @@ def test_buffer_memoryview_is_immutable():
val = b'some data'
buf = pa.frombuffer(val)
+ assert not buf.is_mutable
assert isinstance(buf, pa.Buffer)
result = memoryview(buf)
+ assert result.readonly
with pytest.raises(TypeError) as exc:
result[0] = b'h'
@@ -258,6 +282,29 @@ def test_buffer_memoryview_is_immutable():
assert 'cannot modify read-only' in str(exc.value)
+def test_uninitialized_buffer():
+ # ARROW-2039: calling Buffer() directly creates an uninitialized object
+ check_uninitialized = partial(pytest.raises,
+ ReferenceError, match="uninitialized")
+ buf = pa.Buffer()
+ with check_uninitialized():
+ buf.size
+ with check_uninitialized():
+ len(buf)
+ with check_uninitialized():
+ buf.is_mutable
+ with check_uninitialized():
+ buf.parent
+ with check_uninitialized():
+ buf.to_pybytes()
+ with check_uninitialized():
+ memoryview(buf)
+ with check_uninitialized():
+ buf.equals(pa.frombuffer(b''))
+ with check_uninitialized():
+ pa.frombuffer(b'').equals(buf)
+
+
def test_memory_output_stream():
# 10 bytes
val = b'dataabcdef'
--
To stop receiving notification emails like this one, please contact
[email protected].