This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new 134c66d5 fix(python): Fix use of memoryview to write fill to the
buffer builder (#477)
134c66d5 is described below
commit 134c66d5f818edac6213d6831472dc22678a18e2
Author: Dewey Dunnington <[email protected]>
AuthorDate: Sun May 19 09:27:25 2024 -0300
fix(python): Fix use of memoryview to write fill to the buffer builder
(#477)
At least one test is failing on pypy because of an unreleased buffer.
This was due to the line:
```python
memoryview(builder)[out_start : out_start + length] = b"\x01" * length
```
...which relied on the deletion of `memoryview(builder)` to release the
`builder` buffer. One solution is:
```python
with memoryview(builder) as mv:
mv[out_start : out_start + length] = b"\x01" * length
```
However, we also have `ArrowBufferAppendFill()` in the C library, so
instead, this PR wires that up and uses it (which should be faster,
anyway).
---
python/src/nanoarrow/_lib.pyi | 12 ++++++++++--
python/src/nanoarrow/_lib.pyx | 9 +++++++++
python/src/nanoarrow/visitor.py | 30 +++++++++++-------------------
python/tests/test_c_buffer.py | 22 ++++++++--------------
4 files changed, 38 insertions(+), 35 deletions(-)
diff --git a/python/src/nanoarrow/_lib.pyi b/python/src/nanoarrow/_lib.pyi
index 31d2d0a6..ed6cf32f 100644
--- a/python/src/nanoarrow/_lib.pyi
+++ b/python/src/nanoarrow/_lib.pyi
@@ -236,7 +236,7 @@ class CBuffer:
data_type_id: Incomplete
element_size_bits: Incomplete
format: Incomplete
- item_size: Incomplete
+ itemsize: Incomplete
n_elements: Incomplete
size_bytes: Incomplete
@classmethod
@@ -264,6 +264,7 @@ class CBufferBuilder:
__pyx_vtable__: ClassVar[PyCapsule] = ...
capacity_bytes: Incomplete
format: Incomplete
+ itemsize: Incomplete
size_bytes: Incomplete
@classmethod
def __init__(cls, *args, **kwargs) -> None:
@@ -310,8 +311,15 @@ class CBufferBuilder:
This method returns the number of elements that were written.
"""
+ def write_fill(self, *args, **kwargs):
+ """Write fill bytes to this buffer
+
+ Appends the byte ``value`` to this buffer ``size_bytes`` times.
+ """
def __buffer__(self, *args, **kwargs):
"""Return a buffer object that exposes the underlying memory of the
object."""
+ def __len__(self) -> int:
+ """Return len(self)."""
def __reduce__(self): ...
def __release_buffer__(self, *args, **kwargs):
"""Release the buffer object that exposes the underlying memory of the
object."""
@@ -323,7 +331,7 @@ class CBufferView:
device: Incomplete
element_size_bits: Incomplete
format: Incomplete
- item_size: Incomplete
+ itemsize: Incomplete
n_elements: Incomplete
size_bytes: Incomplete
@classmethod
diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index 0b2fd7b2..590406a0 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -2394,6 +2394,15 @@ cdef class CBufferBuilder:
self._buffer._ptr.size_bytes = new_size
return self
+ def write_fill(self, uint8_t value, int64_t size_bytes):
+ """Write fill bytes to this buffer
+
+ Appends the byte ``value`` to this buffer ``size_bytes`` times.
+ """
+ self._assert_unlocked()
+ cdef int code = ArrowBufferAppendFill(self._buffer._ptr, value,
size_bytes)
+ Error.raise_error_not_ok("ArrowBufferAppendFill", code)
+
def write(self, content):
"""Write bytes to this buffer
diff --git a/python/src/nanoarrow/visitor.py b/python/src/nanoarrow/visitor.py
index 0b0d76d7..7adaf471 100644
--- a/python/src/nanoarrow/visitor.py
+++ b/python/src/nanoarrow/visitor.py
@@ -339,13 +339,13 @@ class ToPyBufferConverter(ArrayViewVisitor):
self._builder.reserve_bytes(total_elements * element_size_bytes)
def visit_chunk_view(self, array_view: CArrayView) -> None:
- converter = self._builder
+ builder = self._builder
offset, length = array_view.offset, array_view.length
- dst_bytes = length * converter.itemsize
+ dst_bytes = length * builder.itemsize
- converter.reserve_bytes(dst_bytes)
- array_view.buffer(1).copy_into(converter, offset, length,
len(converter))
- converter.advance(dst_bytes)
+ builder.reserve_bytes(dst_bytes)
+ array_view.buffer(1).copy_into(builder, offset, length, len(builder))
+ builder.advance(dst_bytes)
def finish(self) -> Any:
return self._builder.finish()
@@ -355,16 +355,15 @@ class ToBooleanBufferConverter(ArrayViewVisitor):
def begin(self, total_elements: Union[int, None]):
self._builder = CBufferBuilder()
self._builder.set_format("?")
-
if total_elements is not None:
self._builder.reserve_bytes(total_elements)
def visit_chunk_view(self, array_view: CArrayView) -> None:
- converter = self._builder
+ builder = self._builder
offset, length = array_view.offset, array_view.length
- converter.reserve_bytes(length)
- array_view.buffer(1).unpack_bits_into(converter, offset, length,
len(converter))
- converter.advance(length)
+ builder.reserve_bytes(length)
+ array_view.buffer(1).unpack_bits_into(builder, offset, length,
len(builder))
+ builder.advance(length)
def finish(self) -> Any:
return self._builder.finish()
@@ -404,7 +403,7 @@ class ToNullableSequenceConverter(ArrayViewVisitor):
if chunk_contains_nulls:
current_length = self._length
if not bitmap_allocated:
- self._fill_valid(current_length)
+ builder.write_fill(1, current_length)
builder.reserve_bytes(length)
array_view.buffer(0).unpack_bits_into(
@@ -413,7 +412,7 @@ class ToNullableSequenceConverter(ArrayViewVisitor):
builder.advance(length)
elif bitmap_allocated:
- self._fill_valid(length)
+ builder.write_fill(1, length)
self._length += length
self._converter.visit_chunk_view(array_view)
@@ -423,13 +422,6 @@ class ToNullableSequenceConverter(ArrayViewVisitor):
data = self._converter.finish()
return self._handle_nulls(is_valid if len(is_valid) > 0 else None,
data)
- def _fill_valid(self, length):
- builder = self._builder
- builder.reserve_bytes(length)
- out_start = len(builder)
- memoryview(builder)[out_start : out_start + length] = b"\x01" * length
- builder.advance(length)
-
def _resolve_converter_cls(schema, handle_nulls=None):
schema_view = c_schema_view(schema)
diff --git a/python/tests/test_c_buffer.py b/python/tests/test_c_buffer.py
index c84d866e..fea83dc4 100644
--- a/python/tests/test_c_buffer.py
+++ b/python/tests/test_c_buffer.py
@@ -222,28 +222,22 @@ def test_c_buffer_builder():
def test_c_buffer_builder_buffer_protocol():
- import platform
-
builder = CBufferBuilder()
builder.reserve_bytes(1)
- mv = memoryview(builder)
- assert len(mv) == 1
+ with memoryview(builder) as mv:
+ assert len(mv) == 1
- with pytest.raises(BufferError, match="CBufferBuilder is locked"):
- memoryview(builder)
+ with pytest.raises(BufferError, match="CBufferBuilder is locked"):
+ memoryview(builder)
- with pytest.raises(BufferError, match="CBufferBuilder is locked"):
- assert bytes(builder.finish()) == b"abcdefghij"
+ with pytest.raises(BufferError, match="CBufferBuilder is locked"):
+ assert bytes(builder.finish()) == b"abcdefghij"
- # On at least some versions of PyPy the call to mv.release() does not seem
- # to deterministically call the CBufferBuilder's __releasebuffer__().
- if platform.python_implementation() == "PyPy":
- pytest.skip("CBufferBuilder buffer release is non-deterministic on
PyPy")
+ mv[builder.size_bytes] = ord("k")
- mv[builder.size_bytes] = ord("k")
builder.advance(1)
- mv.release()
+
assert bytes(builder.finish()) == b"k"