This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 134c66d5 fix(python): Fix use of memoryview to write fill to the 
buffer builder (#477)
134c66d5 is described below

commit 134c66d5f818edac6213d6831472dc22678a18e2
Author: Dewey Dunnington <de...@dunnington.ca>
AuthorDate: Sun May 19 09:27:25 2024 -0300

    fix(python): Fix use of memoryview to write fill to the buffer builder 
(#477)
    
    At least one test is failing on pypy because of an unreleased buffer.
    This was due to the line:
    
    ```python
    memoryview(builder)[out_start : out_start + length] = b"\x01" * length
    ```
    
    ...which relied on the deletion of `memoryview(builder)` to release the
    `builder` buffer. One solution is:
    
    ```python
    with memoryview(builder) as mv:
      mv[out_start : out_start + length] = b"\x01" * length
    ```
    
    However, we also have `ArrowBufferAppendFill()` in the C library, so
    instead, this PR wires that up and uses it (which should be faster,
    anyway).
---
 python/src/nanoarrow/_lib.pyi   | 12 ++++++++++--
 python/src/nanoarrow/_lib.pyx   |  9 +++++++++
 python/src/nanoarrow/visitor.py | 30 +++++++++++-------------------
 python/tests/test_c_buffer.py   | 22 ++++++++--------------
 4 files changed, 38 insertions(+), 35 deletions(-)

diff --git a/python/src/nanoarrow/_lib.pyi b/python/src/nanoarrow/_lib.pyi
index 31d2d0a6..ed6cf32f 100644
--- a/python/src/nanoarrow/_lib.pyi
+++ b/python/src/nanoarrow/_lib.pyi
@@ -236,7 +236,7 @@ class CBuffer:
     data_type_id: Incomplete
     element_size_bits: Incomplete
     format: Incomplete
-    item_size: Incomplete
+    itemsize: Incomplete
     n_elements: Incomplete
     size_bytes: Incomplete
     @classmethod
@@ -264,6 +264,7 @@ class CBufferBuilder:
     __pyx_vtable__: ClassVar[PyCapsule] = ...
     capacity_bytes: Incomplete
     format: Incomplete
+    itemsize: Incomplete
     size_bytes: Incomplete
     @classmethod
     def __init__(cls, *args, **kwargs) -> None:
@@ -310,8 +311,15 @@ class CBufferBuilder:
 
         This method returns the number of elements that were written.
         """
+    def write_fill(self, *args, **kwargs):
+        """Write fill bytes to this buffer
+
+        Appends the byte ``value`` to this buffer ``size_bytes`` times.
+        """
     def __buffer__(self, *args, **kwargs):
         """Return a buffer object that exposes the underlying memory of the 
object."""
+    def __len__(self) -> int:
+        """Return len(self)."""
     def __reduce__(self): ...
     def __release_buffer__(self, *args, **kwargs):
         """Release the buffer object that exposes the underlying memory of the 
object."""
@@ -323,7 +331,7 @@ class CBufferView:
     device: Incomplete
     element_size_bits: Incomplete
     format: Incomplete
-    item_size: Incomplete
+    itemsize: Incomplete
     n_elements: Incomplete
     size_bytes: Incomplete
     @classmethod
diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index 0b2fd7b2..590406a0 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -2394,6 +2394,15 @@ cdef class CBufferBuilder:
         self._buffer._ptr.size_bytes = new_size
         return self
 
+    def write_fill(self, uint8_t value, int64_t size_bytes):
+        """Write fill bytes to this buffer
+
+        Appends the byte ``value`` to this buffer ``size_bytes`` times.
+        """
+        self._assert_unlocked()
+        cdef int code = ArrowBufferAppendFill(self._buffer._ptr, value, 
size_bytes)
+        Error.raise_error_not_ok("ArrowBufferAppendFill", code)
+
     def write(self, content):
         """Write bytes to this buffer
 
diff --git a/python/src/nanoarrow/visitor.py b/python/src/nanoarrow/visitor.py
index 0b0d76d7..7adaf471 100644
--- a/python/src/nanoarrow/visitor.py
+++ b/python/src/nanoarrow/visitor.py
@@ -339,13 +339,13 @@ class ToPyBufferConverter(ArrayViewVisitor):
             self._builder.reserve_bytes(total_elements * element_size_bytes)
 
     def visit_chunk_view(self, array_view: CArrayView) -> None:
-        converter = self._builder
+        builder = self._builder
         offset, length = array_view.offset, array_view.length
-        dst_bytes = length * converter.itemsize
+        dst_bytes = length * builder.itemsize
 
-        converter.reserve_bytes(dst_bytes)
-        array_view.buffer(1).copy_into(converter, offset, length, 
len(converter))
-        converter.advance(dst_bytes)
+        builder.reserve_bytes(dst_bytes)
+        array_view.buffer(1).copy_into(builder, offset, length, len(builder))
+        builder.advance(dst_bytes)
 
     def finish(self) -> Any:
         return self._builder.finish()
@@ -355,16 +355,15 @@ class ToBooleanBufferConverter(ArrayViewVisitor):
     def begin(self, total_elements: Union[int, None]):
         self._builder = CBufferBuilder()
         self._builder.set_format("?")
-
         if total_elements is not None:
             self._builder.reserve_bytes(total_elements)
 
     def visit_chunk_view(self, array_view: CArrayView) -> None:
-        converter = self._builder
+        builder = self._builder
         offset, length = array_view.offset, array_view.length
-        converter.reserve_bytes(length)
-        array_view.buffer(1).unpack_bits_into(converter, offset, length, 
len(converter))
-        converter.advance(length)
+        builder.reserve_bytes(length)
+        array_view.buffer(1).unpack_bits_into(builder, offset, length, 
len(builder))
+        builder.advance(length)
 
     def finish(self) -> Any:
         return self._builder.finish()
@@ -404,7 +403,7 @@ class ToNullableSequenceConverter(ArrayViewVisitor):
         if chunk_contains_nulls:
             current_length = self._length
             if not bitmap_allocated:
-                self._fill_valid(current_length)
+                builder.write_fill(1, current_length)
 
             builder.reserve_bytes(length)
             array_view.buffer(0).unpack_bits_into(
@@ -413,7 +412,7 @@ class ToNullableSequenceConverter(ArrayViewVisitor):
             builder.advance(length)
 
         elif bitmap_allocated:
-            self._fill_valid(length)
+            builder.write_fill(1, length)
 
         self._length += length
         self._converter.visit_chunk_view(array_view)
@@ -423,13 +422,6 @@ class ToNullableSequenceConverter(ArrayViewVisitor):
         data = self._converter.finish()
         return self._handle_nulls(is_valid if len(is_valid) > 0 else None, 
data)
 
-    def _fill_valid(self, length):
-        builder = self._builder
-        builder.reserve_bytes(length)
-        out_start = len(builder)
-        memoryview(builder)[out_start : out_start + length] = b"\x01" * length
-        builder.advance(length)
-
 
 def _resolve_converter_cls(schema, handle_nulls=None):
     schema_view = c_schema_view(schema)
diff --git a/python/tests/test_c_buffer.py b/python/tests/test_c_buffer.py
index c84d866e..fea83dc4 100644
--- a/python/tests/test_c_buffer.py
+++ b/python/tests/test_c_buffer.py
@@ -222,28 +222,22 @@ def test_c_buffer_builder():
 
 
 def test_c_buffer_builder_buffer_protocol():
-    import platform
-
     builder = CBufferBuilder()
     builder.reserve_bytes(1)
 
-    mv = memoryview(builder)
-    assert len(mv) == 1
+    with memoryview(builder) as mv:
+        assert len(mv) == 1
 
-    with pytest.raises(BufferError, match="CBufferBuilder is locked"):
-        memoryview(builder)
+        with pytest.raises(BufferError, match="CBufferBuilder is locked"):
+            memoryview(builder)
 
-    with pytest.raises(BufferError, match="CBufferBuilder is locked"):
-        assert bytes(builder.finish()) == b"abcdefghij"
+        with pytest.raises(BufferError, match="CBufferBuilder is locked"):
+            assert bytes(builder.finish()) == b"abcdefghij"
 
-    # On at least some versions of PyPy the call to mv.release() does not seem
-    # to deterministically call the CBufferBuilder's __releasebuffer__().
-    if platform.python_implementation() == "PyPy":
-        pytest.skip("CBufferBuilder buffer release is non-deterministic on 
PyPy")
+        mv[builder.size_bytes] = ord("k")
 
-    mv[builder.size_bytes] = ord("k")
     builder.advance(1)
-    mv.release()
+
     assert bytes(builder.finish()) == b"k"
 
 

Reply via email to