This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new a451a953 feat(python): Add copy_into() to CBufferView (#455)
a451a953 is described below

commit a451a953b2b93346d77c0addaacc43f026053f42
Author: Dewey Dunnington <[email protected]>
AuthorDate: Fri May 10 16:15:50 2024 -0300

    feat(python): Add copy_into() to CBufferView (#455)
    
    This is the non-bitmap equivalent of #450, useful for the same purpose
    (concatenating one big data buffer from chunks).
---
 python/src/nanoarrow/_lib.pyx      | 82 ++++++++++++++++++++++++++++++--------
 python/tests/test_c_buffer_view.py | 51 +++++++++++++++++++++++-
 2 files changed, 114 insertions(+), 19 deletions(-)

diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index e2555667..539135aa 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -1948,6 +1948,33 @@ cdef class CBufferView:
         else:
             return self._iter_dispatch(offset, length)
 
+    def copy_into(self, dest, offset=0, length=None, dest_offset=0):
+        if length is None:
+            length = self.n_elements
+
+        cdef Py_buffer buffer
+        PyObject_GetBuffer(dest, &buffer, PyBUF_WRITABLE | 
PyBUF_ANY_CONTIGUOUS)
+
+        cdef int64_t c_offset = offset
+        cdef int64_t c_length = length
+        cdef int64_t c_item_size = self.item_size
+        cdef int64_t c_dest_offset = dest_offset
+        self._check_copy_into_bounds(&buffer, c_offset, c_length, dest_offset, 
c_item_size)
+
+        cdef uint8_t* dest_uint8 = <uint8_t*>buffer.buf
+        cdef int64_t dest_offset_bytes = c_dest_offset * c_item_size
+        cdef int64_t src_offset_bytes = c_offset * c_item_size
+        cdef int64_t bytes_to_copy = c_length * c_item_size
+
+        memcpy(
+            &(dest_uint8[dest_offset_bytes]),
+            &(self._ptr.data.as_uint8[src_offset_bytes]),
+            bytes_to_copy
+        )
+
+        PyBuffer_Release(&buffer)
+        return bytes_to_copy
+
     def unpack_bits_into(self, dest, offset=0, length=None, dest_offset=0):
         if self._data_type != NANOARROW_TYPE_BOOL:
             raise ValueError("Can't unpack non-boolean buffer")
@@ -1955,25 +1982,9 @@ cdef class CBufferView:
         if length is None:
             length = self.n_elements
 
-        if offset < 0 or length < 0 or (offset + length) > self.n_elements:
-            raise IndexError(
-                f"offset {offset} and length {length} do not describe a valid 
slice "
-                f"of buffer with {self.n_elements} elements"
-            )
-
         cdef Py_buffer buffer
         PyObject_GetBuffer(dest, &buffer, PyBUF_WRITABLE | 
PyBUF_ANY_CONTIGUOUS)
-        if buffer.itemsize != 1:
-            PyBuffer_Release(&buffer)
-            raise ValueError("Destination buffer has itemsize != 1")
-
-        if dest_offset < 0 or buffer.len < (dest_offset + length):
-            buffer_len = buffer.len
-            PyBuffer_Release(&buffer)
-            raise IndexError(
-                f"Can't unpack {length} elements into buffer of size 
{buffer_len} "
-                f"with dest_offset = {dest_offset}"
-            )
+        self._check_copy_into_bounds(&buffer, offset, length, dest_offset, 1)
 
         ArrowBitsUnpackInt8(
             self._ptr.data.as_uint8,
@@ -1983,6 +1994,7 @@ cdef class CBufferView:
         )
 
         PyBuffer_Release(&buffer)
+        return length
 
     def unpack_bits(self, offset=0, length=None):
         if length is None:
@@ -1994,6 +2006,42 @@ cdef class CBufferView:
         out.advance(length)
         return out.finish()
 
+    def copy(self, offset=0, length=None):
+        if length is None:
+            length = self.n_elements
+
+        cdef int64_t bytes_to_copy = length * self.item_size
+        out = CBufferBuilder().set_data_type(self.data_type_id)
+        out.reserve_bytes(bytes_to_copy)
+        self.copy_into(out, offset, length)
+        out.advance(bytes_to_copy)
+        return out.finish()
+
+    cdef _check_copy_into_bounds(self, Py_buffer* dest, int64_t offset, 
int64_t length,
+                                 int64_t dest_offset, int64_t dest_itemsize):
+        if offset < 0 or length < 0 or (offset + length) > self.n_elements:
+            PyBuffer_Release(dest)
+            raise IndexError(
+                f"offset {offset} and length {length} do not describe a valid 
slice "
+                f"of buffer with {self.n_elements} elements"
+            )
+
+        if dest.itemsize != 1 and dest.itemsize != dest_itemsize:
+            raise ValueError(
+                "Destination buffer must have itemsize == 1 or "
+                f"itemsize == {dest_itemsize}"
+            )
+
+        cdef int64_t dest_offset_bytes = dest_offset * dest_itemsize
+        cdef int64_t bytes_to_copy = dest_itemsize * length
+        if dest_offset < 0 or dest.len < (dest_offset_bytes + bytes_to_copy):
+            buffer_len = dest.len
+            PyBuffer_Release(dest)
+            raise IndexError(
+                f"Can't unpack {length} elements into buffer of size 
{buffer_len} "
+                f"with dest_offset = {dest_offset}"
+            )
+
     def _iter_bitmap(self, int64_t offset, int64_t length):
         cdef uint8_t item
         cdef int64_t i
diff --git a/python/tests/test_c_buffer_view.py 
b/python/tests/test_c_buffer_view.py
index 25973309..fa01d14a 100644
--- a/python/tests/test_c_buffer_view.py
+++ b/python/tests/test_c_buffer_view.py
@@ -81,7 +81,7 @@ def test_buffer_view_bool_unpack():
 
     # Check with non-zero destination offset
     out = bytearray([255] * 10)
-    view.unpack_bits_into(out, dest_offset=2)
+    assert view.unpack_bits_into(out, dest_offset=2) == 8
     assert list(out) == [255, 255, 1, 0, 0, 1, 0, 0, 0, 0]
 
     # Check error requesting out-of-bounds dest_offset
@@ -105,7 +105,7 @@ def test_buffer_view_bool_unpack():
 
     # Check errors from an output buffer with the wrong data type
     out = array("i", [0, 0, 0, 0])
-    msg = "Destination buffer has itemsize != 1"
+    msg = "Destination buffer must have itemsize == 1"
     with pytest.raises(ValueError, match=msg):
         view.unpack_bits_into(out)
 
@@ -153,3 +153,50 @@ def test_buffer_view_non_bool():
 
     # Check repr
     assert "1 2 3 5" in repr(view)
+
+
+def test_buffer_view_copy():
+    from array import array
+
+    array_view = na.c_array([1, 2, 3, 4], na.int32()).view()
+    view = array_view.buffer(1)
+
+    # Check copying
+    copied_all = view.copy()
+    assert len(copied_all) == view.n_elements
+    assert copied_all.data_type == "int32"
+    assert list(copied_all) == [1, 2, 3, 4]
+
+    copied_some = view.copy(1, 3)
+    assert len(copied_some) == 3
+    assert list(copied_some) == [2, 3, 4]
+
+    # Check with non-zero destination offset
+    out = array(view.format, [0, 0, 0, 0, 0, 0])
+    assert view.copy_into(out, dest_offset=2) == 16
+    assert list(out) == [0, 0, 1, 2, 3, 4]
+
+    # Check error requesting out-of-bounds dest_offset
+    with pytest.raises(IndexError, match="Can't unpack"):
+        view.copy_into(out, dest_offset=-1)
+
+    # Check errors from requesting out-of-bounds slices
+    msg = "do not describe a valid slice"
+    with pytest.raises(IndexError, match=msg):
+        view.copy(-1, None)
+    with pytest.raises(IndexError, match=msg):
+        view.copy(0, -1)
+    with pytest.raises(IndexError, match=msg):
+        view.copy(0, 9)
+
+    # Check errors from an output buffer of insufficient length
+    out = array("i")
+    msg = "Can't unpack 4 elements into buffer of size 0"
+    with pytest.raises(IndexError, match=msg):
+        view.copy_into(out)
+
+    # Check errors from an output buffer with the wrong data type
+    out = array("d", [0, 0, 0, 0])
+    msg = "Destination buffer must have itemsize == 1 or itemsize == 4"
+    with pytest.raises(ValueError, match=msg):
+        view.copy_into(out)

Reply via email to