This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new a451a953 feat(python): Add copy_into() to CBufferView (#455)
a451a953 is described below
commit a451a953b2b93346d77c0addaacc43f026053f42
Author: Dewey Dunnington <[email protected]>
AuthorDate: Fri May 10 16:15:50 2024 -0300
feat(python): Add copy_into() to CBufferView (#455)
This is the non-bitmap equivalent of #450, useful for the same purpose
(concatenating one big data buffer from chunks).
---
python/src/nanoarrow/_lib.pyx | 82 ++++++++++++++++++++++++++++++--------
python/tests/test_c_buffer_view.py | 51 +++++++++++++++++++++++-
2 files changed, 114 insertions(+), 19 deletions(-)
diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index e2555667..539135aa 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -1948,6 +1948,33 @@ cdef class CBufferView:
else:
return self._iter_dispatch(offset, length)
+ def copy_into(self, dest, offset=0, length=None, dest_offset=0):
+ if length is None:
+ length = self.n_elements
+
+ cdef Py_buffer buffer
+ PyObject_GetBuffer(dest, &buffer, PyBUF_WRITABLE |
PyBUF_ANY_CONTIGUOUS)
+
+ cdef int64_t c_offset = offset
+ cdef int64_t c_length = length
+ cdef int64_t c_item_size = self.item_size
+ cdef int64_t c_dest_offset = dest_offset
+ self._check_copy_into_bounds(&buffer, c_offset, c_length, dest_offset,
c_item_size)
+
+ cdef uint8_t* dest_uint8 = <uint8_t*>buffer.buf
+ cdef int64_t dest_offset_bytes = c_dest_offset * c_item_size
+ cdef int64_t src_offset_bytes = c_offset * c_item_size
+ cdef int64_t bytes_to_copy = c_length * c_item_size
+
+ memcpy(
+ &(dest_uint8[dest_offset_bytes]),
+ &(self._ptr.data.as_uint8[src_offset_bytes]),
+ bytes_to_copy
+ )
+
+ PyBuffer_Release(&buffer)
+ return bytes_to_copy
+
def unpack_bits_into(self, dest, offset=0, length=None, dest_offset=0):
if self._data_type != NANOARROW_TYPE_BOOL:
raise ValueError("Can't unpack non-boolean buffer")
@@ -1955,25 +1982,9 @@ cdef class CBufferView:
if length is None:
length = self.n_elements
- if offset < 0 or length < 0 or (offset + length) > self.n_elements:
- raise IndexError(
- f"offset {offset} and length {length} do not describe a valid
slice "
- f"of buffer with {self.n_elements} elements"
- )
-
cdef Py_buffer buffer
PyObject_GetBuffer(dest, &buffer, PyBUF_WRITABLE |
PyBUF_ANY_CONTIGUOUS)
- if buffer.itemsize != 1:
- PyBuffer_Release(&buffer)
- raise ValueError("Destination buffer has itemsize != 1")
-
- if dest_offset < 0 or buffer.len < (dest_offset + length):
- buffer_len = buffer.len
- PyBuffer_Release(&buffer)
- raise IndexError(
- f"Can't unpack {length} elements into buffer of size
{buffer_len} "
- f"with dest_offset = {dest_offset}"
- )
+ self._check_copy_into_bounds(&buffer, offset, length, dest_offset, 1)
ArrowBitsUnpackInt8(
self._ptr.data.as_uint8,
@@ -1983,6 +1994,7 @@ cdef class CBufferView:
)
PyBuffer_Release(&buffer)
+ return length
def unpack_bits(self, offset=0, length=None):
if length is None:
@@ -1994,6 +2006,42 @@ cdef class CBufferView:
out.advance(length)
return out.finish()
+ def copy(self, offset=0, length=None):
+ if length is None:
+ length = self.n_elements
+
+ cdef int64_t bytes_to_copy = length * self.item_size
+ out = CBufferBuilder().set_data_type(self.data_type_id)
+ out.reserve_bytes(bytes_to_copy)
+ self.copy_into(out, offset, length)
+ out.advance(bytes_to_copy)
+ return out.finish()
+
+ cdef _check_copy_into_bounds(self, Py_buffer* dest, int64_t offset,
int64_t length,
+ int64_t dest_offset, int64_t dest_itemsize):
+ if offset < 0 or length < 0 or (offset + length) > self.n_elements:
+ PyBuffer_Release(dest)
+ raise IndexError(
+ f"offset {offset} and length {length} do not describe a valid
slice "
+ f"of buffer with {self.n_elements} elements"
+ )
+
+ if dest.itemsize != 1 and dest.itemsize != dest_itemsize:
+ raise ValueError(
+ "Destination buffer must have itemsize == 1 or "
+ f"itemsize == {dest_itemsize}"
+ )
+
+ cdef int64_t dest_offset_bytes = dest_offset * dest_itemsize
+ cdef int64_t bytes_to_copy = dest_itemsize * length
+ if dest_offset < 0 or dest.len < (dest_offset_bytes + bytes_to_copy):
+ buffer_len = dest.len
+ PyBuffer_Release(dest)
+ raise IndexError(
+ f"Can't unpack {length} elements into buffer of size
{buffer_len} "
+ f"with dest_offset = {dest_offset}"
+ )
+
def _iter_bitmap(self, int64_t offset, int64_t length):
cdef uint8_t item
cdef int64_t i
diff --git a/python/tests/test_c_buffer_view.py
b/python/tests/test_c_buffer_view.py
index 25973309..fa01d14a 100644
--- a/python/tests/test_c_buffer_view.py
+++ b/python/tests/test_c_buffer_view.py
@@ -81,7 +81,7 @@ def test_buffer_view_bool_unpack():
# Check with non-zero destination offset
out = bytearray([255] * 10)
- view.unpack_bits_into(out, dest_offset=2)
+ assert view.unpack_bits_into(out, dest_offset=2) == 8
assert list(out) == [255, 255, 1, 0, 0, 1, 0, 0, 0, 0]
# Check error requesting out-of-bounds dest_offset
@@ -105,7 +105,7 @@ def test_buffer_view_bool_unpack():
# Check errors from an output buffer with the wrong data type
out = array("i", [0, 0, 0, 0])
- msg = "Destination buffer has itemsize != 1"
+ msg = "Destination buffer must have itemsize == 1"
with pytest.raises(ValueError, match=msg):
view.unpack_bits_into(out)
@@ -153,3 +153,50 @@ def test_buffer_view_non_bool():
# Check repr
assert "1 2 3 5" in repr(view)
+
+
+def test_buffer_view_copy():
+ from array import array
+
+ array_view = na.c_array([1, 2, 3, 4], na.int32()).view()
+ view = array_view.buffer(1)
+
+ # Check copying
+ copied_all = view.copy()
+ assert len(copied_all) == view.n_elements
+ assert copied_all.data_type == "int32"
+ assert list(copied_all) == [1, 2, 3, 4]
+
+ copied_some = view.copy(1, 3)
+ assert len(copied_some) == 3
+ assert list(copied_some) == [2, 3, 4]
+
+ # Check with non-zero destination offset
+ out = array(view.format, [0, 0, 0, 0, 0, 0])
+ assert view.copy_into(out, dest_offset=2) == 16
+ assert list(out) == [0, 0, 1, 2, 3, 4]
+
+ # Check error requesting out-of-bounds dest_offset
+ with pytest.raises(IndexError, match="Can't unpack"):
+ view.copy_into(out, dest_offset=-1)
+
+ # Check errors from requesting out-of-bounds slices
+ msg = "do not describe a valid slice"
+ with pytest.raises(IndexError, match=msg):
+ view.copy(-1, None)
+ with pytest.raises(IndexError, match=msg):
+ view.copy(0, -1)
+ with pytest.raises(IndexError, match=msg):
+ view.copy(0, 9)
+
+ # Check errors from an output buffer of insufficient length
+ out = array("i")
+ msg = "Can't unpack 4 elements into buffer of size 0"
+ with pytest.raises(IndexError, match=msg):
+ view.copy_into(out)
+
+ # Check errors from an output buffer with the wrong data type
+ out = array("d", [0, 0, 0, 0])
+ msg = "Destination buffer must have itemsize == 1 or itemsize == 4"
+ with pytest.raises(ValueError, match=msg):
+ view.copy_into(out)