This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new 2f2450a8 feat(python): Ensure that buffer produced by
`CBufferView.unpack_bits()` has a boolean type (#457)
2f2450a8 is described below
commit 2f2450a89d90e9154264c62832e6178aeeb515a4
Author: Dewey Dunnington <[email protected]>
AuthorDate: Fri May 10 16:09:06 2024 -0300
feat(python): Ensure that buffer produced by `CBufferView.unpack_bits()`
has a boolean type (#457)
This is small change to ensure that
`np.array(some_buffer.unpack_bits())` "just works" without nanoarrow
having to know about numpy dtypes. Basically we just need to ensure that
we can create/export a buffer with a `"?"` format string.
```python
import nanoarrow as na
import numpy as np
bool_array = na.Array([True, True, True, False, False, True], na.bool_())
np.array(bool_array.buffer(1).unpack_bits(0, len(bool_array)))
#> array([ True, True, True, False, False, True])
```
---
python/src/nanoarrow/_lib.pyx | 15 ++++++++++++---
python/tests/test_c_buffer_view.py | 1 +
2 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index 451bd205..e2555667 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -335,7 +335,7 @@ cdef c_arrow_type_from_format(format):
return item_size, NANOARROW_TYPE_DOUBLE
# Check for signed integers
- if format in ("b", "?", "h", "i", "l", "q", "n"):
+ if format in ("b", "h", "i", "l", "q", "n"):
if item_size == 1:
return item_size, NANOARROW_TYPE_INT8
elif item_size == 2:
@@ -346,7 +346,7 @@ cdef c_arrow_type_from_format(format):
return item_size, NANOARROW_TYPE_INT64
# Check for unsinged integers
- if format in ("B", "H", "I", "L", "Q", "N"):
+ if format in ("B", "?", "H", "I", "L", "Q", "N"):
if item_size == 1:
return item_size, NANOARROW_TYPE_UINT8
elif item_size == 2:
@@ -1988,7 +1988,7 @@ cdef class CBufferView:
if length is None:
length = self.n_elements
- out = CBufferBuilder().set_data_type(NANOARROW_TYPE_UINT8)
+ out = CBufferBuilder().set_format("?")
out.reserve_bytes(length)
self.unpack_bits_into(out, offset, length)
out.advance(length)
@@ -2108,6 +2108,8 @@ cdef class CBuffer:
self._device
)
+ snprintf(self._view._format, sizeof(self._view._format), "%s",
self._format)
+
@staticmethod
def empty():
cdef CBuffer out = CBuffer()
@@ -2272,6 +2274,13 @@ cdef class CBufferBuilder:
self._buffer._set_data_type(type_id, element_size_bits)
return self
+ def set_format(self, str format):
+ """Set the Python buffer format used to interpret elements in
+ :meth:`write_elements`.
+ """
+ self._buffer._set_format(format)
+ return self
+
@property
def format(self):
"""The ``struct`` format code of the underlying buffer"""
diff --git a/python/tests/test_c_buffer_view.py
b/python/tests/test_c_buffer_view.py
index 6e1a4142..25973309 100644
--- a/python/tests/test_c_buffer_view.py
+++ b/python/tests/test_c_buffer_view.py
@@ -72,6 +72,7 @@ def test_buffer_view_bool_unpack():
unpacked_all = view.unpack_bits()
assert len(unpacked_all) == view.n_elements
assert unpacked_all.data_type == "uint8"
+ assert unpacked_all.format == "?"
assert list(unpacked_all) == [1, 0, 0, 1, 0, 0, 0, 0]
unpacked_some = view.unpack_bits(1, 4)