paleolimbot commented on code in PR #378:
URL: https://github.com/apache/arrow-nanoarrow/pull/378#discussion_r1491498274
##########
python/src/nanoarrow/_lib.pyx:
##########
@@ -1177,11 +1429,395 @@ cdef class CBufferView:
buffer.strides = &self._strides
buffer.suboffsets = NULL
- def __releasebuffer__(self, Py_buffer *buffer):
+ cdef _do_releasebuffer(self, Py_buffer* buffer):
pass
def __repr__(self):
- return f"<nanoarrow.c_lib.CBufferView>\n
{_lib_utils.buffer_view_repr(self)[1:]}"
+ return f"CBufferView({_lib_utils.buffer_view_repr(self)})"
+
+
+cdef class CBuffer:
+ """Wrapper around readable owned buffer content
+
+ Like the CBufferView, the CBuffer represents readable buffer content;
however,
+ unlike the CBufferView, the CBuffer always represents a valid ArrowBuffer
C object.
+ """
+ cdef object _base
+ cdef ArrowBuffer* _ptr
+ cdef ArrowType _data_type
+ cdef int _element_size_bits
+ cdef char _format[32]
+ cdef CDevice _device
+ cdef CBufferView _view
+ cdef int _get_buffer_count
+
+ def __cinit__(self):
+ self._base = None
+ self._ptr = NULL
+ self._data_type = NANOARROW_TYPE_BINARY
+ self._element_size_bits = 0
+ self._device = CDEVICE_CPU
+ self._format[0] = 0
+ self._get_buffer_count = 0
+ self._reset_view()
+
+ cdef _assert_valid(self):
+ if self._ptr == NULL:
+ raise RuntimeError("CBuffer is not valid")
+
+ cdef _assert_buffer_count_zero(self):
+ if self._get_buffer_count != 0:
+ raise RuntimeError(
+ f"CBuffer already open ({self._get_buffer_count} ",
+ f"references, {self._writable_get_buffer_count} writable)")
+
+ cdef _reset_view(self):
+ self._view = CBufferView(None, 0, 0, NANOARROW_TYPE_BINARY, 8,
self._device)
+
+ cdef _populate_view(self):
+ self._assert_valid()
+ self._assert_buffer_count_zero()
+ self._view = CBufferView(
+ self._base, <uintptr_t>self._ptr.data,
+ self._ptr.size_bytes, self._data_type, self._element_size_bits,
+ self._device
+ )
+
+ cdef _refresh_view_if_needed(self):
+ if self._get_buffer_count > 0:
+ return
+
+ self._assert_valid()
+ cdef int addr_equal = self._ptr.data == self._view._ptr.data.as_uint8
+ cdef int size_equal = self._ptr.size_bytes ==
self._view._ptr.size_bytes
+ cdef int types_equal = self._data_type == self._view._data_type
+ cdef int element_size_equal = self._element_size_bits ==
self._view.element_size_bits
+ if addr_equal and size_equal and types_equal and element_size_equal:
+ return
+
+ self._populate_view()
+
+ def set_empty(self):
+ self._assert_buffer_count_zero()
+ if self._ptr == NULL:
+ self._base = alloc_c_buffer(&self._ptr)
+ ArrowBufferReset(self._ptr)
+
+ self._data_type = NANOARROW_TYPE_BINARY
+ self._element_size_bits = 0
+ self._device = CDEVICE_CPU
+ self._reset_view()
+ return self
+
+ def set_pybuffer(self, obj):
+ self._assert_buffer_count_zero()
+ if self._ptr == NULL:
+ self._base = alloc_c_buffer(&self._ptr)
+
+ self.set_format(c_buffer_set_pybuffer(obj, &self._ptr))
+ self._device = CDEVICE_CPU
+ self._reset_view()
+ return self
+
+ def set_format(self, str format):
+ self._assert_buffer_count_zero()
+ element_size_bytes, data_type = c_arrow_type_from_format(format)
+ self._data_type = data_type
+ self._element_size_bits = element_size_bytes * 8
+ format_bytes = format.encode("UTF-8")
+ snprintf(self._format, sizeof(self._format), "%s", <const
char*>format_bytes)
+ return self
+
+ def set_data_type(self, ArrowType type_id, int element_size_bits=0):
+ self._assert_buffer_count_zero()
+ self._element_size_bits = c_format_from_arrow_type(
+ type_id,
+ element_size_bits,
+ sizeof(self._format),
+ self._format
+ )
+ self._data_type = type_id
+
+ return self
+
+ def _addr(self):
+ self._assert_valid()
+ return <uintptr_t>self._ptr.data
+
+ @property
+ def size_bytes(self):
+ self._assert_valid()
+ return self._ptr.size_bytes
+
+ @property
+ def capacity_bytes(self):
+ self._assert_valid()
+ return self._ptr.capacity_bytes
+
+ @property
+ def data_type(self):
+ return ArrowTypeString(self._data_type).decode("UTF-8")
+
+ @property
+ def data_type_id(self):
+ return self._data_type
+
+ @property
+ def element_size_bits(self):
+ return self._element_size_bits
+
+ @property
+ def item_size(self):
+ self._refresh_view_if_needed()
+ return self._view.item_size
+
+ @property
+ def format(self):
+ return self._format.decode("UTF-8")
+
+ def __len__(self):
+ self._refresh_view_if_needed()
+ return len(self._view)
+
+ def __getitem__(self, k):
+ self._refresh_view_if_needed()
+ return self._view[k]
+
+ def __iter__(self):
+ self._refresh_view_if_needed()
+ return iter(self._view)
+
+ def __getbuffer__(self, Py_buffer* buffer, int flags):
+ self._refresh_view_if_needed()
+ self._view._do_getbuffer(buffer, flags)
+ self._get_buffer_count += 1
+
+ def __releasebuffer__(self, Py_buffer* buffer):
+ if self._get_buffer_count <= 0:
+ raise RuntimeError("CBuffer buffer reference count underflow
(releasebuffer)")
+
+ self._view._do_releasebuffer(buffer)
+ self._get_buffer_count -= 1
+
+ def __repr__(self):
+ if self._ptr == NULL:
+ return "CBuffer(<invalid>)"
+
+ self._refresh_view_if_needed()
+ return f"CBuffer({_lib_utils.buffer_view_repr(self._view)})"
+
+
+cdef class CBufferBuilder(CBuffer):
+ """Wrapper around writable owned buffer CPU content"""
+
+ def reserve_bytes(self, int64_t additional_bytes):
+ self._assert_valid()
+ self._assert_buffer_count_zero()
+ cdef int code = ArrowBufferReserve(self._ptr, additional_bytes)
+ Error.raise_error_not_ok("ArrowBufferReserve()", code)
+ return self
+
+ def write(self, content):
+ self._assert_valid()
+ self._assert_buffer_count_zero()
+
+ cdef Py_buffer buffer
+ cdef int64_t out
+ PyObject_GetBuffer(content, &buffer, PyBUF_ANY_CONTIGUOUS)
+
+ cdef int code = ArrowBufferReserve(self._ptr, buffer.len)
+ if code != NANOARROW_OK:
+ PyBuffer_Release(&buffer)
+ Error.raise_error("ArrowBufferReserve()", code)
+
+ code = PyBuffer_ToContiguous(
+ self._ptr.data + self._ptr.size_bytes,
+ &buffer,
+ buffer.len,
+ # 'C' (not sure how to pass a character literal here)
+ 43
+ )
+ out = buffer.len
+ PyBuffer_Release(&buffer)
+ Error.raise_error_not_ok("PyBuffer_ToContiguous()", code)
+
+ self._ptr.size_bytes += out
+ return out
+
+ def write_values(self, obj):
+ self._assert_valid()
+
+ if self._data_type == NANOARROW_TYPE_BOOL:
+ return self._write_bits(obj)
+
+ cdef int64_t n_values = 0
+ struct_obj = Struct(self._format)
+ pack = struct_obj.pack
+ write = self.write
+
+ if self._data_type in (NANOARROW_TYPE_INTERVAL_DAY_TIME,
+ NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO):
+ for item in obj:
+ n_values += 1
+ write(pack(*item))
+ else:
+ for item in obj:
+ n_values += 1
+ write(pack(item))
+
+ return n_values
+
+ cdef _write_bits(self, obj):
+ if self._ptr.size_bytes != 0:
+ raise NotImplementedError("Append to bitmap that has already been
appended to")
+
+ cdef char buffer_item = 0
+ cdef int buffer_item_i = 0
+ cdef int code
+ cdef int64_t n_values = 0
+ for item in obj:
+ n_values += 1
+ if item:
+ buffer_item |= (<char>1 << buffer_item_i)
+
+ buffer_item_i += 1
+ if buffer_item_i == 8:
+ code = ArrowBufferAppendInt8(self._ptr, buffer_item)
+ Error.raise_error_not_ok("ArrowBufferAppendInt8()", code)
+ buffer_item = 0
+ buffer_item_i = 0
+
+ if buffer_item_i != 0:
+ code = ArrowBufferAppendInt8(self._ptr, buffer_item)
+ Error.raise_error_not_ok("ArrowBufferAppendInt8()", code)
+
+ return n_values
+
+ def finish(self):
+ return self
Review Comment:
Good call! I fixed this...it now returns a completely independent `CBuffer`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]