paleolimbot commented on code in PR #340:
URL: https://github.com/apache/arrow-nanoarrow/pull/340#discussion_r1447985898
##########
python/src/nanoarrow/_lib.pyx:
##########
@@ -890,50 +790,129 @@ cdef class BufferView:
self._element_size_bits = element_size_bits
self._strides = self._item_size()
self._shape = self._ptr.size_bytes // self._strides
+ self._format[0] = 0
+ self._populate_format()
+
+ def _addr(self):
+ return <uintptr_t>self._ptr.data.data
+ @property
+ def device_type(self):
+ return self._device.device_type
+
+ @property
+ def device_id(self):
+ return self._device.device_id
+
+ @property
+ def element_size_bits(self):
+ return self._element_size_bits
+
+ @property
+ def size_bytes(self):
+ return self._ptr.size_bytes
+
+ @property
+ def type(self):
+ if self._buffer_type == NANOARROW_BUFFER_TYPE_VALIDITY:
+ return "validity"
+ elif self._buffer_type == NANOARROW_BUFFER_TYPE_TYPE_ID:
+ return "type_id"
+ elif self._buffer_type == NANOARROW_BUFFER_TYPE_UNION_OFFSET:
+ return "union_offset"
+ elif self._buffer_type == NANOARROW_BUFFER_TYPE_DATA_OFFSET:
+ return "data_offset"
+ elif self._buffer_type == NANOARROW_BUFFER_TYPE_DATA:
+ return "data"
+
+ @property
+ def data_type(self):
+ return ArrowTypeString(self._buffer_data_type).decode("UTF-8")
+
+ @property
+ def format(self):
+ return self._format.decode("UTF-8")
+
+ @property
+ def item_size(self):
+ return self._strides
+
+ def __len__(self):
+ return self._shape
+
+ def __getitem__(self, int64_t i):
+ if i < 0 or i >= self._shape:
+ raise IndexError(f"Index {i} out of range")
+ cdef int64_t offset = self._strides * i
+ value = unpack_from(self.format, buffer=self, offset=offset)
+ if len(value) == 1:
+ return value[0]
+ else:
+ return value
+
+ def __iter__(self):
+ for value in iter_unpack(self.format, self):
+ if len(value) == 1:
+ yield value[0]
+ else:
+ yield value
cdef Py_ssize_t _item_size(self):
- if self._buffer_data_type == NANOARROW_TYPE_BOOL:
- return 1
- elif self._buffer_data_type == NANOARROW_TYPE_STRING:
- return 1
- elif self._buffer_data_type == NANOARROW_TYPE_BINARY:
+ if self._element_size_bits < 8:
return 1
else:
return self._element_size_bits // 8
- cdef const char* _get_format(self):
- if self._buffer_data_type == NANOARROW_TYPE_INT8:
- return "b"
+ cdef void _populate_format(self):
+ cdef const char* format_const = NULL
+ if self._element_size_bits == 0:
+ # Variable-size elements (e.g., data buffer for string or binary)
export as
+ # one byte per element (character if string, unspecified binary
otherwise)
+ if self._buffer_data_type == NANOARROW_TYPE_STRING:
+ format_const = "c"
+ else:
+ format_const = "B"
+ elif self._element_size_bits < 8:
+ # Bitmaps export as unspecified binary
+ format_const = "B"
+ elif self._buffer_data_type == NANOARROW_TYPE_INT8:
+ format_const = "b"
elif self._buffer_data_type == NANOARROW_TYPE_UINT8:
- return "B"
+ format_const = "B"
elif self._buffer_data_type == NANOARROW_TYPE_INT16:
- return "h"
+ format_const = "=h"
elif self._buffer_data_type == NANOARROW_TYPE_UINT16:
- return "H"
+ format_const = "=H"
elif self._buffer_data_type == NANOARROW_TYPE_INT32:
- return "i"
+ format_const = "=i"
elif self._buffer_data_type == NANOARROW_TYPE_UINT32:
- return "I"
+ format_const = "=I"
elif self._buffer_data_type == NANOARROW_TYPE_INT64:
- return "l"
+ format_const = "=q"
elif self._buffer_data_type == NANOARROW_TYPE_UINT64:
- return "L"
+ format_const = "=Q"
+ elif self._buffer_data_type == NANOARROW_TYPE_HALF_FLOAT:
+ format_const = "=e"
elif self._buffer_data_type == NANOARROW_TYPE_FLOAT:
- return "f"
+ format_const = "=f"
elif self._buffer_data_type == NANOARROW_TYPE_DOUBLE:
- return "d"
- elif self._buffer_data_type == NANOARROW_TYPE_STRING:
- return "c"
+ format_const = "=d"
+ elif self._buffer_data_type == NANOARROW_TYPE_INTERVAL_DAY_TIME:
+ format_const = "=ii"
+ elif self._buffer_data_type == NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
+ format_const = "=iiq"
+
+ if format_const != NULL:
+ snprintf(self._format, sizeof(self._format), "%s", format_const)
else:
- return "B"
+ snprintf(self._format, sizeof(self._format), "%ds",
self._element_size_bits // 8)
Review Comment:
For fixed-size binary/decimal the string has to be dynamically generated
(e.g., `10s`), and it's slightly easier to just always point at `self._format`
after doing this step.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]