joellubi commented on code in PR #43488: URL: https://github.com/apache/arrow/pull/43488#discussion_r1706080065
########## python/pyarrow/array.pxi: ########## @@ -4447,6 +4447,69 @@ cdef class FixedShapeTensorArray(ExtensionArray): FixedSizeListArray.from_arrays(values, shape[1:].prod()) ) +cdef class Bool8Array(ExtensionArray): + """ + Concrete class for bool8 extension arrays. + Examples + -------- + Define the extension type for an bool8 array + >>> import pyarrow as pa + >>> bool8_type = pa.bool8() + Create an extension array + >>> arr = [-1, 0, 1, 2, None] + >>> storage = pa.array(arr, pa.int8()) + >>> pa.ExtensionArray.from_storage(bool8_type, storage) + <pyarrow.lib.Bool8Array object at ...> + [ + -1, + 0, + 1, + 2, + null + ] + """ + + def to_numpy(self, zero_copy_only=True, writable=False): + try: + return self.storage.to_numpy().view(np.bool_) + except ArrowInvalid as e: + if zero_copy_only: + raise e + + return _pc().not_equal(self.storage, 0).to_numpy(zero_copy_only=zero_copy_only, writable=writable) + + @staticmethod + def from_numpy(obj): + """ + Convert numpy array to a bool8 extension array without making a copy. + The input array must be 1-dimensional, with either bool_ or int8 dtype. + + Parameters + ---------- + obj : numpy.ndarray + + Examples + -------- + >>> import pyarrow as pa + >>> import numpy as np + >>> arr = np.array([True, False, True], dtype=np.bool_) + >>> pa.Bool8Array.from_numpy(arr) + <pyarrow.lib.Bool8Array object at ...> + [ + 1, + 0, + 1 + ] + """ + + if obj.ndim != 1: + raise ValueError(f"Cannot convert {obj.ndim}-D array to bool8 array") + + if obj.dtype not in [np.bool_, np.int8]: + raise TypeError(f"Array dtype {obj.dtype} incompatible with bool8 storage") + + buf = foreign_buffer(obj.ctypes.data, obj.size) + return Array.from_buffers(bool8(), obj.size, [None, buf]) Review Comment: Actually now that I think about it I don't think a casting kernel is what's needed in this specific scenario since that goes between Arrow types and we're not trying to convert Arrow Boolean to Arrow Int8. I think what we need is to reinterpret the numpy bool as a numpy int8, then continue the same way as above for the int8 arrow array. I'll give that a try now. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org