paleolimbot commented on code in PR #117:
URL: https://github.com/apache/arrow-nanoarrow/pull/117#discussion_r1229775507


##########
python/nanoarrow/_lib.pyx:
##########
@@ -0,0 +1,869 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: language_level = 3
+# cython: linetrace=True
+
+"""Low-level nanoarrow Python bindings
+
+This Cython extension provides low-level Python wrappers around the
+Arrow C Data and Arrow C Stream interface structs. In general, there
+is one wrapper per C struct and pointer validity is managed by keeping
+strong references to Python objects. These wrappers are intended to
+be literal and stay close to the structure definitions.
+"""
+
+from libc.stdint cimport uintptr_t, int64_t
+from cpython.mem cimport PyMem_Malloc, PyMem_Free
+from cpython.bytes cimport PyBytes_FromStringAndSize
+from cpython cimport Py_buffer
+from nanoarrow_c cimport *
+
+def c_version():
+    """Return the nanoarrow C library version string
+    """
+    return ArrowNanoarrowVersion().decode("UTF-8")
+
+cdef class SchemaHolder:
+    """Memory holder for an ArrowSchema
+
+    This class is responsible for the lifecycle of the ArrowSchema
+    whose memory it is responsible. When this object is deleted,
+    a non-NULL release callback is invoked.
+    """
+    cdef ArrowSchema c_schema
+
+    def __cinit__(self):
+        self.c_schema.release = NULL
+
+    def __dealloc__(self):
+        if self.c_schema.release != NULL:
+          self.c_schema.release(&self.c_schema)
+
+    def _addr(self):
+        return <uintptr_t>&self.c_schema
+
+cdef class ArrayHolder:
+    """Memory holder for an ArrowArray
+
+    This class is responsible for the lifecycle of the ArrowArray
+    whose memory it is responsible. When this object is deleted,
+    a non-NULL release callback is invoked.
+    """
+    cdef ArrowArray c_array
+
+    def __cinit__(self):
+        self.c_array.release = NULL
+
+    def __dealloc__(self):
+        if self.c_array.release != NULL:
+          self.c_array.release(&self.c_array)
+
+    def _addr(self):
+        return <uintptr_t>&self.c_array
+
+cdef class ArrayStreamHolder:
+    """Memory holder for an ArrowArrayStream
+
+    This class is responsible for the lifecycle of the ArrowArrayStream
+    whose memory it is responsible. When this object is deleted,
+    a non-NULL release callback is invoked.
+    """
+    cdef ArrowArrayStream c_array_stream
+
+    def __cinit__(self):
+        self.c_array_stream.release = NULL
+
+    def __dealloc__(self):
+        if self.c_array_stream.release != NULL:
+          self.c_array_stream.release(&self.c_array_stream)
+
+    def _addr(self):
+        return <uintptr_t>&self.c_array_stream
+
+cdef class ArrayViewHolder:
+    """Memory holder for an ArrowArrayView
+
+    This class is responsible for the lifecycle of the ArrowArrayView
+    whose memory it is responsible. When this object is deleted,
+    ArrowArrayViewReset() is called on the contents.
+    """
+    cdef ArrowArrayView c_array_view
+
+    def __init__(self):
+        ArrowArrayViewInitFromType(&self.c_array_view, 
NANOARROW_TYPE_UNINITIALIZED)
+
+    def __dealloc__(self):
+        ArrowArrayViewReset(&self.c_array_view)
+
+    def _addr(self):
+        return <uintptr_t>&self.c_array_view
+
+
+class NanoarrowException(RuntimeError):
+    """An error resulting from a call to the nanoarrow C library
+
+    Calls to the nanoarrow C library and/or the Arrow C Stream interface
+    callbacks return an errno error code and sometimes a message with extra
+    detail. This exception wraps a RuntimeError to format a suitable message
+    and store the components of the original error.
+    """
+
+    def __init__(self, what, code, message):
+        self.what = what
+        self.code = code
+        self.message = message
+
+        if self.message == "":
+            super().__init__(f"{self.what} failed ({self.code})")
+        else:
+            super().__init__(f"{self.what} failed ({self.code}): 
{self.message}")
+
+
+cdef class Error:
+    """Memory holder for an ArrowError
+
+    ArrowError is the C struct that is optionally passed to nanoarrow functions
+    when a detailed error message might be returned. This class holds a C
+    reference to the object and provides helpers for raising exceptions based
+    on the contained message.
+    """
+    cdef ArrowError c_error
+
+    def __cinit__(self):
+        self.c_error.message[0] = 0
+
+    def raise_message(self, what, code):
+        """Raise a NanoarrowException from this message
+        """
+        raise NanoarrowException(what, code, 
self.c_error.message.decode("UTF-8"))
+
+    @staticmethod
+    def raise_error(what, code):
+        """Raise a NanoarrowException without a message
+        """
+        raise NanoarrowException(what, code, "")
+
+
+cdef class Schema:
+    """ArrowSchema wrapper
+
+    This class provides a user-facing interface to access the fields of
+    an ArrowSchema as defined in the Arrow C Data interface. These objects
+    are usually created using `nanoarrow.schema()`. This Python wrapper
+    allows access to schema fields but does not automatically deserialize
+    their content: use `.view()` to validate and deserialize the content
+    into a more easily inspectable object.
+
+    Examples
+    --------
+
+    >>> import pyarrow as pa
+    >>> import nanoarrow as na
+    >>> schema = na.schema(pa.int32())
+    >>> schema.is_valid()
+    True
+    >>> schema.format
+    'i'
+    >>> schema.name
+    ''
+    >>> schema_view = schema.view()
+    >>> schema_view.type
+    'int32'
+    """
+    cdef object _base
+    cdef ArrowSchema* _ptr
+
+    @staticmethod
+    def allocate():
+        base = SchemaHolder()
+        return Schema(base, base._addr())
+
+    def __init__(self, object base, uintptr_t addr):
+        self._base = base,
+        self._ptr = <ArrowSchema*>addr
+
+    def _addr(self):
+        return <uintptr_t>self._ptr
+
+    def is_valid(self):
+        return self._ptr != NULL and self._ptr.release != NULL
+
+    def _assert_valid(self):
+        if self._ptr == NULL:
+            raise RuntimeError("schema is NULL")
+        if self._ptr.release == NULL:
+            raise RuntimeError("schema is released")
+
+    def __repr__(self):
+        cdef int64_t n_chars = ArrowSchemaToString(self._ptr, NULL, 0, True)
+        cdef char* out = <char*>PyMem_Malloc(n_chars + 1)
+        if not out:
+            raise MemoryError()
+
+        ArrowSchemaToString(self._ptr, out, n_chars + 1, True)
+        out_str = out.decode("UTF-8")
+        PyMem_Free(out)
+
+        return out_str
+
+    @property
+    def format(self):
+        self._assert_valid()
+        if self._ptr.format != NULL:
+            return self._ptr.format.decode("UTF-8")
+
+    @property
+    def name(self):
+        self._assert_valid()
+        if self._ptr.name != NULL:
+            return self._ptr.name.decode("UTF-8")
+        else:
+            return None
+
+    @property
+    def flags(self):
+        return self._ptr.flags
+
+    @property
+    def metadata(self):
+        self._assert_valid()
+        if self._ptr.metadata != NULL:
+            return SchemaMetadata(self, <uintptr_t>self._ptr.metadata)
+        else:
+            return None
+
+    @property
+    def children(self):
+        self._assert_valid()
+        return SchemaChildren(self)
+
+    @property
+    def dictionary(self):
+        self._assert_valid()
+        if self._ptr.dictionary != NULL:
+            return Schema(self, <uintptr_t>self._ptr.dictionary)
+        else:
+            return None
+
+    def view(self):
+        self._assert_valid()
+        schema_view = SchemaView()
+        cdef Error error = Error()
+        cdef int result = ArrowSchemaViewInit(&schema_view._schema_view, 
self._ptr, &error.c_error)
+        if result != NANOARROW_OK:
+            error.raise_message("ArrowSchemaViewInit()", result)
+
+        return schema_view
+
+
+cdef class SchemaView:
+    """ArrowSchemaView wrapper
+
+    The ArrowSchemaView is a nanoarrow C library structure that facilitates
+    access to the deserialized content of an ArrowSchema (e.g., parameter
+    values for parameterized types). This wrapper extends that facility to 
Python.
+
+    Examples
+    --------
+
+    >>> import pyarrow as pa
+    >>> import nanoarrow as na
+    >>> schema = na.schema(pa.decimal128(10, 3))
+    >>> schema_view = schema.view()
+    >>> schema_view.type
+    'decimal128'
+    >>> schema_view.decimal_bitwidth
+    128
+    >>> schema_view.decimal_precision
+    10
+    >>> schema_view.decimal_scale
+    3
+    """
+    cdef ArrowSchemaView _schema_view
+
+    _fixed_size_types = (
+        NANOARROW_TYPE_FIXED_SIZE_LIST,
+        NANOARROW_TYPE_FIXED_SIZE_BINARY
+    )
+
+    _decimal_types = (
+        NANOARROW_TYPE_DECIMAL128,
+        NANOARROW_TYPE_DECIMAL256
+    )
+
+    _time_unit_types = (
+        NANOARROW_TYPE_TIME32,
+        NANOARROW_TYPE_TIME64,
+        NANOARROW_TYPE_DURATION,
+        NANOARROW_TYPE_TIMESTAMP
+    )
+
+    _union_types = (
+        NANOARROW_TYPE_DENSE_UNION,
+        NANOARROW_TYPE_SPARSE_UNION
+    )
+
+    def __init__(self):
+        self._schema_view.type = NANOARROW_TYPE_UNINITIALIZED
+        self._schema_view.storage_type = NANOARROW_TYPE_UNINITIALIZED
+
+    @property
+    def type(self):
+        cdef const char* type_str = ArrowTypeString(self._schema_view.type)
+        if type_str != NULL:
+            return type_str.decode('UTF-8')
+
+    @property
+    def storage_type(self):
+        cdef const char* type_str = 
ArrowTypeString(self._schema_view.storage_type)
+        if type_str != NULL:
+            return type_str.decode('UTF-8')
+
+    @property
+    def fixed_size(self):
+        if self._schema_view.type in SchemaView._fixed_size_types:
+            return self._schema_view.fixed_size
+
+    @property
+    def decimal_bitwidth(self):
+        if self._schema_view.type in SchemaView._decimal_types:
+            return self._schema_view.decimal_bitwidth
+
+    @property
+    def decimal_precision(self):
+        if self._schema_view.type in SchemaView._decimal_types:
+            return self._schema_view.decimal_precision
+
+    @property
+    def decimal_scale(self):
+        if self._schema_view.type in SchemaView._decimal_types:
+            return self._schema_view.decimal_scale
+
+    @property
+    def time_unit(self):
+        if self._schema_view.type in SchemaView._time_unit_types:
+            return 
ArrowTimeUnitString(self._schema_view.time_unit).decode('UTF-8')
+
+    @property
+    def timezone(self):
+        if self._schema_view.type == NANOARROW_TYPE_TIMESTAMP:
+            return self._schema_view.timezone.decode('UTF_8')
+
+    @property
+    def union_type_ids(self):
+        if self._schema_view.type in SchemaView._union_types:
+            type_ids_str = 
self._schema_view.union_type_ids.decode('UTF-8').split(',')
+            return (int(type_id) for type_id in type_ids_str)
+
+    @property
+    def extension_name(self):
+        if self._schema_view.extension_name.data != NULL:
+            name_bytes = PyBytes_FromStringAndSize(
+                self._schema_view.extension_name.data,
+                self._schema_view.extension_name.size_bytes
+            )
+            return name_bytes.decode('UTF-8')
+
+    @property
+    def extension_metadata(self):
+        if self._schema_view.extension_name.data != NULL:
+            return PyBytes_FromStringAndSize(
+                self._schema_view.extension_metadata.data,
+                self._schema_view.extension_metadata.size_bytes
+            )
+
+cdef class Array:
+    """ArrowArray wrapper
+
+    This class provides a user-facing interface to access the fields of
+    an ArrowArray as defined in the Arrow C Data interface, holding an
+    optional reference to a Schema that can be used to safely deserialize
+    the content. These objects are usually created using `nanoarrow.array()`.
+    This Python wrapper allows access to array fields but does not
+    automatically deserialize their content: use `.view()` to validate and
+    deserialize the content into a more easily inspectable object.
+
+    Examples
+    --------
+
+    >>> import pyarrow as pa
+    >>> import numpy as np
+    >>> import nanoarrow as na
+    >>> array = na.array(pa.array(["one", "two", "three", None]))
+    >>> array.length
+    4
+    >>> array.null_count
+    1
+    >>> array_view = array.view()
+    """
+    cdef object _base
+    cdef ArrowArray* _ptr
+    cdef Schema _schema
+
+    @staticmethod
+    def allocate(Schema schema):
+        base = ArrayHolder()
+        return Array(base, base._addr(), schema)
+
+    def __init__(self, object base, uintptr_t addr, Schema schema):
+        self._base = base,
+        self._ptr = <ArrowArray*>addr
+        self._schema = schema
+
+    def _addr(self):
+        return <uintptr_t>self._ptr
+
+    def is_valid(self):
+        return self._ptr != NULL and self._ptr.release != NULL
+
+    def _assert_valid(self):
+        if self._ptr == NULL:
+            raise RuntimeError("Array is NULL")
+        if self._ptr.release == NULL:
+            raise RuntimeError("Array is released")
+
+    @property
+    def schema(self):
+        return self._schema
+
+    @property
+    def length(self):
+        self._assert_valid()
+        return self._ptr.length
+
+    @property
+    def offset(self):
+        self._assert_valid()
+        return self._ptr.offset
+
+    @property
+    def null_count(self):
+        return self._ptr.null_count
+
+    @property
+    def buffers(self):
+        return tuple(<uintptr_t>self._ptr.buffers[i] for i in 
range(self._ptr.n_buffers))

Review Comment:
   Possibly just philosophically aligned with what the `ArrowArray` actually 
provides. If you're trying to debug something this would tell you if one of 
your pointers is NULL...to get the `view().buffers` the array would have to be 
validated against a `Schema`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to