WillAyd commented on code in PR #366:
URL: https://github.com/apache/arrow-nanoarrow/pull/366#discussion_r1462030981
##########
python/src/nanoarrow/_lib.pyx:
##########
@@ -220,6 +220,64 @@ cdef class Error:
"""
raise NanoarrowException(what, code, "")
+cdef class CArrowType:
+ """
+ Wrapper around ArrowType to provide implementations in Python access
+ to the values.
+ """
+
+ UNINITIALIZED = NANOARROW_TYPE_UNINITIALIZED
+ NA = NANOARROW_TYPE_NA
+ BOOL = NANOARROW_TYPE_BOOL
+ UINT8 = NANOARROW_TYPE_UINT8
+ INT8 = NANOARROW_TYPE_INT8
+ UINT16 = NANOARROW_TYPE_UINT16
+ INT16 = NANOARROW_TYPE_INT16
+ UINT32 = NANOARROW_TYPE_UINT32
+ INT32 = NANOARROW_TYPE_INT32
+ UINT64 = NANOARROW_TYPE_UINT64
+ INT64 = NANOARROW_TYPE_INT64
+ HALF_FLOAT = NANOARROW_TYPE_HALF_FLOAT
+ FLOAT = NANOARROW_TYPE_FLOAT
+ DOUBLE = NANOARROW_TYPE_DOUBLE
+ STRING = NANOARROW_TYPE_STRING
+ BINARY = NANOARROW_TYPE_BINARY
+ FIXED_SIZE_BINARY = NANOARROW_TYPE_FIXED_SIZE_BINARY
+ DATE32 = NANOARROW_TYPE_DATE32
+ DATE64 = NANOARROW_TYPE_DATE64
+ TIMESTAMP = NANOARROW_TYPE_TIMESTAMP
+ TIME32 = NANOARROW_TYPE_TIME32
+ TIME64 = NANOARROW_TYPE_TIME64
+ INTERVAL_MONTHS = NANOARROW_TYPE_INTERVAL_MONTHS
+ INTERVAL_DAY_TIME = NANOARROW_TYPE_INTERVAL_DAY_TIME
+ DECIMAL128 = NANOARROW_TYPE_DECIMAL128
+ DECIMAL256 = NANOARROW_TYPE_DECIMAL256
+ LIST = NANOARROW_TYPE_LIST
+ STRUCT = NANOARROW_TYPE_STRUCT
+ SPARSE_UNION = NANOARROW_TYPE_SPARSE_UNION
+ DENSE_UNION = NANOARROW_TYPE_DENSE_UNION
+ DICTIONARY = NANOARROW_TYPE_DICTIONARY
+ MAP = NANOARROW_TYPE_MAP
+ EXTENSION = NANOARROW_TYPE_EXTENSION
+ FIXED_SIZE_LIST = NANOARROW_TYPE_FIXED_SIZE_LIST
+ DURATION = NANOARROW_TYPE_DURATION
+ LARGE_STRING = NANOARROW_TYPE_LARGE_STRING
+ LARGE_BINARY = NANOARROW_TYPE_LARGE_BINARY
+ LARGE_LIST = NANOARROW_TYPE_LARGE_LIST
+ INTERVAL_MONTH_DAY_NANO = NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO
+
+
+cdef class CArrowTimeUnit:
Review Comment:
I think this would fit more naturally if you just declared a `cpdef enum`
The Cython docs has an example of this:
https://cython.readthedocs.io/en/latest/src/userguide/language_basics.html#structs-unions-enums
##########
python/src/nanoarrow/_lib.pyx:
##########
@@ -478,6 +593,130 @@ cdef class CSchemaView:
def __repr__(self):
return _lib_utils.schema_view_repr(self)
+
+cdef class CSchemaBuilder:
+ cdef CSchema c_schema
+ cdef ArrowSchema* _ptr
+
+ def __cinit__(self, CSchema schema):
+ self.c_schema = schema
+ self._ptr = schema._ptr
+ if self._ptr.release == NULL:
+ ArrowSchemaInit(self._ptr)
+
+ @staticmethod
+ def allocate():
+ return CSchemaBuilder(CSchema.allocate())
+
+ def child(self, int64_t i):
+ return CSchemaBuilder(self.c_schema.child(i))
+
+ def set_type(self, int type):
+ self.c_schema._assert_valid()
+
+ cdef int result = ArrowSchemaSetType(self._ptr, <ArrowType>type)
+ if result != NANOARROW_OK:
+ Error.raise_error("ArrowSchemaSetType()", result)
+
+ return self
+
+ def set_type_decimal(self, int type, int precision, int scale):
+ self.c_schema._assert_valid()
+
+ cdef int result = ArrowSchemaSetTypeDecimal(self._ptr,
<ArrowType>type, precision, scale)
Review Comment:
Do you actually need the casts here for the `type` argument?
##########
python/src/nanoarrow/_lib.pyx:
##########
@@ -376,6 +458,10 @@ cdef class CSchemaView:
# lifetime guarantees that the pointed-to data from ArrowStringViews
remains valid
cdef object _base
cdef ArrowSchemaView _schema_view
+ # Not part of the ArrowSchemaView (but possibly should be)
+ cdef int _dictionary_ordered
Review Comment:
You could declare these as `bool` instead of `int`
##########
python/src/nanoarrow/_lib.pyx:
##########
@@ -478,6 +593,130 @@ cdef class CSchemaView:
def __repr__(self):
return _lib_utils.schema_view_repr(self)
+
+cdef class CSchemaBuilder:
+ cdef CSchema c_schema
+ cdef ArrowSchema* _ptr
+
+ def __cinit__(self, CSchema schema):
+ self.c_schema = schema
+ self._ptr = schema._ptr
+ if self._ptr.release == NULL:
+ ArrowSchemaInit(self._ptr)
+
+ @staticmethod
+ def allocate():
+ return CSchemaBuilder(CSchema.allocate())
+
+ def child(self, int64_t i):
+ return CSchemaBuilder(self.c_schema.child(i))
+
+ def set_type(self, int type):
+ self.c_schema._assert_valid()
+
+ cdef int result = ArrowSchemaSetType(self._ptr, <ArrowType>type)
+ if result != NANOARROW_OK:
+ Error.raise_error("ArrowSchemaSetType()", result)
+
+ return self
+
+ def set_type_decimal(self, int type, int precision, int scale):
Review Comment:
I don't think it matters for cython but as a matter of style `type` is a
reserved keyword in Python, so you typically don't use it as a variable name. I
would suggest `type_` instead
##########
python/src/nanoarrow/__init__.py:
##########
@@ -26,4 +26,40 @@
allocate_c_array,
allocate_c_array_stream,
)
+from nanoarrow.schema import ( # noqa: F401
+ Schema,
+ Type,
+ TimeUnit,
+ schema,
+ null,
+ bool,
+ int8,
+ uint8,
+ int16,
+ uint16,
+ int32,
+ uint32,
+ int64,
+ uint64,
+ float16,
+ float32,
+ float64,
+ string,
+ large_string,
+ binary,
+ large_binary,
+ fixed_size_binary,
+ date32,
+ date64,
+ time32,
+ time64,
+ timestamp,
+ duration,
+ interval_months,
Review Comment:
Probably more of a pyarrow issue than something here but I don't think there
is a pyarrow exposure for any interval aside from month/day/nano
##########
python/src/nanoarrow/_lib.pyx:
##########
@@ -478,6 +593,130 @@ cdef class CSchemaView:
def __repr__(self):
return _lib_utils.schema_view_repr(self)
+
+cdef class CSchemaBuilder:
+ cdef CSchema c_schema
+ cdef ArrowSchema* _ptr
+
+ def __cinit__(self, CSchema schema):
Review Comment:
The RAII equivalent of cinit is `__dealloc__` - you might want to implement
that to potentially cleanup the schema in case ownership is never transfered
when this object gets destroyed
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]