(arrow-nanoarrow) branch main updated: feat(python): Allow creation of dictionary and list types (#445)

paleolimbot Wed, 01 May 2024 05:16:47 -0700

This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git



The following commit(s) were added to refs/heads/main by this push:
     new fe5082f0 feat(python): Allow creation of dictionary and list types 
(#445)
fe5082f0 is described below

commit fe5082f0c71d57d55770e0175fa40ecb6f1028c6
Author: Dewey Dunnington <[email protected]>
AuthorDate: Wed May 1 09:16:39 2024 -0300

    feat(python): Allow creation of dictionary and list types (#445)
    
    This PR adds support for creating dictionary and list types:
    
    ```python
    import nanoarrow as na
    na.list_of(na.int32())
    #> Schema(LIST, value_type=Schema(INT32, name='item'))
    na.dictionary(na.int32(), na.string())
    #> Schema(DICTIONARY, index_type=Schema(INT32), value_type=Schema(STRING), 
dictionary_ordered=False)
    ```
    
    Before, creating these types (or associated arrays from buffer) was not
    possible. This required some changes to `modify()` to ensure we could
    also set `children` and `dictionary` there.
---
 python/src/nanoarrow/__init__.py   |  12 +-
 python/src/nanoarrow/_lib.pyx      |  80 ++++++++--
 python/src/nanoarrow/schema.py     | 289 +++++++++++++++++++++++++++++--------
 python/tests/test_c_array.py       |   2 +-
 python/tests/test_c_buffer.py      |   6 +-
 python/tests/test_c_buffer_view.py |   4 +-
 python/tests/test_c_schema.py      |  42 ++++++
 python/tests/test_iterator.py      |  28 ++--
 python/tests/test_schema.py        |  48 ++++--
 9 files changed, 405 insertions(+), 106 deletions(-)

diff --git a/python/src/nanoarrow/__init__.py b/python/src/nanoarrow/__init__.py
index c1cd12dd..d96ed5c2 100644
--- a/python/src/nanoarrow/__init__.py
+++ b/python/src/nanoarrow/__init__.py
@@ -43,7 +43,7 @@ from nanoarrow.schema import (
     Type,
     TimeUnit,
     null,
-    bool,
+    bool_,
     int8,
     uint8,
     int16,
@@ -57,6 +57,10 @@ from nanoarrow.schema import (
     float64,
     string,
     large_string,
+    list_,
+    large_list,
+    fixed_size_list,
+    dictionary,
     binary,
     large_binary,
     fixed_size_binary,
@@ -88,7 +92,7 @@ __all__ = [
     "allocate_c_array_stream",
     "allocate_c_schema",
     "binary",
-    "bool",
+    "bool_",
     "c_array",
     "c_array_from_buffers",
     "c_array_stream",
@@ -102,9 +106,11 @@ __all__ = [
     "date64",
     "decimal128",
     "decimal256",
+    "dictionary",
     "duration",
     "extension_type",
     "fixed_size_binary",
+    "fixed_size_list",
     "float16",
     "float32",
     "float64",
@@ -117,6 +123,8 @@ __all__ = [
     "interval_months",
     "large_binary",
     "large_string",
+    "large_list",
+    "list_",
     "null",
     "string",
     "struct",
diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index 99127087..04602120 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -769,23 +769,55 @@ cdef class CSchema:
         else:
             return None
 
-    def modify(self, *, name=None, flags=None, nullable=None, metadata=None,
-               validate=True):
-        builder = CSchemaBuilder.copy(self)
+    def modify(self, *, format=None, name=None, flags=None, nullable=None,
+               metadata=None, children=None, dictionary=None, validate=True):
+        cdef CSchemaBuilder builder = CSchemaBuilder.allocate()
 
-        if name is not None:
+        if format is None:
+            builder.set_format(self.format)
+        else:
+            builder.set_format(format)
+
+        if name is None:
+            builder.set_name(self.name)
+        elif name is not False:
             builder.set_name(name)
 
-        if flags is not None:
+        if flags is None:
+            builder.set_flags(self.flags)
+        else:
             builder.set_flags(flags)
 
         if nullable is not None:
             builder.set_nullable(nullable)
 
-        if metadata is not None:
-            builder.clear_metadata()
+        if metadata is None:
+            if self.metadata is not None:
+                builder.append_metadata(self.metadata)
+        else:
             builder.append_metadata(metadata)
 
+        if children is None:
+            if self.n_children > 0:
+                builder.allocate_children(self.n_children)
+                for i, child in enumerate(self.children):
+                    builder.set_child(i, None, child)
+        elif hasattr(children, "items"):
+            builder.allocate_children(len(children))
+            for i, item in enumerate(children.items()):
+                name, child = item
+                builder.set_child(i, name, child)
+        else:
+            builder.allocate_children(len(children))
+            for i, child in enumerate(children):
+                builder.set_child(i, None, child)
+
+        if dictionary is None:
+            if self.dictionary:
+                builder.set_dictionary(self.dictionary)
+        elif dictionary is not False:
+            builder.set_dictionary(dictionary)
+
         if validate:
             builder.validate()
 
@@ -1036,19 +1068,10 @@ cdef class CSchemaBuilder:
         if self._ptr.release == NULL:
             ArrowSchemaInit(self._ptr)
 
-    @staticmethod
-    def copy(CSchema schema):
-        return CSchemaBuilder(schema.__deepcopy__())
-
     @staticmethod
     def allocate():
         return CSchemaBuilder(CSchema.allocate())
 
-    def clear_metadata(self):
-        cdef int code = ArrowSchemaSetMetadata(self.c_schema._ptr, NULL)
-        Error.raise_error_not_ok("ArrowSchemaSetMetadata()", code)
-        return self
-
     def append_metadata(self, metadata):
         cdef CBuffer buffer = CBuffer.empty()
 
@@ -1164,6 +1187,23 @@ cdef class CSchemaBuilder:
         if name is not None:
             name = str(name)
             code = ArrowSchemaSetName(self._ptr.children[i], 
name.encode("UTF-8"))
+            Error.raise_error_not_ok("ArrowSchemaSetName()", code)
+
+        return self
+
+    def set_dictionary(self, CSchema dictionary):
+        self.c_schema._assert_valid()
+
+        cdef int code
+        if self._ptr.dictionary == NULL:
+            code = ArrowSchemaAllocateDictionary(self._ptr)
+            Error.raise_error_not_ok("ArrowSchemaAllocateDictionary()", code)
+
+        if self._ptr.dictionary.release != NULL:
+            ArrowSchemaRelease(self._ptr.dictionary)
+
+        code = ArrowSchemaDeepCopy(dictionary._ptr, self._ptr.dictionary)
+        Error.raise_error_not_ok("ArrowSchemaDeepCopy()", code)
 
         return self
 
@@ -1179,6 +1219,14 @@ cdef class CSchemaBuilder:
 
         return self
 
+    def set_dictionary_ordered(self, dictionary_ordered):
+        if dictionary_ordered:
+            self._ptr.flags = self._ptr.flags | ARROW_FLAG_DICTIONARY_ORDERED
+        else:
+            self._ptr.flags = self._ptr.flags & ~ARROW_FLAG_DICTIONARY_ORDERED
+
+        return self
+
     def validate(self):
         return CSchemaView(self.c_schema)
 
diff --git a/python/src/nanoarrow/schema.py b/python/src/nanoarrow/schema.py
index 97bb45b8..94d1a8c8 100644
--- a/python/src/nanoarrow/schema.py
+++ b/python/src/nanoarrow/schema.py
@@ -193,23 +193,27 @@ class Schema:
         name=None,
         nullable=None,
         metadata=None,
+        fields=None,
         **params,
     ) -> None:
         if isinstance(obj, Type):
-            self._c_schema = _c_schema_from_type_and_params(
-                obj, params, name, nullable, metadata
-            )
-            self._c_schema_view = CSchemaView(self._c_schema)
-            return
-
-        if params:
-            raise ValueError("params are only supported for obj of class Type")
-
-        self._c_schema = c_schema(obj)
-
-        if name is not None or nullable is not None or metadata is not None:
+            self._c_schema = _c_schema_from_type_and_params(obj, params)
+        else:
+            if params:
+                raise ValueError("params are only supported for obj of class 
Type")
+            self._c_schema = c_schema(obj)
+
+        if (
+            name is not None
+            or nullable is not None
+            or metadata is not None
+            or fields is not None
+        ):
             self._c_schema = self._c_schema.modify(
-                name=name, nullable=nullable, metadata=metadata
+                name=name,
+                nullable=nullable,
+                metadata=metadata,
+                children=_clean_fields(fields),
             )
 
         self._c_schema_view = CSchemaView(self._c_schema)
@@ -343,6 +347,72 @@ class Schema:
 
         return self._c_schema_view.decimal_scale
 
+    @property
+    def index_type(self) -> Union["Schema", None]:
+        """Dictionary index type
+
+        For dictionary types, the type corresponding to the indices.
+        See also :attr:`value_type`.
+
+        >>> import nanoarrow as na
+        >>> na.dictionary(na.int32(), na.string()).index_type
+        Schema(INT32)
+        """
+        if self._c_schema_view.type_id == CArrowType.DICTIONARY:
+            index_schema = self._c_schema.modify(
+                dictionary=False, flags=0, nullable=self.nullable
+            )
+            return Schema(index_schema)
+        else:
+            return None
+
+    @property
+    def dictionary_ordered(self) -> Union[bool, None]:
+        """Dictionary ordering
+
+        For dictionary types, returns ``True`` if the order of dictionary 
values
+        are meaningful.
+
+        >>> import nanoarrow as na
+        >>> na.dictionary(na.int32(), na.string()).dictionary_ordered
+        False
+        """
+        return self._c_schema_view.dictionary_ordered
+
+    @property
+    def value_type(self):
+        """Dictionary or list value type
+
+        >>> import nanoarrow as na
+        >>> na.list_(na.int32()).value_type
+        Schema(INT32, name='item')
+        >>> na.dictionary(na.int32(), na.string()).value_type
+        Schema(STRING)
+        """
+        if self._c_schema_view.type_id in (
+            CArrowType.LIST,
+            CArrowType.LARGE_LIST,
+            CArrowType.FIXED_SIZE_LIST,
+        ):
+            return self.field(0)
+        elif self._c_schema_view.type_id == CArrowType.DICTIONARY:
+            return Schema(self._c_schema.dictionary)
+        else:
+            return None
+
+    @property
+    def list_size(self) -> Union[int, None]:
+        """Fixed-size list element size
+
+        >>> import nanoarrow as na
+        >>> na.fixed_size_list(na.int32(), 123).list_size
+        123
+        """
+        if self._c_schema_view.type_id == CArrowType.FIXED_SIZE_LIST:
+            return self._c_schema_view.fixed_size
+        else:
+            return None
+
     @property
     def n_fields(self) -> int:
         """Number of child Schemas
@@ -408,7 +478,7 @@ def null(nullable: bool = True) -> Schema:
     return Schema(Type.NULL, nullable=nullable)
 
 
-def bool(nullable: bool = True) -> Schema:
+def bool_(nullable: bool = True) -> Schema:
     """Create an instance of a boolean type.
 
     Parameters
@@ -420,7 +490,7 @@ def bool(nullable: bool = True) -> Schema:
     --------
 
     >>> import nanoarrow as na
-    >>> na.bool()
+    >>> na.bool_()
     Schema(BOOL)
     """
     return Schema(Type.BOOL, nullable=nullable)
@@ -945,9 +1015,8 @@ def struct(fields, nullable=True) -> Schema:
     ----------
     fields :
         * A dictionary whose keys are field names and values are schema-like 
objects
-        * An iterable whose items are a schema like object or a two-tuple of 
the
-          field name and a schema-like object. If a field name is not specified
-          from the tuple, the field name is inherited from the schema-like 
object.
+        * An iterable whose items are a schema like objects where the field 
name is
+          inherited from the schema-like object.
     nullable : bool, optional
         Use ``False`` to mark this field as non-nullable.
 
@@ -957,14 +1026,113 @@ def struct(fields, nullable=True) -> Schema:
     >>> import nanoarrow as na
     >>> na.struct([na.int32()])
     Schema(STRUCT, fields=[Schema(INT32)])
-    >>> na.struct([("col1", na.int32())])
-    Schema(STRUCT, fields=[Schema(INT32, name='col1')])
     >>> na.struct({"col1": na.int32()})
     Schema(STRUCT, fields=[Schema(INT32, name='col1')])
     """
     return Schema(Type.STRUCT, fields=fields, nullable=nullable)
 
 
+def list_(value_type, nullable=True) -> Schema:
+    """Create a type representing a variable-size list of some other type.
+
+    Parameters
+    ----------
+    value_type : schema-like
+        The type of values in each list element.
+    nullable : bool, optional
+        Use ``False`` to mark this field as non-nullable.
+
+    Examples
+    --------
+
+    >>> import nanoarrow as na
+    >>> na.list_(na.int32())
+    Schema(LIST, value_type=Schema(INT32, name='item'))
+    """
+    return Schema(Type.LIST, value_type=value_type, nullable=nullable)
+
+
+def large_list(value_type, nullable=True) -> Schema:
+    """Create a type representing a variable-size list of some other type.
+
+    Unlike :func:`list_`, the func:`large_list` can accomodate arrays
+    with more than ``2 ** 31 - 1`` items in the values array.
+
+    Parameters
+    ----------
+    value_type : schema-like
+        The type of values in each list element.
+    nullable : bool, optional
+        Use ``False`` to mark this field as non-nullable.
+
+    Examples
+    --------
+
+    >>> import nanoarrow as na
+    >>> na.large_list(na.int32())
+    Schema(LARGE_LIST, value_type=Schema(INT32, name='item'))
+    """
+    return Schema(Type.LARGE_LIST, value_type=value_type, nullable=nullable)
+
+
+def fixed_size_list(value_type, list_size, nullable=True) -> Schema:
+    """Create a type representing a fixed-size list of some other type.
+
+    Parameters
+    ----------
+    value_type : schema-like
+        The type of values in each list element.
+    list_size : int
+        The number of values in each list element.
+    nullable : bool, optional
+        Use ``False`` to mark this field as non-nullable.
+
+    Examples
+    --------
+
+    >>> import nanoarrow as na
+    >>> na.fixed_size_list(na.int32(), 123)
+    Schema(FIXED_SIZE_LIST, value_type=Schema(INT32, name='item'), 
list_size=123)
+    """
+    return Schema(
+        Type.FIXED_SIZE_LIST,
+        value_type=value_type,
+        list_size=list_size,
+        nullable=nullable,
+    )
+
+
+def dictionary(index_type, value_type, dictionary_ordered=False):
+    """Create a type representing dictionary-encoded values
+
+    Parameters
+    ----------
+    index_type : schema-like
+        The data type of the indices. Must be an integral type.
+    value_type : schema-like
+        The type of the dictionary array.
+    ordered: bool, optional
+        Use ``True`` if the order of values in the dictionary array is
+        meaningful.
+    nullable : bool, optional
+        Use ``False`` to mark this field as non-nullable.
+
+    Examples
+    --------
+
+    >>> import nanoarrow as na
+    >>> na.dictionary(na.int32(), na.string())
+    Schema(DICTIONARY, index_type=Schema(INT32), value_type=Schema(STRING), \
+dictionary_ordered=False)
+    """
+    return Schema(
+        Type.DICTIONARY,
+        index_type=index_type,
+        value_type=value_type,
+        dictionary_ordered=dictionary_ordered,
+    )
+
+
 def extension_type(
     storage_schema,
     extension_name: str,
@@ -993,24 +1161,10 @@ def extension_type(
     return Schema(storage_schema, nullable=nullable, metadata=metadata)
 
 
-def _c_schema_from_type_and_params(
-    type: Type,
-    params: dict,
-    name: Union[bool, None, bool],
-    nullable: Union[bool, None],
-    metadata: Mapping[Union[str, bytes], Union[str, bytes]],
-):
+def _c_schema_from_type_and_params(type: Type, params: dict):
     factory = CSchemaBuilder.allocate()
 
-    if type == Type.STRUCT:
-        fields = _clean_fields(params.pop("fields"))
-
-        factory.set_format("+s").allocate_children(len(fields))
-        for i, item in enumerate(fields):
-            child_name, c_schema = item
-            factory.set_child(i, child_name, c_schema)
-
-    elif type.value in CSchemaView._decimal_types:
+    if type.value in CSchemaView._decimal_types:
         precision = int(params.pop("precision"))
         scale = int(params.pop("scale"))
         factory.set_type_decimal(type.value, precision, scale)
@@ -1029,6 +1183,32 @@ def _c_schema_from_type_and_params(
     elif type == Type.FIXED_SIZE_BINARY:
         factory.set_type_fixed_size(type.value, int(params.pop("byte_width")))
 
+    elif type == Type.LIST:
+        factory.set_format("+l")
+        factory.allocate_children(1)
+        factory.set_child(0, "item", c_schema(params.pop("value_type")))
+
+    elif type == Type.LARGE_LIST:
+        factory.set_format("+L")
+        factory.allocate_children(1)
+        factory.set_child(0, "item", c_schema(params.pop("value_type")))
+
+    elif type == Type.FIXED_SIZE_LIST:
+        fixed_size = int(params.pop("list_size"))
+        factory.set_format(f"+w:{fixed_size}")
+        factory.allocate_children(1)
+        factory.set_child(0, "item", c_schema(params.pop("value_type")))
+
+    elif type == Type.DICTIONARY:
+        index_type = c_schema(params.pop("index_type"))
+        factory.set_format(index_type.format)
+
+        value_type = c_schema(params.pop("value_type"))
+        factory.set_dictionary(value_type)
+
+        if "dictionary_ordered" in params and 
bool(params.pop("dictionary_ordered")):
+            factory.set_dictionary_ordered(True)
+
     else:
         factory.set_type(type.value)
 
@@ -1036,38 +1216,19 @@ def _c_schema_from_type_and_params(
         unused = ", ".join(f"'{item}'" for item in params.keys())
         raise ValueError(f"Unused parameters whilst constructing Schema: 
{unused}")
 
-    # Apply default nullability (True)
-    if nullable is None:
-        nullable = True
-    factory.set_nullable(nullable)
-
-    # Apply default name (an empty string). To explicitly set a NULL
-    # name, a caller would have to specify False.
-    if name is None:
-        name = ""
-    elif name is False:
-        name = None
-    factory.set_name(name)
-
-    # Apply metadata
-    if metadata is not None:
-        factory.append_metadata(metadata)
+    # Better default than NULL, which causes some implementations to crash
+    factory.set_name("")
 
     return factory.finish()
 
 
 def _clean_fields(fields):
-    if isinstance(fields, dict):
-        return [(str(k), c_schema(v)) for k, v in fields.items()]
+    if fields is None:
+        return None
+    elif hasattr(fields, "items"):
+        return {k: c_schema(v) for k, v in fields.items()}
     else:
-        fields_clean = []
-        for item in fields:
-            if isinstance(item, tuple) and len(item) == 2:
-                fields_clean.append((str(item[0]), c_schema(item[1])))
-            else:
-                fields_clean.append((None, c_schema(item)))
-
-        return fields_clean
+        return [c_schema(v) for v in fields]
 
 
 def _schema_repr(obj):
@@ -1120,4 +1281,8 @@ _PARAM_NAMES = {
     CArrowType.DECIMAL128: ("precision", "scale"),
     CArrowType.DECIMAL256: ("precision", "scale"),
     CArrowType.STRUCT: ("fields",),
+    CArrowType.LIST: ("value_type",),
+    CArrowType.LARGE_LIST: ("value_type",),
+    CArrowType.FIXED_SIZE_LIST: ("value_type", "list_size"),
+    CArrowType.DICTIONARY: ("index_type", "value_type", "dictionary_ordered"),
 }
diff --git a/python/tests/test_c_array.py b/python/tests/test_c_array.py
index b64370da..b5ec3b90 100644
--- a/python/tests/test_c_array.py
+++ b/python/tests/test_c_array.py
@@ -303,7 +303,7 @@ def test_c_array_from_iterable_float_with_nulls():
 
 
 def test_c_array_from_iterable_bool_with_nulls():
-    c_array = na.c_array([True, None, False], na.bool())
+    c_array = na.c_array([True, None, False], na.bool_())
     assert c_array.length == 3
     assert c_array.null_count == 1
 
diff --git a/python/tests/test_c_buffer.py b/python/tests/test_c_buffer.py
index 40ba4a3b..d18435cc 100644
--- a/python/tests/test_c_buffer.py
+++ b/python/tests/test_c_buffer.py
@@ -322,7 +322,7 @@ def test_c_buffer_from_decimal256_iterable():
 
 def test_c_buffer_bitmap_from_iterable():
     # Check something less than one byte
-    buffer = na.c_buffer([True, False, False, True], na.bool())
+    buffer = na.c_buffer([True, False, False, True], na.bool_())
     assert "10010000" in repr(buffer)
     assert buffer.size_bytes == 1
     assert buffer.data_type == "bool"
@@ -343,13 +343,13 @@ def test_c_buffer_bitmap_from_iterable():
     )
 
     # Check something exactly one byte
-    buffer = na.c_buffer([True, False, False, True] * 2, na.bool())
+    buffer = na.c_buffer([True, False, False, True] * 2, na.bool_())
     assert "10011001" in repr(buffer)
     assert buffer.size_bytes == 1
     assert list(buffer.elements()) == [True, False, False, True] * 2
 
     # Check something more than one byte
-    buffer = na.c_buffer([True, False, False, True] * 3, na.bool())
+    buffer = na.c_buffer([True, False, False, True] * 3, na.bool_())
     assert "1001100110010000" in repr(buffer)
     assert buffer.size_bytes == 2
     assert list(buffer.elements()) == [True, False, False, True] * 3 + [
diff --git a/python/tests/test_c_buffer_view.py 
b/python/tests/test_c_buffer_view.py
index b885488e..e84c04df 100644
--- a/python/tests/test_c_buffer_view.py
+++ b/python/tests/test_c_buffer_view.py
@@ -20,8 +20,8 @@ import pytest
 import nanoarrow as na
 
 
-def test_buffer_view_bool():
-    bool_array_view = na.c_array_view([1, 0, 0, 1], na.bool())
+def test_buffer_view_bool_():
+    bool_array_view = na.c_array_view([1, 0, 0, 1], na.bool_())
     view = bool_array_view.buffer(1)
 
     assert view.element_size_bits == 1
diff --git a/python/tests/test_c_schema.py b/python/tests/test_c_schema.py
index 5617fe7b..e299157f 100644
--- a/python/tests/test_c_schema.py
+++ b/python/tests/test_c_schema.py
@@ -133,6 +133,9 @@ def test_c_schema_modify():
     assert schema_clone is not schema
     assert schema._addr() != schema_clone._addr()
 
+    schema_formatted = schema.modify(format="i")
+    assert schema_formatted.format == "i"
+
     schema_named = schema.modify(name="something else")
     assert schema_named.name == "something else"
     assert schema_named.format == schema.format
@@ -155,3 +158,42 @@ def test_c_schema_modify():
 
     schema_no_metad = schema_metad.modify(metadata={})
     assert schema_no_metad.metadata is None
+
+
+def test_c_schema_modify_children():
+    schema = na.c_schema(na.struct({"col1": na.null()}))
+
+    schema_same_children = schema.modify()
+    assert schema_same_children.n_children == 1
+    assert schema_same_children.child(0).name == "col1"
+    assert schema_same_children.child(0).format == "n"
+
+    schema_new_children_list = schema.modify(
+        children=[na.c_schema(na.int32()).modify(name="new name")]
+    )
+    assert schema_new_children_list.n_children == 1
+    assert schema_new_children_list.child(0).name == "new name"
+    assert schema_new_children_list.child(0).format == "i"
+
+    schema_new_children_dict = schema.modify(
+        children={"new name": na.c_schema(na.int32())}
+    )
+    assert schema_new_children_dict.n_children == 1
+    assert schema_new_children_dict.child(0).name == "new name"
+    assert schema_new_children_dict.child(0).format == "i"
+
+
+def test_c_schema_modify_dictionary():
+    schema = na.c_schema(na.int32())
+
+    schema_dictionary = schema.modify(dictionary=na.c_schema(na.string()))
+    assert schema_dictionary.format == "i"
+    assert schema_dictionary.dictionary.format == "u"
+
+    schema_same_dictionary = schema_dictionary.modify()
+    assert schema_same_dictionary.format == "i"
+    assert schema_same_dictionary.dictionary.format == "u"
+
+    schema_no_dictionary = schema_dictionary.modify(dictionary=False)
+    assert schema_no_dictionary.format == "i"
+    assert schema.dictionary is None
diff --git a/python/tests/test_iterator.py b/python/tests/test_iterator.py
index 826f9be2..ff0b34e2 100644
--- a/python/tests/test_iterator.py
+++ b/python/tests/test_iterator.py
@@ -106,10 +106,13 @@ def test_iterator_nullable_binary():
 
 def test_iter_tuples():
     array = na.c_array_from_buffers(
-        na.struct({"col1": na.int32(), "col2": na.bool()}),
+        na.struct({"col1": na.int32(), "col2": na.bool_()}),
         length=3,
         buffers=[None],
-        children=[na.c_array([1, 2, 3], na.int32()), na.c_array([1, 0, 1], 
na.bool())],
+        children=[
+            na.c_array([1, 2, 3], na.int32()),
+            na.c_array([1, 0, 1], na.bool_()),
+        ],
     )
 
     assert list(iter_tuples(array)) == [(1, True), (2, False), (3, True)]
@@ -131,12 +134,12 @@ def test_iter_tuples():
 
 def test_iter_tuples_nullable():
     array = na.c_array_from_buffers(
-        na.struct({"col1": na.int32(), "col2": na.bool()}),
+        na.struct({"col1": na.int32(), "col2": na.bool_()}),
         length=4,
-        buffers=[na.c_buffer([True, True, True, False], na.bool())],
+        buffers=[na.c_buffer([True, True, True, False], na.bool_())],
         children=[
             na.c_array([1, 2, 3, 4], na.int32()),
-            na.c_array([1, 0, 1, 0], na.bool()),
+            na.c_array([1, 0, 1, 0], na.bool_()),
         ],
     )
 
@@ -148,7 +151,7 @@ def test_iter_tuples_nullable():
     sliced_child = na.c_array_from_buffers(
         array.schema,
         length=3,
-        buffers=[na.c_buffer([True, True, False], na.bool())],
+        buffers=[na.c_buffer([True, True, False], na.bool_())],
         children=[array.child(0)[1:], array.child(1)[1:]],
     )
     assert list(iter_tuples(sliced_child)) == [(2, False), (3, True), None]
@@ -164,10 +167,13 @@ def test_iter_tuples_errors():
 
 def test_iterator_struct():
     array = na.c_array_from_buffers(
-        na.struct({"col1": na.int32(), "col2": na.bool()}),
+        na.struct({"col1": na.int32(), "col2": na.bool_()}),
         length=3,
         buffers=[None],
-        children=[na.c_array([1, 2, 3], na.int32()), na.c_array([1, 0, 1], 
na.bool())],
+        children=[
+            na.c_array([1, 2, 3], na.int32()),
+            na.c_array([1, 0, 1], na.bool_()),
+        ],
     )
 
     assert list(iter_py(array)) == [
@@ -185,12 +191,12 @@ def test_iterator_struct():
 
 def test_iterator_nullable_struct():
     array = na.c_array_from_buffers(
-        na.struct({"col1": na.int32(), "col2": na.bool()}),
+        na.struct({"col1": na.int32(), "col2": na.bool_()}),
         length=4,
-        buffers=[na.c_buffer([True, True, True, False], na.bool())],
+        buffers=[na.c_buffer([True, True, True, False], na.bool_())],
         children=[
             na.c_array([1, 2, 3, 4], na.int32()),
-            na.c_array([1, 0, 1, 0], na.bool()),
+            na.c_array([1, 0, 1, 0], na.bool_()),
         ],
     )
 
diff --git a/python/tests/test_schema.py b/python/tests/test_schema.py
index cc9e42b2..38c412f3 100644
--- a/python/tests/test_schema.py
+++ b/python/tests/test_schema.py
@@ -77,13 +77,17 @@ def test_schema_create_no_params():
     assert schema_obj.name == "not empty"
     assert "name='not empty'" in repr(schema_obj)
 
+    msg = "params are only supported for obj of class Type"
+    with pytest.raises(ValueError, match=msg):
+        na.Schema(na.fixed_size_binary(123), byte_width=12)
+
     with pytest.raises(ValueError, match=r"^Unused parameter"):
         na.Schema(na.Type.INT32, unused_param="unused_value")
 
 
 def test_schema_simple():
     assert na.null().type == na.Type.NULL
-    assert na.bool().type == na.Type.BOOL
+    assert na.bool_().type == na.Type.BOOL
     assert na.int8().type == na.Type.INT8
     assert na.uint8().type == na.Type.UINT8
     assert na.int16().type == na.Type.INT16
@@ -171,13 +175,6 @@ def test_schema_struct():
 
     assert "fields=[Schema(INT32)]" in repr(schema_obj)
 
-    # Make sure we can use a list of two-tuples
-    schema_obj = na.struct([("col_name", na.Type.INT32)])
-    assert schema_obj.type == na.Type.STRUCT
-    assert schema_obj.field(0).type == na.Type.INT32
-    assert schema_obj.field(0).name == "col_name"
-    assert "fields=[Schema(INT32, name='col_name')]" in repr(schema_obj)
-
     # Make sure we can use a dictionary to specify fields
     schema_obj = na.struct({"col_name": na.Type.INT32})
     assert schema_obj.type == na.Type.STRUCT
@@ -185,13 +182,46 @@ def test_schema_struct():
     assert schema_obj.field(0).name == "col_name"
 
     # Make sure we can use a Schema when constructing fields (and that
-    # fild names are taken from the input)
+    # field names are taken from the input)
     schema_obj = na.struct([schema_obj.field(0)])
     assert schema_obj.type == na.Type.STRUCT
     assert schema_obj.field(0).type == na.Type.INT32
     assert schema_obj.field(0).name == "col_name"
 
 
+def test_schema_list_():
+    schema_obj = na.list_(na.null())
+    assert schema_obj.type == na.Type.LIST
+    assert schema_obj.value_type.type == na.Type.NULL
+
+
+def test_schema_large_list():
+    schema_obj = na.large_list(na.null())
+    assert schema_obj.type == na.Type.LARGE_LIST
+    assert schema_obj.value_type.type == na.Type.NULL
+
+
+def test_schema_fixed_size_list():
+    schema_obj = na.fixed_size_list(na.null(), 123)
+    assert schema_obj.type == na.Type.FIXED_SIZE_LIST
+    assert schema_obj.value_type.type == na.Type.NULL
+    assert schema_obj.list_size == 123
+
+
+def test_schema_dictionary():
+    schema_obj = na.dictionary(na.int8(), na.null())
+    assert schema_obj.type == na.Type.DICTIONARY
+    assert schema_obj.index_type.type == na.Type.INT8
+    assert schema_obj.value_type.type == na.Type.NULL
+    assert schema_obj.dictionary_ordered is False
+
+    schema_obj_ordered = na.dictionary(na.int8(), na.null(), 
dictionary_ordered=True)
+    assert schema_obj_ordered.type == na.Type.DICTIONARY
+    assert schema_obj_ordered.index_type.type == na.Type.INT8
+    assert schema_obj_ordered.value_type.type == na.Type.NULL
+    assert schema_obj_ordered.dictionary_ordered is True
+
+
 def test_schema_extension():
     schema_obj = na.int32()
     assert schema_obj.extension is None

(arrow-nanoarrow) branch main updated: feat(python): Allow creation of dictionary and list types (#445)

Reply via email to