(arrow-nanoarrow) branch main updated: feat(python): Unify printing of type information across classes (#458)

paleolimbot Fri, 10 May 2024 10:05:51 -0700

This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git



The following commit(s) were added to refs/heads/main by this push:
     new f47e8309 feat(python): Unify printing of type information across 
classes (#458)
f47e8309 is described below

commit f47e83095d3ba18daecaae0d296f66fc5993141d
Author: Dewey Dunnington <[email protected]>
AuthorDate: Fri May 10 14:04:25 2024 -0300

    feat(python): Unify printing of type information across classes (#458)
    
    This PR unifies how types are printed, which was different among the
    `Array`, `Scalar`, `ArrayStream`, and `Schema`. These now all use the
    same function such that type information is communicated in a consistent
    way for all user-facing classes.
    
    The general approach is to print all possible information on the first
    line. Most types have descriptions like `int32`, so this is a good fit
    and there's no need for multiple lines. This makes a list of schemas
    (e.g., `schema.fields`) have a nice repr, too. The Schema repr
    additionally prints out metadata if present (this is skipped by the
    other classes).
    
    Some examples:
    
    ```python
    import nanoarrow as na
    
    # Schema with no parameters
    na.int32()
    #> <Schema> int32
    
    # Type parameters are already part of the schema's to_string
    na.fixed_size_binary(123)
    #> <Schema> fixed_size_binary(123)
    
    # non-nullable schema
    na.int32(nullable=False)
    #> <Schema> non-nullable int32
    
    # named schema
    na.Schema(na.int32(), name="some_col")
    #> <Schema> 'some_col': int32
    
    # ordered dictionary
    na.dictionary(na.int32(), na.string(), True)
    #> <Schema> ordered dictionary(int32)<string>
    
    # Schema with metadata
    na.Schema(na.int32(), metadata={"some key": "some value"})
    #> <Schema> int32
    #> - metadata: {b'some key': b'some value'}
    
    # fields are already a part of the schema's to_string
    struct = na.struct({"some col": na.int32(), "some other col": na.string()})
    struct
    #> <Schema> struct<some col: int32, some other col: string>
    
    # ...and you can inspect them more closely with .fields
    struct.fields
    #> [<Schema> 'some col': int32, <Schema> 'some other col': string]
    ```
---
 python/src/nanoarrow/_lib.pyx          |  13 +++
 python/src/nanoarrow/_repr_utils.py    |  23 +++--
 python/src/nanoarrow/array.py          |  28 +++---
 python/src/nanoarrow/array_stream.py   |  15 +--
 python/src/nanoarrow/c_array_stream.py |   4 +-
 python/src/nanoarrow/ipc.py            |   8 +-
 python/src/nanoarrow/iterator.py       |   2 +-
 python/src/nanoarrow/schema.py         | 174 ++++++++++++++++++---------------
 python/tests/test_c_schema.py          |  17 +++-
 python/tests/test_nanoarrow.py         |  12 +--
 python/tests/test_schema.py            |  18 +---
 11 files changed, 175 insertions(+), 139 deletions(-)

diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index b99a6505..451bd205 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -1782,6 +1782,12 @@ cdef class SchemaMetadata:
         for key, _ in self.items():
             yield key
 
+    def keys(self):
+        return list(self)
+
+    def values(self):
+        return [value for _, value in self.items()]
+
     def items(self):
         cdef ArrowStringView key
         cdef ArrowStringView value
@@ -1792,6 +1798,13 @@ cdef class SchemaMetadata:
             value_obj = PyBytes_FromStringAndSize(value.data, value.size_bytes)
             yield key_obj, value_obj
 
+    def __repr__(self):
+        lines = [
+            f"<{_repr_utils.make_class_label(self)}>",
+            _repr_utils.metadata_repr(self)
+        ]
+        return "\n".join(lines)
+
 
 cdef class CBufferView:
     """Wrapper for Array buffer content
diff --git a/python/src/nanoarrow/_repr_utils.py 
b/python/src/nanoarrow/_repr_utils.py
index 2c3ce0da..7658ae79 100644
--- a/python/src/nanoarrow/_repr_utils.py
+++ b/python/src/nanoarrow/_repr_utils.py
@@ -35,9 +35,19 @@ def c_schema_to_string(obj, max_char_width=80):
         return c_schema_string
 
 
+def metadata_repr(obj, indent=0, max_char_width=80):
+    indent_str = " " * indent
+    lines = []
+    for key, value in obj.items():
+        line = f"{indent_str}- {repr(key)}: {repr(value)}"
+        lines.append(line[:max_char_width])
+
+    return "\n".join(lines)
+
+
 def schema_repr(schema, indent=0):
     indent_str = " " * indent
-    class_label = make_class_label(schema, module="nanoarrow.c_lib")
+    class_label = make_class_label(schema, module="nanoarrow.c_schema")
     if schema._addr() == 0:
         return f"<{class_label} <NULL>>"
     elif not schema.is_valid():
@@ -54,8 +64,7 @@ def schema_repr(schema, indent=0):
         lines.append(f"{indent_str}- metadata: NULL")
     else:
         lines.append(f"{indent_str}- metadata:")
-        for key, value in metadata.items():
-            lines.append(f"{indent_str}  - {repr(key)}: {repr(value)}")
+        lines.append(metadata_repr(metadata, indent + 2))
 
     if schema.dictionary:
         dictionary_repr = schema_repr(schema.dictionary, indent=indent + 2)
@@ -76,7 +85,7 @@ def array_repr(array, indent=0, max_char_width=80):
         max_char_width = 20
 
     indent_str = " " * indent
-    class_label = make_class_label(array, module="nanoarrow.c_lib")
+    class_label = make_class_label(array, module="nanoarrow.c_array")
     if array._addr() == 0:
         return f"<{class_label} <NULL>>"
     elif not array.is_valid():
@@ -105,7 +114,7 @@ def array_repr(array, indent=0, max_char_width=80):
 
 
 def schema_view_repr(schema_view):
-    class_label = make_class_label(schema_view, module="nanoarrow.c_lib")
+    class_label = make_class_label(schema_view, module="nanoarrow.c_schema")
 
     lines = [
         f"<{class_label}>",
@@ -128,7 +137,7 @@ def schema_view_repr(schema_view):
 
 def array_view_repr(array_view, max_char_width=80, indent=0):
     indent_str = " " * indent
-    class_label = make_class_label(array_view, module="nanoarrow.c_lib")
+    class_label = make_class_label(array_view, module="nanoarrow.c_array")
 
     lines = [f"<{class_label}>"]
 
@@ -210,7 +219,7 @@ def buffer_view_preview_cpu(buffer_view, max_char_width):
 
 
 def array_stream_repr(array_stream, max_char_width=80):
-    class_label = make_class_label(array_stream, module="nanoarrow.c_lib")
+    class_label = make_class_label(array_stream, 
module="nanoarrow.c_array_stream")
 
     if array_stream._addr() == 0:
         return f"<{class_label} <NULL>>"
diff --git a/python/src/nanoarrow/array.py b/python/src/nanoarrow/array.py
index eabe8f2a..43ff0dc2 100644
--- a/python/src/nanoarrow/array.py
+++ b/python/src/nanoarrow/array.py
@@ -31,7 +31,7 @@ from nanoarrow.c_array import c_array, c_array_view
 from nanoarrow.c_array_stream import c_array_stream
 from nanoarrow.c_schema import c_schema
 from nanoarrow.iterator import iter_array_views, iter_py, iter_tuples
-from nanoarrow.schema import Schema
+from nanoarrow.schema import Schema, _schema_repr
 
 from nanoarrow import _repr_utils
 
@@ -58,7 +58,7 @@ class Scalar:
     >>> array[0].as_py()
     1
     >>> array[0].schema
-    Schema(INT32)
+    <Schema> int32
     """
 
     def __init__(self):
@@ -82,11 +82,14 @@ class Scalar:
         return next(iter_py(self))
 
     def to_string(self, width_hint=80) -> str:
-        c_schema_string = _repr_utils.c_schema_to_string(
-            self._c_array.schema, width_hint // 4
+        schema_repr = _schema_repr(
+            self.schema,
+            max_char_width=width_hint // 4,
+            prefix="",
+            include_metadata=False,
         )
 
-        prefix = f"Scalar<{c_schema_string}> "
+        prefix = f"Scalar<{schema_repr}> "
         width_hint -= len(prefix)
 
         py_repr = repr(self.as_py())
@@ -379,7 +382,7 @@ class Array:
         ... )
         >>> array = na.Array(batch)
         >>> array.child(1)
-        nanoarrow.Array<string>[3]
+        nanoarrow.Array<'col2': string>[3]
         'a'
         'b'
         'c'
@@ -401,11 +404,11 @@ class Array:
         >>> array = na.Array(batch)
         >>> for child in array.iter_children():
         ...     print(child)
-        nanoarrow.Array<int64>[3]
+        nanoarrow.Array<'col1': int64>[3]
         1
         2
         3
-        nanoarrow.Array<string>[3]
+        nanoarrow.Array<'col2': string>[3]
         'a'
         'b'
         'c'
@@ -545,11 +548,14 @@ class Array:
     def to_string(self, width_hint=80, items_hint=10) -> str:
         cls_name = _repr_utils.make_class_label(self, module="nanoarrow")
         len_text = f"[{len(self)}]"
-        c_schema_string = _repr_utils.c_schema_to_string(
-            self._data.schema, width_hint - len(cls_name) - len(len_text) - 2
+        schema_repr = _schema_repr(
+            self.schema,
+            max_char_width=width_hint - len(cls_name) - len(len_text) - 2,
+            prefix="",
+            include_metadata=False,
         )
 
-        lines = [f"{cls_name}<{c_schema_string}>{len_text}"]
+        lines = [f"{cls_name}<{schema_repr}>{len_text}"]
 
         for i, item in enumerate(self.iter_py()):
             if i >= items_hint:
diff --git a/python/src/nanoarrow/array_stream.py 
b/python/src/nanoarrow/array_stream.py
index 6fa1e0f2..deaaece2 100644
--- a/python/src/nanoarrow/array_stream.py
+++ b/python/src/nanoarrow/array_stream.py
@@ -23,7 +23,7 @@ from nanoarrow._repr_utils import make_class_label
 from nanoarrow.array import Array
 from nanoarrow.c_array_stream import c_array_stream
 from nanoarrow.iterator import iter_py, iter_tuples
-from nanoarrow.schema import Schema
+from nanoarrow.schema import Schema, _schema_repr
 
 
 class ArrayStream:
@@ -52,7 +52,7 @@ class ArrayStream:
 
     >>> import nanoarrow as na
     >>> na.ArrayStream([1, 2, 3], na.int32())
-    <nanoarrow.ArrayStream: Schema(INT32)>
+    nanoarrow.ArrayStream<int32>
     """
 
     def __init__(self, obj, schema=None) -> None:
@@ -65,7 +65,7 @@ class ArrayStream:
         >>> import nanoarrow as na
         >>> stream = na.ArrayStream([1, 2, 3], na.int32())
         >>> stream.schema
-        Schema(INT32)
+        <Schema> int32
         """
         return Schema(self._c_array_stream._get_cached_schema())
 
@@ -200,7 +200,8 @@ class ArrayStream:
 
     def __repr__(self) -> str:
         cls = make_class_label(self, "nanoarrow")
-        return f"<{cls}: {self.schema}>"
+        schema_repr = _schema_repr(self.schema, prefix="", 
include_metadata=False)
+        return f"{cls}<{schema_repr}>"
 
     @staticmethod
     def from_readable(obj):
@@ -212,7 +213,7 @@ class ArrayStream:
         >>> from nanoarrow.ipc import Stream
         >>> with na.ArrayStream.from_readable(Stream.example_bytes()) as 
stream:
         ...     stream.read_all()
-        nanoarrow.Array<struct<some_col: int32>>[3]
+        nanoarrow.Array<non-nullable struct<some_col: int32>>[3]
         {'some_col': 1}
         {'some_col': 2}
         {'some_col': 3}
@@ -239,7 +240,7 @@ class ArrayStream:
         ...
         ...     with na.ArrayStream.from_path(path) as stream:
         ...         stream.read_all()
-        nanoarrow.Array<struct<some_col: int32>>[3]
+        nanoarrow.Array<non-nullable struct<some_col: int32>>[3]
         {'some_col': 1}
         {'some_col': 2}
         {'some_col': 3}
@@ -268,7 +269,7 @@ class ArrayStream:
         ...     uri = pathlib.Path(path).as_uri()
         ...     with na.ArrayStream.from_url(uri) as stream:
         ...         stream.read_all()
-        nanoarrow.Array<struct<some_col: int32>>[3]
+        nanoarrow.Array<non-nullable struct<some_col: int32>>[3]
         {'some_col': 1}
         {'some_col': 2}
         {'some_col': 3}
diff --git a/python/src/nanoarrow/c_array_stream.py 
b/python/src/nanoarrow/c_array_stream.py
index 77eeaaf6..411f41fd 100644
--- a/python/src/nanoarrow/c_array_stream.py
+++ b/python/src/nanoarrow/c_array_stream.py
@@ -37,14 +37,14 @@ def c_array_stream(obj=None, schema=None) -> CArrayStream:
     >>> pa_reader = pa.RecordBatchReader.from_batches(pa_batch.schema, 
[pa_batch])
     >>> array_stream = na.c_array_stream(pa_reader)
     >>> array_stream.get_schema()
-    <nanoarrow.c_lib.CSchema struct>
+    <nanoarrow.c_schema.CSchema struct>
     - format: '+s'
     - name: ''
     - flags: 0
     - metadata: NULL
     - dictionary: NULL
     - children[1]:
-      'col1': <nanoarrow.c_lib.CSchema int32>
+      'col1': <nanoarrow.c_schema.CSchema int32>
         - format: 'i'
         - name: 'col1'
         - flags: 2
diff --git a/python/src/nanoarrow/ipc.py b/python/src/nanoarrow/ipc.py
index 5125f771..794ecf36 100644
--- a/python/src/nanoarrow/ipc.py
+++ b/python/src/nanoarrow/ipc.py
@@ -43,7 +43,7 @@ class Stream:
     >>> from nanoarrow.ipc import Stream
     >>> with Stream.example() as inp, na.c_array_stream(inp) as stream:
     ...     stream
-    <nanoarrow.c_lib.CArrayStream>
+    <nanoarrow.c_array_stream.CArrayStream>
     - get_schema(): struct<some_col: int32>
     """
 
@@ -96,7 +96,7 @@ class Stream:
         >>> from nanoarrow.ipc import Stream
         >>> ipc_stream = Stream.from_readable(Stream.example_bytes())
         >>> na.c_array_stream(ipc_stream)
-        <nanoarrow.c_lib.CArrayStream>
+        <nanoarrow.c_array_stream.CArrayStream>
         - get_schema(): struct<some_col: int32>
         """
         if not hasattr(obj, "readinto") and _obj_is_buffer(obj):
@@ -137,7 +137,7 @@ class Stream:
         ...
         ...     with Stream.from_path(path) as inp, na.c_array_stream(inp) as 
stream:
         ...         stream
-        <nanoarrow.c_lib.CArrayStream>
+        <nanoarrow.c_array_stream.CArrayStream>
         - get_schema(): struct<some_col: int32>
         """
         out = Stream()
@@ -176,7 +176,7 @@ class Stream:
         ...     uri = pathlib.Path(path).as_uri()
         ...     with Stream.from_url(uri) as inp, na.c_array_stream(inp) as 
stream:
         ...         stream
-        <nanoarrow.c_lib.CArrayStream>
+        <nanoarrow.c_array_stream.CArrayStream>
         - get_schema(): struct<some_col: int32>
         """
         import urllib.request
diff --git a/python/src/nanoarrow/iterator.py b/python/src/nanoarrow/iterator.py
index 76f2a775..5f85724d 100644
--- a/python/src/nanoarrow/iterator.py
+++ b/python/src/nanoarrow/iterator.py
@@ -101,7 +101,7 @@ def iter_array_views(obj, schema=None) -> 
Iterable[CArrayView]:
     >>> from nanoarrow import iterator
     >>> array = na.c_array([1, 2, 3], na.int32())
     >>> list(iterator.iter_array_views(array))
-    [<nanoarrow.c_lib.CArrayView>
+    [<nanoarrow.c_array.CArrayView>
     - storage_type: 'int32'
     - length: 3
     - offset: 0
diff --git a/python/src/nanoarrow/schema.py b/python/src/nanoarrow/schema.py
index c4c34fe8..b7e1fe2f 100644
--- a/python/src/nanoarrow/schema.py
+++ b/python/src/nanoarrow/schema.py
@@ -18,7 +18,7 @@
 import enum
 import reprlib
 from functools import cached_property
-from typing import Mapping, Union
+from typing import List, Mapping, Union
 
 from nanoarrow._lib import (
     CArrowTimeUnit,
@@ -29,6 +29,8 @@ from nanoarrow._lib import (
 )
 from nanoarrow.c_schema import c_schema
 
+from nanoarrow import _repr_utils
+
 
 class Type(enum.Enum):
     """The Type enumerator provides a means by which the various type
@@ -179,11 +181,11 @@ class Schema:
     >>> import nanoarrow as na
     >>> import pyarrow as pa
     >>> na.Schema(na.Type.INT32)
-    Schema(INT32)
+    <Schema> int32
     >>> na.Schema(na.Type.DURATION, unit=na.TimeUnit.SECOND)
-    Schema(DURATION, unit=SECOND)
+    <Schema> duration('s')
     >>> na.Schema(pa.int32())
-    Schema(INT32)
+    <Schema> int32
     """
 
     def __init__(
@@ -218,6 +220,23 @@ class Schema:
 
         self._c_schema_view = CSchemaView(self._c_schema)
 
+    @property
+    def params(self) -> Mapping:
+        """Get parameter names and values for this type
+
+        Returns a dictionary of parameters that can be used to reconstruct
+        this type together with its type identifier.
+
+        >>> import nanoarrow as na
+        >>> na.fixed_size_binary(123).params
+        {'byte_width': 123}
+        """
+        if self._c_schema_view.type_id not in _PARAM_NAMES:
+            return {}
+
+        param_names = _PARAM_NAMES[self._c_schema_view.type_id]
+        return {k: getattr(self, k) for k in param_names}
+
     @property
     def type(self) -> Type:
         """Type enumerator value of this Schema
@@ -356,7 +375,7 @@ class Schema:
 
         >>> import nanoarrow as na
         >>> na.dictionary(na.int32(), na.string()).index_type
-        Schema(INT32)
+        <Schema> int32
         """
         if self._c_schema_view.type_id == CArrowType.DICTIONARY:
             index_schema = self._c_schema.modify(
@@ -385,9 +404,9 @@ class Schema:
 
         >>> import nanoarrow as na
         >>> na.list_(na.int32()).value_type
-        Schema(INT32, name='item')
+        <Schema> 'item': int32
         >>> na.dictionary(na.int32(), na.string()).value_type
-        Schema(STRING)
+        <Schema> string
         """
         if self._c_schema_view.type_id in (
             CArrowType.LIST,
@@ -425,13 +444,13 @@ class Schema:
 
         return self._c_schema.n_children
 
-    def field(self, i):
+    def field(self, i) -> "Schema":
         """Extract a child Schema
 
         >>> import nanoarrow as na
         >>> schema = na.struct({"col1": na.int32()})
         >>> schema.field(0)
-        Schema(INT32, name='col1')
+        <Schema> 'col1': int32
         """
 
         # Returning a copy to reduce interdependence between Schema instances:
@@ -440,7 +459,7 @@ class Schema:
         return Schema(self._c_schema.child(i).__deepcopy__())
 
     @property
-    def fields(self):
+    def fields(self) -> List["Schema"]:
         """Iterate over child Schemas
 
         >>> import nanoarrow as na
@@ -450,8 +469,7 @@ class Schema:
         ...
         col1
         """
-        for i in range(self.n_fields):
-            yield self.field(i)
+        return [self.field(i) for i in range(self.n_fields)]
 
     def __repr__(self) -> str:
         return _schema_repr(self)
@@ -481,7 +499,7 @@ def null(nullable: bool = True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.null()
-    Schema(NULL)
+    <Schema> na
     """
     return Schema(Type.NULL, nullable=nullable)
 
@@ -499,7 +517,7 @@ def bool_(nullable: bool = True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.bool_()
-    Schema(BOOL)
+    <Schema> bool
     """
     return Schema(Type.BOOL, nullable=nullable)
 
@@ -517,7 +535,7 @@ def int8(nullable: bool = True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.int8()
-    Schema(INT8)
+    <Schema> int8
     """
     return Schema(Type.INT8, nullable=nullable)
 
@@ -535,7 +553,7 @@ def uint8(nullable: bool = True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.uint8()
-    Schema(UINT8)
+    <Schema> uint8
     """
     return Schema(Type.UINT8, nullable=nullable)
 
@@ -553,7 +571,7 @@ def int16(nullable: bool = True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.int16()
-    Schema(INT16)
+    <Schema> int16
     """
     return Schema(Type.INT16, nullable=nullable)
 
@@ -571,7 +589,7 @@ def uint16(nullable: bool = True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.uint16()
-    Schema(UINT16)
+    <Schema> uint16
     """
     return Schema(Type.UINT16, nullable=nullable)
 
@@ -589,7 +607,7 @@ def int32(nullable: bool = True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.int32()
-    Schema(INT32)
+    <Schema> int32
     """
     return Schema(Type.INT32, nullable=nullable)
 
@@ -607,7 +625,7 @@ def uint32(nullable: bool = True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.uint32()
-    Schema(UINT32)
+    <Schema> uint32
     """
     return Schema(Type.UINT32, nullable=nullable)
 
@@ -625,7 +643,7 @@ def int64(nullable: bool = True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.int64()
-    Schema(INT64)
+    <Schema> int64
     """
     return Schema(Type.INT64, nullable=nullable)
 
@@ -643,7 +661,7 @@ def uint64(nullable: bool = True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.uint64()
-    Schema(UINT64)
+    <Schema> uint64
     """
     return Schema(Type.UINT64, nullable=nullable)
 
@@ -661,7 +679,7 @@ def float16(nullable: bool = True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.float16()
-    Schema(HALF_FLOAT)
+    <Schema> half_float
     """
     return Schema(Type.HALF_FLOAT, nullable=nullable)
 
@@ -679,7 +697,7 @@ def float32(nullable: bool = True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.float32()
-    Schema(FLOAT)
+    <Schema> float
     """
     return Schema(Type.FLOAT, nullable=nullable)
 
@@ -697,7 +715,7 @@ def float64(nullable: bool = True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.float64()
-    Schema(DOUBLE)
+    <Schema> double
     """
     return Schema(Type.DOUBLE, nullable=nullable)
 
@@ -715,7 +733,7 @@ def string(nullable: bool = True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.string()
-    Schema(STRING)
+    <Schema> string
     """
     return Schema(Type.STRING, nullable=nullable)
 
@@ -734,7 +752,7 @@ def large_string(nullable: bool = True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.large_string()
-    Schema(LARGE_STRING)
+    <Schema> large_string
     """
     return Schema(Type.LARGE_STRING, nullable=nullable)
 
@@ -752,7 +770,7 @@ def binary(nullable: bool = True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.binary()
-    Schema(BINARY)
+    <Schema> binary
     """
     return Schema(Type.BINARY, nullable=nullable)
 
@@ -770,7 +788,7 @@ def large_binary(nullable: bool = True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.large_binary()
-    Schema(LARGE_BINARY)
+    <Schema> large_binary
     """
     return Schema(Type.LARGE_BINARY, nullable=nullable)
 
@@ -790,7 +808,7 @@ def fixed_size_binary(byte_width: int, nullable: bool = 
True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.fixed_size_binary(123)
-    Schema(FIXED_SIZE_BINARY, byte_width=123)
+    <Schema> fixed_size_binary(123)
     """
     return Schema(Type.FIXED_SIZE_BINARY, byte_width=byte_width, 
nullable=nullable)
 
@@ -808,7 +826,7 @@ def date32(nullable: bool = True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.date32()
-    Schema(DATE32)
+    <Schema> date32
     """
     return Schema(Type.DATE32, nullable=nullable)
 
@@ -826,7 +844,7 @@ def date64(nullable: bool = True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.date64()
-    Schema(DATE64)
+    <Schema> date64
     """
     return Schema(Type.DATE64, nullable=nullable)
 
@@ -846,7 +864,7 @@ def time32(unit: Union[str, TimeUnit], nullable: bool = 
True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.time32("s")
-    Schema(TIME32, unit=SECOND)
+    <Schema> time32('s')
     """
     return Schema(Type.TIME32, unit=unit, nullable=nullable)
 
@@ -866,7 +884,7 @@ def time64(unit: Union[str, TimeUnit], nullable: bool = 
True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.time64("us")
-    Schema(TIME64, unit=MICRO)
+    <Schema> time64('us')
     """
     return Schema(Type.TIME64, unit=unit, nullable=nullable)
 
@@ -888,9 +906,9 @@ def timestamp(
 
     >>> import nanoarrow as na
     >>> na.timestamp("s")
-    Schema(TIMESTAMP, unit=SECOND)
+    <Schema> timestamp('s', '')
     >>> na.timestamp("s", timezone="America/Halifax")
-    Schema(TIMESTAMP, unit=SECOND, timezone='America/Halifax')
+    <Schema> timestamp('s', 'America/Halifax')
     """
     return Schema(Type.TIMESTAMP, timezone=timezone, unit=unit, 
nullable=nullable)
 
@@ -910,7 +928,7 @@ def duration(unit, nullable: bool = True):
 
     >>> import nanoarrow as na
     >>> na.duration("s")
-    Schema(DURATION, unit=SECOND)
+    <Schema> duration('s')
     """
     return Schema(Type.DURATION, unit=unit, nullable=nullable)
 
@@ -928,7 +946,7 @@ def interval_months(nullable: bool = True):
 
     >>> import nanoarrow as na
     >>> na.interval_months()
-    Schema(INTERVAL_MONTHS)
+    <Schema> interval_months
     """
     return Schema(Type.INTERVAL_MONTHS, nullable=nullable)
 
@@ -946,7 +964,7 @@ def interval_day_time(nullable: bool = True):
 
     >>> import nanoarrow as na
     >>> na.interval_day_time()
-    Schema(INTERVAL_DAY_TIME)
+    <Schema> interval_day_time
     """
     return Schema(Type.INTERVAL_DAY_TIME, nullable=nullable)
 
@@ -965,7 +983,7 @@ def interval_month_day_nano(nullable: bool = True):
 
     >>> import nanoarrow as na
     >>> na.interval_month_day_nano()
-    Schema(INTERVAL_MONTH_DAY_NANO)
+    <Schema> interval_month_day_nano
     """
     return Schema(Type.INTERVAL_MONTH_DAY_NANO, nullable=nullable)
 
@@ -988,7 +1006,7 @@ def decimal128(precision: int, scale: int, nullable: bool 
= True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.decimal128(10, 3)
-    Schema(DECIMAL128, precision=10, scale=3)
+    <Schema> decimal128(10, 3)
     """
     return Schema(Type.DECIMAL128, precision=precision, scale=scale, 
nullable=nullable)
 
@@ -1011,7 +1029,7 @@ def decimal256(precision: int, scale: int, nullable: bool 
= True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.decimal256(10, 3)
-    Schema(DECIMAL256, precision=10, scale=3)
+    <Schema> decimal256(10, 3)
     """
     return Schema(Type.DECIMAL256, precision=precision, scale=scale, 
nullable=nullable)
 
@@ -1033,9 +1051,9 @@ def struct(fields, nullable=True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.struct([na.int32()])
-    Schema(STRUCT, fields=[Schema(INT32)])
+    <Schema> struct<: int32>
     >>> na.struct({"col1": na.int32()})
-    Schema(STRUCT, fields=[Schema(INT32, name='col1')])
+    <Schema> struct<col1: int32>
     """
     return Schema(Type.STRUCT, fields=fields, nullable=nullable)
 
@@ -1055,7 +1073,7 @@ def list_(value_type, nullable=True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.list_(na.int32())
-    Schema(LIST, value_type=Schema(INT32, name='item'))
+    <Schema> list<item: int32>
     """
     return Schema(Type.LIST, value_type=value_type, nullable=nullable)
 
@@ -1078,7 +1096,7 @@ def large_list(value_type, nullable=True) -> Schema:
 
     >>> import nanoarrow as na
     >>> na.large_list(na.int32())
-    Schema(LARGE_LIST, value_type=Schema(INT32, name='item'))
+    <Schema> large_list<item: int32>
     """
     return Schema(Type.LARGE_LIST, value_type=value_type, nullable=nullable)
 
@@ -1100,7 +1118,7 @@ def fixed_size_list(value_type, list_size, nullable=True) 
-> Schema:
 
     >>> import nanoarrow as na
     >>> na.fixed_size_list(na.int32(), 123)
-    Schema(FIXED_SIZE_LIST, value_type=Schema(INT32, name='item'), 
list_size=123)
+    <Schema> fixed_size_list(123)<item: int32>
     """
     return Schema(
         Type.FIXED_SIZE_LIST,
@@ -1130,8 +1148,7 @@ def dictionary(index_type, value_type, 
dictionary_ordered=False):
 
     >>> import nanoarrow as na
     >>> na.dictionary(na.int32(), na.string())
-    Schema(DICTIONARY, index_type=Schema(INT32), value_type=Schema(STRING), \
-dictionary_ordered=False)
+    <Schema> dictionary(int32)<string>
     """
     return Schema(
         Type.DICTIONARY,
@@ -1239,45 +1256,40 @@ def _clean_fields(fields):
         return [c_schema(v) for v in fields]
 
 
-def _schema_repr(obj):
-    out = f"Schema({_schema_param_repr('type', obj.type)}"
+def _schema_repr(obj, max_char_width=80, prefix="<Schema> ", 
include_metadata=True):
+    lines = []
 
-    if obj.name is None:
-        out += ", name=False"
-    elif obj.name:
-        out += f", name={_schema_param_repr('name', obj.name)}"
+    modifiers = []
 
-    if obj._c_schema_view.type_id not in _PARAM_NAMES:
-        param_names = []
-    else:
-        param_names = _PARAM_NAMES[obj._c_schema_view.type_id]
-
-    for name in param_names:
-        value = getattr(obj, name)
-        if value is None:
-            continue
-        out += ", "
-        param_repr = f"{name}={_schema_param_repr(name, getattr(obj, name))}"
-        out += param_repr
+    if obj.name:
+        name = reprlib.Repr().repr(obj.name)
+        modifiers.append(f"{name}:")
 
     if not obj.nullable:
-        out += ", nullable=False"
+        modifiers.append("non-nullable")
 
-    out += ")"
-    return out
+    if obj.dictionary_ordered:
+        modifiers.append("ordered")
 
+    # Ensure extra space at the end of the modifiers
+    modifiers.append("")
 
-def _schema_param_repr(name, value):
-    if name == "type":
-        return f"{value.name}"
-    elif name == "unit":
-        return f"{value.name}"
-    elif name == "fields":
-        # It would be nice to indent this/get it on multiple lines since
-        # most output will be uncomfortably wide even with the abbreviated repr
-        return reprlib.Repr().repr(list(value))
-    else:
-        return reprlib.Repr().repr(value)
+    modifiers_str = " ".join(modifiers)
+    first_line_prefix = f"{prefix}{modifiers_str}"
+
+    schema_str = _repr_utils.c_schema_to_string(
+        obj._c_schema, max_char_width - len(first_line_prefix)
+    )
+    lines.append(f"{first_line_prefix}{schema_str}")
+
+    if include_metadata:
+        metadata_dict = dict(obj.metadata.items())
+        if metadata_dict:
+            metadata_dict_repr = reprlib.Repr().repr(metadata_dict)
+            metadata_line = f"- metadata: 
{metadata_dict_repr[:max_char_width]}"
+            lines.append(metadata_line[:max_char_width])
+
+    return "\n".join(lines)
 
 
 _PARAM_NAMES = {
diff --git a/python/tests/test_c_schema.py b/python/tests/test_c_schema.py
index f70f49ab..a9144051 100644
--- a/python/tests/test_c_schema.py
+++ b/python/tests/test_c_schema.py
@@ -25,7 +25,7 @@ def test_c_schema_basic():
     schema = allocate_c_schema()
     assert schema.is_valid() is False
     assert schema._to_string() == "[invalid: schema is released]"
-    assert repr(schema) == "<nanoarrow.c_lib.CSchema <released>>"
+    assert repr(schema) == "<nanoarrow.c_schema.CSchema <released>>"
 
     schema = na.c_schema(na.struct({"some_name": na.int32()}))
 
@@ -37,7 +37,7 @@ def test_c_schema_basic():
     assert schema.child(0).format == "i"
     assert schema.child(0).name == "some_name"
     assert schema.child(0)._to_string() == "int32"
-    assert "<nanoarrow.c_lib.CSchema int32>" in repr(schema)
+    assert "<nanoarrow.c_schema.CSchema int32>" in repr(schema)
     assert schema.dictionary is None
 
     with pytest.raises(IndexError):
@@ -50,7 +50,7 @@ def test_c_schema_dictionary():
     schema = na.c_schema(pa.dictionary(pa.int32(), pa.utf8()))
     assert schema.format == "i"
     assert schema.dictionary.format == "u"
-    assert "dictionary: <nanoarrow.c_lib.CSchema string" in repr(schema)
+    assert "dictionary: <nanoarrow.c_schema.CSchema string" in repr(schema)
 
 
 def test_schema_metadata():
@@ -117,11 +117,18 @@ def test_c_schema_view_extra_params():
 
 def test_c_schema_metadata():
     meta = {
-        "ARROW:extension:name": "some_name",
-        "ARROW:extension:metadata": "some_metadata",
+        b"ARROW:extension:name": b"some_name",
+        b"ARROW:extension:metadata": b"some_metadata",
     }
 
     schema = na.c_schema(na.int32()).modify(metadata=meta)
+    assert "b'some_name'" in repr(schema)
+    assert "b'some_name'" in repr(schema.metadata)
+    assert list(schema.metadata) == list(meta)
+    assert list(schema.metadata.items()) == list(meta.items())
+    assert list(schema.metadata.keys()) == list(meta.keys())
+    assert list(schema.metadata.values()) == list(meta.values())
+
     view = c_schema_view(schema)
     assert view.extension_name == "some_name"
     assert view.extension_metadata == b"some_metadata"
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index eccd2378..2fb805bd 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -82,7 +82,7 @@ def test_c_array_empty():
 
     array = allocate_c_array()
     assert array.is_valid() is False
-    assert repr(array) == "<nanoarrow.c_lib.CArray <released>>"
+    assert repr(array) == "<nanoarrow.c_array.CArray <released>>"
 
 
 def test_c_array():
@@ -98,7 +98,7 @@ def test_c_array():
     assert array.n_children == 0
     assert len(list(array.children)) == 0
     assert array.dictionary is None
-    assert "<nanoarrow.c_lib.CArray int32" in repr(array)
+    assert "<nanoarrow.c_array.CArray int32" in repr(array)
 
 
 def test_c_array_recursive():
@@ -107,7 +107,7 @@ def test_c_array_recursive():
     assert len(list(array.children)) == 1
     assert array.child(0).length == 3
     assert array.child(0).schema._to_string() == "int32"
-    assert "'col': <nanoarrow.c_lib.CArray int32" in repr(array)
+    assert "'col': <nanoarrow.c_array.CArray int32" in repr(array)
 
     with pytest.raises(IndexError):
         array.child(-1)
@@ -117,7 +117,7 @@ def test_c_array_dictionary():
     array = na.c_array(pa.array(["a", "b", "b"]).dictionary_encode())
     assert array.length == 3
     assert array.dictionary.length == 2
-    assert "dictionary: <nanoarrow.c_lib.CArray string>" in repr(array)
+    assert "dictionary: <nanoarrow.c_array.CArray string>" in repr(array)
 
 
 def test_c_array_view():
@@ -180,7 +180,7 @@ def test_c_array_view_dictionary():
     view = array.view()
     assert view.n_buffers == 2
     assert view.dictionary.n_buffers == 3
-    assert "- dictionary: <nanoarrow.c_lib.CArrayView>" in repr(view)
+    assert "- dictionary: <nanoarrow.c_array.CArrayView>" in repr(view)
 
 
 def test_buffers_integer():
@@ -381,7 +381,7 @@ def test_c_array_stream():
 
     array_stream = allocate_c_array_stream()
     assert na.c_array_stream(array_stream) is array_stream
-    assert repr(array_stream) == "<nanoarrow.c_lib.CArrayStream <released>>"
+    assert repr(array_stream) == "<nanoarrow.c_array_stream.CArrayStream 
<released>>"
 
     assert array_stream.is_valid() is False
     with pytest.raises(RuntimeError):
diff --git a/python/tests/test_schema.py b/python/tests/test_schema.py
index 04aba78d..86360369 100644
--- a/python/tests/test_schema.py
+++ b/python/tests/test_schema.py
@@ -63,19 +63,18 @@ def test_schema_create_no_params():
     schema_obj = na.int32()
     assert schema_obj.type == na.Type.INT32
     assert schema_obj.nullable is True
-    assert repr(schema_obj) == "Schema(INT32)"
+    assert repr(schema_obj) == "<Schema> int32"
 
     schema_obj = na.int32(nullable=False)
     assert schema_obj.nullable is False
-    assert "nullable=False" in repr(schema_obj)
+    assert "non-nullable" in repr(schema_obj)
 
     schema_obj = na.Schema(na.Type.INT32, name=False)
     assert schema_obj.name is None
-    assert "name=False" in repr(schema_obj)
 
     schema_obj = na.Schema(na.Type.INT32, name="not empty")
     assert schema_obj.name == "not empty"
-    assert "name='not empty'" in repr(schema_obj)
+    assert "'not empty': " in repr(schema_obj)
 
     msg = "params are only supported for obj of class Type"
     with pytest.raises(ValueError, match=msg):
@@ -114,19 +113,16 @@ def test_schema_fixed_size_binary():
     schema_obj = na.fixed_size_binary(byte_width=123)
     assert schema_obj.type == na.Type.FIXED_SIZE_BINARY
     assert schema_obj.byte_width == 123
-    assert "byte_width=123" in repr(schema_obj)
 
 
 def test_schema_time():
     schema_obj = na.time32(na.TimeUnit.SECOND)
     assert schema_obj.type == na.Type.TIME32
     assert schema_obj.unit == na.TimeUnit.SECOND
-    assert "unit=SECOND" in repr(schema_obj)
 
     schema_obj = na.time64(na.TimeUnit.MICRO)
     assert schema_obj.type == na.Type.TIME64
     assert schema_obj.unit == na.TimeUnit.MICRO
-    assert "unit=MICRO" in repr(schema_obj)
 
 
 def test_schema_timestamp():
@@ -137,14 +133,12 @@ def test_schema_timestamp():
 
     schema_obj = na.timestamp(na.TimeUnit.SECOND, timezone="America/Halifax")
     assert schema_obj.timezone == "America/Halifax"
-    assert "timezone='America/Halifax'" in repr(schema_obj)
 
 
 def test_schema_duration():
     schema_obj = na.duration(na.TimeUnit.SECOND)
     assert schema_obj.type == na.Type.DURATION
     assert schema_obj.unit == na.TimeUnit.SECOND
-    assert "unit=SECOND" in repr(schema_obj)
 
 
 def test_schema_decimal():
@@ -152,15 +146,11 @@ def test_schema_decimal():
     assert schema_obj.type == na.Type.DECIMAL128
     assert schema_obj.precision == 10
     assert schema_obj.scale == 3
-    assert "precision=10" in repr(schema_obj)
-    assert "scale=3" in repr(schema_obj)
 
     schema_obj = na.decimal256(10, 3)
     assert schema_obj.type == na.Type.DECIMAL256
     assert schema_obj.precision == 10
     assert schema_obj.scale == 3
-    assert "precision=10" in repr(schema_obj)
-    assert "scale=3" in repr(schema_obj)
 
 
 def test_schema_struct():
@@ -173,8 +163,6 @@ def test_schema_struct():
     for field in schema_obj.fields:
         assert isinstance(field, na.Schema)
 
-    assert "fields=[Schema(INT32)]" in repr(schema_obj)
-
     # Make sure we can use a dictionary to specify fields
     schema_obj = na.struct({"col_name": na.Type.INT32})
     assert schema_obj.type == na.Type.STRUCT

(arrow-nanoarrow) branch main updated: feat(python): Unify printing of type information across classes (#458)

Reply via email to