This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new f47e8309 feat(python): Unify printing of type information across
classes (#458)
f47e8309 is described below
commit f47e83095d3ba18daecaae0d296f66fc5993141d
Author: Dewey Dunnington <[email protected]>
AuthorDate: Fri May 10 14:04:25 2024 -0300
feat(python): Unify printing of type information across classes (#458)
This PR unifies how types are printed, which was different among the
`Array`, `Scalar`, `ArrayStream`, and `Schema`. These now all use the
same function such that type information is communicated in a consistent
way for all user-facing classes.
The general approach is to print all possible information on the first
line. Most types have descriptions like `int32`, so this is a good fit
and there's no need for multiple lines. This makes a list of schemas
(e.g., `schema.fields`) have a nice repr, too. The Schema repr
additionally prints out metadata if present (this is skipped by the
other classes).
Some examples:
```python
import nanoarrow as na
# Schema with no parameters
na.int32()
#> <Schema> int32
# Type parameters are already part of the schema's to_string
na.fixed_size_binary(123)
#> <Schema> fixed_size_binary(123)
# non-nullable schema
na.int32(nullable=False)
#> <Schema> non-nullable int32
# named schema
na.Schema(na.int32(), name="some_col")
#> <Schema> 'some_col': int32
# ordered dictionary
na.dictionary(na.int32(), na.string(), True)
#> <Schema> ordered dictionary(int32)<string>
# Schema with metadata
na.Schema(na.int32(), metadata={"some key": "some value"})
#> <Schema> int32
#> - metadata: {b'some key': b'some value'}
# fields are already a part of the schema's to_string
struct = na.struct({"some col": na.int32(), "some other col": na.string()})
struct
#> <Schema> struct<some col: int32, some other col: string>
# ...and you can inspect them more closely with .fields
struct.fields
#> [<Schema> 'some col': int32, <Schema> 'some other col': string]
```
---
python/src/nanoarrow/_lib.pyx | 13 +++
python/src/nanoarrow/_repr_utils.py | 23 +++--
python/src/nanoarrow/array.py | 28 +++---
python/src/nanoarrow/array_stream.py | 15 +--
python/src/nanoarrow/c_array_stream.py | 4 +-
python/src/nanoarrow/ipc.py | 8 +-
python/src/nanoarrow/iterator.py | 2 +-
python/src/nanoarrow/schema.py | 174 ++++++++++++++++++---------------
python/tests/test_c_schema.py | 17 +++-
python/tests/test_nanoarrow.py | 12 +--
python/tests/test_schema.py | 18 +---
11 files changed, 175 insertions(+), 139 deletions(-)
diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index b99a6505..451bd205 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -1782,6 +1782,12 @@ cdef class SchemaMetadata:
for key, _ in self.items():
yield key
+ def keys(self):
+ return list(self)
+
+ def values(self):
+ return [value for _, value in self.items()]
+
def items(self):
cdef ArrowStringView key
cdef ArrowStringView value
@@ -1792,6 +1798,13 @@ cdef class SchemaMetadata:
value_obj = PyBytes_FromStringAndSize(value.data, value.size_bytes)
yield key_obj, value_obj
+ def __repr__(self):
+ lines = [
+ f"<{_repr_utils.make_class_label(self)}>",
+ _repr_utils.metadata_repr(self)
+ ]
+ return "\n".join(lines)
+
cdef class CBufferView:
"""Wrapper for Array buffer content
diff --git a/python/src/nanoarrow/_repr_utils.py
b/python/src/nanoarrow/_repr_utils.py
index 2c3ce0da..7658ae79 100644
--- a/python/src/nanoarrow/_repr_utils.py
+++ b/python/src/nanoarrow/_repr_utils.py
@@ -35,9 +35,19 @@ def c_schema_to_string(obj, max_char_width=80):
return c_schema_string
+def metadata_repr(obj, indent=0, max_char_width=80):
+ indent_str = " " * indent
+ lines = []
+ for key, value in obj.items():
+ line = f"{indent_str}- {repr(key)}: {repr(value)}"
+ lines.append(line[:max_char_width])
+
+ return "\n".join(lines)
+
+
def schema_repr(schema, indent=0):
indent_str = " " * indent
- class_label = make_class_label(schema, module="nanoarrow.c_lib")
+ class_label = make_class_label(schema, module="nanoarrow.c_schema")
if schema._addr() == 0:
return f"<{class_label} <NULL>>"
elif not schema.is_valid():
@@ -54,8 +64,7 @@ def schema_repr(schema, indent=0):
lines.append(f"{indent_str}- metadata: NULL")
else:
lines.append(f"{indent_str}- metadata:")
- for key, value in metadata.items():
- lines.append(f"{indent_str} - {repr(key)}: {repr(value)}")
+ lines.append(metadata_repr(metadata, indent + 2))
if schema.dictionary:
dictionary_repr = schema_repr(schema.dictionary, indent=indent + 2)
@@ -76,7 +85,7 @@ def array_repr(array, indent=0, max_char_width=80):
max_char_width = 20
indent_str = " " * indent
- class_label = make_class_label(array, module="nanoarrow.c_lib")
+ class_label = make_class_label(array, module="nanoarrow.c_array")
if array._addr() == 0:
return f"<{class_label} <NULL>>"
elif not array.is_valid():
@@ -105,7 +114,7 @@ def array_repr(array, indent=0, max_char_width=80):
def schema_view_repr(schema_view):
- class_label = make_class_label(schema_view, module="nanoarrow.c_lib")
+ class_label = make_class_label(schema_view, module="nanoarrow.c_schema")
lines = [
f"<{class_label}>",
@@ -128,7 +137,7 @@ def schema_view_repr(schema_view):
def array_view_repr(array_view, max_char_width=80, indent=0):
indent_str = " " * indent
- class_label = make_class_label(array_view, module="nanoarrow.c_lib")
+ class_label = make_class_label(array_view, module="nanoarrow.c_array")
lines = [f"<{class_label}>"]
@@ -210,7 +219,7 @@ def buffer_view_preview_cpu(buffer_view, max_char_width):
def array_stream_repr(array_stream, max_char_width=80):
- class_label = make_class_label(array_stream, module="nanoarrow.c_lib")
+ class_label = make_class_label(array_stream,
module="nanoarrow.c_array_stream")
if array_stream._addr() == 0:
return f"<{class_label} <NULL>>"
diff --git a/python/src/nanoarrow/array.py b/python/src/nanoarrow/array.py
index eabe8f2a..43ff0dc2 100644
--- a/python/src/nanoarrow/array.py
+++ b/python/src/nanoarrow/array.py
@@ -31,7 +31,7 @@ from nanoarrow.c_array import c_array, c_array_view
from nanoarrow.c_array_stream import c_array_stream
from nanoarrow.c_schema import c_schema
from nanoarrow.iterator import iter_array_views, iter_py, iter_tuples
-from nanoarrow.schema import Schema
+from nanoarrow.schema import Schema, _schema_repr
from nanoarrow import _repr_utils
@@ -58,7 +58,7 @@ class Scalar:
>>> array[0].as_py()
1
>>> array[0].schema
- Schema(INT32)
+ <Schema> int32
"""
def __init__(self):
@@ -82,11 +82,14 @@ class Scalar:
return next(iter_py(self))
def to_string(self, width_hint=80) -> str:
- c_schema_string = _repr_utils.c_schema_to_string(
- self._c_array.schema, width_hint // 4
+ schema_repr = _schema_repr(
+ self.schema,
+ max_char_width=width_hint // 4,
+ prefix="",
+ include_metadata=False,
)
- prefix = f"Scalar<{c_schema_string}> "
+ prefix = f"Scalar<{schema_repr}> "
width_hint -= len(prefix)
py_repr = repr(self.as_py())
@@ -379,7 +382,7 @@ class Array:
... )
>>> array = na.Array(batch)
>>> array.child(1)
- nanoarrow.Array<string>[3]
+ nanoarrow.Array<'col2': string>[3]
'a'
'b'
'c'
@@ -401,11 +404,11 @@ class Array:
>>> array = na.Array(batch)
>>> for child in array.iter_children():
... print(child)
- nanoarrow.Array<int64>[3]
+ nanoarrow.Array<'col1': int64>[3]
1
2
3
- nanoarrow.Array<string>[3]
+ nanoarrow.Array<'col2': string>[3]
'a'
'b'
'c'
@@ -545,11 +548,14 @@ class Array:
def to_string(self, width_hint=80, items_hint=10) -> str:
cls_name = _repr_utils.make_class_label(self, module="nanoarrow")
len_text = f"[{len(self)}]"
- c_schema_string = _repr_utils.c_schema_to_string(
- self._data.schema, width_hint - len(cls_name) - len(len_text) - 2
+ schema_repr = _schema_repr(
+ self.schema,
+ max_char_width=width_hint - len(cls_name) - len(len_text) - 2,
+ prefix="",
+ include_metadata=False,
)
- lines = [f"{cls_name}<{c_schema_string}>{len_text}"]
+ lines = [f"{cls_name}<{schema_repr}>{len_text}"]
for i, item in enumerate(self.iter_py()):
if i >= items_hint:
diff --git a/python/src/nanoarrow/array_stream.py
b/python/src/nanoarrow/array_stream.py
index 6fa1e0f2..deaaece2 100644
--- a/python/src/nanoarrow/array_stream.py
+++ b/python/src/nanoarrow/array_stream.py
@@ -23,7 +23,7 @@ from nanoarrow._repr_utils import make_class_label
from nanoarrow.array import Array
from nanoarrow.c_array_stream import c_array_stream
from nanoarrow.iterator import iter_py, iter_tuples
-from nanoarrow.schema import Schema
+from nanoarrow.schema import Schema, _schema_repr
class ArrayStream:
@@ -52,7 +52,7 @@ class ArrayStream:
>>> import nanoarrow as na
>>> na.ArrayStream([1, 2, 3], na.int32())
- <nanoarrow.ArrayStream: Schema(INT32)>
+ nanoarrow.ArrayStream<int32>
"""
def __init__(self, obj, schema=None) -> None:
@@ -65,7 +65,7 @@ class ArrayStream:
>>> import nanoarrow as na
>>> stream = na.ArrayStream([1, 2, 3], na.int32())
>>> stream.schema
- Schema(INT32)
+ <Schema> int32
"""
return Schema(self._c_array_stream._get_cached_schema())
@@ -200,7 +200,8 @@ class ArrayStream:
def __repr__(self) -> str:
cls = make_class_label(self, "nanoarrow")
- return f"<{cls}: {self.schema}>"
+ schema_repr = _schema_repr(self.schema, prefix="",
include_metadata=False)
+ return f"{cls}<{schema_repr}>"
@staticmethod
def from_readable(obj):
@@ -212,7 +213,7 @@ class ArrayStream:
>>> from nanoarrow.ipc import Stream
>>> with na.ArrayStream.from_readable(Stream.example_bytes()) as
stream:
... stream.read_all()
- nanoarrow.Array<struct<some_col: int32>>[3]
+ nanoarrow.Array<non-nullable struct<some_col: int32>>[3]
{'some_col': 1}
{'some_col': 2}
{'some_col': 3}
@@ -239,7 +240,7 @@ class ArrayStream:
...
... with na.ArrayStream.from_path(path) as stream:
... stream.read_all()
- nanoarrow.Array<struct<some_col: int32>>[3]
+ nanoarrow.Array<non-nullable struct<some_col: int32>>[3]
{'some_col': 1}
{'some_col': 2}
{'some_col': 3}
@@ -268,7 +269,7 @@ class ArrayStream:
... uri = pathlib.Path(path).as_uri()
... with na.ArrayStream.from_url(uri) as stream:
... stream.read_all()
- nanoarrow.Array<struct<some_col: int32>>[3]
+ nanoarrow.Array<non-nullable struct<some_col: int32>>[3]
{'some_col': 1}
{'some_col': 2}
{'some_col': 3}
diff --git a/python/src/nanoarrow/c_array_stream.py
b/python/src/nanoarrow/c_array_stream.py
index 77eeaaf6..411f41fd 100644
--- a/python/src/nanoarrow/c_array_stream.py
+++ b/python/src/nanoarrow/c_array_stream.py
@@ -37,14 +37,14 @@ def c_array_stream(obj=None, schema=None) -> CArrayStream:
>>> pa_reader = pa.RecordBatchReader.from_batches(pa_batch.schema,
[pa_batch])
>>> array_stream = na.c_array_stream(pa_reader)
>>> array_stream.get_schema()
- <nanoarrow.c_lib.CSchema struct>
+ <nanoarrow.c_schema.CSchema struct>
- format: '+s'
- name: ''
- flags: 0
- metadata: NULL
- dictionary: NULL
- children[1]:
- 'col1': <nanoarrow.c_lib.CSchema int32>
+ 'col1': <nanoarrow.c_schema.CSchema int32>
- format: 'i'
- name: 'col1'
- flags: 2
diff --git a/python/src/nanoarrow/ipc.py b/python/src/nanoarrow/ipc.py
index 5125f771..794ecf36 100644
--- a/python/src/nanoarrow/ipc.py
+++ b/python/src/nanoarrow/ipc.py
@@ -43,7 +43,7 @@ class Stream:
>>> from nanoarrow.ipc import Stream
>>> with Stream.example() as inp, na.c_array_stream(inp) as stream:
... stream
- <nanoarrow.c_lib.CArrayStream>
+ <nanoarrow.c_array_stream.CArrayStream>
- get_schema(): struct<some_col: int32>
"""
@@ -96,7 +96,7 @@ class Stream:
>>> from nanoarrow.ipc import Stream
>>> ipc_stream = Stream.from_readable(Stream.example_bytes())
>>> na.c_array_stream(ipc_stream)
- <nanoarrow.c_lib.CArrayStream>
+ <nanoarrow.c_array_stream.CArrayStream>
- get_schema(): struct<some_col: int32>
"""
if not hasattr(obj, "readinto") and _obj_is_buffer(obj):
@@ -137,7 +137,7 @@ class Stream:
...
... with Stream.from_path(path) as inp, na.c_array_stream(inp) as
stream:
... stream
- <nanoarrow.c_lib.CArrayStream>
+ <nanoarrow.c_array_stream.CArrayStream>
- get_schema(): struct<some_col: int32>
"""
out = Stream()
@@ -176,7 +176,7 @@ class Stream:
... uri = pathlib.Path(path).as_uri()
... with Stream.from_url(uri) as inp, na.c_array_stream(inp) as
stream:
... stream
- <nanoarrow.c_lib.CArrayStream>
+ <nanoarrow.c_array_stream.CArrayStream>
- get_schema(): struct<some_col: int32>
"""
import urllib.request
diff --git a/python/src/nanoarrow/iterator.py b/python/src/nanoarrow/iterator.py
index 76f2a775..5f85724d 100644
--- a/python/src/nanoarrow/iterator.py
+++ b/python/src/nanoarrow/iterator.py
@@ -101,7 +101,7 @@ def iter_array_views(obj, schema=None) ->
Iterable[CArrayView]:
>>> from nanoarrow import iterator
>>> array = na.c_array([1, 2, 3], na.int32())
>>> list(iterator.iter_array_views(array))
- [<nanoarrow.c_lib.CArrayView>
+ [<nanoarrow.c_array.CArrayView>
- storage_type: 'int32'
- length: 3
- offset: 0
diff --git a/python/src/nanoarrow/schema.py b/python/src/nanoarrow/schema.py
index c4c34fe8..b7e1fe2f 100644
--- a/python/src/nanoarrow/schema.py
+++ b/python/src/nanoarrow/schema.py
@@ -18,7 +18,7 @@
import enum
import reprlib
from functools import cached_property
-from typing import Mapping, Union
+from typing import List, Mapping, Union
from nanoarrow._lib import (
CArrowTimeUnit,
@@ -29,6 +29,8 @@ from nanoarrow._lib import (
)
from nanoarrow.c_schema import c_schema
+from nanoarrow import _repr_utils
+
class Type(enum.Enum):
"""The Type enumerator provides a means by which the various type
@@ -179,11 +181,11 @@ class Schema:
>>> import nanoarrow as na
>>> import pyarrow as pa
>>> na.Schema(na.Type.INT32)
- Schema(INT32)
+ <Schema> int32
>>> na.Schema(na.Type.DURATION, unit=na.TimeUnit.SECOND)
- Schema(DURATION, unit=SECOND)
+ <Schema> duration('s')
>>> na.Schema(pa.int32())
- Schema(INT32)
+ <Schema> int32
"""
def __init__(
@@ -218,6 +220,23 @@ class Schema:
self._c_schema_view = CSchemaView(self._c_schema)
+ @property
+ def params(self) -> Mapping:
+ """Get parameter names and values for this type
+
+ Returns a dictionary of parameters that can be used to reconstruct
+ this type together with its type identifier.
+
+ >>> import nanoarrow as na
+ >>> na.fixed_size_binary(123).params
+ {'byte_width': 123}
+ """
+ if self._c_schema_view.type_id not in _PARAM_NAMES:
+ return {}
+
+ param_names = _PARAM_NAMES[self._c_schema_view.type_id]
+ return {k: getattr(self, k) for k in param_names}
+
@property
def type(self) -> Type:
"""Type enumerator value of this Schema
@@ -356,7 +375,7 @@ class Schema:
>>> import nanoarrow as na
>>> na.dictionary(na.int32(), na.string()).index_type
- Schema(INT32)
+ <Schema> int32
"""
if self._c_schema_view.type_id == CArrowType.DICTIONARY:
index_schema = self._c_schema.modify(
@@ -385,9 +404,9 @@ class Schema:
>>> import nanoarrow as na
>>> na.list_(na.int32()).value_type
- Schema(INT32, name='item')
+ <Schema> 'item': int32
>>> na.dictionary(na.int32(), na.string()).value_type
- Schema(STRING)
+ <Schema> string
"""
if self._c_schema_view.type_id in (
CArrowType.LIST,
@@ -425,13 +444,13 @@ class Schema:
return self._c_schema.n_children
- def field(self, i):
+ def field(self, i) -> "Schema":
"""Extract a child Schema
>>> import nanoarrow as na
>>> schema = na.struct({"col1": na.int32()})
>>> schema.field(0)
- Schema(INT32, name='col1')
+ <Schema> 'col1': int32
"""
# Returning a copy to reduce interdependence between Schema instances:
@@ -440,7 +459,7 @@ class Schema:
return Schema(self._c_schema.child(i).__deepcopy__())
@property
- def fields(self):
+ def fields(self) -> List["Schema"]:
"""Iterate over child Schemas
>>> import nanoarrow as na
@@ -450,8 +469,7 @@ class Schema:
...
col1
"""
- for i in range(self.n_fields):
- yield self.field(i)
+ return [self.field(i) for i in range(self.n_fields)]
def __repr__(self) -> str:
return _schema_repr(self)
@@ -481,7 +499,7 @@ def null(nullable: bool = True) -> Schema:
>>> import nanoarrow as na
>>> na.null()
- Schema(NULL)
+ <Schema> na
"""
return Schema(Type.NULL, nullable=nullable)
@@ -499,7 +517,7 @@ def bool_(nullable: bool = True) -> Schema:
>>> import nanoarrow as na
>>> na.bool_()
- Schema(BOOL)
+ <Schema> bool
"""
return Schema(Type.BOOL, nullable=nullable)
@@ -517,7 +535,7 @@ def int8(nullable: bool = True) -> Schema:
>>> import nanoarrow as na
>>> na.int8()
- Schema(INT8)
+ <Schema> int8
"""
return Schema(Type.INT8, nullable=nullable)
@@ -535,7 +553,7 @@ def uint8(nullable: bool = True) -> Schema:
>>> import nanoarrow as na
>>> na.uint8()
- Schema(UINT8)
+ <Schema> uint8
"""
return Schema(Type.UINT8, nullable=nullable)
@@ -553,7 +571,7 @@ def int16(nullable: bool = True) -> Schema:
>>> import nanoarrow as na
>>> na.int16()
- Schema(INT16)
+ <Schema> int16
"""
return Schema(Type.INT16, nullable=nullable)
@@ -571,7 +589,7 @@ def uint16(nullable: bool = True) -> Schema:
>>> import nanoarrow as na
>>> na.uint16()
- Schema(UINT16)
+ <Schema> uint16
"""
return Schema(Type.UINT16, nullable=nullable)
@@ -589,7 +607,7 @@ def int32(nullable: bool = True) -> Schema:
>>> import nanoarrow as na
>>> na.int32()
- Schema(INT32)
+ <Schema> int32
"""
return Schema(Type.INT32, nullable=nullable)
@@ -607,7 +625,7 @@ def uint32(nullable: bool = True) -> Schema:
>>> import nanoarrow as na
>>> na.uint32()
- Schema(UINT32)
+ <Schema> uint32
"""
return Schema(Type.UINT32, nullable=nullable)
@@ -625,7 +643,7 @@ def int64(nullable: bool = True) -> Schema:
>>> import nanoarrow as na
>>> na.int64()
- Schema(INT64)
+ <Schema> int64
"""
return Schema(Type.INT64, nullable=nullable)
@@ -643,7 +661,7 @@ def uint64(nullable: bool = True) -> Schema:
>>> import nanoarrow as na
>>> na.uint64()
- Schema(UINT64)
+ <Schema> uint64
"""
return Schema(Type.UINT64, nullable=nullable)
@@ -661,7 +679,7 @@ def float16(nullable: bool = True) -> Schema:
>>> import nanoarrow as na
>>> na.float16()
- Schema(HALF_FLOAT)
+ <Schema> half_float
"""
return Schema(Type.HALF_FLOAT, nullable=nullable)
@@ -679,7 +697,7 @@ def float32(nullable: bool = True) -> Schema:
>>> import nanoarrow as na
>>> na.float32()
- Schema(FLOAT)
+ <Schema> float
"""
return Schema(Type.FLOAT, nullable=nullable)
@@ -697,7 +715,7 @@ def float64(nullable: bool = True) -> Schema:
>>> import nanoarrow as na
>>> na.float64()
- Schema(DOUBLE)
+ <Schema> double
"""
return Schema(Type.DOUBLE, nullable=nullable)
@@ -715,7 +733,7 @@ def string(nullable: bool = True) -> Schema:
>>> import nanoarrow as na
>>> na.string()
- Schema(STRING)
+ <Schema> string
"""
return Schema(Type.STRING, nullable=nullable)
@@ -734,7 +752,7 @@ def large_string(nullable: bool = True) -> Schema:
>>> import nanoarrow as na
>>> na.large_string()
- Schema(LARGE_STRING)
+ <Schema> large_string
"""
return Schema(Type.LARGE_STRING, nullable=nullable)
@@ -752,7 +770,7 @@ def binary(nullable: bool = True) -> Schema:
>>> import nanoarrow as na
>>> na.binary()
- Schema(BINARY)
+ <Schema> binary
"""
return Schema(Type.BINARY, nullable=nullable)
@@ -770,7 +788,7 @@ def large_binary(nullable: bool = True) -> Schema:
>>> import nanoarrow as na
>>> na.large_binary()
- Schema(LARGE_BINARY)
+ <Schema> large_binary
"""
return Schema(Type.LARGE_BINARY, nullable=nullable)
@@ -790,7 +808,7 @@ def fixed_size_binary(byte_width: int, nullable: bool =
True) -> Schema:
>>> import nanoarrow as na
>>> na.fixed_size_binary(123)
- Schema(FIXED_SIZE_BINARY, byte_width=123)
+ <Schema> fixed_size_binary(123)
"""
return Schema(Type.FIXED_SIZE_BINARY, byte_width=byte_width,
nullable=nullable)
@@ -808,7 +826,7 @@ def date32(nullable: bool = True) -> Schema:
>>> import nanoarrow as na
>>> na.date32()
- Schema(DATE32)
+ <Schema> date32
"""
return Schema(Type.DATE32, nullable=nullable)
@@ -826,7 +844,7 @@ def date64(nullable: bool = True) -> Schema:
>>> import nanoarrow as na
>>> na.date64()
- Schema(DATE64)
+ <Schema> date64
"""
return Schema(Type.DATE64, nullable=nullable)
@@ -846,7 +864,7 @@ def time32(unit: Union[str, TimeUnit], nullable: bool =
True) -> Schema:
>>> import nanoarrow as na
>>> na.time32("s")
- Schema(TIME32, unit=SECOND)
+ <Schema> time32('s')
"""
return Schema(Type.TIME32, unit=unit, nullable=nullable)
@@ -866,7 +884,7 @@ def time64(unit: Union[str, TimeUnit], nullable: bool =
True) -> Schema:
>>> import nanoarrow as na
>>> na.time64("us")
- Schema(TIME64, unit=MICRO)
+ <Schema> time64('us')
"""
return Schema(Type.TIME64, unit=unit, nullable=nullable)
@@ -888,9 +906,9 @@ def timestamp(
>>> import nanoarrow as na
>>> na.timestamp("s")
- Schema(TIMESTAMP, unit=SECOND)
+ <Schema> timestamp('s', '')
>>> na.timestamp("s", timezone="America/Halifax")
- Schema(TIMESTAMP, unit=SECOND, timezone='America/Halifax')
+ <Schema> timestamp('s', 'America/Halifax')
"""
return Schema(Type.TIMESTAMP, timezone=timezone, unit=unit,
nullable=nullable)
@@ -910,7 +928,7 @@ def duration(unit, nullable: bool = True):
>>> import nanoarrow as na
>>> na.duration("s")
- Schema(DURATION, unit=SECOND)
+ <Schema> duration('s')
"""
return Schema(Type.DURATION, unit=unit, nullable=nullable)
@@ -928,7 +946,7 @@ def interval_months(nullable: bool = True):
>>> import nanoarrow as na
>>> na.interval_months()
- Schema(INTERVAL_MONTHS)
+ <Schema> interval_months
"""
return Schema(Type.INTERVAL_MONTHS, nullable=nullable)
@@ -946,7 +964,7 @@ def interval_day_time(nullable: bool = True):
>>> import nanoarrow as na
>>> na.interval_day_time()
- Schema(INTERVAL_DAY_TIME)
+ <Schema> interval_day_time
"""
return Schema(Type.INTERVAL_DAY_TIME, nullable=nullable)
@@ -965,7 +983,7 @@ def interval_month_day_nano(nullable: bool = True):
>>> import nanoarrow as na
>>> na.interval_month_day_nano()
- Schema(INTERVAL_MONTH_DAY_NANO)
+ <Schema> interval_month_day_nano
"""
return Schema(Type.INTERVAL_MONTH_DAY_NANO, nullable=nullable)
@@ -988,7 +1006,7 @@ def decimal128(precision: int, scale: int, nullable: bool
= True) -> Schema:
>>> import nanoarrow as na
>>> na.decimal128(10, 3)
- Schema(DECIMAL128, precision=10, scale=3)
+ <Schema> decimal128(10, 3)
"""
return Schema(Type.DECIMAL128, precision=precision, scale=scale,
nullable=nullable)
@@ -1011,7 +1029,7 @@ def decimal256(precision: int, scale: int, nullable: bool
= True) -> Schema:
>>> import nanoarrow as na
>>> na.decimal256(10, 3)
- Schema(DECIMAL256, precision=10, scale=3)
+ <Schema> decimal256(10, 3)
"""
return Schema(Type.DECIMAL256, precision=precision, scale=scale,
nullable=nullable)
@@ -1033,9 +1051,9 @@ def struct(fields, nullable=True) -> Schema:
>>> import nanoarrow as na
>>> na.struct([na.int32()])
- Schema(STRUCT, fields=[Schema(INT32)])
+ <Schema> struct<: int32>
>>> na.struct({"col1": na.int32()})
- Schema(STRUCT, fields=[Schema(INT32, name='col1')])
+ <Schema> struct<col1: int32>
"""
return Schema(Type.STRUCT, fields=fields, nullable=nullable)
@@ -1055,7 +1073,7 @@ def list_(value_type, nullable=True) -> Schema:
>>> import nanoarrow as na
>>> na.list_(na.int32())
- Schema(LIST, value_type=Schema(INT32, name='item'))
+ <Schema> list<item: int32>
"""
return Schema(Type.LIST, value_type=value_type, nullable=nullable)
@@ -1078,7 +1096,7 @@ def large_list(value_type, nullable=True) -> Schema:
>>> import nanoarrow as na
>>> na.large_list(na.int32())
- Schema(LARGE_LIST, value_type=Schema(INT32, name='item'))
+ <Schema> large_list<item: int32>
"""
return Schema(Type.LARGE_LIST, value_type=value_type, nullable=nullable)
@@ -1100,7 +1118,7 @@ def fixed_size_list(value_type, list_size, nullable=True)
-> Schema:
>>> import nanoarrow as na
>>> na.fixed_size_list(na.int32(), 123)
- Schema(FIXED_SIZE_LIST, value_type=Schema(INT32, name='item'),
list_size=123)
+ <Schema> fixed_size_list(123)<item: int32>
"""
return Schema(
Type.FIXED_SIZE_LIST,
@@ -1130,8 +1148,7 @@ def dictionary(index_type, value_type,
dictionary_ordered=False):
>>> import nanoarrow as na
>>> na.dictionary(na.int32(), na.string())
- Schema(DICTIONARY, index_type=Schema(INT32), value_type=Schema(STRING), \
-dictionary_ordered=False)
+ <Schema> dictionary(int32)<string>
"""
return Schema(
Type.DICTIONARY,
@@ -1239,45 +1256,40 @@ def _clean_fields(fields):
return [c_schema(v) for v in fields]
-def _schema_repr(obj):
- out = f"Schema({_schema_param_repr('type', obj.type)}"
+def _schema_repr(obj, max_char_width=80, prefix="<Schema> ",
include_metadata=True):
+ lines = []
- if obj.name is None:
- out += ", name=False"
- elif obj.name:
- out += f", name={_schema_param_repr('name', obj.name)}"
+ modifiers = []
- if obj._c_schema_view.type_id not in _PARAM_NAMES:
- param_names = []
- else:
- param_names = _PARAM_NAMES[obj._c_schema_view.type_id]
-
- for name in param_names:
- value = getattr(obj, name)
- if value is None:
- continue
- out += ", "
- param_repr = f"{name}={_schema_param_repr(name, getattr(obj, name))}"
- out += param_repr
+ if obj.name:
+ name = reprlib.Repr().repr(obj.name)
+ modifiers.append(f"{name}:")
if not obj.nullable:
- out += ", nullable=False"
+ modifiers.append("non-nullable")
- out += ")"
- return out
+ if obj.dictionary_ordered:
+ modifiers.append("ordered")
+ # Ensure extra space at the end of the modifiers
+ modifiers.append("")
-def _schema_param_repr(name, value):
- if name == "type":
- return f"{value.name}"
- elif name == "unit":
- return f"{value.name}"
- elif name == "fields":
- # It would be nice to indent this/get it on multiple lines since
- # most output will be uncomfortably wide even with the abbreviated repr
- return reprlib.Repr().repr(list(value))
- else:
- return reprlib.Repr().repr(value)
+ modifiers_str = " ".join(modifiers)
+ first_line_prefix = f"{prefix}{modifiers_str}"
+
+ schema_str = _repr_utils.c_schema_to_string(
+ obj._c_schema, max_char_width - len(first_line_prefix)
+ )
+ lines.append(f"{first_line_prefix}{schema_str}")
+
+ if include_metadata:
+ metadata_dict = dict(obj.metadata.items())
+ if metadata_dict:
+ metadata_dict_repr = reprlib.Repr().repr(metadata_dict)
+ metadata_line = f"- metadata:
{metadata_dict_repr[:max_char_width]}"
+ lines.append(metadata_line[:max_char_width])
+
+ return "\n".join(lines)
_PARAM_NAMES = {
diff --git a/python/tests/test_c_schema.py b/python/tests/test_c_schema.py
index f70f49ab..a9144051 100644
--- a/python/tests/test_c_schema.py
+++ b/python/tests/test_c_schema.py
@@ -25,7 +25,7 @@ def test_c_schema_basic():
schema = allocate_c_schema()
assert schema.is_valid() is False
assert schema._to_string() == "[invalid: schema is released]"
- assert repr(schema) == "<nanoarrow.c_lib.CSchema <released>>"
+ assert repr(schema) == "<nanoarrow.c_schema.CSchema <released>>"
schema = na.c_schema(na.struct({"some_name": na.int32()}))
@@ -37,7 +37,7 @@ def test_c_schema_basic():
assert schema.child(0).format == "i"
assert schema.child(0).name == "some_name"
assert schema.child(0)._to_string() == "int32"
- assert "<nanoarrow.c_lib.CSchema int32>" in repr(schema)
+ assert "<nanoarrow.c_schema.CSchema int32>" in repr(schema)
assert schema.dictionary is None
with pytest.raises(IndexError):
@@ -50,7 +50,7 @@ def test_c_schema_dictionary():
schema = na.c_schema(pa.dictionary(pa.int32(), pa.utf8()))
assert schema.format == "i"
assert schema.dictionary.format == "u"
- assert "dictionary: <nanoarrow.c_lib.CSchema string" in repr(schema)
+ assert "dictionary: <nanoarrow.c_schema.CSchema string" in repr(schema)
def test_schema_metadata():
@@ -117,11 +117,18 @@ def test_c_schema_view_extra_params():
def test_c_schema_metadata():
meta = {
- "ARROW:extension:name": "some_name",
- "ARROW:extension:metadata": "some_metadata",
+ b"ARROW:extension:name": b"some_name",
+ b"ARROW:extension:metadata": b"some_metadata",
}
schema = na.c_schema(na.int32()).modify(metadata=meta)
+ assert "b'some_name'" in repr(schema)
+ assert "b'some_name'" in repr(schema.metadata)
+ assert list(schema.metadata) == list(meta)
+ assert list(schema.metadata.items()) == list(meta.items())
+ assert list(schema.metadata.keys()) == list(meta.keys())
+ assert list(schema.metadata.values()) == list(meta.values())
+
view = c_schema_view(schema)
assert view.extension_name == "some_name"
assert view.extension_metadata == b"some_metadata"
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index eccd2378..2fb805bd 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -82,7 +82,7 @@ def test_c_array_empty():
array = allocate_c_array()
assert array.is_valid() is False
- assert repr(array) == "<nanoarrow.c_lib.CArray <released>>"
+ assert repr(array) == "<nanoarrow.c_array.CArray <released>>"
def test_c_array():
@@ -98,7 +98,7 @@ def test_c_array():
assert array.n_children == 0
assert len(list(array.children)) == 0
assert array.dictionary is None
- assert "<nanoarrow.c_lib.CArray int32" in repr(array)
+ assert "<nanoarrow.c_array.CArray int32" in repr(array)
def test_c_array_recursive():
@@ -107,7 +107,7 @@ def test_c_array_recursive():
assert len(list(array.children)) == 1
assert array.child(0).length == 3
assert array.child(0).schema._to_string() == "int32"
- assert "'col': <nanoarrow.c_lib.CArray int32" in repr(array)
+ assert "'col': <nanoarrow.c_array.CArray int32" in repr(array)
with pytest.raises(IndexError):
array.child(-1)
@@ -117,7 +117,7 @@ def test_c_array_dictionary():
array = na.c_array(pa.array(["a", "b", "b"]).dictionary_encode())
assert array.length == 3
assert array.dictionary.length == 2
- assert "dictionary: <nanoarrow.c_lib.CArray string>" in repr(array)
+ assert "dictionary: <nanoarrow.c_array.CArray string>" in repr(array)
def test_c_array_view():
@@ -180,7 +180,7 @@ def test_c_array_view_dictionary():
view = array.view()
assert view.n_buffers == 2
assert view.dictionary.n_buffers == 3
- assert "- dictionary: <nanoarrow.c_lib.CArrayView>" in repr(view)
+ assert "- dictionary: <nanoarrow.c_array.CArrayView>" in repr(view)
def test_buffers_integer():
@@ -381,7 +381,7 @@ def test_c_array_stream():
array_stream = allocate_c_array_stream()
assert na.c_array_stream(array_stream) is array_stream
- assert repr(array_stream) == "<nanoarrow.c_lib.CArrayStream <released>>"
+ assert repr(array_stream) == "<nanoarrow.c_array_stream.CArrayStream
<released>>"
assert array_stream.is_valid() is False
with pytest.raises(RuntimeError):
diff --git a/python/tests/test_schema.py b/python/tests/test_schema.py
index 04aba78d..86360369 100644
--- a/python/tests/test_schema.py
+++ b/python/tests/test_schema.py
@@ -63,19 +63,18 @@ def test_schema_create_no_params():
schema_obj = na.int32()
assert schema_obj.type == na.Type.INT32
assert schema_obj.nullable is True
- assert repr(schema_obj) == "Schema(INT32)"
+ assert repr(schema_obj) == "<Schema> int32"
schema_obj = na.int32(nullable=False)
assert schema_obj.nullable is False
- assert "nullable=False" in repr(schema_obj)
+ assert "non-nullable" in repr(schema_obj)
schema_obj = na.Schema(na.Type.INT32, name=False)
assert schema_obj.name is None
- assert "name=False" in repr(schema_obj)
schema_obj = na.Schema(na.Type.INT32, name="not empty")
assert schema_obj.name == "not empty"
- assert "name='not empty'" in repr(schema_obj)
+ assert "'not empty': " in repr(schema_obj)
msg = "params are only supported for obj of class Type"
with pytest.raises(ValueError, match=msg):
@@ -114,19 +113,16 @@ def test_schema_fixed_size_binary():
schema_obj = na.fixed_size_binary(byte_width=123)
assert schema_obj.type == na.Type.FIXED_SIZE_BINARY
assert schema_obj.byte_width == 123
- assert "byte_width=123" in repr(schema_obj)
def test_schema_time():
schema_obj = na.time32(na.TimeUnit.SECOND)
assert schema_obj.type == na.Type.TIME32
assert schema_obj.unit == na.TimeUnit.SECOND
- assert "unit=SECOND" in repr(schema_obj)
schema_obj = na.time64(na.TimeUnit.MICRO)
assert schema_obj.type == na.Type.TIME64
assert schema_obj.unit == na.TimeUnit.MICRO
- assert "unit=MICRO" in repr(schema_obj)
def test_schema_timestamp():
@@ -137,14 +133,12 @@ def test_schema_timestamp():
schema_obj = na.timestamp(na.TimeUnit.SECOND, timezone="America/Halifax")
assert schema_obj.timezone == "America/Halifax"
- assert "timezone='America/Halifax'" in repr(schema_obj)
def test_schema_duration():
schema_obj = na.duration(na.TimeUnit.SECOND)
assert schema_obj.type == na.Type.DURATION
assert schema_obj.unit == na.TimeUnit.SECOND
- assert "unit=SECOND" in repr(schema_obj)
def test_schema_decimal():
@@ -152,15 +146,11 @@ def test_schema_decimal():
assert schema_obj.type == na.Type.DECIMAL128
assert schema_obj.precision == 10
assert schema_obj.scale == 3
- assert "precision=10" in repr(schema_obj)
- assert "scale=3" in repr(schema_obj)
schema_obj = na.decimal256(10, 3)
assert schema_obj.type == na.Type.DECIMAL256
assert schema_obj.precision == 10
assert schema_obj.scale == 3
- assert "precision=10" in repr(schema_obj)
- assert "scale=3" in repr(schema_obj)
def test_schema_struct():
@@ -173,8 +163,6 @@ def test_schema_struct():
for field in schema_obj.fields:
assert isinstance(field, na.Schema)
- assert "fields=[Schema(INT32)]" in repr(schema_obj)
-
# Make sure we can use a dictionary to specify fields
schema_obj = na.struct({"col_name": na.Type.INT32})
assert schema_obj.type == na.Type.STRUCT