This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new 503548b9 fix: python schema repr does not truncate output (#628)
503548b9 is described below
commit 503548b9f3ca5e62fceb1cc44088c1857578ddaa
Author: Dane Pitkin <[email protected]>
AuthorDate: Fri Sep 20 18:15:33 2024 -0400
fix: python schema repr does not truncate output (#628)
Fixes #466.
```
>>> import nanoarrow as na
>>> url =
"https://github.com/apache/arrow-experiments/raw/main/data/arrow-commits/arrow-commits.arrows"
>>> schema = na.ArrayStream.from_url(url).schema
>>> schema
<Schema> non-nullable struct<commit: string, time: timestamp('us', 'UTC'),
files: int32, merge: bool, message: string>
```
---
python/src/nanoarrow/_repr_utils.py | 13 ++++++++-----
python/src/nanoarrow/schema.py | 8 ++++----
python/tests/test_schema.py | 18 ++++++++++++++++++
3 files changed, 30 insertions(+), 9 deletions(-)
diff --git a/python/src/nanoarrow/_repr_utils.py
b/python/src/nanoarrow/_repr_utils.py
index c695d807..604736d9 100644
--- a/python/src/nanoarrow/_repr_utils.py
+++ b/python/src/nanoarrow/_repr_utils.py
@@ -27,12 +27,15 @@ def make_class_label(obj, module=None):
def c_schema_to_string(obj, max_char_width=80):
- max_char_width = max(max_char_width, 10)
- c_schema_string = obj._to_string(recursive=True, max_chars=max_char_width
+ 1)
- if len(c_schema_string) > max_char_width:
- return c_schema_string[: (max_char_width - 3)] + "..."
+ c_schema_string = ""
+ if max_char_width == 0:
+ c_schema_string = obj._to_string(recursive=True,
max_chars=max_char_width)
else:
- return c_schema_string
+ max_char_width = max(max_char_width, 10)
+ c_schema_string = obj._to_string(recursive=True,
max_chars=max_char_width + 1)
+ if len(c_schema_string) > max_char_width:
+ c_schema_string = c_schema_string[: (max_char_width - 3)] + "..."
+ return c_schema_string
def metadata_repr(obj, indent=0, max_char_width=80):
diff --git a/python/src/nanoarrow/schema.py b/python/src/nanoarrow/schema.py
index 8adae418..c62efc81 100644
--- a/python/src/nanoarrow/schema.py
+++ b/python/src/nanoarrow/schema.py
@@ -498,7 +498,8 @@ class Schema:
writer.write_stream(empty)
def __repr__(self) -> str:
- return _schema_repr(self)
+ # An empty max_char_width prints the entire schema
+ return _schema_repr(self, max_char_width=0)
def __arrow_c_schema__(self):
return self._c_schema.__arrow_c_schema__()
@@ -1302,10 +1303,9 @@ def _schema_repr(obj, max_char_width=80,
prefix="<Schema> ", include_metadata=Tr
modifiers_str = " ".join(modifiers)
first_line_prefix = f"{prefix}{modifiers_str}"
+ max_char_width = max(max_char_width - len(first_line_prefix), 0)
- schema_str = _repr_utils.c_schema_to_string(
- obj._c_schema, max_char_width - len(first_line_prefix)
- )
+ schema_str = _repr_utils.c_schema_to_string(obj._c_schema, max_char_width)
lines.append(f"{first_line_prefix}{schema_str}")
if include_metadata:
diff --git a/python/tests/test_schema.py b/python/tests/test_schema.py
index abfe6518..e5fbbcef 100644
--- a/python/tests/test_schema.py
+++ b/python/tests/test_schema.py
@@ -259,3 +259,21 @@ def test_schema_serialize():
schema.serialize(out)
schema_roundtrip = na.ArrayStream.from_readable(out.getvalue()).schema
assert repr(schema_roundtrip) == repr(schema)
+
+
+def test_schema_repr():
+ schema = na.struct(
+ {
+ "col1": na.int32(),
+ "col2": na.int16(),
+ "col3": na.string(),
+ "col4": na.timestamp(unit=na.TimeUnit.SECOND),
+ },
+ nullable=False,
+ )
+
+ assert repr(schema) == (
+ "<Schema> non-nullable struct"
+ "<col1: int32, col2: int16, col3: string, "
+ "col4: timestamp('s', '')>"
+ )