This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 503548b9 fix: python schema repr does not truncate output (#628)
503548b9 is described below

commit 503548b9f3ca5e62fceb1cc44088c1857578ddaa
Author: Dane Pitkin <[email protected]>
AuthorDate: Fri Sep 20 18:15:33 2024 -0400

    fix: python schema repr does not truncate output (#628)
    
    Fixes #466.
    
    ```
    >>> import nanoarrow as na
    >>> url = 
"https://github.com/apache/arrow-experiments/raw/main/data/arrow-commits/arrow-commits.arrows";
    >>> schema = na.ArrayStream.from_url(url).schema
    >>> schema
    <Schema> non-nullable struct<commit: string, time: timestamp('us', 'UTC'), 
files: int32, merge: bool, message: string>
    ```
---
 python/src/nanoarrow/_repr_utils.py | 13 ++++++++-----
 python/src/nanoarrow/schema.py      |  8 ++++----
 python/tests/test_schema.py         | 18 ++++++++++++++++++
 3 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/python/src/nanoarrow/_repr_utils.py 
b/python/src/nanoarrow/_repr_utils.py
index c695d807..604736d9 100644
--- a/python/src/nanoarrow/_repr_utils.py
+++ b/python/src/nanoarrow/_repr_utils.py
@@ -27,12 +27,15 @@ def make_class_label(obj, module=None):
 
 
 def c_schema_to_string(obj, max_char_width=80):
-    max_char_width = max(max_char_width, 10)
-    c_schema_string = obj._to_string(recursive=True, max_chars=max_char_width 
+ 1)
-    if len(c_schema_string) > max_char_width:
-        return c_schema_string[: (max_char_width - 3)] + "..."
+    c_schema_string = ""
+    if max_char_width == 0:
+        c_schema_string = obj._to_string(recursive=True, 
max_chars=max_char_width)
     else:
-        return c_schema_string
+        max_char_width = max(max_char_width, 10)
+        c_schema_string = obj._to_string(recursive=True, 
max_chars=max_char_width + 1)
+        if len(c_schema_string) > max_char_width:
+            c_schema_string = c_schema_string[: (max_char_width - 3)] + "..."
+    return c_schema_string
 
 
 def metadata_repr(obj, indent=0, max_char_width=80):
diff --git a/python/src/nanoarrow/schema.py b/python/src/nanoarrow/schema.py
index 8adae418..c62efc81 100644
--- a/python/src/nanoarrow/schema.py
+++ b/python/src/nanoarrow/schema.py
@@ -498,7 +498,8 @@ class Schema:
             writer.write_stream(empty)
 
     def __repr__(self) -> str:
-        return _schema_repr(self)
+        # An empty max_char_width prints the entire schema
+        return _schema_repr(self, max_char_width=0)
 
     def __arrow_c_schema__(self):
         return self._c_schema.__arrow_c_schema__()
@@ -1302,10 +1303,9 @@ def _schema_repr(obj, max_char_width=80, 
prefix="<Schema> ", include_metadata=Tr
 
     modifiers_str = " ".join(modifiers)
     first_line_prefix = f"{prefix}{modifiers_str}"
+    max_char_width = max(max_char_width - len(first_line_prefix), 0)
 
-    schema_str = _repr_utils.c_schema_to_string(
-        obj._c_schema, max_char_width - len(first_line_prefix)
-    )
+    schema_str = _repr_utils.c_schema_to_string(obj._c_schema, max_char_width)
     lines.append(f"{first_line_prefix}{schema_str}")
 
     if include_metadata:
diff --git a/python/tests/test_schema.py b/python/tests/test_schema.py
index abfe6518..e5fbbcef 100644
--- a/python/tests/test_schema.py
+++ b/python/tests/test_schema.py
@@ -259,3 +259,21 @@ def test_schema_serialize():
     schema.serialize(out)
     schema_roundtrip = na.ArrayStream.from_readable(out.getvalue()).schema
     assert repr(schema_roundtrip) == repr(schema)
+
+
+def test_schema_repr():
+    schema = na.struct(
+        {
+            "col1": na.int32(),
+            "col2": na.int16(),
+            "col3": na.string(),
+            "col4": na.timestamp(unit=na.TimeUnit.SECOND),
+        },
+        nullable=False,
+    )
+
+    assert repr(schema) == (
+        "<Schema> non-nullable struct"
+        "<col1: int32, col2: int16, col3: string, "
+        "col4: timestamp('s', '')>"
+    )

Reply via email to