This is an automated email from the ASF dual-hosted git repository. timsaucer pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion-python.git
The following commit(s) were added to refs/heads/main by this push: new 1ba8807c Improve `show()` output for empty DataFrames (#1208) 1ba8807c is described below commit 1ba8807cfd3ea878dbd866554a547da7148e2d4e Author: kosiew <kos...@gmail.com> AuthorDate: Mon Aug 25 21:03:37 2025 +0800 Improve `show()` output for empty DataFrames (#1208) * Add test for showing empty DataFrame and improve print output for empty DataFrames * Add tests for handling empty DataFrames and zero-row queries * Add tests for showing DataFrames with no rows and improve output messages * Fix assertion in test_show_from_empty_batch to ensure proper output for empty DataFrames * feat(tests): add a blank line before test_show_select_where_no_rows function for improved readability --- python/tests/test_dataframe.py | 23 +++++++++++++++++++++++ src/dataframe.rs | 11 +++++++---- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index 1fd99b33..0cd56219 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -252,6 +252,13 @@ def test_filter(df): assert result.column(2) == pa.array([5]) +def test_show_empty(df, capsys): + df_empty = df.filter(column("a") > literal(3)) + df_empty.show() + captured = capsys.readouterr() + assert "DataFrame has no rows" in captured.out + + def test_sort(df): df = df.sort(column("b").sort(ascending=False)) @@ -2657,3 +2664,19 @@ def test_collect_interrupted(): # Make sure the interrupt thread has finished interrupt_thread.join(timeout=1.0) + + +def test_show_select_where_no_rows(capsys) -> None: + ctx = SessionContext() + df = ctx.sql("SELECT 1 WHERE 1=0") + df.show() + out = capsys.readouterr().out + assert "DataFrame has no rows" in out + + +def test_show_from_empty_batch(capsys) -> None: + ctx = SessionContext() + batch = pa.record_batch([pa.array([], type=pa.int32())], names=["a"]) + ctx.create_dataframe([[batch]]).show() + out = capsys.readouterr().out + assert "| a |" in out diff --git a/src/dataframe.rs b/src/dataframe.rs index 05f665cd..1437f5f8 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -998,10 +998,13 @@ impl PyDataFrame { fn print_dataframe(py: Python, df: DataFrame) -> PyDataFusionResult<()> { // Get string representation of record batches let batches = wait_for_future(py, df.collect())??; - let batches_as_string = pretty::pretty_format_batches(&batches); - let result = match batches_as_string { - Ok(batch) => format!("DataFrame()\n{batch}"), - Err(err) => format!("Error: {:?}", err.to_string()), + let result = if batches.is_empty() { + "DataFrame has no rows".to_string() + } else { + match pretty::pretty_format_batches(&batches) { + Ok(batch) => format!("DataFrame()\n{batch}"), + Err(err) => format!("Error: {:?}", err.to_string()), + } }; // Import the Python 'builtins' module to access the print function --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org