This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new bdf5e9d810 Fix `datafusion-cli` print output (#8895)
bdf5e9d810 is described below
commit bdf5e9d81091971328fbcf9f9517ae38c07d64c4
Author: Andrew Lamb <[email protected]>
AuthorDate: Thu Jan 18 06:34:04 2024 -0500
Fix `datafusion-cli` print output (#8895)
* Fix datafusion-cli print output
* fmt
* Do not print header if only empty batches, test for same
---
datafusion-cli/src/print_format.rs | 127 +++++++++++++++++++++++++++++++++++--
1 file changed, 120 insertions(+), 7 deletions(-)
diff --git a/datafusion-cli/src/print_format.rs
b/datafusion-cli/src/print_format.rs
index ea41856249..0a8c7b4b3e 100644
--- a/datafusion-cli/src/print_format.rs
+++ b/datafusion-cli/src/print_format.rs
@@ -161,23 +161,29 @@ impl PrintFormat {
maxrows: MaxRows,
with_header: bool,
) -> Result<()> {
- if batches.is_empty() || batches[0].num_rows() == 0 {
+ // filter out any empty batches
+ let batches: Vec<_> = batches
+ .iter()
+ .filter(|b| b.num_rows() > 0)
+ .cloned()
+ .collect();
+ if batches.is_empty() {
return Ok(());
}
match self {
Self::Csv | Self::Automatic => {
- print_batches_with_sep(writer, batches, b',', with_header)
+ print_batches_with_sep(writer, &batches, b',', with_header)
}
- Self::Tsv => print_batches_with_sep(writer, batches, b'\t',
with_header),
+ Self::Tsv => print_batches_with_sep(writer, &batches, b'\t',
with_header),
Self::Table => {
if maxrows == MaxRows::Limited(0) {
return Ok(());
}
- format_batches_with_maxrows(writer, batches, maxrows)
+ format_batches_with_maxrows(writer, &batches, maxrows)
}
- Self::Json => batches_to_json!(ArrayWriter, writer, batches),
- Self::NdJson => batches_to_json!(LineDelimitedWriter, writer,
batches),
+ Self::Json => batches_to_json!(ArrayWriter, writer, &batches),
+ Self::NdJson => batches_to_json!(LineDelimitedWriter, writer,
&batches),
}
}
}
@@ -189,7 +195,7 @@ mod tests {
use super::*;
- use arrow::array::Int32Array;
+ use arrow::array::{ArrayRef, Int32Array};
use arrow::datatypes::{DataType, Field, Schema};
use datafusion::error::Result;
@@ -351,4 +357,111 @@ mod tests {
Ok(())
}
+
+ #[test]
+ fn test_print_batches_empty_batches() -> Result<()> {
+ let batch = one_column_batch();
+ let empty_batch = RecordBatch::new_empty(batch.schema());
+
+ #[rustfmt::skip]
+ let expected =&[
+ "+---+",
+ "| a |",
+ "+---+",
+ "| 1 |",
+ "| 2 |",
+ "| 3 |",
+ "+---+\n",
+ ];
+
+ PrintBatchesTest::new()
+ .with_format(PrintFormat::Table)
+ .with_batches(vec![empty_batch.clone(), batch, empty_batch])
+ .with_expected(expected)
+ .run();
+ Ok(())
+ }
+
+ #[test]
+ fn test_print_batches_empty_batches_no_header() -> Result<()> {
+ let empty_batch = RecordBatch::new_empty(one_column_batch().schema());
+
+ // empty batches should not print a header
+ let expected = &[""];
+
+ PrintBatchesTest::new()
+ .with_format(PrintFormat::Table)
+ .with_batches(vec![empty_batch])
+ .with_header(true)
+ .with_expected(expected)
+ .run();
+ Ok(())
+ }
+
+ struct PrintBatchesTest {
+ format: PrintFormat,
+ batches: Vec<RecordBatch>,
+ maxrows: MaxRows,
+ with_header: bool,
+ expected: Vec<&'static str>,
+ }
+
+ impl PrintBatchesTest {
+ fn new() -> Self {
+ Self {
+ format: PrintFormat::Table,
+ batches: vec![],
+ maxrows: MaxRows::Unlimited,
+ with_header: false,
+ expected: vec![],
+ }
+ }
+
+ /// set the format
+ fn with_format(mut self, format: PrintFormat) -> Self {
+ self.format = format;
+ self
+ }
+
+ /// set the batches to convert
+ fn with_batches(mut self, batches: Vec<RecordBatch>) -> Self {
+ self.batches = batches;
+ self
+ }
+
+ /// set whether to include a header
+ fn with_header(mut self, with_header: bool) -> Self {
+ self.with_header = with_header;
+ self
+ }
+
+ /// set expected output
+ fn with_expected(mut self, expected: &[&'static str]) -> Self {
+ self.expected = expected.to_vec();
+ self
+ }
+
+ /// run the test
+ fn run(self) {
+ let mut buffer: Vec<u8> = vec![];
+ self.format
+ .print_batches(&mut buffer, &self.batches, self.maxrows,
self.with_header)
+ .unwrap();
+ let actual = String::from_utf8(buffer).unwrap();
+ let expected = self.expected.join("\n");
+ assert_eq!(
+ actual, expected,
+ "actual:\n\n{actual}expected:\n\n{expected}"
+ );
+ }
+ }
+
+ /// return a batch with one column and three rows
+ fn one_column_batch() -> RecordBatch {
+ RecordBatch::try_from_iter(vec![(
+ "a",
+ Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+ )])
+ .unwrap()
+ }
}