This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 3a9e23d138 Improve datafusion-cli print format tests (#8896)
3a9e23d138 is described below
commit 3a9e23d138c935ffea68408899016c9323aa0f36
Author: Andrew Lamb <[email protected]>
AuthorDate: Fri Jan 19 04:56:58 2024 -0500
Improve datafusion-cli print format tests (#8896)
---
datafusion-cli/src/print_format.rs | 415 +++++++++++++++++++++++++------------
1 file changed, 283 insertions(+), 132 deletions(-)
diff --git a/datafusion-cli/src/print_format.rs
b/datafusion-cli/src/print_format.rs
index 0a8c7b4b3e..2de52be612 100644
--- a/datafusion-cli/src/print_format.rs
+++ b/datafusion-cli/src/print_format.rs
@@ -190,117 +190,212 @@ impl PrintFormat {
#[cfg(test)]
mod tests {
- use std::io::{Cursor, Read, Write};
- use std::sync::Arc;
-
use super::*;
+ use std::sync::Arc;
use arrow::array::{ArrayRef, Int32Array};
use arrow::datatypes::{DataType, Field, Schema};
- use datafusion::error::Result;
-
- fn run_test<F>(batches: &[RecordBatch], test_fn: F) -> Result<String>
- where
- F: Fn(&mut Cursor<Vec<u8>>, &[RecordBatch]) -> Result<()>,
- {
- let mut buffer = Cursor::new(Vec::new());
- test_fn(&mut buffer, batches)?;
- buffer.set_position(0);
- let mut contents = String::new();
- buffer.read_to_string(&mut contents)?;
- Ok(contents)
+
+ #[test]
+ fn print_empty() {
+ for format in [
+ PrintFormat::Csv,
+ PrintFormat::Tsv,
+ PrintFormat::Table,
+ PrintFormat::Json,
+ PrintFormat::NdJson,
+ PrintFormat::Automatic,
+ ] {
+ // no output for empty batches, even with header set
+ PrintBatchesTest::new()
+ .with_format(format)
+ .with_batches(vec![])
+ .with_expected(&[""])
+ .run();
+ }
}
#[test]
- fn test_print_batches_with_sep() -> Result<()> {
- let contents = run_test(&[], |buffer, batches| {
- print_batches_with_sep(buffer, batches, b',', true)
- })?;
- assert_eq!(contents, "");
+ fn print_csv_no_header() {
+ #[rustfmt::skip]
+ let expected = &[
+ "1,4,7",
+ "2,5,8",
+ "3,6,9",
+ ];
- let schema = Arc::new(Schema::new(vec![
- Field::new("a", DataType::Int32, false),
- Field::new("b", DataType::Int32, false),
- Field::new("c", DataType::Int32, false),
- ]));
- let batch = RecordBatch::try_new(
- schema,
- vec![
- Arc::new(Int32Array::from(vec![1, 2, 3])),
- Arc::new(Int32Array::from(vec![4, 5, 6])),
- Arc::new(Int32Array::from(vec![7, 8, 9])),
- ],
- )?;
+ PrintBatchesTest::new()
+ .with_format(PrintFormat::Csv)
+ .with_batches(split_batch(three_column_batch()))
+ .with_header(WithHeader::No)
+ .with_expected(expected)
+ .run();
+ }
- let contents = run_test(&[batch], |buffer, batches| {
- print_batches_with_sep(buffer, batches, b',', true)
- })?;
- assert_eq!(contents, "a,b,c\n1,4,7\n2,5,8\n3,6,9\n");
+ #[test]
+ fn print_csv_with_header() {
+ #[rustfmt::skip]
+ let expected = &[
+ "a,b,c",
+ "1,4,7",
+ "2,5,8",
+ "3,6,9",
+ ];
- Ok(())
+ PrintBatchesTest::new()
+ .with_format(PrintFormat::Csv)
+ .with_batches(split_batch(three_column_batch()))
+ .with_header(WithHeader::Yes)
+ .with_expected(expected)
+ .run();
}
#[test]
- fn test_print_batches_to_json_empty() -> Result<()> {
- let contents = run_test(&[], |buffer, batches| {
- batches_to_json!(ArrayWriter, buffer, batches)
- })?;
- assert_eq!(contents, "");
+ fn print_tsv_no_header() {
+ #[rustfmt::skip]
+ let expected = &[
+ "1\t4\t7",
+ "2\t5\t8",
+ "3\t6\t9",
+ ];
- let contents = run_test(&[], |buffer, batches| {
- batches_to_json!(LineDelimitedWriter, buffer, batches)
- })?;
- assert_eq!(contents, "");
+ PrintBatchesTest::new()
+ .with_format(PrintFormat::Tsv)
+ .with_batches(split_batch(three_column_batch()))
+ .with_header(WithHeader::No)
+ .with_expected(expected)
+ .run();
+ }
- let schema = Arc::new(Schema::new(vec![
- Field::new("a", DataType::Int32, false),
- Field::new("b", DataType::Int32, false),
- Field::new("c", DataType::Int32, false),
- ]));
- let batch = RecordBatch::try_new(
- schema,
- vec![
- Arc::new(Int32Array::from(vec![1, 2, 3])),
- Arc::new(Int32Array::from(vec![4, 5, 6])),
- Arc::new(Int32Array::from(vec![7, 8, 9])),
- ],
- )?;
- let batches = vec![batch];
+ #[test]
+ fn print_tsv_with_header() {
+ #[rustfmt::skip]
+ let expected = &[
+ "a\tb\tc",
+ "1\t4\t7",
+ "2\t5\t8",
+ "3\t6\t9",
+ ];
+
+ PrintBatchesTest::new()
+ .with_format(PrintFormat::Tsv)
+ .with_batches(split_batch(three_column_batch()))
+ .with_header(WithHeader::Yes)
+ .with_expected(expected)
+ .run();
+ }
- let contents = run_test(&batches, |buffer, batches| {
- batches_to_json!(ArrayWriter, buffer, batches)
- })?;
- assert_eq!(contents,
"[{\"a\":1,\"b\":4,\"c\":7},{\"a\":2,\"b\":5,\"c\":8},{\"a\":3,\"b\":6,\"c\":9}]\n");
+ #[test]
+ fn print_table() {
+ let expected = &[
+ "+---+---+---+",
+ "| a | b | c |",
+ "+---+---+---+",
+ "| 1 | 4 | 7 |",
+ "| 2 | 5 | 8 |",
+ "| 3 | 6 | 9 |",
+ "+---+---+---+",
+ ];
- let contents = run_test(&batches, |buffer, batches| {
- batches_to_json!(LineDelimitedWriter, buffer, batches)
- })?;
- assert_eq!(contents,
"{\"a\":1,\"b\":4,\"c\":7}\n{\"a\":2,\"b\":5,\"c\":8}\n{\"a\":3,\"b\":6,\"c\":9}\n");
+ PrintBatchesTest::new()
+ .with_format(PrintFormat::Table)
+ .with_batches(split_batch(three_column_batch()))
+ .with_header(WithHeader::Ignored)
+ .with_expected(expected)
+ .run();
+ }
+ #[test]
+ fn print_json() {
+ let expected =
+
&[r#"[{"a":1,"b":4,"c":7},{"a":2,"b":5,"c":8},{"a":3,"b":6,"c":9}]"#];
- Ok(())
+ PrintBatchesTest::new()
+ .with_format(PrintFormat::Json)
+ .with_batches(split_batch(three_column_batch()))
+ .with_header(WithHeader::Ignored)
+ .with_expected(expected)
+ .run();
}
#[test]
- fn test_format_batches_with_maxrows() -> Result<()> {
- let schema = Arc::new(Schema::new(vec![Field::new("a",
DataType::Int32, false)]));
- let batch = RecordBatch::try_new(
- schema,
- vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
- )?;
+ fn print_ndjson() {
+ let expected = &[
+ r#"{"a":1,"b":4,"c":7}"#,
+ r#"{"a":2,"b":5,"c":8}"#,
+ r#"{"a":3,"b":6,"c":9}"#,
+ ];
+
+ PrintBatchesTest::new()
+ .with_format(PrintFormat::NdJson)
+ .with_batches(split_batch(three_column_batch()))
+ .with_header(WithHeader::Ignored)
+ .with_expected(expected)
+ .run();
+ }
+ #[test]
+ fn print_automatic_no_header() {
#[rustfmt::skip]
- let all_rows_expected = [
+ let expected = &[
+ "1,4,7",
+ "2,5,8",
+ "3,6,9",
+ ];
+
+ PrintBatchesTest::new()
+ .with_format(PrintFormat::Automatic)
+ .with_batches(split_batch(three_column_batch()))
+ .with_header(WithHeader::No)
+ .with_expected(expected)
+ .run();
+ }
+ #[test]
+ fn print_automatic_with_header() {
+ #[rustfmt::skip]
+ let expected = &[
+ "a,b,c",
+ "1,4,7",
+ "2,5,8",
+ "3,6,9",
+ ];
+
+ PrintBatchesTest::new()
+ .with_format(PrintFormat::Automatic)
+ .with_batches(split_batch(three_column_batch()))
+ .with_header(WithHeader::Yes)
+ .with_expected(expected)
+ .run();
+ }
+
+ #[test]
+ fn print_maxrows_unlimited() {
+ #[rustfmt::skip]
+ let expected = &[
"+---+",
"| a |",
"+---+",
"| 1 |",
"| 2 |",
"| 3 |",
- "+---+\n",
- ].join("\n");
+ "+---+",
+ ];
+
+ // should print out entire output with no truncation if unlimited or
+ // limit greater than number of batches or equal to the number of
batches
+ for max_rows in [MaxRows::Unlimited, MaxRows::Limited(5),
MaxRows::Limited(3)] {
+ PrintBatchesTest::new()
+ .with_format(PrintFormat::Table)
+ .with_batches(vec![one_column_batch()])
+ .with_maxrows(max_rows)
+ .with_expected(expected)
+ .run();
+ }
+ }
+ #[test]
+ fn print_maxrows_limited_one_batch() {
#[rustfmt::skip]
- let one_row_expected = [
+ let expected = &[
"+---+",
"| a |",
"+---+",
@@ -308,11 +403,21 @@ mod tests {
"| . |",
"| . |",
"| . |",
- "+---+\n",
- ].join("\n");
+ "+---+",
+ ];
+ PrintBatchesTest::new()
+ .with_format(PrintFormat::Table)
+ .with_batches(vec![one_column_batch()])
+ .with_maxrows(MaxRows::Limited(1))
+ .with_expected(expected)
+ .run();
+ }
+
+ #[test]
+ fn print_maxrows_limited_multi_batched() {
#[rustfmt::skip]
- let multi_batches_expected = [
+ let expected = &[
"+---+",
"| a |",
"+---+",
@@ -324,42 +429,23 @@ mod tests {
"| . |",
"| . |",
"| . |",
- "+---+\n",
- ].join("\n");
-
- let no_limit = run_test(&[batch.clone()], |buffer, batches| {
- format_batches_with_maxrows(buffer, batches, MaxRows::Unlimited)
- })?;
- assert_eq!(no_limit, all_rows_expected);
-
- let maxrows_less_than_actual = run_test(&[batch.clone()], |buffer,
batches| {
- format_batches_with_maxrows(buffer, batches, MaxRows::Limited(1))
- })?;
- assert_eq!(maxrows_less_than_actual, one_row_expected);
-
- let maxrows_more_than_actual = run_test(&[batch.clone()], |buffer,
batches| {
- format_batches_with_maxrows(buffer, batches, MaxRows::Limited(5))
- })?;
- assert_eq!(maxrows_more_than_actual, all_rows_expected);
-
- let maxrows_equals_actual = run_test(&[batch.clone()], |buffer,
batches| {
- format_batches_with_maxrows(buffer, batches, MaxRows::Limited(3))
- })?;
- assert_eq!(maxrows_equals_actual, all_rows_expected);
-
- let multi_batches = run_test(
- &[batch.clone(), batch.clone(), batch.clone()],
- |buffer, batches| {
- format_batches_with_maxrows(buffer, batches,
MaxRows::Limited(5))
- },
- )?;
- assert_eq!(multi_batches, multi_batches_expected);
-
- Ok(())
+ "+---+",
+ ];
+
+ PrintBatchesTest::new()
+ .with_format(PrintFormat::Table)
+ .with_batches(vec![
+ one_column_batch(),
+ one_column_batch(),
+ one_column_batch(),
+ ])
+ .with_maxrows(MaxRows::Limited(5))
+ .with_expected(expected)
+ .run();
}
#[test]
- fn test_print_batches_empty_batches() -> Result<()> {
+ fn test_print_batches_empty_batches() {
let batch = one_column_batch();
let empty_batch = RecordBatch::new_empty(batch.schema());
@@ -371,7 +457,7 @@ mod tests {
"| 1 |",
"| 2 |",
"| 3 |",
- "+---+\n",
+ "+---+",
];
PrintBatchesTest::new()
@@ -379,11 +465,10 @@ mod tests {
.with_batches(vec![empty_batch.clone(), batch, empty_batch])
.with_expected(expected)
.run();
- Ok(())
}
#[test]
- fn test_print_batches_empty_batches_no_header() -> Result<()> {
+ fn test_print_batches_empty_batches_no_header() {
let empty_batch = RecordBatch::new_empty(one_column_batch().schema());
// empty batches should not print a header
@@ -392,27 +477,36 @@ mod tests {
PrintBatchesTest::new()
.with_format(PrintFormat::Table)
.with_batches(vec![empty_batch])
- .with_header(true)
+ .with_header(WithHeader::Yes)
.with_expected(expected)
.run();
- Ok(())
}
+ #[derive(Debug)]
struct PrintBatchesTest {
format: PrintFormat,
batches: Vec<RecordBatch>,
maxrows: MaxRows,
- with_header: bool,
+ with_header: WithHeader,
expected: Vec<&'static str>,
}
+ /// How to test with_header
+ #[derive(Debug, Clone)]
+ enum WithHeader {
+ Yes,
+ No,
+ /// output should be the same with or without header
+ Ignored,
+ }
+
impl PrintBatchesTest {
fn new() -> Self {
Self {
format: PrintFormat::Table,
batches: vec![],
maxrows: MaxRows::Unlimited,
- with_header: false,
+ with_header: WithHeader::Ignored,
expected: vec![],
}
}
@@ -429,8 +523,14 @@ mod tests {
self
}
- /// set whether to include a header
- fn with_header(mut self, with_header: bool) -> Self {
+ /// set maxrows
+ fn with_maxrows(mut self, maxrows: MaxRows) -> Self {
+ self.maxrows = maxrows;
+ self
+ }
+
+ /// set with_header
+ fn with_header(mut self, with_header: WithHeader) -> Self {
self.with_header = with_header;
self
}
@@ -443,17 +543,58 @@ mod tests {
/// run the test
fn run(self) {
- let mut buffer: Vec<u8> = vec![];
- self.format
- .print_batches(&mut buffer, &self.batches, self.maxrows,
self.with_header)
- .unwrap();
- let actual = String::from_utf8(buffer).unwrap();
- let expected = self.expected.join("\n");
+ let actual = self.output();
+ let actual: Vec<_> = actual.trim_end().split('\n').collect();
+ let expected = self.expected;
assert_eq!(
actual, expected,
- "actual:\n\n{actual}expected:\n\n{expected}"
+ "\n\nactual:\n{actual:#?}\n\nexpected:\n{expected:#?}"
);
}
+
+ /// formats batches using parameters and returns the resulting output
+ fn output(&self) -> String {
+ match self.with_header {
+ WithHeader::Yes => self.output_with_header(true),
+ WithHeader::No => self.output_with_header(false),
+ WithHeader::Ignored => {
+ let output = self.output_with_header(true);
+ // ensure the output is the same without header
+ let output_without_header = self.output_with_header(false);
+ assert_eq!(
+ output, output_without_header,
+ "Expected output to be the same with or without header"
+ );
+ output
+ }
+ }
+ }
+
+ fn output_with_header(&self, with_header: bool) -> String {
+ let mut buffer: Vec<u8> = vec![];
+ self.format
+ .print_batches(&mut buffer, &self.batches, self.maxrows,
with_header)
+ .unwrap();
+ String::from_utf8(buffer).unwrap()
+ }
+ }
+
+ /// Return a batch with three columns and three rows
+ fn three_column_batch() -> RecordBatch {
+ let schema = Arc::new(Schema::new(vec![
+ Field::new("a", DataType::Int32, false),
+ Field::new("b", DataType::Int32, false),
+ Field::new("c", DataType::Int32, false),
+ ]));
+ RecordBatch::try_new(
+ schema,
+ vec![
+ Arc::new(Int32Array::from(vec![1, 2, 3])),
+ Arc::new(Int32Array::from(vec![4, 5, 6])),
+ Arc::new(Int32Array::from(vec![7, 8, 9])),
+ ],
+ )
+ .unwrap()
}
/// return a batch with one column and three rows
@@ -464,4 +605,14 @@ mod tests {
)])
.unwrap()
}
+
+ /// Slice the record batch into 2 batches
+ fn split_batch(batch: RecordBatch) -> Vec<RecordBatch> {
+ assert!(batch.num_rows() > 1);
+ let split = batch.num_rows() / 2;
+ vec![
+ batch.slice(0, split),
+ batch.slice(split, batch.num_rows() - split),
+ ]
+ }
}