This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 7133c33 Add json print format mode to datafusion cli (#295)
7133c33 is described below
commit 7133c330d1aabef750d9c42e0dab7e3a260ed69d
Author: Jiayu Liu <[email protected]>
AuthorDate: Tue May 11 18:52:53 2021 +0800
Add json print format mode to datafusion cli (#295)
* add csv mode to datafusion cli
* add license
* fix per comments
* update help
* adding tsv format
* use Self whereas possible
* add json support
* adding unit test
* remove redundant clone
* add csv mode to datafusion cli
* adding tsv format
* use Self whereas possible
* prune import
---
datafusion-cli/src/format/print_format.rs | 99 ++++++++++++++++++++++++++++++-
datafusion-cli/src/main.rs | 2 +-
2 files changed, 99 insertions(+), 2 deletions(-)
diff --git a/datafusion-cli/src/format/print_format.rs
b/datafusion-cli/src/format/print_format.rs
index a9fc56b..85caaa3 100644
--- a/datafusion-cli/src/format/print_format.rs
+++ b/datafusion-cli/src/format/print_format.rs
@@ -17,17 +17,19 @@
//! Print format variants
use arrow::csv::writer::WriterBuilder;
+use arrow::json::ArrayWriter;
use datafusion::arrow::record_batch::RecordBatch;
use datafusion::arrow::util::pretty;
use datafusion::error::{DataFusionError, Result};
use std::str::FromStr;
/// Allow records to be printed in different formats
-#[derive(Debug, Clone)]
+#[derive(Debug, PartialEq, Eq, Clone)]
pub enum PrintFormat {
Csv,
Tsv,
Table,
+ Json,
}
impl FromStr for PrintFormat {
@@ -37,11 +39,24 @@ impl FromStr for PrintFormat {
"csv" => Ok(Self::Csv),
"tsv" => Ok(Self::Tsv),
"table" => Ok(Self::Table),
+ "json" => Ok(Self::Json),
_ => Err(()),
}
}
}
+fn print_batches_to_json(batches: &[RecordBatch]) -> Result<String> {
+ let mut bytes = vec![];
+ {
+ let mut writer = ArrayWriter::new(&mut bytes);
+ writer.write_batches(batches)?;
+ writer.finish()?;
+ }
+ let formatted = String::from_utf8(bytes)
+ .map_err(|e| DataFusionError::Execution(e.to_string()))?;
+ Ok(formatted)
+}
+
fn print_batches_with_sep(batches: &[RecordBatch], delimiter: u8) ->
Result<String> {
let mut bytes = vec![];
{
@@ -65,7 +80,89 @@ impl PrintFormat {
Self::Csv => println!("{}", print_batches_with_sep(batches,
b',')?),
Self::Tsv => println!("{}", print_batches_with_sep(batches,
b'\t')?),
Self::Table => pretty::print_batches(batches)?,
+ Self::Json => println!("{}", print_batches_to_json(batches)?),
}
Ok(())
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use arrow::array::Int32Array;
+ use arrow::datatypes::{DataType, Field, Schema};
+ use std::sync::Arc;
+
+ #[test]
+ fn test_from_str() {
+ let format = "csv".parse::<PrintFormat>().unwrap();
+ assert_eq!(PrintFormat::Csv, format);
+
+ let format = "tsv".parse::<PrintFormat>().unwrap();
+ assert_eq!(PrintFormat::Tsv, format);
+
+ let format = "json".parse::<PrintFormat>().unwrap();
+ assert_eq!(PrintFormat::Json, format);
+
+ let format = "table".parse::<PrintFormat>().unwrap();
+ assert_eq!(PrintFormat::Table, format);
+ }
+
+ #[test]
+ fn test_from_str_failure() {
+ assert_eq!(true, "pretty".parse::<PrintFormat>().is_err());
+ }
+
+ #[test]
+ fn test_print_batches_with_sep() {
+ let batches = vec![];
+ assert_eq!("", print_batches_with_sep(&batches, b',').unwrap());
+
+ let schema = Arc::new(Schema::new(vec![
+ Field::new("a", DataType::Int32, false),
+ Field::new("b", DataType::Int32, false),
+ Field::new("c", DataType::Int32, false),
+ ]));
+
+ let batch = RecordBatch::try_new(
+ schema,
+ vec![
+ Arc::new(Int32Array::from(vec![1, 2, 3])),
+ Arc::new(Int32Array::from(vec![4, 5, 6])),
+ Arc::new(Int32Array::from(vec![7, 8, 9])),
+ ],
+ )
+ .unwrap();
+
+ let batches = vec![batch];
+ let r = print_batches_with_sep(&batches, b',').unwrap();
+ assert_eq!("a,b,c\n1,4,7\n2,5,8\n3,6,9\n", r);
+ }
+
+ #[test]
+ fn test_print_batches_to_json_empty() {
+ let batches = vec![];
+ let r = print_batches_to_json(&batches).unwrap();
+ assert_eq!("", r);
+
+ let schema = Arc::new(Schema::new(vec![
+ Field::new("a", DataType::Int32, false),
+ Field::new("b", DataType::Int32, false),
+ Field::new("c", DataType::Int32, false),
+ ]));
+
+ let batch = RecordBatch::try_new(
+ schema,
+ vec![
+ Arc::new(Int32Array::from(vec![1, 2, 3])),
+ Arc::new(Int32Array::from(vec![4, 5, 6])),
+ Arc::new(Int32Array::from(vec![7, 8, 9])),
+ ],
+ )
+ .unwrap();
+
+ let batches = vec![batch];
+ let r = print_batches_to_json(&batches).unwrap();
+
assert_eq!("[{\"a\":1,\"b\":4,\"c\":7},{\"a\":2,\"b\":5,\"c\":8},{\"a\":3,\"b\":6,\"c\":9}]",
r);
+ }
+}
diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs
index 52d3ccc..2360d46 100644
--- a/datafusion-cli/src/main.rs
+++ b/datafusion-cli/src/main.rs
@@ -66,7 +66,7 @@ pub async fn main() {
)
.arg(
Arg::with_name("format")
- .help("Output format (possible values: table, csv, tsv)")
+ .help("Output format (possible values: table, csv, tsv, json)")
.long("format")
.default_value("table")
.validator(is_valid_format)