This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 7133c33  Add json print format mode to datafusion cli (#295)
7133c33 is described below

commit 7133c330d1aabef750d9c42e0dab7e3a260ed69d
Author: Jiayu Liu <[email protected]>
AuthorDate: Tue May 11 18:52:53 2021 +0800

    Add json print format mode to datafusion cli (#295)
    
    * add csv mode to datafusion cli
    
    * add license
    
    * fix per comments
    
    * update help
    
    * adding tsv format
    
    * use Self whereas possible
    
    * add json support
    
    * adding unit test
    
    * remove redundant clone
    
    * add csv mode to datafusion cli
    
    * adding tsv format
    
    * use Self whereas possible
    
    * prune import
---
 datafusion-cli/src/format/print_format.rs | 99 ++++++++++++++++++++++++++++++-
 datafusion-cli/src/main.rs                |  2 +-
 2 files changed, 99 insertions(+), 2 deletions(-)

diff --git a/datafusion-cli/src/format/print_format.rs 
b/datafusion-cli/src/format/print_format.rs
index a9fc56b..85caaa3 100644
--- a/datafusion-cli/src/format/print_format.rs
+++ b/datafusion-cli/src/format/print_format.rs
@@ -17,17 +17,19 @@
 
 //! Print format variants
 use arrow::csv::writer::WriterBuilder;
+use arrow::json::ArrayWriter;
 use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::arrow::util::pretty;
 use datafusion::error::{DataFusionError, Result};
 use std::str::FromStr;
 
 /// Allow records to be printed in different formats
-#[derive(Debug, Clone)]
+#[derive(Debug, PartialEq, Eq, Clone)]
 pub enum PrintFormat {
     Csv,
     Tsv,
     Table,
+    Json,
 }
 
 impl FromStr for PrintFormat {
@@ -37,11 +39,24 @@ impl FromStr for PrintFormat {
             "csv" => Ok(Self::Csv),
             "tsv" => Ok(Self::Tsv),
             "table" => Ok(Self::Table),
+            "json" => Ok(Self::Json),
             _ => Err(()),
         }
     }
 }
 
+fn print_batches_to_json(batches: &[RecordBatch]) -> Result<String> {
+    let mut bytes = vec![];
+    {
+        let mut writer = ArrayWriter::new(&mut bytes);
+        writer.write_batches(batches)?;
+        writer.finish()?;
+    }
+    let formatted = String::from_utf8(bytes)
+        .map_err(|e| DataFusionError::Execution(e.to_string()))?;
+    Ok(formatted)
+}
+
 fn print_batches_with_sep(batches: &[RecordBatch], delimiter: u8) -> 
Result<String> {
     let mut bytes = vec![];
     {
@@ -65,7 +80,89 @@ impl PrintFormat {
             Self::Csv => println!("{}", print_batches_with_sep(batches, 
b',')?),
             Self::Tsv => println!("{}", print_batches_with_sep(batches, 
b'\t')?),
             Self::Table => pretty::print_batches(batches)?,
+            Self::Json => println!("{}", print_batches_to_json(batches)?),
         }
         Ok(())
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow::array::Int32Array;
+    use arrow::datatypes::{DataType, Field, Schema};
+    use std::sync::Arc;
+
+    #[test]
+    fn test_from_str() {
+        let format = "csv".parse::<PrintFormat>().unwrap();
+        assert_eq!(PrintFormat::Csv, format);
+
+        let format = "tsv".parse::<PrintFormat>().unwrap();
+        assert_eq!(PrintFormat::Tsv, format);
+
+        let format = "json".parse::<PrintFormat>().unwrap();
+        assert_eq!(PrintFormat::Json, format);
+
+        let format = "table".parse::<PrintFormat>().unwrap();
+        assert_eq!(PrintFormat::Table, format);
+    }
+
+    #[test]
+    fn test_from_str_failure() {
+        assert_eq!(true, "pretty".parse::<PrintFormat>().is_err());
+    }
+
+    #[test]
+    fn test_print_batches_with_sep() {
+        let batches = vec![];
+        assert_eq!("", print_batches_with_sep(&batches, b',').unwrap());
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Int32, false),
+            Field::new("c", DataType::Int32, false),
+        ]));
+
+        let batch = RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(Int32Array::from(vec![1, 2, 3])),
+                Arc::new(Int32Array::from(vec![4, 5, 6])),
+                Arc::new(Int32Array::from(vec![7, 8, 9])),
+            ],
+        )
+        .unwrap();
+
+        let batches = vec![batch];
+        let r = print_batches_with_sep(&batches, b',').unwrap();
+        assert_eq!("a,b,c\n1,4,7\n2,5,8\n3,6,9\n", r);
+    }
+
+    #[test]
+    fn test_print_batches_to_json_empty() {
+        let batches = vec![];
+        let r = print_batches_to_json(&batches).unwrap();
+        assert_eq!("", r);
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Int32, false),
+            Field::new("c", DataType::Int32, false),
+        ]));
+
+        let batch = RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(Int32Array::from(vec![1, 2, 3])),
+                Arc::new(Int32Array::from(vec![4, 5, 6])),
+                Arc::new(Int32Array::from(vec![7, 8, 9])),
+            ],
+        )
+        .unwrap();
+
+        let batches = vec![batch];
+        let r = print_batches_to_json(&batches).unwrap();
+        
assert_eq!("[{\"a\":1,\"b\":4,\"c\":7},{\"a\":2,\"b\":5,\"c\":8},{\"a\":3,\"b\":6,\"c\":9}]",
 r);
+    }
+}
diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs
index 52d3ccc..2360d46 100644
--- a/datafusion-cli/src/main.rs
+++ b/datafusion-cli/src/main.rs
@@ -66,7 +66,7 @@ pub async fn main() {
         )
         .arg(
             Arg::with_name("format")
-                .help("Output format (possible values: table, csv, tsv)")
+                .help("Output format (possible values: table, csv, tsv, json)")
                 .long("format")
                 .default_value("table")
                 .validator(is_valid_format)

Reply via email to