This is an automated email from the ASF dual-hosted git repository.

jiayuliu pushed a commit to branch clap-3
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git

commit 6948a0a4175ec2946de616a5b5bb094b5cb767ff
Author: Jiayu Liu <[email protected]>
AuthorDate: Sat Feb 5 00:06:30 2022 +0800

    use clap 3 style args parsing for datafusion cli
---
 datafusion-cli/Cargo.toml          |   3 +
 datafusion-cli/src/command.rs      |  12 ++-
 datafusion-cli/src/exec.rs         |  10 +--
 datafusion-cli/src/functions.rs    |   2 +-
 datafusion-cli/src/lib.rs          |   1 -
 datafusion-cli/src/main.rs         | 176 ++++++++++++++++---------------------
 datafusion-cli/src/print_format.rs |  70 +--------------
 7 files changed, 96 insertions(+), 178 deletions(-)

diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index e8f1e30..2e27e4a 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -17,6 +17,7 @@
 
 [package]
 name = "datafusion-cli"
+description = "DataFusion is an in-memory query engine that uses Apache Arrow 
as the memory model. It supports executing SQL queries against CSV and Parquet 
files as well as querying directly against in-memory data."
 version = "5.1.0"
 authors = ["Apache Arrow <[email protected]>"]
 edition = "2021"
@@ -30,6 +31,8 @@ rust-version = "1.58"
 clap = { version = "3", features = ["derive", "cargo"] }
 rustyline = "9.0"
 tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", 
"sync", "parking_lot"] }
+log = "~0.4"
+env_logger = "~0.9"
 datafusion = { path = "../datafusion", version = "6.0.0" }
 arrow = { version = "8.0.0" }
 ballista = { path = "../ballista/rust/client", version = "0.6.0" }
diff --git a/datafusion-cli/src/command.rs b/datafusion-cli/src/command.rs
index ef6f67d..27c5bfc 100644
--- a/datafusion-cli/src/command.rs
+++ b/datafusion-cli/src/command.rs
@@ -20,11 +20,13 @@
 use crate::context::Context;
 use crate::functions::{display_all_functions, Function};
 use crate::print_format::PrintFormat;
-use crate::print_options::{self, PrintOptions};
+use crate::print_options::PrintOptions;
+use clap::ArgEnum;
 use datafusion::arrow::array::{ArrayRef, StringArray};
 use datafusion::arrow::datatypes::{DataType, Field, Schema};
 use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::error::{DataFusionError, Result};
+use log::info;
 use std::str::FromStr;
 use std::sync::Arc;
 use std::time::Instant;
@@ -206,10 +208,14 @@ impl OutputFormat {
             Self::ChangeFormat(format) => {
                 if let Ok(format) = format.parse::<PrintFormat>() {
                     print_options.format = format;
-                    println!("Output format is {}.", print_options.format);
+                    println!("Output format is {:?}.", print_options.format);
                     Ok(())
                 } else {
-                    Err(DataFusionError::Execution(format!("{} is not a valid 
format type [possible values: csv, tsv, table, json, ndjson]", format)))
+                    Err(DataFusionError::Execution(format!(
+                        "{:?} is not a valid format type [possible values: 
{:?}]",
+                        format,
+                        PrintFormat::value_variants()
+                    )))
                 }
             }
         }
diff --git a/datafusion-cli/src/exec.rs b/datafusion-cli/src/exec.rs
index dad6d6e..17b329b 100644
--- a/datafusion-cli/src/exec.rs
+++ b/datafusion-cli/src/exec.rs
@@ -21,20 +21,14 @@ use crate::{
     command::{Command, OutputFormat},
     context::Context,
     helper::CliHelper,
-    print_format::{all_print_formats, PrintFormat},
     print_options::PrintOptions,
 };
-use datafusion::arrow::record_batch::RecordBatch;
-use datafusion::arrow::util::pretty;
-use datafusion::error::{DataFusionError, Result};
-use rustyline::config::Config;
+use datafusion::error::Result;
 use rustyline::error::ReadlineError;
 use rustyline::Editor;
 use std::fs::File;
 use std::io::prelude::*;
 use std::io::BufReader;
-use std::str::FromStr;
-use std::sync::Arc;
 use std::time::Instant;
 
 /// run and execute SQL statements and commands from a file, against a context 
with the given print options
@@ -109,7 +103,7 @@ pub async fn exec_from_repl(ctx: &mut Context, 
print_options: &mut PrintOptions)
                                     );
                                 }
                             } else {
-                                println!("Output format is {}.", 
print_options.format);
+                                println!("Output format is {:?}.", 
print_options.format);
                             }
                         }
                         _ => {
diff --git a/datafusion-cli/src/functions.rs b/datafusion-cli/src/functions.rs
index 2372e64..98b698a 100644
--- a/datafusion-cli/src/functions.rs
+++ b/datafusion-cli/src/functions.rs
@@ -20,7 +20,7 @@ use arrow::array::StringArray;
 use arrow::datatypes::{DataType, Field, Schema};
 use arrow::record_batch::RecordBatch;
 use arrow::util::pretty::pretty_format_batches;
-use datafusion::error::{DataFusionError, Result};
+use datafusion::error::Result;
 use std::fmt;
 use std::str::FromStr;
 use std::sync::Arc;
diff --git a/datafusion-cli/src/lib.rs b/datafusion-cli/src/lib.rs
index b2bcdd3..b75be33 100644
--- a/datafusion-cli/src/lib.rs
+++ b/datafusion-cli/src/lib.rs
@@ -16,7 +16,6 @@
 // under the License.
 
 #![doc = include_str!("../README.md")]
-#![allow(unused_imports)]
 pub const DATAFUSION_CLI_VERSION: &str = env!("CARGO_PKG_VERSION");
 
 pub mod command;
diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs
index 4cb9e9d..c3ceb32 100644
--- a/datafusion-cli/src/main.rs
+++ b/datafusion-cli/src/main.rs
@@ -15,132 +15,110 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use clap::{crate_version, App, Arg};
+use clap::Parser;
 use datafusion::error::Result;
 use datafusion::execution::context::ExecutionConfig;
 use datafusion_cli::{
-    context::Context,
-    exec,
-    print_format::{all_print_formats, PrintFormat},
-    print_options::PrintOptions,
+    context::Context, exec, print_format::PrintFormat, 
print_options::PrintOptions,
     DATAFUSION_CLI_VERSION,
 };
+use env_logger::Builder;
+use log::info;
+use log::LevelFilter;
 use std::env;
 use std::fs::File;
 use std::io::BufReader;
 use std::path::Path;
 
+#[derive(Debug, Parser, PartialEq)]
+#[clap(author, version, about, long_about= None)]
+struct Args {
+    #[clap(
+        short = 'p',
+        long,
+        help = "Path to your data, default to current directory",
+        validator(is_valid_data_dir)
+    )]
+    data_path: Option<String>,
+
+    #[clap(
+        short = 'c',
+        long,
+        help = "The batch size of each query, or use DataFusion default",
+        validator(is_valid_batch_size)
+    )]
+    batch_size: Option<usize>,
+
+    #[clap(
+        short,
+        long,
+        multiple_values = true,
+        help = "Execute commands from file(s), then exit",
+        validator(is_valid_file)
+    )]
+    file: Vec<String>,
+
+    #[clap(long, arg_enum, default_value_t = PrintFormat::Table)]
+    format: PrintFormat,
+
+    #[clap(long, help = "Ballista scheduler host")]
+    host: Option<String>,
+
+    #[clap(long, help = "Ballista scheduler port")]
+    port: Option<u16>,
+
+    #[clap(
+        short,
+        long,
+        help = "Reduce printing other than the results and work quietly"
+    )]
+    quiet: bool,
+}
+
+fn init_logger(quite: bool) {
+    let mut builder = Builder::from_default_env();
+    builder.filter_level(if quite {
+        LevelFilter::Error
+    } else {
+        LevelFilter::Info
+    });
+    builder.init();
+}
+
 #[tokio::main]
 pub async fn main() -> Result<()> {
-    let matches = App::new("DataFusion")
-        .version(crate_version!())
-        .about(
-            "DataFusion is an in-memory query engine that uses Apache Arrow \
-             as the memory model. It supports executing SQL queries against 
CSV and \
-             Parquet files as well as querying directly against in-memory 
data.",
-        )
-        .arg(
-            Arg::new("data-path")
-                .help("Path to your data, default to current directory")
-                .short('p')
-                .long("data-path")
-                .validator(is_valid_data_dir)
-                .takes_value(true),
-        )
-        .arg(
-            Arg::new("batch-size")
-                .help("The batch size of each query, or use DataFusion 
default")
-                .short('c')
-                .long("batch-size")
-                .validator(is_valid_batch_size)
-                .takes_value(true),
-        )
-        .arg(
-            Arg::new("file")
-                .help("Execute commands from file(s), then exit")
-                .short('f')
-                .long("file")
-                .multiple_occurrences(true)
-                .validator(is_valid_file)
-                .takes_value(true),
-        )
-        .arg(
-            Arg::new("format")
-                .help("Output format")
-                .long("format")
-                .default_value("table")
-                .possible_values(
-                    &all_print_formats()
-                        .iter()
-                        .map(|format| format.to_string())
-                        .collect::<Vec<_>>()
-                        .iter()
-                        .map(|i| i.as_str())
-                        .collect::<Vec<_>>(),
-                )
-                .takes_value(true),
-        )
-        .arg(
-            Arg::new("host")
-                .help("Ballista scheduler host")
-                .long("host")
-                .takes_value(true),
-        )
-        .arg(
-            Arg::new("port")
-                .help("Ballista scheduler port")
-                .long("port")
-                .takes_value(true),
-        )
-        .arg(
-            Arg::new("quiet")
-                .help("Reduce printing other than the results and work 
quietly")
-                .short('q')
-                .long("quiet")
-                .takes_value(false),
-        )
-        .get_matches();
-
-    let quiet = matches.is_present("quiet");
-
-    if !quiet {
-        println!("DataFusion CLI v{}\n", DATAFUSION_CLI_VERSION);
-    }
+    let args = Args::parse();
+    init_logger(args.quiet);
 
-    let host = matches.value_of("host");
-    let port = matches
-        .value_of("port")
-        .and_then(|port| port.parse::<u16>().ok());
+    if !args.quiet {
+        println!("DataFusion CLI v{}", DATAFUSION_CLI_VERSION);
+    }
 
-    if let Some(path) = matches.value_of("data-path") {
+    if let Some(ref path) = args.data_path {
         let p = Path::new(path);
         env::set_current_dir(&p).unwrap();
     };
 
     let mut execution_config = 
ExecutionConfig::new().with_information_schema(true);
 
-    if let Some(batch_size) = matches
-        .value_of("batch-size")
-        .and_then(|size| size.parse::<usize>().ok())
-    {
+    if let Some(batch_size) = args.batch_size {
         execution_config = execution_config.with_batch_size(batch_size);
     };
 
-    let mut ctx: Context = match (host, port) {
-        (Some(h), Some(p)) => Context::new_remote(h, p)?,
+    let mut ctx: Context = match (args.host, args.port) {
+        (Some(ref h), Some(p)) => Context::new_remote(h, p)?,
         _ => Context::new_local(&execution_config),
     };
 
-    let format = matches
-        .value_of("format")
-        .expect("No format is specified")
-        .parse::<PrintFormat>()
-        .expect("Invalid format");
-
-    let mut print_options = PrintOptions { format, quiet };
+    let mut print_options = PrintOptions {
+        format: args.format,
+        quiet: args.quiet,
+    };
 
-    if let Some(file_paths) = matches.values_of("file") {
-        let files = file_paths
+    let files = args.file;
+    if !files.is_empty() {
+        let files = files
+            .into_iter()
             .map(|file_path| File::open(file_path).unwrap())
             .collect::<Vec<_>>();
         for file in files {
diff --git a/datafusion-cli/src/print_format.rs 
b/datafusion-cli/src/print_format.rs
index 0320166..05a1ef7 100644
--- a/datafusion-cli/src/print_format.rs
+++ b/datafusion-cli/src/print_format.rs
@@ -21,11 +21,10 @@ use arrow::json::{ArrayWriter, LineDelimitedWriter};
 use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::arrow::util::pretty;
 use datafusion::error::{DataFusionError, Result};
-use std::fmt;
 use std::str::FromStr;
 
 /// Allow records to be printed in different formats
-#[derive(Debug, PartialEq, Eq, Clone)]
+#[derive(Debug, PartialEq, Eq, clap::ArgEnum, Clone)]
 pub enum PrintFormat {
     Csv,
     Tsv,
@@ -34,40 +33,11 @@ pub enum PrintFormat {
     NdJson,
 }
 
-/// returns all print formats
-pub fn all_print_formats() -> Vec<PrintFormat> {
-    vec![
-        PrintFormat::Csv,
-        PrintFormat::Tsv,
-        PrintFormat::Table,
-        PrintFormat::Json,
-        PrintFormat::NdJson,
-    ]
-}
-
 impl FromStr for PrintFormat {
-    type Err = ();
-    fn from_str(s: &str) -> std::result::Result<Self, ()> {
-        match s.to_lowercase().as_str() {
-            "csv" => Ok(Self::Csv),
-            "tsv" => Ok(Self::Tsv),
-            "table" => Ok(Self::Table),
-            "json" => Ok(Self::Json),
-            "ndjson" => Ok(Self::NdJson),
-            _ => Err(()),
-        }
-    }
-}
+    type Err = String;
 
-impl fmt::Display for PrintFormat {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match *self {
-            Self::Csv => write!(f, "csv"),
-            Self::Tsv => write!(f, "tsv"),
-            Self::Table => write!(f, "table"),
-            Self::Json => write!(f, "json"),
-            Self::NdJson => write!(f, "ndjson"),
-        }
+    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
+        clap::ArgEnum::from_str(s, true)
     }
 }
 
@@ -124,38 +94,6 @@ mod tests {
     use std::sync::Arc;
 
     #[test]
-    fn test_from_str() {
-        let format = "csv".parse::<PrintFormat>().unwrap();
-        assert_eq!(PrintFormat::Csv, format);
-
-        let format = "tsv".parse::<PrintFormat>().unwrap();
-        assert_eq!(PrintFormat::Tsv, format);
-
-        let format = "json".parse::<PrintFormat>().unwrap();
-        assert_eq!(PrintFormat::Json, format);
-
-        let format = "ndjson".parse::<PrintFormat>().unwrap();
-        assert_eq!(PrintFormat::NdJson, format);
-
-        let format = "table".parse::<PrintFormat>().unwrap();
-        assert_eq!(PrintFormat::Table, format);
-    }
-
-    #[test]
-    fn test_to_str() {
-        assert_eq!("csv", PrintFormat::Csv.to_string());
-        assert_eq!("table", PrintFormat::Table.to_string());
-        assert_eq!("tsv", PrintFormat::Tsv.to_string());
-        assert_eq!("json", PrintFormat::Json.to_string());
-        assert_eq!("ndjson", PrintFormat::NdJson.to_string());
-    }
-
-    #[test]
-    fn test_from_str_failure() {
-        assert!("pretty".parse::<PrintFormat>().is_err());
-    }
-
-    #[test]
     fn test_print_batches_with_sep() {
         let batches = vec![];
         assert_eq!("", print_batches_with_sep(&batches, b',').unwrap());

Reply via email to