sunchao commented on a change in pull request #9306:
URL: https://github.com/apache/arrow/pull/9306#discussion_r567462432



##########
File path: rust/parquet/README.md
##########
@@ -79,23 +79,23 @@ enabled by adding `RUSTFLAGS="-C target-feature=+sse4.2"` 
before the
 `cargo build` command.
 
 ## Test
-Run `cargo test` for unit tests.
+Run `cargo test` for unit tests. To also run tests related to the binaries, 
use `cargo test --features cli`.
 
 ## Binaries
-The following binaries are provided (use `cargo install` to install them):
+The following binaries are provided (use `cargo install --features cli` to 
install them):
 - **parquet-schema** for printing Parquet file schema and metadata.
-`Usage: parquet-schema <file-path> [verbose]`, where `file-path` is the path 
to a Parquet file,
-and optional `verbose` is the boolean flag that allows to print full metadata 
or schema only
-(when not specified only schema will be printed).
+`Usage: parquet-schema <file-path>`, where `file-path` is the path to a 
Parquet file. Use `-v/--verbose` flag 
+to print full metadata or schema only (when not specified only schema will be 
printed).
 
 - **parquet-read** for reading records from a Parquet file.
 `Usage: parquet-read <file-path> [num-records]`, where `file-path` is the path 
to a Parquet file,
 and `num-records` is the number of records to read from a file (when not 
specified all records will
-be printed).
+be printed). Use `cargo install --features json_output` to enable `-j/--json` 
flag for printing output

Review comment:
       Hmm is this correct? I don't see `json_output` in the Cargo file. The 
flag is associated with the command?

##########
File path: rust/parquet/src/bin/parquet-rowcount.rs
##########
@@ -34,30 +34,44 @@
 //! ```
 //!
 //! # Usage
-//!
 //! ```
-//! parquet-rowcount <file-path> ...
+//! parquet-rowcount <file-paths>...
 //! ```
-//! where `file-path` is the path to a Parquet file and `...` is any 
additional number of
-//! parquet files to count the number of rows from.
+//!
+//! ## Flags
+//!     -h, --help       Prints help information
+//!     -V, --version    Prints version information
+//!
+//! ## Args
+//!     <file-paths>...    List of parquet files to read from
 //!
 //! Note that `parquet-rowcount` reads full file schema, no projection or 
filtering is
 //! applied.
 
 extern crate parquet;
 
-use std::{env, fs::File, path::Path, process};
+use std::{env, fs::File, path::Path};
+
+use clap::{crate_authors, crate_version, App, Arg};
 
 use parquet::file::reader::{FileReader, SerializedFileReader};
 
 fn main() {
-    let args: Vec<String> = env::args().collect();
-    if args.len() < 2 {
-        println!("Usage: parquet-rowcount <file-path> ...");
-        process::exit(1);
-    }
+    let matches = App::new("parquet-rowcount")
+        .version(crate_version!())
+        .author(crate_authors!())
+        .about("Return number of rows in parquet file")
+        .arg(
+            Arg::with_name("file_paths")
+                .value_name("file-paths")
+                .required(true)
+                .multiple(true)
+                .help("List of parquet files to read from"),

Review comment:
       maybe add "separated by space"

##########
File path: rust/parquet/src/bin/parquet-read.rs
##########
@@ -34,39 +34,72 @@
 //! ```
 //!
 //! # Usage
-//!
 //! ```
-//! parquet-read <file-path> [num-records]
+//!  parquet-read <file-path> [num-records]

Review comment:
       nit: remove space?

##########
File path: rust/parquet/src/bin/parquet-read.rs
##########
@@ -34,39 +34,72 @@
 //! ```
 //!
 //! # Usage
-//!
 //! ```
-//! parquet-read <file-path> [num-records]
+//!  parquet-read <file-path> [num-records]
 //! ```
-//! where `file-path` is the path to a Parquet file and `num-records` is the 
optional
-//! numeric option that allows to specify number of records to read from a 
file.
-//! When not provided, all records are read.
+//!
+//! ## Flags
+//!     -h, --help       Prints help information
+//!     -j, --json       Print parquet file in JSON lines Format
+//!     -V, --version    Prints version information
+//!
+//! ## Args
+//!     <file-path>      Path to a parquet file
+//!     <num-records>    Number of records to read. When not provided, all 
records are read.
 //!
 //! Note that `parquet-read` reads full file schema, no projection or 
filtering is
 //! applied.
 
 extern crate parquet;
 
-use std::{env, fs::File, path::Path, process};
+use std::{env, fs::File, path::Path};
+
+use clap::{crate_authors, crate_version, App, Arg};
 
 use parquet::file::reader::{FileReader, SerializedFileReader};
+use parquet::record::Row;
 
 fn main() {
-    let args: Vec<String> = env::args().collect();
-    if args.len() != 2 && args.len() != 3 {
-        println!("Usage: parquet-read <file-path> [num-records]");
-        process::exit(1);
-    }
+    let app = App::new("parquet-read")
+        .version(crate_version!())
+        .author(crate_authors!())
+        .about("Read data from parquet file")

Review comment:
       maybe say "Read data from a Parquet file and print output in console, in 
either built-in or JSON format"?

##########
File path: rust/parquet/src/bin/parquet-rowcount.rs
##########
@@ -34,30 +34,44 @@
 //! ```
 //!
 //! # Usage
-//!
 //! ```
-//! parquet-rowcount <file-path> ...
+//! parquet-rowcount <file-paths>...
 //! ```
-//! where `file-path` is the path to a Parquet file and `...` is any 
additional number of
-//! parquet files to count the number of rows from.
+//!
+//! ## Flags
+//!     -h, --help       Prints help information
+//!     -V, --version    Prints version information
+//!
+//! ## Args
+//!     <file-paths>...    List of parquet files to read from
 //!
 //! Note that `parquet-rowcount` reads full file schema, no projection or 
filtering is
 //! applied.
 
 extern crate parquet;
 
-use std::{env, fs::File, path::Path, process};
+use std::{env, fs::File, path::Path};
+
+use clap::{crate_authors, crate_version, App, Arg};
 
 use parquet::file::reader::{FileReader, SerializedFileReader};
 
 fn main() {
-    let args: Vec<String> = env::args().collect();
-    if args.len() < 2 {
-        println!("Usage: parquet-rowcount <file-path> ...");
-        process::exit(1);
-    }
+    let matches = App::new("parquet-rowcount")
+        .version(crate_version!())
+        .author(crate_authors!())
+        .about("Return number of rows in parquet file")

Review comment:
       nit: let's use Parquet instead of parquet everywhere

##########
File path: rust/parquet/src/bin/parquet-read.rs
##########
@@ -34,39 +34,72 @@
 //! ```
 //!
 //! # Usage
-//!
 //! ```
-//! parquet-read <file-path> [num-records]
+//!  parquet-read <file-path> [num-records]
 //! ```
-//! where `file-path` is the path to a Parquet file and `num-records` is the 
optional
-//! numeric option that allows to specify number of records to read from a 
file.
-//! When not provided, all records are read.
+//!
+//! ## Flags
+//!     -h, --help       Prints help information
+//!     -j, --json       Print parquet file in JSON lines Format
+//!     -V, --version    Prints version information
+//!
+//! ## Args
+//!     <file-path>      Path to a parquet file
+//!     <num-records>    Number of records to read. When not provided, all 
records are read.
 //!
 //! Note that `parquet-read` reads full file schema, no projection or 
filtering is
 //! applied.
 
 extern crate parquet;
 
-use std::{env, fs::File, path::Path, process};
+use std::{env, fs::File, path::Path};
+
+use clap::{crate_authors, crate_version, App, Arg};
 
 use parquet::file::reader::{FileReader, SerializedFileReader};
+use parquet::record::Row;
 
 fn main() {
-    let args: Vec<String> = env::args().collect();
-    if args.len() != 2 && args.len() != 3 {
-        println!("Usage: parquet-read <file-path> [num-records]");
-        process::exit(1);
-    }
+    let app = App::new("parquet-read")
+        .version(crate_version!())
+        .author(crate_authors!())
+        .about("Read data from parquet file")
+        .arg(
+            Arg::with_name("file_path")
+                .value_name("file-path")
+                .required(true)
+                .index(1)
+                .help("Path to a parquet file"),
+        )
+        .arg(
+            Arg::with_name("num_records")
+                .value_name("num-records")
+                .index(2)
+                .help(
+                    "Number of records to read. When not provided, all records 
are read.",
+                ),
+        )
+        .arg(
+            Arg::with_name("json")
+                .short("j")
+                .long("json")
+                .takes_value(false)
+                .help("Print parquet file in JSON lines Format"),

Review comment:
       nit: Format -> format




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to