manojkarthick commented on a change in pull request #9306:
URL: https://github.com/apache/arrow/pull/9306#discussion_r564488177



##########
File path: rust/parquet/src/record/api.rs
##########
@@ -635,6 +648,55 @@ impl Field {
             _ => nyi!(descr, value),
         }
     }
+
+    #[cfg(feature = "json_output")]
+    pub fn to_json_value(&self) -> Value {
+        match &self {
+            Field::Null => Value::Null,
+            Field::Bool(b) => Value::Bool(*b),
+            Field::Byte(n) => Value::Number(serde_json::Number::from(*n)),
+            Field::Short(n) => Value::Number(serde_json::Number::from(*n)),
+            Field::Int(n) => Value::Number(serde_json::Number::from(*n)),
+            Field::Long(n) => Value::Number(serde_json::Number::from(*n)),
+            Field::UByte(n) => Value::Number(serde_json::Number::from(*n)),
+            Field::UShort(n) => Value::Number(serde_json::Number::from(*n)),
+            Field::UInt(n) => Value::Number(serde_json::Number::from(*n)),
+            Field::ULong(n) => Value::Number(serde_json::Number::from(*n)),
+            Field::Float(n) => serde_json::Number::from_f64(*n as f64)
+                .map(Value::Number)
+                .unwrap_or(Value::Null),
+            Field::Double(n) => serde_json::Number::from_f64(*n)
+                .map(Value::Number)
+                .unwrap_or(Value::Null),
+            Field::Decimal(n) => Value::String(convert_decimal_to_string(&n)),
+            Field::Str(s) => Value::String(s.to_owned()),
+            Field::Bytes(b) => Value::String(base64::encode(b.data())),
+            Field::Date(d) => Value::String(convert_date_to_string(*d)),
+            Field::TimestampMillis(ts) => {
+                Value::String(convert_timestamp_millis_to_string(*ts))
+            }
+            Field::TimestampMicros(ts) => {
+                Value::String(convert_timestamp_micros_to_string(*ts))
+            }
+            Field::Group(row) => row.to_json_value(),
+            Field::ListInternal(fields) => {
+                Value::Array(fields.elements.iter().map(|f| 
f.to_json_value()).collect())
+            }
+            Field::MapInternal(map) => Value::Object(
+                map.entries
+                    .iter()
+                    .map(|(key_field, value_field)| {
+                        let key_val = key_field.to_json_value();
+                        let key_str = key_val
+                            .as_str()
+                            .map(|s| s.to_owned())
+                            .unwrap_or_else(|| key_val.to_string());
+                        (key_str, value_field.to_json_value())
+                    })
+                    .collect(),
+            ),
+        }
+    }
 }

Review comment:
       Thank you - I will work on that.

##########
File path: rust/parquet/src/bin/parquet-read.rs
##########
@@ -34,39 +34,98 @@
 //! ```
 //!
 //! # Usage
-//!
 //! ```
-//! parquet-read <file-path> [num-records]
+//!  parquet-read <file-path> [num-records]
 //! ```
-//! where `file-path` is the path to a Parquet file and `num-records` is the 
optional
-//! numeric option that allows to specify number of records to read from a 
file.
-//! When not provided, all records are read.
+//!
+//! ## Flags
+//!     -h, --help       Prints help information
+//!     -j, --json       Print parquet file in JSON lines Format
+//!     -V, --version    Prints version information
+//!
+//! ## Args
+//!     <file-path>      Path to a parquet file
+//!     <num-records>    Number of records to read. When not provided, all 
records are read.
 //!
 //! Note that `parquet-read` reads full file schema, no projection or 
filtering is
 //! applied.
 
 extern crate parquet;
 
-use std::{env, fs::File, path::Path, process};
+use std::{env, fs::File, path::Path};
+
+use clap::{crate_authors, crate_version, App, Arg};
 
 use parquet::file::reader::{FileReader, SerializedFileReader};
+use parquet::record::Row;
 
 fn main() {
-    let args: Vec<String> = env::args().collect();
-    if args.len() != 2 && args.len() != 3 {
-        println!("Usage: parquet-read <file-path> [num-records]");
-        process::exit(1);
-    }
+    #[cfg(not(feature = "json_output"))]
+    let app = App::new("parquet-read")
+        .version(crate_version!())
+        .author(crate_authors!())
+        .about("Read data from parquet file")
+        .arg(
+            Arg::with_name("file_path")
+                .value_name("file-path")
+                .required(true)
+                .index(1)
+                .help("Path to a parquet file"),
+        )
+        .arg(
+            Arg::with_name("num_records")
+                .value_name("num-records")
+                .index(2)
+                .help(
+                    "Number of records to read. When not provided, all records 
are read.",
+                ),
+        );
+
+    #[cfg(feature = "json_output")]
+    let app = App::new("parquet-read")
+        .version(crate_version!())
+        .author(crate_authors!())
+        .about("Read data from parquet file")
+        .arg(
+            Arg::with_name("file_path")
+                .value_name("file-path")
+                .required(true)
+                .index(1)
+                .help("Path to a parquet file"),
+        )
+        .arg(
+            Arg::with_name("num_records")
+                .value_name("num-records")
+                .index(2)
+                .help(
+                    "Number of records to read. When not provided, all records 
are read.",
+                ),
+        )
+        .arg(

Review comment:
       I tried but that didn't work since `clap::App` doesn't implement the 
`Copy` trait. OTOH, if we move this into a CLI tools feature, I don't think we 
need to have a separate feature for json output. Thoughts?

##########
File path: rust/parquet/Cargo.toml
##########
@@ -43,6 +43,8 @@ chrono = "0.4"
 num-bigint = "0.3"
 arrow = { path = "../arrow", version = "4.0.0-SNAPSHOT", optional = true }
 base64 = { version = "0.12", optional = true }
+clap = "2.33.3"

Review comment:
       Thanks @alamb ! Just to confirm: Having one feature for command line 
tools that has both `clap` + `serde_json` as dependencies and when used without 
that feature, no binaries are produced. Is my understanding correct?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to