This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new c546eef  ARROW-8287: [Rust] Add "pretty" util to help with printing 
tabular output of RecordBatches
c546eef is described below

commit c546eef41e6ab20c4ca29a2d836987959843896f
Author: Mark Hildreth <[email protected]>
AuthorDate: Wed Apr 29 16:57:53 2020 -0600

    ARROW-8287: [Rust] Add "pretty" util to help with printing tabular output 
of RecordBatches
    
    Just a simple move of code from DataFusion to Arrow, and using it. I have a 
few comments/questions on this [which I have put on the JIRA 
issue](https://issues.apache.org/jira/browse/ARROW-8287?focusedCommentId=17086534&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-17086534).
    
    Closes #6972 from markhildreth/ARROW-8287
    
    Authored-by: Mark Hildreth <[email protected]>
    Signed-off-by: Andy Grove <[email protected]>
---
 rust/arrow/Cargo.toml                        |   1 +
 rust/arrow/examples/read_csv.rs              |  37 +-----
 rust/arrow/examples/read_csv_infer_schema.rs |  38 +-----
 rust/arrow/src/util/mod.rs                   |   1 +
 rust/arrow/src/util/pretty.rs                | 177 +++++++++++++++++++++++++++
 rust/parquet/src/encodings/rle.rs            |   8 +-
 6 files changed, 189 insertions(+), 73 deletions(-)

diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml
index 95a0dae..479cb6c 100644
--- a/rust/arrow/Cargo.toml
+++ b/rust/arrow/Cargo.toml
@@ -50,6 +50,7 @@ chrono = "0.4"
 flatbuffers = "0.6"
 hex = "0.4"
 arrow-flight = { path = "../arrow-flight", optional = true }
+prettytable-rs = "0.8.0"
 
 [features]
 simd = ["packed_simd"]
diff --git a/rust/arrow/examples/read_csv.rs b/rust/arrow/examples/read_csv.rs
index 6a37b3d..cde59d7 100644
--- a/rust/arrow/examples/read_csv.rs
+++ b/rust/arrow/examples/read_csv.rs
@@ -20,11 +20,12 @@ extern crate arrow;
 use std::fs::File;
 use std::sync::Arc;
 
-use arrow::array::{Float64Array, StringArray};
 use arrow::csv;
 use arrow::datatypes::{DataType, Field, Schema};
+use arrow::error::Result;
+use arrow::util::pretty::print_batches;
 
-fn main() {
+fn main() -> Result<()> {
     let schema = Schema::new(vec![
         Field::new("city", DataType::Utf8, false),
         Field::new("lat", DataType::Float64, false),
@@ -35,35 +36,5 @@ fn main() {
 
     let mut csv = csv::Reader::new(file, Arc::new(schema), false, 1024, None);
     let batch = csv.next().unwrap().unwrap();
-
-    println!(
-        "Loaded {} rows containing {} columns",
-        batch.num_rows(),
-        batch.num_columns()
-    );
-
-    let city = batch
-        .column(0)
-        .as_any()
-        .downcast_ref::<StringArray>()
-        .unwrap();
-    let lat = batch
-        .column(1)
-        .as_any()
-        .downcast_ref::<Float64Array>()
-        .unwrap();
-    let lng = batch
-        .column(2)
-        .as_any()
-        .downcast_ref::<Float64Array>()
-        .unwrap();
-
-    for i in 0..batch.num_rows() {
-        println!(
-            "City: {}, Latitude: {}, Longitude: {}",
-            city.value(i),
-            lat.value(i),
-            lng.value(i)
-        );
-    }
+    print_batches(&vec![batch])
 }
diff --git a/rust/arrow/examples/read_csv_infer_schema.rs 
b/rust/arrow/examples/read_csv_infer_schema.rs
index 6a25d2d..07c28c7 100644
--- a/rust/arrow/examples/read_csv_infer_schema.rs
+++ b/rust/arrow/examples/read_csv_infer_schema.rs
@@ -17,11 +17,12 @@
 
 extern crate arrow;
 
-use arrow::array::{Float64Array, StringArray};
 use arrow::csv;
+use arrow::error::Result;
+use arrow::util::pretty::print_batches;
 use std::fs::File;
 
-fn main() {
+fn main() -> Result<()> {
     let file = File::open("test/data/uk_cities_with_headers.csv").unwrap();
     let builder = csv::ReaderBuilder::new()
         .has_headers(true)
@@ -29,36 +30,5 @@ fn main() {
     let mut csv = builder.build(file).unwrap();
     let batch = csv.next().unwrap().unwrap();
 
-    println!(
-        "Loaded {} rows containing {} columns",
-        batch.num_rows(),
-        batch.num_columns()
-    );
-
-    println!("Inferred schema: {:?}", batch.schema());
-
-    let city = batch
-        .column(0)
-        .as_any()
-        .downcast_ref::<StringArray>()
-        .unwrap();
-    let lat = batch
-        .column(1)
-        .as_any()
-        .downcast_ref::<Float64Array>()
-        .unwrap();
-    let lng = batch
-        .column(2)
-        .as_any()
-        .downcast_ref::<Float64Array>()
-        .unwrap();
-
-    for i in 0..batch.num_rows() {
-        println!(
-            "City: {}, Latitude: {}, Longitude: {}",
-            city.value(i),
-            lat.value(i),
-            lng.value(i)
-        );
-    }
+    print_batches(&vec![batch])
 }
diff --git a/rust/arrow/src/util/mod.rs b/rust/arrow/src/util/mod.rs
index 982d42a..a66b3c3 100644
--- a/rust/arrow/src/util/mod.rs
+++ b/rust/arrow/src/util/mod.rs
@@ -17,5 +17,6 @@
 
 pub mod bit_util;
 pub(crate) mod integration_util;
+pub mod pretty;
 pub mod string_writer;
 pub mod test_util;
diff --git a/rust/arrow/src/util/pretty.rs b/rust/arrow/src/util/pretty.rs
new file mode 100644
index 0000000..7416aaa
--- /dev/null
+++ b/rust/arrow/src/util/pretty.rs
@@ -0,0 +1,177 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Utilities for printing record batches
+
+use crate::array;
+use crate::datatypes::{DataType, TimeUnit};
+use crate::record_batch::RecordBatch;
+
+use prettytable::format;
+use prettytable::{Cell, Row, Table};
+
+use crate::error::{ArrowError, Result};
+
+///! Create a visual representation of record batches
+pub fn pretty_format_batches(results: &Vec<RecordBatch>) -> Result<String> {
+    Ok(create_table(results)?.to_string())
+}
+
+///! Prints a visual representation of record batches to stdout
+pub fn print_batches(results: &Vec<RecordBatch>) -> Result<()> {
+    create_table(results)?.printstd();
+    Ok(())
+}
+
+///! Convert a series of record batches into a table
+fn create_table(results: &Vec<RecordBatch>) -> Result<Table> {
+    let mut table = Table::new();
+    table.set_format(*format::consts::FORMAT_NO_LINESEP_WITH_TITLE);
+
+    if results.is_empty() {
+        return Ok(table);
+    }
+
+    let schema = results[0].schema();
+
+    let mut header = Vec::new();
+    for field in schema.fields() {
+        header.push(Cell::new(&field.name()));
+    }
+    table.set_titles(Row::new(header));
+
+    for batch in results {
+        for row in 0..batch.num_rows() {
+            let mut cells = Vec::new();
+            for col in 0..batch.num_columns() {
+                let column = batch.column(col);
+                cells.push(Cell::new(&array_value_to_string(column.clone(), 
row)?));
+            }
+            table.add_row(Row::new(cells));
+        }
+    }
+
+    Ok(table)
+}
+
+macro_rules! make_string {
+    ($array_type:ty, $column: ident, $row: ident) => {{
+        Ok($column
+            .as_any()
+            .downcast_ref::<$array_type>()
+            .unwrap()
+            .value($row)
+            .to_string())
+    }};
+}
+
+/// Get the value at the given row in an array as a string
+fn array_value_to_string(column: array::ArrayRef, row: usize) -> 
Result<String> {
+    match column.data_type() {
+        DataType::Utf8 => Ok(column
+            .as_any()
+            .downcast_ref::<array::StringArray>()
+            .unwrap()
+            .value(row)
+            .to_string()),
+        DataType::Boolean => make_string!(array::BooleanArray, column, row),
+        DataType::Int16 => make_string!(array::Int16Array, column, row),
+        DataType::Int32 => make_string!(array::Int32Array, column, row),
+        DataType::Int64 => make_string!(array::Int64Array, column, row),
+        DataType::UInt8 => make_string!(array::UInt8Array, column, row),
+        DataType::UInt16 => make_string!(array::UInt16Array, column, row),
+        DataType::UInt32 => make_string!(array::UInt32Array, column, row),
+        DataType::UInt64 => make_string!(array::UInt64Array, column, row),
+        DataType::Float16 => make_string!(array::Float32Array, column, row),
+        DataType::Float32 => make_string!(array::Float32Array, column, row),
+        DataType::Float64 => make_string!(array::Float64Array, column, row),
+        DataType::Timestamp(unit, _) if *unit == TimeUnit::Second => {
+            make_string!(array::TimestampSecondArray, column, row)
+        }
+        DataType::Timestamp(unit, _) if *unit == TimeUnit::Millisecond => {
+            make_string!(array::TimestampMillisecondArray, column, row)
+        }
+        DataType::Timestamp(unit, _) if *unit == TimeUnit::Microsecond => {
+            make_string!(array::TimestampMicrosecondArray, column, row)
+        }
+        DataType::Timestamp(unit, _) if *unit == TimeUnit::Nanosecond => {
+            make_string!(array::TimestampNanosecondArray, column, row)
+        }
+        DataType::Date32(_) => make_string!(array::Date32Array, column, row),
+        DataType::Date64(_) => make_string!(array::Date64Array, column, row),
+        DataType::Time32(unit) if *unit == TimeUnit::Second => {
+            make_string!(array::Time32SecondArray, column, row)
+        }
+        DataType::Time32(unit) if *unit == TimeUnit::Millisecond => {
+            make_string!(array::Time32MillisecondArray, column, row)
+        }
+        DataType::Time32(unit) if *unit == TimeUnit::Microsecond => {
+            make_string!(array::Time64MicrosecondArray, column, row)
+        }
+        DataType::Time64(unit) if *unit == TimeUnit::Nanosecond => {
+            make_string!(array::Time64NanosecondArray, column, row)
+        }
+        _ => Err(ArrowError::InvalidArgumentError(format!(
+            "Unsupported {:?} type for repl.",
+            column.data_type()
+        ))),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::datatypes::{Field, Schema};
+    use std::sync::Arc;
+
+    #[test]
+    fn test_pretty_format_batches() -> Result<()> {
+        // define a schema.
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Utf8, false),
+            Field::new("b", DataType::Int32, false),
+        ]));
+
+        // define data.
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![
+                Arc::new(array::StringArray::from(vec!["a", "b", "c", "d"])),
+                Arc::new(array::Int32Array::from(vec![1, 10, 10, 100])),
+            ],
+        )?;
+
+        let table = pretty_format_batches(&vec![batch])?;
+
+        let expected = vec![
+            "+---+-----+",
+            "| a | b   |",
+            "+---+-----+",
+            "| a | 1   |",
+            "| b | 10  |",
+            "| c | 10  |",
+            "| d | 100 |",
+            "+---+-----+",
+        ];
+
+        let actual: Vec<&str> = table.lines().collect();
+
+        assert_eq!(expected, actual);
+
+        Ok(())
+    }
+}
diff --git a/rust/parquet/src/encodings/rle.rs 
b/rust/parquet/src/encodings/rle.rs
index 26df49f..a965f54 100644
--- a/rust/parquet/src/encodings/rle.rs
+++ b/rust/parquet/src/encodings/rle.rs
@@ -522,11 +522,7 @@ impl RleDecoder {
 mod tests {
     use super::*;
 
-    use rand::{
-        self,
-        distributions::{Distribution, Standard},
-        thread_rng, Rng, SeedableRng,
-    };
+    use rand::{self, distributions::Standard, thread_rng, Rng, SeedableRng};
 
     use crate::util::memory::ByteBufferPtr;
 
@@ -830,7 +826,7 @@ mod tests {
             values.clear();
             let mut rng = thread_rng();
             let seed_vec: Vec<u8> =
-                Standard.sample_iter(&mut rng).take(seed_len).collect();
+                rng.sample_iter::<u8, _>(&Standard).take(seed_len).collect();
             let mut seed = [0u8; 32];
             seed.copy_from_slice(&seed_vec[0..seed_len]);
             let mut gen = rand::rngs::StdRng::from_seed(seed);

Reply via email to