This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 4bbb747  ARROW-10233: [Rust] Make array_value_to_string available in 
all Arrow builds
4bbb747 is described below

commit 4bbb74713c6883e8523eeeb5ac80a1e1f8521674
Author: alamb <[email protected]>
AuthorDate: Thu Oct 8 17:02:24 2020 +0200

    ARROW-10233: [Rust] Make array_value_to_string available in all Arrow builds
    
    This PR makes `array_value_to_string` available to all arrow builds. 
Currently it is only available if the `feature = "prettyprint"` is enabled 
which is not the default. The full `print_batches` and `pretty_format_batches` 
(and the libraries they depend on) are still only available of the feature flag 
is set.
    
    The rationale for making this change is that I want to be able to use 
`array_value_to_string` to write tests (such as on 
https://github.com/apache/arrow/pull/8346) but currently it is only available 
when `feature = "prettyprint"` is enabled.
    
    It appears that @nevi-me  made prettyprint compilation optional so that 
arrow could be compiled for wasm in https://github.com/apache/arrow/pull/7400. 
https://issues.apache.org/jira/browse/ARROW-9088 explains that this is due to 
some dependency of pretty-table;   `array_value_to_string` has no needed 
dependencies.
    
    Note I tried to compile ARROW again using the `wasm32-unknown-unknown` 
target on master and it fails (perhaps due to a new dependency that was added?):
    
    <details>
      <summary>Click to expand!</summary>
    
    ```
    alamb@ip-192-168-0-182 rust % git log | head -n 1
    git log | head -n 1
    commit d4cbc4b7aab5d37262b83e972af4bd7cb44c7a5c
    alamb@ip-192-168-0-182 rust % git status
    git status
    On branch master
    Your branch is up to date with 'upstream/master'.
    
    nothing to commit, working tree clean
    alamb@ip-192-168-0-182 rust %
    
    alamb@ip-192-168-0-182 rust % cargo build --target=wasm32-unknown-unknown
    cargo build --target=wasm32-unknown-unknown
       Compiling cfg-if v0.1.10
       Compiling lazy_static v1.4.0
       Compiling futures-core v0.3.5
       Compiling slab v0.4.2
       Compiling futures-sink v0.3.5
       Compiling once_cell v1.4.0
       Compiling pin-utils v0.1.0
       Compiling futures-io v0.3.5
       Compiling itoa v0.4.5
       Compiling bytes v0.5.4
       Compiling fnv v1.0.7
       Compiling iovec v0.1.4
       Compiling unicode-width v0.1.7
       Compiling pin-project-lite v0.1.7
       Compiling ppv-lite86 v0.2.8
       Compiling atty v0.2.14
       Compiling dirs v1.0.5
       Compiling smallvec v1.4.0
       Compiling regex-syntax v0.6.18
       Compiling encode_unicode v0.3.6
       Compiling hex v0.4.2
       Compiling tower-service v0.3.0
    error[E0433]: failed to resolve: could not find `unix` in `os`
      --> 
/Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/dirs-1.0.5/src/lin.rs:41:18
       |
    41 |     use std::os::unix::ffi::OsStringExt;
       |                  ^^^^ could not find `unix` in `os`
    
    error[E0432]: unresolved import `unix`
     --> 
/Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/dirs-1.0.5/src/lin.rs:6:5
      |
    6 | use unix;
      |     ^^^^ no `unix` in the root
    
       Compiling alloc-no-stdlib v2.0.1
       Compiling adler32 v1.0.4
    error[E0599]: no function or associated item named `from_vec` found for 
struct `std::ffi::OsString` in the current scope
      --> 
/Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/dirs-1.0.5/src/lin.rs:48:34
       |
    48 |     Some(PathBuf::from(OsString::from_vec(out)))
       |                                  ^^^^^^^^ function or associated item 
not found in `std::ffi::OsString`
       |
       = help: items from traits can only be used if the trait is in scope
       = note: the following trait is implemented but not in scope; perhaps add 
a `use` for it:
               `use std::sys_common::os_str_bytes::OsStringExt;`
    
    error: aborting due to 3 previous errors
    
    Some errors have detailed explanations: E0432, E0433, E0599.
    For more information about an error, try `rustc --explain E0432`.
    error: could not compile `dirs`.
    
    To learn more, run the command again with --verbose.
    warning: build failed, waiting for other jobs to finish...
    error: build failed
    alamb@ip-192-168-0-182 rust % ```
    
    </details>
    
    Closes #8397 from alamb/alamb/consolidate-array-value-to-string
    
    Lead-authored-by: alamb <[email protected]>
    Co-authored-by: Andrew Lamb <[email protected]>
    Signed-off-by: Jorge C. Leitao <[email protected]>
---
 rust/arrow/src/util/{pretty.rs => display.rs} | 149 ++------------------------
 rust/arrow/src/util/mod.rs                    |   1 +
 rust/arrow/src/util/pretty.rs                 | 119 ++------------------
 rust/datafusion/tests/sql.rs                  |   2 +-
 4 files changed, 17 insertions(+), 254 deletions(-)

diff --git a/rust/arrow/src/util/pretty.rs b/rust/arrow/src/util/display.rs
similarity index 59%
copy from rust/arrow/src/util/pretty.rs
copy to rust/arrow/src/util/display.rs
index b881c3a..bf0cade 100644
--- a/rust/arrow/src/util/pretty.rs
+++ b/rust/arrow/src/util/display.rs
@@ -15,7 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Utilities for printing record batches
+//! Functions for printing array values, as strings, for debugging
+//! purposes. See the `pretty` crate for additional functions for
+//! record batch pretty printing.
 
 use crate::array;
 use crate::array::{Array, PrimitiveArrayOps};
@@ -23,56 +25,11 @@ use crate::datatypes::{
     ArrowNativeType, ArrowPrimitiveType, DataType, Int16Type, Int32Type, 
Int64Type,
     Int8Type, TimeUnit, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
 };
-use crate::record_batch::RecordBatch;
 
 use array::DictionaryArray;
-use prettytable::format;
-use prettytable::{Cell, Row, Table};
 
 use crate::error::{ArrowError, Result};
 
-///! Create a visual representation of record batches
-pub fn pretty_format_batches(results: &[RecordBatch]) -> Result<String> {
-    Ok(create_table(results)?.to_string())
-}
-
-///! Prints a visual representation of record batches to stdout
-pub fn print_batches(results: &[RecordBatch]) -> Result<()> {
-    create_table(results)?.printstd();
-    Ok(())
-}
-
-///! Convert a series of record batches into a table
-fn create_table(results: &[RecordBatch]) -> Result<Table> {
-    let mut table = Table::new();
-    table.set_format(*format::consts::FORMAT_NO_LINESEP_WITH_TITLE);
-
-    if results.is_empty() {
-        return Ok(table);
-    }
-
-    let schema = results[0].schema();
-
-    let mut header = Vec::new();
-    for field in schema.fields() {
-        header.push(Cell::new(&field.name()));
-    }
-    table.set_titles(Row::new(header));
-
-    for batch in results {
-        for row in 0..batch.num_rows() {
-            let mut cells = Vec::new();
-            for col in 0..batch.num_columns() {
-                let column = batch.column(col);
-                cells.push(Cell::new(&array_value_to_string(&column, row)?));
-            }
-            table.add_row(Row::new(cells));
-        }
-    }
-
-    Ok(table)
-}
-
 macro_rules! make_string {
     ($array_type:ty, $column: ident, $row: ident) => {{
         let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
@@ -87,7 +44,10 @@ macro_rules! make_string {
     }};
 }
 
-/// Get the value at the given row in an array as a String
+/// Get the value at the given row in an array as a String.
+///
+/// Note this function is quite inefficient and is unlikely to be
+/// suitable for converting large arrays or record batches.
 pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> 
Result<String> {
     match column.data_type() {
         DataType::Utf8 => make_string!(array::StringArray, column, row),
@@ -165,7 +125,7 @@ fn dict_array_value_to_string<K: ArrowPrimitiveType>(
 
     let dict_index = keys_array.value(row).to_usize().ok_or_else(|| {
         ArrowError::InvalidArgumentError(format!(
-            "Can not convert value {:?} at index {:?} to usize for repl.",
+            "Can not convert value {:?} at index {:?} to usize for string 
conversion.",
             keys_array.value(row),
             row
         ))
@@ -173,96 +133,3 @@ fn dict_array_value_to_string<K: ArrowPrimitiveType>(
 
     array_value_to_string(&dict_array.values(), dict_index)
 }
-
-#[cfg(test)]
-mod tests {
-    use array::{PrimitiveBuilder, StringBuilder, StringDictionaryBuilder};
-
-    use super::*;
-    use crate::datatypes::{Field, Schema};
-    use std::sync::Arc;
-
-    #[test]
-    fn test_pretty_format_batches() -> Result<()> {
-        // define a schema.
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Utf8, true),
-            Field::new("b", DataType::Int32, true),
-        ]));
-
-        // define data.
-        let batch = RecordBatch::try_new(
-            schema,
-            vec![
-                Arc::new(array::StringArray::from(vec![
-                    Some("a"),
-                    Some("b"),
-                    None,
-                    Some("d"),
-                ])),
-                Arc::new(array::Int32Array::from(vec![
-                    Some(1),
-                    None,
-                    Some(10),
-                    Some(100),
-                ])),
-            ],
-        )?;
-
-        let table = pretty_format_batches(&[batch])?;
-
-        let expected = vec![
-            "+---+-----+",
-            "| a | b   |",
-            "+---+-----+",
-            "| a | 1   |",
-            "| b |     |",
-            "|   | 10  |",
-            "| d | 100 |",
-            "+---+-----+",
-        ];
-
-        let actual: Vec<&str> = table.lines().collect();
-
-        assert_eq!(expected, actual, "Actual result:\n{}", table);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_pretty_format_dictionary() -> Result<()> {
-        // define a schema.
-        let field_type =
-            DataType::Dictionary(Box::new(DataType::Int32), 
Box::new(DataType::Utf8));
-        let schema = Arc::new(Schema::new(vec![Field::new("d1", field_type, 
true)]));
-
-        let keys_builder = PrimitiveBuilder::<Int32Type>::new(10);
-        let values_builder = StringBuilder::new(10);
-        let mut builder = StringDictionaryBuilder::new(keys_builder, 
values_builder);
-
-        builder.append("one")?;
-        builder.append_null()?;
-        builder.append("three")?;
-        let array = Arc::new(builder.finish());
-
-        let batch = RecordBatch::try_new(schema.clone(), vec![array])?;
-
-        let table = pretty_format_batches(&[batch])?;
-
-        let expected = vec![
-            "+-------+",
-            "| d1    |",
-            "+-------+",
-            "| one   |",
-            "|       |",
-            "| three |",
-            "+-------+",
-        ];
-
-        let actual: Vec<&str> = table.lines().collect();
-
-        assert_eq!(expected, actual, "Actual result:\n{}", table);
-
-        Ok(())
-    }
-}
diff --git a/rust/arrow/src/util/mod.rs b/rust/arrow/src/util/mod.rs
index 30a510f..0f95043 100644
--- a/rust/arrow/src/util/mod.rs
+++ b/rust/arrow/src/util/mod.rs
@@ -17,6 +17,7 @@
 
 pub mod bit_chunk_iterator;
 pub mod bit_util;
+pub mod display;
 pub mod integration_util;
 #[cfg(feature = "prettyprint")]
 pub mod pretty;
diff --git a/rust/arrow/src/util/pretty.rs b/rust/arrow/src/util/pretty.rs
index b881c3a..7eacba3 100644
--- a/rust/arrow/src/util/pretty.rs
+++ b/rust/arrow/src/util/pretty.rs
@@ -15,21 +15,17 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Utilities for printing record batches
+//! Utilities for printing record batches. Note this module is not
+//! available unless `feature = "prettyprint"` is enabled.
 
-use crate::array;
-use crate::array::{Array, PrimitiveArrayOps};
-use crate::datatypes::{
-    ArrowNativeType, ArrowPrimitiveType, DataType, Int16Type, Int32Type, 
Int64Type,
-    Int8Type, TimeUnit, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
-};
 use crate::record_batch::RecordBatch;
 
-use array::DictionaryArray;
 use prettytable::format;
 use prettytable::{Cell, Row, Table};
 
-use crate::error::{ArrowError, Result};
+use crate::error::Result;
+
+use super::display::array_value_to_string;
 
 ///! Create a visual representation of record batches
 pub fn pretty_format_batches(results: &[RecordBatch]) -> Result<String> {
@@ -73,113 +69,12 @@ fn create_table(results: &[RecordBatch]) -> Result<Table> {
     Ok(table)
 }
 
-macro_rules! make_string {
-    ($array_type:ty, $column: ident, $row: ident) => {{
-        let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
-
-        let s = if array.is_null($row) {
-            "".to_string()
-        } else {
-            array.value($row).to_string()
-        };
-
-        Ok(s)
-    }};
-}
-
-/// Get the value at the given row in an array as a String
-pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> 
Result<String> {
-    match column.data_type() {
-        DataType::Utf8 => make_string!(array::StringArray, column, row),
-        DataType::Boolean => make_string!(array::BooleanArray, column, row),
-        DataType::Int8 => make_string!(array::Int8Array, column, row),
-        DataType::Int16 => make_string!(array::Int16Array, column, row),
-        DataType::Int32 => make_string!(array::Int32Array, column, row),
-        DataType::Int64 => make_string!(array::Int64Array, column, row),
-        DataType::UInt8 => make_string!(array::UInt8Array, column, row),
-        DataType::UInt16 => make_string!(array::UInt16Array, column, row),
-        DataType::UInt32 => make_string!(array::UInt32Array, column, row),
-        DataType::UInt64 => make_string!(array::UInt64Array, column, row),
-        DataType::Float16 => make_string!(array::Float32Array, column, row),
-        DataType::Float32 => make_string!(array::Float32Array, column, row),
-        DataType::Float64 => make_string!(array::Float64Array, column, row),
-        DataType::Timestamp(unit, _) if *unit == TimeUnit::Second => {
-            make_string!(array::TimestampSecondArray, column, row)
-        }
-        DataType::Timestamp(unit, _) if *unit == TimeUnit::Millisecond => {
-            make_string!(array::TimestampMillisecondArray, column, row)
-        }
-        DataType::Timestamp(unit, _) if *unit == TimeUnit::Microsecond => {
-            make_string!(array::TimestampMicrosecondArray, column, row)
-        }
-        DataType::Timestamp(unit, _) if *unit == TimeUnit::Nanosecond => {
-            make_string!(array::TimestampNanosecondArray, column, row)
-        }
-        DataType::Date32(_) => make_string!(array::Date32Array, column, row),
-        DataType::Date64(_) => make_string!(array::Date64Array, column, row),
-        DataType::Time32(unit) if *unit == TimeUnit::Second => {
-            make_string!(array::Time32SecondArray, column, row)
-        }
-        DataType::Time32(unit) if *unit == TimeUnit::Millisecond => {
-            make_string!(array::Time32MillisecondArray, column, row)
-        }
-        DataType::Time32(unit) if *unit == TimeUnit::Microsecond => {
-            make_string!(array::Time64MicrosecondArray, column, row)
-        }
-        DataType::Time64(unit) if *unit == TimeUnit::Nanosecond => {
-            make_string!(array::Time64NanosecondArray, column, row)
-        }
-        DataType::Dictionary(index_type, _value_type) => match **index_type {
-            DataType::Int8 => dict_array_value_to_string::<Int8Type>(column, 
row),
-            DataType::Int16 => dict_array_value_to_string::<Int16Type>(column, 
row),
-            DataType::Int32 => dict_array_value_to_string::<Int32Type>(column, 
row),
-            DataType::Int64 => dict_array_value_to_string::<Int64Type>(column, 
row),
-            DataType::UInt8 => dict_array_value_to_string::<UInt8Type>(column, 
row),
-            DataType::UInt16 => 
dict_array_value_to_string::<UInt16Type>(column, row),
-            DataType::UInt32 => 
dict_array_value_to_string::<UInt32Type>(column, row),
-            DataType::UInt64 => 
dict_array_value_to_string::<UInt64Type>(column, row),
-            _ => Err(ArrowError::InvalidArgumentError(format!(
-                "Pretty printing not supported for {:?} due to index type",
-                column.data_type()
-            ))),
-        },
-        _ => Err(ArrowError::InvalidArgumentError(format!(
-            "Pretty printing not implemented for {:?} type",
-            column.data_type()
-        ))),
-    }
-}
-
-/// Converts the value of the dictionary array at `row` to a String
-fn dict_array_value_to_string<K: ArrowPrimitiveType>(
-    colum: &array::ArrayRef,
-    row: usize,
-) -> Result<String> {
-    let dict_array = 
colum.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
-
-    let keys_array = dict_array.keys_array();
-
-    if keys_array.is_null(row) {
-        return Ok(String::from(""));
-    }
-
-    let dict_index = keys_array.value(row).to_usize().ok_or_else(|| {
-        ArrowError::InvalidArgumentError(format!(
-            "Can not convert value {:?} at index {:?} to usize for repl.",
-            keys_array.value(row),
-            row
-        ))
-    })?;
-
-    array_value_to_string(&dict_array.values(), dict_index)
-}
-
 #[cfg(test)]
 mod tests {
-    use array::{PrimitiveBuilder, StringBuilder, StringDictionaryBuilder};
+    use crate::array::{self, PrimitiveBuilder, StringBuilder, 
StringDictionaryBuilder};
 
     use super::*;
-    use crate::datatypes::{Field, Schema};
+    use crate::datatypes::{DataType, Field, Int32Type, Schema};
     use std::sync::Arc;
 
     #[test]
diff --git a/rust/datafusion/tests/sql.rs b/rust/datafusion/tests/sql.rs
index 5640daa..1bc8bd0 100644
--- a/rust/datafusion/tests/sql.rs
+++ b/rust/datafusion/tests/sql.rs
@@ -25,7 +25,7 @@ use arrow::record_batch::RecordBatch;
 use arrow::{array::*, datatypes::TimeUnit};
 use arrow::{
     datatypes::{DataType, Field, Schema, SchemaRef},
-    util::pretty::array_value_to_string,
+    util::display::array_value_to_string,
 };
 
 use datafusion::datasource::{csv::CsvReadOptions, MemTable};

Reply via email to