This is an automated email from the ASF dual-hosted git repository.
jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 4bbb747 ARROW-10233: [Rust] Make array_value_to_string available in
all Arrow builds
4bbb747 is described below
commit 4bbb74713c6883e8523eeeb5ac80a1e1f8521674
Author: alamb <[email protected]>
AuthorDate: Thu Oct 8 17:02:24 2020 +0200
ARROW-10233: [Rust] Make array_value_to_string available in all Arrow builds
This PR makes `array_value_to_string` available to all arrow builds.
Currently it is only available if the `feature = "prettyprint"` is enabled
which is not the default. The full `print_batches` and `pretty_format_batches`
(and the libraries they depend on) are still only available of the feature flag
is set.
The rationale for making this change is that I want to be able to use
`array_value_to_string` to write tests (such as on
https://github.com/apache/arrow/pull/8346) but currently it is only available
when `feature = "prettyprint"` is enabled.
It appears that @nevi-me made prettyprint compilation optional so that
arrow could be compiled for wasm in https://github.com/apache/arrow/pull/7400.
https://issues.apache.org/jira/browse/ARROW-9088 explains that this is due to
some dependency of pretty-table; `array_value_to_string` has no needed
dependencies.
Note I tried to compile ARROW again using the `wasm32-unknown-unknown`
target on master and it fails (perhaps due to a new dependency that was added?):
<details>
<summary>Click to expand!</summary>
```
alamb@ip-192-168-0-182 rust % git log | head -n 1
git log | head -n 1
commit d4cbc4b7aab5d37262b83e972af4bd7cb44c7a5c
alamb@ip-192-168-0-182 rust % git status
git status
On branch master
Your branch is up to date with 'upstream/master'.
nothing to commit, working tree clean
alamb@ip-192-168-0-182 rust %
alamb@ip-192-168-0-182 rust % cargo build --target=wasm32-unknown-unknown
cargo build --target=wasm32-unknown-unknown
Compiling cfg-if v0.1.10
Compiling lazy_static v1.4.0
Compiling futures-core v0.3.5
Compiling slab v0.4.2
Compiling futures-sink v0.3.5
Compiling once_cell v1.4.0
Compiling pin-utils v0.1.0
Compiling futures-io v0.3.5
Compiling itoa v0.4.5
Compiling bytes v0.5.4
Compiling fnv v1.0.7
Compiling iovec v0.1.4
Compiling unicode-width v0.1.7
Compiling pin-project-lite v0.1.7
Compiling ppv-lite86 v0.2.8
Compiling atty v0.2.14
Compiling dirs v1.0.5
Compiling smallvec v1.4.0
Compiling regex-syntax v0.6.18
Compiling encode_unicode v0.3.6
Compiling hex v0.4.2
Compiling tower-service v0.3.0
error[E0433]: failed to resolve: could not find `unix` in `os`
-->
/Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/dirs-1.0.5/src/lin.rs:41:18
|
41 | use std::os::unix::ffi::OsStringExt;
| ^^^^ could not find `unix` in `os`
error[E0432]: unresolved import `unix`
-->
/Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/dirs-1.0.5/src/lin.rs:6:5
|
6 | use unix;
| ^^^^ no `unix` in the root
Compiling alloc-no-stdlib v2.0.1
Compiling adler32 v1.0.4
error[E0599]: no function or associated item named `from_vec` found for
struct `std::ffi::OsString` in the current scope
-->
/Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/dirs-1.0.5/src/lin.rs:48:34
|
48 | Some(PathBuf::from(OsString::from_vec(out)))
| ^^^^^^^^ function or associated item
not found in `std::ffi::OsString`
|
= help: items from traits can only be used if the trait is in scope
= note: the following trait is implemented but not in scope; perhaps add
a `use` for it:
`use std::sys_common::os_str_bytes::OsStringExt;`
error: aborting due to 3 previous errors
Some errors have detailed explanations: E0432, E0433, E0599.
For more information about an error, try `rustc --explain E0432`.
error: could not compile `dirs`.
To learn more, run the command again with --verbose.
warning: build failed, waiting for other jobs to finish...
error: build failed
alamb@ip-192-168-0-182 rust % ```
</details>
Closes #8397 from alamb/alamb/consolidate-array-value-to-string
Lead-authored-by: alamb <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
Signed-off-by: Jorge C. Leitao <[email protected]>
---
rust/arrow/src/util/{pretty.rs => display.rs} | 149 ++------------------------
rust/arrow/src/util/mod.rs | 1 +
rust/arrow/src/util/pretty.rs | 119 ++------------------
rust/datafusion/tests/sql.rs | 2 +-
4 files changed, 17 insertions(+), 254 deletions(-)
diff --git a/rust/arrow/src/util/pretty.rs b/rust/arrow/src/util/display.rs
similarity index 59%
copy from rust/arrow/src/util/pretty.rs
copy to rust/arrow/src/util/display.rs
index b881c3a..bf0cade 100644
--- a/rust/arrow/src/util/pretty.rs
+++ b/rust/arrow/src/util/display.rs
@@ -15,7 +15,9 @@
// specific language governing permissions and limitations
// under the License.
-//! Utilities for printing record batches
+//! Functions for printing array values, as strings, for debugging
+//! purposes. See the `pretty` crate for additional functions for
+//! record batch pretty printing.
use crate::array;
use crate::array::{Array, PrimitiveArrayOps};
@@ -23,56 +25,11 @@ use crate::datatypes::{
ArrowNativeType, ArrowPrimitiveType, DataType, Int16Type, Int32Type,
Int64Type,
Int8Type, TimeUnit, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
};
-use crate::record_batch::RecordBatch;
use array::DictionaryArray;
-use prettytable::format;
-use prettytable::{Cell, Row, Table};
use crate::error::{ArrowError, Result};
-///! Create a visual representation of record batches
-pub fn pretty_format_batches(results: &[RecordBatch]) -> Result<String> {
- Ok(create_table(results)?.to_string())
-}
-
-///! Prints a visual representation of record batches to stdout
-pub fn print_batches(results: &[RecordBatch]) -> Result<()> {
- create_table(results)?.printstd();
- Ok(())
-}
-
-///! Convert a series of record batches into a table
-fn create_table(results: &[RecordBatch]) -> Result<Table> {
- let mut table = Table::new();
- table.set_format(*format::consts::FORMAT_NO_LINESEP_WITH_TITLE);
-
- if results.is_empty() {
- return Ok(table);
- }
-
- let schema = results[0].schema();
-
- let mut header = Vec::new();
- for field in schema.fields() {
- header.push(Cell::new(&field.name()));
- }
- table.set_titles(Row::new(header));
-
- for batch in results {
- for row in 0..batch.num_rows() {
- let mut cells = Vec::new();
- for col in 0..batch.num_columns() {
- let column = batch.column(col);
- cells.push(Cell::new(&array_value_to_string(&column, row)?));
- }
- table.add_row(Row::new(cells));
- }
- }
-
- Ok(table)
-}
-
macro_rules! make_string {
($array_type:ty, $column: ident, $row: ident) => {{
let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
@@ -87,7 +44,10 @@ macro_rules! make_string {
}};
}
-/// Get the value at the given row in an array as a String
+/// Get the value at the given row in an array as a String.
+///
+/// Note this function is quite inefficient and is unlikely to be
+/// suitable for converting large arrays or record batches.
pub fn array_value_to_string(column: &array::ArrayRef, row: usize) ->
Result<String> {
match column.data_type() {
DataType::Utf8 => make_string!(array::StringArray, column, row),
@@ -165,7 +125,7 @@ fn dict_array_value_to_string<K: ArrowPrimitiveType>(
let dict_index = keys_array.value(row).to_usize().ok_or_else(|| {
ArrowError::InvalidArgumentError(format!(
- "Can not convert value {:?} at index {:?} to usize for repl.",
+ "Can not convert value {:?} at index {:?} to usize for string
conversion.",
keys_array.value(row),
row
))
@@ -173,96 +133,3 @@ fn dict_array_value_to_string<K: ArrowPrimitiveType>(
array_value_to_string(&dict_array.values(), dict_index)
}
-
-#[cfg(test)]
-mod tests {
- use array::{PrimitiveBuilder, StringBuilder, StringDictionaryBuilder};
-
- use super::*;
- use crate::datatypes::{Field, Schema};
- use std::sync::Arc;
-
- #[test]
- fn test_pretty_format_batches() -> Result<()> {
- // define a schema.
- let schema = Arc::new(Schema::new(vec![
- Field::new("a", DataType::Utf8, true),
- Field::new("b", DataType::Int32, true),
- ]));
-
- // define data.
- let batch = RecordBatch::try_new(
- schema,
- vec![
- Arc::new(array::StringArray::from(vec![
- Some("a"),
- Some("b"),
- None,
- Some("d"),
- ])),
- Arc::new(array::Int32Array::from(vec![
- Some(1),
- None,
- Some(10),
- Some(100),
- ])),
- ],
- )?;
-
- let table = pretty_format_batches(&[batch])?;
-
- let expected = vec![
- "+---+-----+",
- "| a | b |",
- "+---+-----+",
- "| a | 1 |",
- "| b | |",
- "| | 10 |",
- "| d | 100 |",
- "+---+-----+",
- ];
-
- let actual: Vec<&str> = table.lines().collect();
-
- assert_eq!(expected, actual, "Actual result:\n{}", table);
-
- Ok(())
- }
-
- #[test]
- fn test_pretty_format_dictionary() -> Result<()> {
- // define a schema.
- let field_type =
- DataType::Dictionary(Box::new(DataType::Int32),
Box::new(DataType::Utf8));
- let schema = Arc::new(Schema::new(vec![Field::new("d1", field_type,
true)]));
-
- let keys_builder = PrimitiveBuilder::<Int32Type>::new(10);
- let values_builder = StringBuilder::new(10);
- let mut builder = StringDictionaryBuilder::new(keys_builder,
values_builder);
-
- builder.append("one")?;
- builder.append_null()?;
- builder.append("three")?;
- let array = Arc::new(builder.finish());
-
- let batch = RecordBatch::try_new(schema.clone(), vec![array])?;
-
- let table = pretty_format_batches(&[batch])?;
-
- let expected = vec![
- "+-------+",
- "| d1 |",
- "+-------+",
- "| one |",
- "| |",
- "| three |",
- "+-------+",
- ];
-
- let actual: Vec<&str> = table.lines().collect();
-
- assert_eq!(expected, actual, "Actual result:\n{}", table);
-
- Ok(())
- }
-}
diff --git a/rust/arrow/src/util/mod.rs b/rust/arrow/src/util/mod.rs
index 30a510f..0f95043 100644
--- a/rust/arrow/src/util/mod.rs
+++ b/rust/arrow/src/util/mod.rs
@@ -17,6 +17,7 @@
pub mod bit_chunk_iterator;
pub mod bit_util;
+pub mod display;
pub mod integration_util;
#[cfg(feature = "prettyprint")]
pub mod pretty;
diff --git a/rust/arrow/src/util/pretty.rs b/rust/arrow/src/util/pretty.rs
index b881c3a..7eacba3 100644
--- a/rust/arrow/src/util/pretty.rs
+++ b/rust/arrow/src/util/pretty.rs
@@ -15,21 +15,17 @@
// specific language governing permissions and limitations
// under the License.
-//! Utilities for printing record batches
+//! Utilities for printing record batches. Note this module is not
+//! available unless `feature = "prettyprint"` is enabled.
-use crate::array;
-use crate::array::{Array, PrimitiveArrayOps};
-use crate::datatypes::{
- ArrowNativeType, ArrowPrimitiveType, DataType, Int16Type, Int32Type,
Int64Type,
- Int8Type, TimeUnit, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
-};
use crate::record_batch::RecordBatch;
-use array::DictionaryArray;
use prettytable::format;
use prettytable::{Cell, Row, Table};
-use crate::error::{ArrowError, Result};
+use crate::error::Result;
+
+use super::display::array_value_to_string;
///! Create a visual representation of record batches
pub fn pretty_format_batches(results: &[RecordBatch]) -> Result<String> {
@@ -73,113 +69,12 @@ fn create_table(results: &[RecordBatch]) -> Result<Table> {
Ok(table)
}
-macro_rules! make_string {
- ($array_type:ty, $column: ident, $row: ident) => {{
- let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
-
- let s = if array.is_null($row) {
- "".to_string()
- } else {
- array.value($row).to_string()
- };
-
- Ok(s)
- }};
-}
-
-/// Get the value at the given row in an array as a String
-pub fn array_value_to_string(column: &array::ArrayRef, row: usize) ->
Result<String> {
- match column.data_type() {
- DataType::Utf8 => make_string!(array::StringArray, column, row),
- DataType::Boolean => make_string!(array::BooleanArray, column, row),
- DataType::Int8 => make_string!(array::Int8Array, column, row),
- DataType::Int16 => make_string!(array::Int16Array, column, row),
- DataType::Int32 => make_string!(array::Int32Array, column, row),
- DataType::Int64 => make_string!(array::Int64Array, column, row),
- DataType::UInt8 => make_string!(array::UInt8Array, column, row),
- DataType::UInt16 => make_string!(array::UInt16Array, column, row),
- DataType::UInt32 => make_string!(array::UInt32Array, column, row),
- DataType::UInt64 => make_string!(array::UInt64Array, column, row),
- DataType::Float16 => make_string!(array::Float32Array, column, row),
- DataType::Float32 => make_string!(array::Float32Array, column, row),
- DataType::Float64 => make_string!(array::Float64Array, column, row),
- DataType::Timestamp(unit, _) if *unit == TimeUnit::Second => {
- make_string!(array::TimestampSecondArray, column, row)
- }
- DataType::Timestamp(unit, _) if *unit == TimeUnit::Millisecond => {
- make_string!(array::TimestampMillisecondArray, column, row)
- }
- DataType::Timestamp(unit, _) if *unit == TimeUnit::Microsecond => {
- make_string!(array::TimestampMicrosecondArray, column, row)
- }
- DataType::Timestamp(unit, _) if *unit == TimeUnit::Nanosecond => {
- make_string!(array::TimestampNanosecondArray, column, row)
- }
- DataType::Date32(_) => make_string!(array::Date32Array, column, row),
- DataType::Date64(_) => make_string!(array::Date64Array, column, row),
- DataType::Time32(unit) if *unit == TimeUnit::Second => {
- make_string!(array::Time32SecondArray, column, row)
- }
- DataType::Time32(unit) if *unit == TimeUnit::Millisecond => {
- make_string!(array::Time32MillisecondArray, column, row)
- }
- DataType::Time32(unit) if *unit == TimeUnit::Microsecond => {
- make_string!(array::Time64MicrosecondArray, column, row)
- }
- DataType::Time64(unit) if *unit == TimeUnit::Nanosecond => {
- make_string!(array::Time64NanosecondArray, column, row)
- }
- DataType::Dictionary(index_type, _value_type) => match **index_type {
- DataType::Int8 => dict_array_value_to_string::<Int8Type>(column,
row),
- DataType::Int16 => dict_array_value_to_string::<Int16Type>(column,
row),
- DataType::Int32 => dict_array_value_to_string::<Int32Type>(column,
row),
- DataType::Int64 => dict_array_value_to_string::<Int64Type>(column,
row),
- DataType::UInt8 => dict_array_value_to_string::<UInt8Type>(column,
row),
- DataType::UInt16 =>
dict_array_value_to_string::<UInt16Type>(column, row),
- DataType::UInt32 =>
dict_array_value_to_string::<UInt32Type>(column, row),
- DataType::UInt64 =>
dict_array_value_to_string::<UInt64Type>(column, row),
- _ => Err(ArrowError::InvalidArgumentError(format!(
- "Pretty printing not supported for {:?} due to index type",
- column.data_type()
- ))),
- },
- _ => Err(ArrowError::InvalidArgumentError(format!(
- "Pretty printing not implemented for {:?} type",
- column.data_type()
- ))),
- }
-}
-
-/// Converts the value of the dictionary array at `row` to a String
-fn dict_array_value_to_string<K: ArrowPrimitiveType>(
- colum: &array::ArrayRef,
- row: usize,
-) -> Result<String> {
- let dict_array =
colum.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
-
- let keys_array = dict_array.keys_array();
-
- if keys_array.is_null(row) {
- return Ok(String::from(""));
- }
-
- let dict_index = keys_array.value(row).to_usize().ok_or_else(|| {
- ArrowError::InvalidArgumentError(format!(
- "Can not convert value {:?} at index {:?} to usize for repl.",
- keys_array.value(row),
- row
- ))
- })?;
-
- array_value_to_string(&dict_array.values(), dict_index)
-}
-
#[cfg(test)]
mod tests {
- use array::{PrimitiveBuilder, StringBuilder, StringDictionaryBuilder};
+ use crate::array::{self, PrimitiveBuilder, StringBuilder,
StringDictionaryBuilder};
use super::*;
- use crate::datatypes::{Field, Schema};
+ use crate::datatypes::{DataType, Field, Int32Type, Schema};
use std::sync::Arc;
#[test]
diff --git a/rust/datafusion/tests/sql.rs b/rust/datafusion/tests/sql.rs
index 5640daa..1bc8bd0 100644
--- a/rust/datafusion/tests/sql.rs
+++ b/rust/datafusion/tests/sql.rs
@@ -25,7 +25,7 @@ use arrow::record_batch::RecordBatch;
use arrow::{array::*, datatypes::TimeUnit};
use arrow::{
datatypes::{DataType, Field, Schema, SchemaRef},
- util::pretty::array_value_to_string,
+ util::display::array_value_to_string,
};
use datafusion::datasource::{csv::CsvReadOptions, MemTable};