This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 905c46be1a8 PrettyPrint support for `StringViewArray` and
`BinaryViewArray` (#5634)
905c46be1a8 is described below
commit 905c46be1a8b1605c7bbd44392fea2bf4182eb01
Author: Andrew Lamb <[email protected]>
AuthorDate: Mon Apr 15 07:18:42 2024 -0400
PrettyPrint support for `StringViewArray` and `BinaryViewArray` (#5634)
---
arrow-cast/src/display.rs | 19 +++++++++
arrow-cast/src/pretty.rs | 98 +++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 109 insertions(+), 8 deletions(-)
diff --git a/arrow-cast/src/display.rs b/arrow-cast/src/display.rs
index 9ec12f6e63d..a5f69b66094 100644
--- a/arrow-cast/src/display.rs
+++ b/arrow-cast/src/display.rs
@@ -282,7 +282,9 @@ fn make_formatter<'a>(
DataType::Boolean => array_format(as_boolean_array(array), options),
DataType::Utf8 => array_format(array.as_string::<i32>(), options),
DataType::LargeUtf8 => array_format(array.as_string::<i64>(), options),
+ DataType::Utf8View => array_format(array.as_string_view(), options),
DataType::Binary => array_format(array.as_binary::<i32>(), options),
+ DataType::BinaryView => array_format(array.as_binary_view(), options),
DataType::LargeBinary => array_format(array.as_binary::<i64>(),
options),
DataType::FixedSizeBinary(_) => {
let a =
array.as_any().downcast_ref::<FixedSizeBinaryArray>().unwrap();
@@ -733,6 +735,13 @@ impl<'a, O: OffsetSizeTrait> DisplayIndex for &'a
GenericStringArray<O> {
}
}
+impl<'a> DisplayIndex for &'a StringViewArray {
+ fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
+ write!(f, "{}", self.value(idx))?;
+ Ok(())
+ }
+}
+
impl<'a, O: OffsetSizeTrait> DisplayIndex for &'a GenericBinaryArray<O> {
fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
let v = self.value(idx);
@@ -743,6 +752,16 @@ impl<'a, O: OffsetSizeTrait> DisplayIndex for &'a
GenericBinaryArray<O> {
}
}
+impl<'a> DisplayIndex for &'a BinaryViewArray {
+ fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
+ let v = self.value(idx);
+ for byte in v {
+ write!(f, "{byte:02x}")?;
+ }
+ Ok(())
+ }
+}
+
impl<'a> DisplayIndex for &'a FixedSizeBinaryArray {
fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
let v = self.value(idx);
diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs
index 550afa9f739..da7c5e9bb6b 100644
--- a/arrow-cast/src/pretty.rs
+++ b/arrow-cast/src/pretty.rs
@@ -18,11 +18,14 @@
//! Utilities for pretty printing record batches. Note this module is not
//! available unless `feature = "prettyprint"` is enabled.
-use crate::display::{ArrayFormatter, FormatOptions};
+use std::fmt::Display;
+
+use comfy_table::{Cell, Table};
+
use arrow_array::{Array, ArrayRef, RecordBatch};
use arrow_schema::ArrowError;
-use comfy_table::{Cell, Table};
-use std::fmt::Display;
+
+use crate::display::{ArrayFormatter, FormatOptions};
/// Create a visual representation of record batches
pub fn pretty_format_batches(results: &[RecordBatch]) -> Result<impl Display,
ArrowError> {
@@ -131,17 +134,20 @@ fn create_column(
#[cfg(test)]
mod tests {
+ use std::fmt::Write;
+ use std::sync::Arc;
+
+ use half::f16;
- use super::*;
- use crate::display::array_value_to_string;
use arrow_array::builder::*;
use arrow_array::types::*;
use arrow_array::*;
use arrow_buffer::Buffer;
use arrow_schema::*;
- use half::f16;
- use std::fmt::Write;
- use std::sync::Arc;
+
+ use crate::display::array_value_to_string;
+
+ use super::*;
#[test]
fn test_pretty_format_batches() {
@@ -317,6 +323,82 @@ mod tests {
assert_eq!(expected, actual, "Actual result:\n{table}");
}
+ #[test]
+ fn test_pretty_format_string_view() {
+ let schema = Arc::new(Schema::new(vec![Field::new(
+ "d1",
+ DataType::Utf8View,
+ true,
+ )]));
+
+ // Use a small capacity so we end up with multiple views
+ let mut builder = StringViewBuilder::with_capacity(20);
+ builder.append_value("hello");
+ builder.append_null();
+ builder.append_value("longer than 12 bytes");
+ builder.append_value("another than 12 bytes");
+ builder.append_null();
+ builder.append_value("small");
+
+ let array: ArrayRef = Arc::new(builder.finish());
+ let batch = RecordBatch::try_new(schema, vec![array]).unwrap();
+ let table = pretty_format_batches(&[batch]).unwrap().to_string();
+ let expected = vec![
+ "+-----------------------+",
+ "| d1 |",
+ "+-----------------------+",
+ "| hello |",
+ "| |",
+ "| longer than 12 bytes |",
+ "| another than 12 bytes |",
+ "| |",
+ "| small |",
+ "+-----------------------+",
+ ];
+
+ let actual: Vec<&str> = table.lines().collect();
+
+ assert_eq!(expected, actual, "Actual result:\n{table:#?}");
+ }
+
+ #[test]
+ fn test_pretty_format_binary_view() {
+ let schema = Arc::new(Schema::new(vec![Field::new(
+ "d1",
+ DataType::BinaryView,
+ true,
+ )]));
+
+ // Use a small capacity so we end up with multiple views
+ let mut builder = BinaryViewBuilder::with_capacity(20);
+ builder.append_value(b"hello");
+ builder.append_null();
+ builder.append_value(b"longer than 12 bytes");
+ builder.append_value(b"another than 12 bytes");
+ builder.append_null();
+ builder.append_value(b"small");
+
+ let array: ArrayRef = Arc::new(builder.finish());
+ let batch = RecordBatch::try_new(schema, vec![array]).unwrap();
+ let table = pretty_format_batches(&[batch]).unwrap().to_string();
+ let expected = vec![
+ "+--------------------------------------------+",
+ "| d1 |",
+ "+--------------------------------------------+",
+ "| 68656c6c6f |",
+ "| |",
+ "| 6c6f6e676572207468616e203132206279746573 |",
+ "| 616e6f74686572207468616e203132206279746573 |",
+ "| |",
+ "| 736d616c6c |",
+ "+--------------------------------------------+",
+ ];
+
+ let actual: Vec<&str> = table.lines().collect();
+
+ assert_eq!(expected, actual, "Actual result:\n\n{table:#?}");
+ }
+
#[test]
fn test_pretty_format_fixed_size_binary() {
// define a schema.