This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 905c46be1a8 PrettyPrint support for `StringViewArray` and 
`BinaryViewArray` (#5634)
905c46be1a8 is described below

commit 905c46be1a8b1605c7bbd44392fea2bf4182eb01
Author: Andrew Lamb <[email protected]>
AuthorDate: Mon Apr 15 07:18:42 2024 -0400

    PrettyPrint support for `StringViewArray` and `BinaryViewArray` (#5634)
---
 arrow-cast/src/display.rs | 19 +++++++++
 arrow-cast/src/pretty.rs  | 98 +++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 109 insertions(+), 8 deletions(-)

diff --git a/arrow-cast/src/display.rs b/arrow-cast/src/display.rs
index 9ec12f6e63d..a5f69b66094 100644
--- a/arrow-cast/src/display.rs
+++ b/arrow-cast/src/display.rs
@@ -282,7 +282,9 @@ fn make_formatter<'a>(
         DataType::Boolean => array_format(as_boolean_array(array), options),
         DataType::Utf8 => array_format(array.as_string::<i32>(), options),
         DataType::LargeUtf8 => array_format(array.as_string::<i64>(), options),
+        DataType::Utf8View => array_format(array.as_string_view(), options),
         DataType::Binary => array_format(array.as_binary::<i32>(), options),
+        DataType::BinaryView => array_format(array.as_binary_view(), options),
         DataType::LargeBinary => array_format(array.as_binary::<i64>(), 
options),
         DataType::FixedSizeBinary(_) => {
             let a = 
array.as_any().downcast_ref::<FixedSizeBinaryArray>().unwrap();
@@ -733,6 +735,13 @@ impl<'a, O: OffsetSizeTrait> DisplayIndex for &'a 
GenericStringArray<O> {
     }
 }
 
+impl<'a> DisplayIndex for &'a StringViewArray {
+    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
+        write!(f, "{}", self.value(idx))?;
+        Ok(())
+    }
+}
+
 impl<'a, O: OffsetSizeTrait> DisplayIndex for &'a GenericBinaryArray<O> {
     fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
         let v = self.value(idx);
@@ -743,6 +752,16 @@ impl<'a, O: OffsetSizeTrait> DisplayIndex for &'a 
GenericBinaryArray<O> {
     }
 }
 
+impl<'a> DisplayIndex for &'a BinaryViewArray {
+    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
+        let v = self.value(idx);
+        for byte in v {
+            write!(f, "{byte:02x}")?;
+        }
+        Ok(())
+    }
+}
+
 impl<'a> DisplayIndex for &'a FixedSizeBinaryArray {
     fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
         let v = self.value(idx);
diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs
index 550afa9f739..da7c5e9bb6b 100644
--- a/arrow-cast/src/pretty.rs
+++ b/arrow-cast/src/pretty.rs
@@ -18,11 +18,14 @@
 //! Utilities for pretty printing record batches. Note this module is not
 //! available unless `feature = "prettyprint"` is enabled.
 
-use crate::display::{ArrayFormatter, FormatOptions};
+use std::fmt::Display;
+
+use comfy_table::{Cell, Table};
+
 use arrow_array::{Array, ArrayRef, RecordBatch};
 use arrow_schema::ArrowError;
-use comfy_table::{Cell, Table};
-use std::fmt::Display;
+
+use crate::display::{ArrayFormatter, FormatOptions};
 
 /// Create a visual representation of record batches
 pub fn pretty_format_batches(results: &[RecordBatch]) -> Result<impl Display, 
ArrowError> {
@@ -131,17 +134,20 @@ fn create_column(
 
 #[cfg(test)]
 mod tests {
+    use std::fmt::Write;
+    use std::sync::Arc;
+
+    use half::f16;
 
-    use super::*;
-    use crate::display::array_value_to_string;
     use arrow_array::builder::*;
     use arrow_array::types::*;
     use arrow_array::*;
     use arrow_buffer::Buffer;
     use arrow_schema::*;
-    use half::f16;
-    use std::fmt::Write;
-    use std::sync::Arc;
+
+    use crate::display::array_value_to_string;
+
+    use super::*;
 
     #[test]
     fn test_pretty_format_batches() {
@@ -317,6 +323,82 @@ mod tests {
         assert_eq!(expected, actual, "Actual result:\n{table}");
     }
 
+    #[test]
+    fn test_pretty_format_string_view() {
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "d1",
+            DataType::Utf8View,
+            true,
+        )]));
+
+        // Use a small capacity so we end up with multiple views
+        let mut builder = StringViewBuilder::with_capacity(20);
+        builder.append_value("hello");
+        builder.append_null();
+        builder.append_value("longer than 12 bytes");
+        builder.append_value("another than 12 bytes");
+        builder.append_null();
+        builder.append_value("small");
+
+        let array: ArrayRef = Arc::new(builder.finish());
+        let batch = RecordBatch::try_new(schema, vec![array]).unwrap();
+        let table = pretty_format_batches(&[batch]).unwrap().to_string();
+        let expected = vec![
+            "+-----------------------+",
+            "| d1                    |",
+            "+-----------------------+",
+            "| hello                 |",
+            "|                       |",
+            "| longer than 12 bytes  |",
+            "| another than 12 bytes |",
+            "|                       |",
+            "| small                 |",
+            "+-----------------------+",
+        ];
+
+        let actual: Vec<&str> = table.lines().collect();
+
+        assert_eq!(expected, actual, "Actual result:\n{table:#?}");
+    }
+
+    #[test]
+    fn test_pretty_format_binary_view() {
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "d1",
+            DataType::BinaryView,
+            true,
+        )]));
+
+        // Use a small capacity so we end up with multiple views
+        let mut builder = BinaryViewBuilder::with_capacity(20);
+        builder.append_value(b"hello");
+        builder.append_null();
+        builder.append_value(b"longer than 12 bytes");
+        builder.append_value(b"another than 12 bytes");
+        builder.append_null();
+        builder.append_value(b"small");
+
+        let array: ArrayRef = Arc::new(builder.finish());
+        let batch = RecordBatch::try_new(schema, vec![array]).unwrap();
+        let table = pretty_format_batches(&[batch]).unwrap().to_string();
+        let expected = vec![
+            "+--------------------------------------------+",
+            "| d1                                         |",
+            "+--------------------------------------------+",
+            "| 68656c6c6f                                 |",
+            "|                                            |",
+            "| 6c6f6e676572207468616e203132206279746573   |",
+            "| 616e6f74686572207468616e203132206279746573 |",
+            "|                                            |",
+            "| 736d616c6c                                 |",
+            "+--------------------------------------------+",
+        ];
+
+        let actual: Vec<&str> = table.lines().collect();
+
+        assert_eq!(expected, actual, "Actual result:\n\n{table:#?}");
+    }
+
     #[test]
     fn test_pretty_format_fixed_size_binary() {
         // define a schema.

Reply via email to