This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 7568178e378 Support casting `StringView`/`BinaryView` --> 
`StringArray`/`BinaryArray`. (#5704)
7568178e378 is described below

commit 7568178e37812424d9189c625f3958b165ec13cf
Author: RinChanNOW <[email protected]>
AuthorDate: Mon May 6 23:26:06 2024 +0800

    Support casting `StringView`/`BinaryView` --> `StringArray`/`BinaryArray`. 
(#5704)
---
 arrow-cast/src/cast/mod.rs | 110 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)

diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 36072760ed0..171267f8054 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -218,6 +218,8 @@ pub fn can_cast_types(from_type: &DataType, to_type: 
&DataType) -> bool {
             | Interval(_),
         ) => true,
         (Utf8 | LargeUtf8, Utf8View) => true,
+        (Utf8View, Utf8 | LargeUtf8) => true,
+        (BinaryView, Binary | LargeBinary) => true,
         (Utf8 | LargeUtf8, _) => to_type.is_numeric() && to_type != &Float16,
         (_, Utf8 | LargeUtf8) => from_type.is_primitive(),
 
@@ -1262,6 +1264,12 @@ pub fn cast_with_options(
                 "Casting from {from_type:?} to {to_type:?} not supported",
             ))),
         },
+        (Utf8View, Utf8) => cast_view_to_byte::<StringViewType, 
GenericStringType<i32>>(array),
+        (Utf8View, LargeUtf8) => cast_view_to_byte::<StringViewType, 
GenericStringType<i64>>(array),
+        (BinaryView, Binary) => cast_view_to_byte::<BinaryViewType, 
GenericBinaryType<i32>>(array),
+        (BinaryView, LargeBinary) => {
+            cast_view_to_byte::<BinaryViewType, GenericBinaryType<i64>>(array)
+        }
         (from_type, LargeUtf8) if from_type.is_primitive() => {
             value_to_string::<i64>(array, cast_options)
         }
@@ -2299,6 +2307,32 @@ where
     }))
 }
 
+/// Helper function to cast from one `ByteViewType` array to `ByteArrayType` 
array.
+fn cast_view_to_byte<FROM, TO>(array: &dyn Array) -> Result<ArrayRef, 
ArrowError>
+where
+    FROM: ByteViewType,
+    TO: ByteArrayType,
+    FROM::Native: AsRef<TO::Native>,
+{
+    let data = array.to_data();
+    let view_array = GenericByteViewArray::<FROM>::from(data);
+
+    let len = view_array.len();
+    let bytes = view_array
+        .views()
+        .iter()
+        .map(|v| ByteView::from(*v).length as usize)
+        .sum::<usize>();
+
+    let mut byte_array_builder = GenericByteBuilder::<TO>::with_capacity(len, 
bytes);
+
+    for val in view_array.iter() {
+        byte_array_builder.append_option(val);
+    }
+
+    Ok(Arc::new(byte_array_builder.finish()))
+}
+
 #[cfg(test)]
 mod tests {
     use arrow_buffer::{Buffer, NullBuffer};
@@ -5169,6 +5203,82 @@ mod tests {
         assert_eq!(binary_view_array.as_ref(), &expect_binary_view_array);
     }
 
+    #[test]
+    fn test_view_to_string() {
+        _test_view_to_string::<i32>();
+        _test_view_to_string::<i64>();
+    }
+
+    fn _test_view_to_string<O>()
+    where
+        O: OffsetSizeTrait,
+    {
+        let data: Vec<Option<&str>> = vec![
+            Some("hello"),
+            Some("world"),
+            None,
+            Some("large payload over 12 bytes"),
+            Some("lulu"),
+        ];
+
+        let view_array = {
+            // ["hello", "world", null, "large payload over 12 bytes", "lulu"]
+            let mut builder = StringViewBuilder::new().with_block_size(8); // 
multiple buffers.
+            for s in data.iter() {
+                builder.append_option(*s);
+            }
+            builder.finish()
+        };
+
+        let expected_string_array = GenericStringArray::<O>::from(data);
+        let expected_type = expected_string_array.data_type();
+
+        assert!(can_cast_types(view_array.data_type(), expected_type));
+
+        let string_array = cast(&view_array, expected_type).unwrap();
+        assert_eq!(string_array.data_type(), expected_type);
+
+        assert_eq!(string_array.as_ref(), &expected_string_array);
+    }
+
+    #[test]
+    fn test_view_to_binary() {
+        _test_view_to_binary::<i32>();
+        _test_view_to_binary::<i64>();
+    }
+
+    fn _test_view_to_binary<O>()
+    where
+        O: OffsetSizeTrait,
+    {
+        let data: Vec<Option<&[u8]>> = vec![
+            Some(b"hello"),
+            Some(b"world"),
+            None,
+            Some(b"large payload over 12 bytes"),
+            Some(b"lulu"),
+        ];
+
+        let view_array = {
+            // ["hello", "world", null, "large payload over 12 bytes", "lulu"]
+            let mut builder = BinaryViewBuilder::new().with_block_size(8); // 
multiple buffers.
+            for s in data.iter() {
+                builder.append_option(*s);
+            }
+            builder.finish()
+        };
+
+        let expected_binary_array = GenericBinaryArray::<O>::from(data);
+        let expected_type = expected_binary_array.data_type();
+
+        assert!(can_cast_types(view_array.data_type(), expected_type));
+
+        let binary_array = cast(&view_array, expected_type).unwrap();
+        assert_eq!(binary_array.data_type(), expected_type);
+
+        assert_eq!(binary_array.as_ref(), &expected_binary_array);
+    }
+
     #[test]
     fn test_cast_from_f64() {
         let f64_values: Vec<f64> = vec![

Reply via email to