This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 144c9c7f6d5 Implement `take` kernel for byte view array. (#5602)
144c9c7f6d5 is described below

commit 144c9c7f6d593d69ea3ee5148868d68d6f38a435
Author: RinChanNOW <[email protected]>
AuthorDate: Tue Apr 9 18:30:38 2024 +0800

    Implement `take` kernel for byte view array. (#5602)
    
    * impl take kernel for byte view array.
    
    * Add unit tests.
    
    * Use ArrayData equality
    
    * Rename to byte_view
    
    ---------
    
    Co-authored-by: Raphael Taylor-Davies <[email protected]>
---
 arrow-array/src/cast.rs  | 36 ++++++++++++++++++++++++++
 arrow-select/src/take.rs | 67 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 103 insertions(+)

diff --git a/arrow-array/src/cast.rs b/arrow-array/src/cast.rs
index 2e21f3e7e64..7b4b1d6eca4 100644
--- a/arrow-array/src/cast.rs
+++ b/arrow-array/src/cast.rs
@@ -779,6 +779,34 @@ pub trait AsArray: private::Sealed {
         self.as_bytes_opt().expect("binary array")
     }
 
+    /// Downcast this to a [`StringViewArray`] returning `None` if not possible
+    fn as_string_view(&self) -> &StringViewArray {
+        self.as_byte_view_opt().expect("string view array")
+    }
+
+    /// Downcast this to a [`StringViewArray`] returning `None` if not possible
+    fn as_string_view_opt(&self) -> Option<&StringViewArray> {
+        self.as_byte_view_opt()
+    }
+
+    /// Downcast this to a [`StringViewArray`] returning `None` if not possible
+    fn as_binary_view(&self) -> &BinaryViewArray {
+        self.as_byte_view_opt().expect("binary view array")
+    }
+
+    /// Downcast this to a [`BinaryViewArray`] returning `None` if not possible
+    fn as_binary_view_opt(&self) -> Option<&BinaryViewArray> {
+        self.as_byte_view_opt()
+    }
+
+    /// Downcast this to a [`GenericByteViewArray`] returning `None` if not 
possible
+    fn as_byte_view<T: ByteViewType>(&self) -> &GenericByteViewArray<T> {
+        self.as_byte_view_opt().expect("byte view array")
+    }
+
+    /// Downcast this to a [`GenericByteViewArray`] returning `None` if not 
possible
+    fn as_byte_view_opt<T: ByteViewType>(&self) -> 
Option<&GenericByteViewArray<T>>;
+
     /// Downcast this to a [`StructArray`] returning `None` if not possible
     fn as_struct_opt(&self) -> Option<&StructArray>;
 
@@ -852,6 +880,10 @@ impl AsArray for dyn Array + '_ {
         self.as_any().downcast_ref()
     }
 
+    fn as_byte_view_opt<T: ByteViewType>(&self) -> 
Option<&GenericByteViewArray<T>> {
+        self.as_any().downcast_ref()
+    }
+
     fn as_struct_opt(&self) -> Option<&StructArray> {
         self.as_any().downcast_ref()
     }
@@ -899,6 +931,10 @@ impl AsArray for ArrayRef {
         self.as_ref().as_bytes_opt()
     }
 
+    fn as_byte_view_opt<T: ByteViewType>(&self) -> 
Option<&GenericByteViewArray<T>> {
+        self.as_ref().as_byte_view_opt()
+    }
+
     fn as_struct_opt(&self) -> Option<&StructArray> {
         self.as_ref().as_struct_opt()
     }
diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs
index d9a639da806..dc9e13040c8 100644
--- a/arrow-select/src/take.rs
+++ b/arrow-select/src/take.rs
@@ -143,6 +143,9 @@ fn take_impl<IndexType: ArrowPrimitiveType>(
         DataType::LargeUtf8 => {
             Ok(Arc::new(take_bytes(values.as_string::<i64>(), indices)?))
         }
+        DataType::Utf8View => {
+            Ok(Arc::new(take_byte_view(values.as_string_view(), indices)?))
+        }
         DataType::List(_) => {
             Ok(Arc::new(take_list::<_, Int32Type>(values.as_list(), indices)?))
         }
@@ -204,6 +207,9 @@ fn take_impl<IndexType: ArrowPrimitiveType>(
         DataType::LargeBinary => {
             Ok(Arc::new(take_bytes(values.as_binary::<i64>(), indices)?))
         }
+        DataType::BinaryView => {
+            Ok(Arc::new(take_byte_view(values.as_binary_view(), indices)?))
+        }
         DataType::FixedSizeBinary(size) => {
             let values = values
                 .as_any()
@@ -437,6 +443,20 @@ fn take_bytes<T: ByteArrayType, IndexType: 
ArrowPrimitiveType>(
     Ok(GenericByteArray::from(array_data))
 }
 
+/// `take` implementation for byte view arrays
+fn take_byte_view<T: ByteViewType, IndexType: ArrowPrimitiveType>(
+    array: &GenericByteViewArray<T>,
+    indices: &PrimitiveArray<IndexType>,
+) -> Result<GenericByteViewArray<T>, ArrowError> {
+    let new_views = take_native(array.views(), indices);
+    let new_nulls = take_nulls(array.nulls(), indices);
+    Ok(GenericByteViewArray::new(
+        new_views,
+        array.data_buffers().to_vec(),
+        new_nulls,
+    ))
+}
+
 /// `take` implementation for list arrays
 ///
 /// Calculates the index and indexed offset for the inner array,
@@ -1424,6 +1444,53 @@ mod tests {
         assert_eq!(result.as_ref(), &expected);
     }
 
+    fn _test_byte_view<T>()
+    where
+        T: ByteViewType,
+        str: AsRef<T::Native>,
+        T::Native: PartialEq,
+    {
+        let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3), 
Some(4), Some(2)]);
+        let array = {
+            // ["hello", "world", null, "large payload over 12 bytes", "lulu"]
+            let mut builder = GenericByteViewBuilder::<T>::new();
+            builder.append_value("hello");
+            builder.append_value("world");
+            builder.append_null();
+            builder.append_value("large payload over 12 bytes");
+            builder.append_value("lulu");
+            builder.finish()
+        };
+
+        let actual = take(&array, &index, None).unwrap();
+
+        assert_eq!(actual.len(), index.len());
+
+        let expected = {
+            // ["large payload over 12 bytes", null, "world", "large payload 
over 12 bytes", "lulu", null]
+            let mut builder = GenericByteViewBuilder::<T>::new();
+            builder.append_value("large payload over 12 bytes");
+            builder.append_null();
+            builder.append_value("world");
+            builder.append_value("large payload over 12 bytes");
+            builder.append_value("lulu");
+            builder.append_null();
+            builder.finish()
+        };
+
+        assert_eq!(actual.as_ref(), &expected);
+    }
+
+    #[test]
+    fn test_take_string_view() {
+        _test_byte_view::<StringViewType>()
+    }
+
+    #[test]
+    fn test_take_binary_view() {
+        _test_byte_view::<BinaryViewType>()
+    }
+
     macro_rules! test_take_list {
         ($offset_type:ty, $list_data_type:ident, $list_array_type:ident) => {{
             // Construct a value array, [[0,0,0], [-1,-2,-1], [], [2,3]]

Reply via email to