This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 144c9c7f6d5 Implement `take` kernel for byte view array. (#5602)
144c9c7f6d5 is described below
commit 144c9c7f6d593d69ea3ee5148868d68d6f38a435
Author: RinChanNOW <[email protected]>
AuthorDate: Tue Apr 9 18:30:38 2024 +0800
Implement `take` kernel for byte view array. (#5602)
* impl take kernel for byte view array.
* Add unit tests.
* Use ArrayData equality
* Rename to byte_view
---------
Co-authored-by: Raphael Taylor-Davies <[email protected]>
---
arrow-array/src/cast.rs | 36 ++++++++++++++++++++++++++
arrow-select/src/take.rs | 67 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 103 insertions(+)
diff --git a/arrow-array/src/cast.rs b/arrow-array/src/cast.rs
index 2e21f3e7e64..7b4b1d6eca4 100644
--- a/arrow-array/src/cast.rs
+++ b/arrow-array/src/cast.rs
@@ -779,6 +779,34 @@ pub trait AsArray: private::Sealed {
self.as_bytes_opt().expect("binary array")
}
+ /// Downcast this to a [`StringViewArray`] returning `None` if not possible
+ fn as_string_view(&self) -> &StringViewArray {
+ self.as_byte_view_opt().expect("string view array")
+ }
+
+ /// Downcast this to a [`StringViewArray`] returning `None` if not possible
+ fn as_string_view_opt(&self) -> Option<&StringViewArray> {
+ self.as_byte_view_opt()
+ }
+
+ /// Downcast this to a [`StringViewArray`] returning `None` if not possible
+ fn as_binary_view(&self) -> &BinaryViewArray {
+ self.as_byte_view_opt().expect("binary view array")
+ }
+
+ /// Downcast this to a [`BinaryViewArray`] returning `None` if not possible
+ fn as_binary_view_opt(&self) -> Option<&BinaryViewArray> {
+ self.as_byte_view_opt()
+ }
+
+ /// Downcast this to a [`GenericByteViewArray`] returning `None` if not
possible
+ fn as_byte_view<T: ByteViewType>(&self) -> &GenericByteViewArray<T> {
+ self.as_byte_view_opt().expect("byte view array")
+ }
+
+ /// Downcast this to a [`GenericByteViewArray`] returning `None` if not
possible
+ fn as_byte_view_opt<T: ByteViewType>(&self) ->
Option<&GenericByteViewArray<T>>;
+
/// Downcast this to a [`StructArray`] returning `None` if not possible
fn as_struct_opt(&self) -> Option<&StructArray>;
@@ -852,6 +880,10 @@ impl AsArray for dyn Array + '_ {
self.as_any().downcast_ref()
}
+ fn as_byte_view_opt<T: ByteViewType>(&self) ->
Option<&GenericByteViewArray<T>> {
+ self.as_any().downcast_ref()
+ }
+
fn as_struct_opt(&self) -> Option<&StructArray> {
self.as_any().downcast_ref()
}
@@ -899,6 +931,10 @@ impl AsArray for ArrayRef {
self.as_ref().as_bytes_opt()
}
+ fn as_byte_view_opt<T: ByteViewType>(&self) ->
Option<&GenericByteViewArray<T>> {
+ self.as_ref().as_byte_view_opt()
+ }
+
fn as_struct_opt(&self) -> Option<&StructArray> {
self.as_ref().as_struct_opt()
}
diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs
index d9a639da806..dc9e13040c8 100644
--- a/arrow-select/src/take.rs
+++ b/arrow-select/src/take.rs
@@ -143,6 +143,9 @@ fn take_impl<IndexType: ArrowPrimitiveType>(
DataType::LargeUtf8 => {
Ok(Arc::new(take_bytes(values.as_string::<i64>(), indices)?))
}
+ DataType::Utf8View => {
+ Ok(Arc::new(take_byte_view(values.as_string_view(), indices)?))
+ }
DataType::List(_) => {
Ok(Arc::new(take_list::<_, Int32Type>(values.as_list(), indices)?))
}
@@ -204,6 +207,9 @@ fn take_impl<IndexType: ArrowPrimitiveType>(
DataType::LargeBinary => {
Ok(Arc::new(take_bytes(values.as_binary::<i64>(), indices)?))
}
+ DataType::BinaryView => {
+ Ok(Arc::new(take_byte_view(values.as_binary_view(), indices)?))
+ }
DataType::FixedSizeBinary(size) => {
let values = values
.as_any()
@@ -437,6 +443,20 @@ fn take_bytes<T: ByteArrayType, IndexType:
ArrowPrimitiveType>(
Ok(GenericByteArray::from(array_data))
}
+/// `take` implementation for byte view arrays
+fn take_byte_view<T: ByteViewType, IndexType: ArrowPrimitiveType>(
+ array: &GenericByteViewArray<T>,
+ indices: &PrimitiveArray<IndexType>,
+) -> Result<GenericByteViewArray<T>, ArrowError> {
+ let new_views = take_native(array.views(), indices);
+ let new_nulls = take_nulls(array.nulls(), indices);
+ Ok(GenericByteViewArray::new(
+ new_views,
+ array.data_buffers().to_vec(),
+ new_nulls,
+ ))
+}
+
/// `take` implementation for list arrays
///
/// Calculates the index and indexed offset for the inner array,
@@ -1424,6 +1444,53 @@ mod tests {
assert_eq!(result.as_ref(), &expected);
}
+ fn _test_byte_view<T>()
+ where
+ T: ByteViewType,
+ str: AsRef<T::Native>,
+ T::Native: PartialEq,
+ {
+ let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3),
Some(4), Some(2)]);
+ let array = {
+ // ["hello", "world", null, "large payload over 12 bytes", "lulu"]
+ let mut builder = GenericByteViewBuilder::<T>::new();
+ builder.append_value("hello");
+ builder.append_value("world");
+ builder.append_null();
+ builder.append_value("large payload over 12 bytes");
+ builder.append_value("lulu");
+ builder.finish()
+ };
+
+ let actual = take(&array, &index, None).unwrap();
+
+ assert_eq!(actual.len(), index.len());
+
+ let expected = {
+ // ["large payload over 12 bytes", null, "world", "large payload
over 12 bytes", "lulu", null]
+ let mut builder = GenericByteViewBuilder::<T>::new();
+ builder.append_value("large payload over 12 bytes");
+ builder.append_null();
+ builder.append_value("world");
+ builder.append_value("large payload over 12 bytes");
+ builder.append_value("lulu");
+ builder.append_null();
+ builder.finish()
+ };
+
+ assert_eq!(actual.as_ref(), &expected);
+ }
+
+ #[test]
+ fn test_take_string_view() {
+ _test_byte_view::<StringViewType>()
+ }
+
+ #[test]
+ fn test_take_binary_view() {
+ _test_byte_view::<BinaryViewType>()
+ }
+
macro_rules! test_take_list {
($offset_type:ty, $list_data_type:ident, $list_array_type:ident) => {{
// Construct a value array, [[0,0,0], [-1,-2,-1], [], [2,3]]