This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 69c99a7e78 Add native stringview support for RIGHT (#11955)
69c99a7e78 is described below
commit 69c99a7e78b0d9e2ac3881200a58f184c0023d15
Author: kf zheng <[email protected]>
AuthorDate: Wed Aug 14 03:33:40 2024 +0800
Add native stringview support for RIGHT (#11955)
* add stringview support for RIGHT
* add tests of stringview support for RIGHT
* combine functions by ArrayAccessor and ArrayIter
* fix fmt
* fix clippy
* fix fmt
---
datafusion/functions/src/unicode/right.rs | 48 +++++++++++++++++-----
datafusion/sqllogictest/test_files/string_view.slt | 16 +++++++-
2 files changed, 52 insertions(+), 12 deletions(-)
diff --git a/datafusion/functions/src/unicode/right.rs
b/datafusion/functions/src/unicode/right.rs
index 20cbbe020f..9d542bb2c0 100644
--- a/datafusion/functions/src/unicode/right.rs
+++ b/datafusion/functions/src/unicode/right.rs
@@ -19,17 +19,21 @@ use std::any::Any;
use std::cmp::{max, Ordering};
use std::sync::Arc;
-use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
+use arrow::array::{
+ Array, ArrayAccessor, ArrayIter, ArrayRef, GenericStringArray, Int64Array,
+ OffsetSizeTrait,
+};
use arrow::datatypes::DataType;
-use datafusion_common::cast::{as_generic_string_array, as_int64_array};
+use crate::utils::{make_scalar_function, utf8_to_str_type};
+use datafusion_common::cast::{
+ as_generic_string_array, as_int64_array, as_string_view_array,
+};
use datafusion_common::exec_err;
use datafusion_common::Result;
use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
-use crate::utils::{make_scalar_function, utf8_to_str_type};
-
#[derive(Debug)]
pub struct RightFunc {
signature: Signature,
@@ -46,7 +50,11 @@ impl RightFunc {
use DataType::*;
Self {
signature: Signature::one_of(
- vec![Exact(vec![Utf8, Int64]), Exact(vec![LargeUtf8, Int64])],
+ vec![
+ Exact(vec![Utf8View, Int64]),
+ Exact(vec![Utf8, Int64]),
+ Exact(vec![LargeUtf8, Int64]),
+ ],
Volatility::Immutable,
),
}
@@ -72,9 +80,14 @@ impl ScalarUDFImpl for RightFunc {
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
match args[0].data_type() {
- DataType::Utf8 => make_scalar_function(right::<i32>, vec![])(args),
+ DataType::Utf8 | DataType::Utf8View => {
+ make_scalar_function(right::<i32>, vec![])(args)
+ }
DataType::LargeUtf8 => make_scalar_function(right::<i64>,
vec![])(args),
- other => exec_err!("Unsupported data type {other:?} for function
right"),
+ other => exec_err!(
+ "Unsupported data type {other:?} for function right,\
+ expected Utf8View, Utf8 or LargeUtf8."
+ ),
}
}
}
@@ -83,11 +96,26 @@ impl ScalarUDFImpl for RightFunc {
/// right('abcde', 2) = 'de'
/// The implementation uses UTF-8 code points as characters
pub fn right<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
- let string_array = as_generic_string_array::<T>(&args[0])?;
let n_array = as_int64_array(&args[1])?;
+ if args[0].data_type() == &DataType::Utf8View {
+ // string_view_right(args)
+ let string_array = as_string_view_array(&args[0])?;
+ right_impl::<T, _>(&mut string_array.iter(), n_array)
+ } else {
+ // string_right::<T>(args)
+ let string_array = &as_generic_string_array::<T>(&args[0])?;
+ right_impl::<T, _>(&mut string_array.iter(), n_array)
+ }
+}
- let result = string_array
- .iter()
+// Currently the return type can only be Utf8 or LargeUtf8, to reach fully
support, we need
+// to edit the `get_optimal_return_type` in utils.rs to make the udfs be able
to return Utf8View
+// See
https://github.com/apache/datafusion/issues/11790#issuecomment-2283777166
+fn right_impl<'a, T: OffsetSizeTrait, V: ArrayAccessor<Item = &'a str>>(
+ string_array_iter: &mut ArrayIter<V>,
+ n_array: &Int64Array,
+) -> Result<ArrayRef> {
+ let result = string_array_iter
.zip(n_array.iter())
.map(|(string, n)| match (string, n) {
(Some(string), Some(n)) => match n.cmp(&0) {
diff --git a/datafusion/sqllogictest/test_files/string_view.slt
b/datafusion/sqllogictest/test_files/string_view.slt
index 2381bd122b..0a9b73babb 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -896,16 +896,28 @@ logical_plan
## Ensure no casts for RIGHT
-## TODO file ticket
query TT
EXPLAIN SELECT
RIGHT(column1_utf8view, 3) as c2
FROM test;
----
logical_plan
-01)Projection: right(CAST(test.column1_utf8view AS Utf8), Int64(3)) AS c2
+01)Projection: right(test.column1_utf8view, Int64(3)) AS c2
02)--TableScan: test projection=[column1_utf8view]
+# Test outputs of RIGHT
+query TTT
+SELECT
+ RIGHT(column1_utf8view, 3) as c1,
+ RIGHT(column1_utf8view, 0) as c2,
+ RIGHT(column1_utf8view, -3) as c3
+FROM test;
+----
+rew (empty) rew
+eng (empty) ngpeng
+ael (empty) hael
+NULL NULL NULL
+
## Ensure no casts for RPAD
## TODO file ticket
query TT
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]