This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 69c99a7e78 Add native stringview support for RIGHT (#11955)
69c99a7e78 is described below

commit 69c99a7e78b0d9e2ac3881200a58f184c0023d15
Author: kf zheng <[email protected]>
AuthorDate: Wed Aug 14 03:33:40 2024 +0800

    Add native stringview support for RIGHT (#11955)
    
    * add stringview support for RIGHT
    
    * add tests of stringview support for RIGHT
    
    * combine functions by ArrayAccessor and ArrayIter
    
    * fix fmt
    
    * fix clippy
    
    * fix fmt
---
 datafusion/functions/src/unicode/right.rs          | 48 +++++++++++++++++-----
 datafusion/sqllogictest/test_files/string_view.slt | 16 +++++++-
 2 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/datafusion/functions/src/unicode/right.rs 
b/datafusion/functions/src/unicode/right.rs
index 20cbbe020f..9d542bb2c0 100644
--- a/datafusion/functions/src/unicode/right.rs
+++ b/datafusion/functions/src/unicode/right.rs
@@ -19,17 +19,21 @@ use std::any::Any;
 use std::cmp::{max, Ordering};
 use std::sync::Arc;
 
-use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
+use arrow::array::{
+    Array, ArrayAccessor, ArrayIter, ArrayRef, GenericStringArray, Int64Array,
+    OffsetSizeTrait,
+};
 use arrow::datatypes::DataType;
 
-use datafusion_common::cast::{as_generic_string_array, as_int64_array};
+use crate::utils::{make_scalar_function, utf8_to_str_type};
+use datafusion_common::cast::{
+    as_generic_string_array, as_int64_array, as_string_view_array,
+};
 use datafusion_common::exec_err;
 use datafusion_common::Result;
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 
-use crate::utils::{make_scalar_function, utf8_to_str_type};
-
 #[derive(Debug)]
 pub struct RightFunc {
     signature: Signature,
@@ -46,7 +50,11 @@ impl RightFunc {
         use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![Exact(vec![Utf8, Int64]), Exact(vec![LargeUtf8, Int64])],
+                vec![
+                    Exact(vec![Utf8View, Int64]),
+                    Exact(vec![Utf8, Int64]),
+                    Exact(vec![LargeUtf8, Int64]),
+                ],
                 Volatility::Immutable,
             ),
         }
@@ -72,9 +80,14 @@ impl ScalarUDFImpl for RightFunc {
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(right::<i32>, vec![])(args),
+            DataType::Utf8 | DataType::Utf8View => {
+                make_scalar_function(right::<i32>, vec![])(args)
+            }
             DataType::LargeUtf8 => make_scalar_function(right::<i64>, 
vec![])(args),
-            other => exec_err!("Unsupported data type {other:?} for function 
right"),
+            other => exec_err!(
+                "Unsupported data type {other:?} for function right,\
+            expected Utf8View, Utf8 or LargeUtf8."
+            ),
         }
     }
 }
@@ -83,11 +96,26 @@ impl ScalarUDFImpl for RightFunc {
 /// right('abcde', 2) = 'de'
 /// The implementation uses UTF-8 code points as characters
 pub fn right<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = as_generic_string_array::<T>(&args[0])?;
     let n_array = as_int64_array(&args[1])?;
+    if args[0].data_type() == &DataType::Utf8View {
+        // string_view_right(args)
+        let string_array = as_string_view_array(&args[0])?;
+        right_impl::<T, _>(&mut string_array.iter(), n_array)
+    } else {
+        // string_right::<T>(args)
+        let string_array = &as_generic_string_array::<T>(&args[0])?;
+        right_impl::<T, _>(&mut string_array.iter(), n_array)
+    }
+}
 
-    let result = string_array
-        .iter()
+// Currently the return type can only be Utf8 or LargeUtf8, to reach fully 
support, we need
+// to edit the `get_optimal_return_type` in utils.rs to make the udfs be able 
to return Utf8View
+// See 
https://github.com/apache/datafusion/issues/11790#issuecomment-2283777166
+fn right_impl<'a, T: OffsetSizeTrait, V: ArrayAccessor<Item = &'a str>>(
+    string_array_iter: &mut ArrayIter<V>,
+    n_array: &Int64Array,
+) -> Result<ArrayRef> {
+    let result = string_array_iter
         .zip(n_array.iter())
         .map(|(string, n)| match (string, n) {
             (Some(string), Some(n)) => match n.cmp(&0) {
diff --git a/datafusion/sqllogictest/test_files/string_view.slt 
b/datafusion/sqllogictest/test_files/string_view.slt
index 2381bd122b..0a9b73babb 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -896,16 +896,28 @@ logical_plan
 
 
 ## Ensure no casts for RIGHT
-## TODO file ticket
 query TT
 EXPLAIN SELECT
   RIGHT(column1_utf8view, 3) as c2
 FROM test;
 ----
 logical_plan
-01)Projection: right(CAST(test.column1_utf8view AS Utf8), Int64(3)) AS c2
+01)Projection: right(test.column1_utf8view, Int64(3)) AS c2
 02)--TableScan: test projection=[column1_utf8view]
 
+# Test outputs of RIGHT
+query TTT
+SELECT
+  RIGHT(column1_utf8view, 3) as c1,
+  RIGHT(column1_utf8view, 0) as c2,
+  RIGHT(column1_utf8view, -3) as c3
+FROM test;
+----
+rew  (empty) rew
+eng  (empty) ngpeng
+ael  (empty) hael
+NULL NULL    NULL
+
 ## Ensure no casts for RPAD
 ## TODO file ticket
 query TT


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to