This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 508da801b6 Update LPAD scalar function to support Utf8View (#11941)
508da801b6 is described below

commit 508da801b633dfafe969f7e8f8598bc79a878d9d
Author: Bruce Ritchie <[email protected]>
AuthorDate: Tue Aug 13 14:25:43 2024 -0400

    Update LPAD scalar function to support Utf8View (#11941)
---
 datafusion/functions/src/unicode/lpad.rs           | 677 +++++++++++++--------
 datafusion/sqllogictest/test_files/functions.slt   |  26 +
 datafusion/sqllogictest/test_files/string_view.slt |  20 +-
 3 files changed, 463 insertions(+), 260 deletions(-)

diff --git a/datafusion/functions/src/unicode/lpad.rs 
b/datafusion/functions/src/unicode/lpad.rs
index ce5e006436..5caa6acd67 100644
--- a/datafusion/functions/src/unicode/lpad.rs
+++ b/datafusion/functions/src/unicode/lpad.rs
@@ -18,16 +18,21 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
+use arrow::array::{
+    Array, ArrayAccessor, ArrayIter, ArrayRef, AsArray, GenericStringArray, 
Int64Array,
+    OffsetSizeTrait, StringViewArray,
+};
 use arrow::datatypes::DataType;
-use datafusion_common::cast::{as_generic_string_array, as_int64_array};
 use unicode_segmentation::UnicodeSegmentation;
+use DataType::{LargeUtf8, Utf8, Utf8View};
 
-use crate::utils::{make_scalar_function, utf8_to_str_type};
+use datafusion_common::cast::as_int64_array;
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 
+use crate::utils::{make_scalar_function, utf8_to_str_type};
+
 #[derive(Debug)]
 pub struct LPadFunc {
     signature: Signature,
@@ -45,11 +50,17 @@ impl LPadFunc {
         Self {
             signature: Signature::one_of(
                 vec![
+                    Exact(vec![Utf8View, Int64]),
+                    Exact(vec![Utf8View, Int64, Utf8View]),
+                    Exact(vec![Utf8View, Int64, Utf8]),
+                    Exact(vec![Utf8View, Int64, LargeUtf8]),
                     Exact(vec![Utf8, Int64]),
-                    Exact(vec![LargeUtf8, Int64]),
+                    Exact(vec![Utf8, Int64, Utf8View]),
                     Exact(vec![Utf8, Int64, Utf8]),
-                    Exact(vec![LargeUtf8, Int64, Utf8]),
                     Exact(vec![Utf8, Int64, LargeUtf8]),
+                    Exact(vec![LargeUtf8, Int64]),
+                    Exact(vec![LargeUtf8, Int64, Utf8View]),
+                    Exact(vec![LargeUtf8, Int64, Utf8]),
                     Exact(vec![LargeUtf8, Int64, LargeUtf8]),
                 ],
                 Volatility::Immutable,
@@ -76,300 +87,450 @@ impl ScalarUDFImpl for LPadFunc {
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(lpad::<i32>, vec![])(args),
-            DataType::LargeUtf8 => make_scalar_function(lpad::<i64>, 
vec![])(args),
-            other => exec_err!("Unsupported data type {other:?} for function 
lpad"),
-        }
+        make_scalar_function(lpad, vec![])(args)
     }
 }
 
-/// Extends the string to length 'length' by prepending the characters fill (a 
space by default). If the string is already longer than length then it is 
truncated (on the right).
+/// Extends the string to length 'length' by prepending the characters fill (a 
space by default).
+/// If the string is already longer than length then it is truncated (on the 
right).
 /// lpad('hi', 5, 'xy') = 'xyxhi'
-pub fn lpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    match args.len() {
-        2 => {
-            let string_array = as_generic_string_array::<T>(&args[0])?;
-            let length_array = as_int64_array(&args[1])?;
-
-            let result = string_array
-                .iter()
-                .zip(length_array.iter())
-                .map(|(string, length)| match (string, length) {
-                    (Some(string), Some(length)) => {
-                        if length > i32::MAX as i64 {
-                            return exec_err!(
-                                "lpad requested length {length} too large"
-                            );
-                        }
+pub fn lpad(args: &[ArrayRef]) -> Result<ArrayRef> {
+    if args.len() <= 1 || args.len() > 3 {
+        return exec_err!(
+            "lpad was called with {} arguments. It requires at least 2 and at 
most 3.",
+            args.len()
+        );
+    }
+
+    let length_array = as_int64_array(&args[1])?;
+
+    match args[0].data_type() {
+        Utf8 => match args.len() {
+            2 => lpad_impl::<&GenericStringArray<i32>, 
&GenericStringArray<i32>, i32>(
+                args[0].as_string::<i32>(),
+                length_array,
+                None,
+            ),
+            3 => lpad_with_replace::<&GenericStringArray<i32>, i32>(
+                args[0].as_string::<i32>(),
+                length_array,
+                &args[2],
+            ),
+            _ => unreachable!(),
+        },
+        LargeUtf8 => match args.len() {
+            2 => lpad_impl::<&GenericStringArray<i64>, 
&GenericStringArray<i64>, i64>(
+                args[0].as_string::<i64>(),
+                length_array,
+                None,
+            ),
+            3 => lpad_with_replace::<&GenericStringArray<i64>, i64>(
+                args[0].as_string::<i64>(),
+                length_array,
+                &args[2],
+            ),
+            _ => unreachable!(),
+        },
+        Utf8View => match args.len() {
+            2 => lpad_impl::<&StringViewArray, &GenericStringArray<i32>, i32>(
+                args[0].as_string_view(),
+                length_array,
+                None,
+            ),
+            3 => lpad_with_replace::<&StringViewArray, i32>(
+                args[0].as_string_view(),
+                length_array,
+                &args[2],
+            ),
+            _ => unreachable!(),
+        },
+        other => {
+            exec_err!("Unsupported data type {other:?} for function lpad")
+        }
+    }
+}
 
-                        let length = if length < 0 { 0 } else { length as 
usize };
-                        if length == 0 {
-                            Ok(Some("".to_string()))
+fn lpad_with_replace<'a, V, T: OffsetSizeTrait>(
+    string_array: V,
+    length_array: &Int64Array,
+    fill_array: &'a ArrayRef,
+) -> Result<ArrayRef>
+where
+    V: StringArrayType<'a>,
+{
+    match fill_array.data_type() {
+        Utf8 => lpad_impl::<V, &GenericStringArray<i32>, T>(
+            string_array,
+            length_array,
+            Some(fill_array.as_string::<i32>()),
+        ),
+        LargeUtf8 => lpad_impl::<V, &GenericStringArray<i64>, T>(
+            string_array,
+            length_array,
+            Some(fill_array.as_string::<i64>()),
+        ),
+        Utf8View => lpad_impl::<V, &StringViewArray, T>(
+            string_array,
+            length_array,
+            Some(fill_array.as_string_view()),
+        ),
+        other => {
+            exec_err!("Unsupported data type {other:?} for function lpad")
+        }
+    }
+}
+
+fn lpad_impl<'a, V, V2, T>(
+    string_array: V,
+    length_array: &Int64Array,
+    fill_array: Option<V2>,
+) -> Result<ArrayRef>
+where
+    V: StringArrayType<'a>,
+    V2: StringArrayType<'a>,
+    T: OffsetSizeTrait,
+{
+    if fill_array.is_none() {
+        let result = string_array
+            .iter()
+            .zip(length_array.iter())
+            .map(|(string, length)| match (string, length) {
+                (Some(string), Some(length)) => {
+                    if length > i32::MAX as i64 {
+                        return exec_err!("lpad requested length {length} too 
large");
+                    }
+
+                    let length = if length < 0 { 0 } else { length as usize };
+                    if length == 0 {
+                        Ok(Some("".to_string()))
+                    } else {
+                        let graphemes = 
string.graphemes(true).collect::<Vec<&str>>();
+                        if length < graphemes.len() {
+                            Ok(Some(graphemes[..length].concat()))
                         } else {
-                            let graphemes = 
string.graphemes(true).collect::<Vec<&str>>();
-                            if length < graphemes.len() {
-                                Ok(Some(graphemes[..length].concat()))
-                            } else {
-                                let mut s: String = " ".repeat(length - 
graphemes.len());
-                                s.push_str(string);
-                                Ok(Some(s))
-                            }
+                            let mut s: String = " ".repeat(length - 
graphemes.len());
+                            s.push_str(string);
+                            Ok(Some(s))
                         }
                     }
-                    _ => Ok(None),
-                })
-                .collect::<Result<GenericStringArray<T>>>()?;
+                }
+                _ => Ok(None),
+            })
+            .collect::<Result<GenericStringArray<T>>>()?;
 
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        3 => {
-            let string_array = as_generic_string_array::<T>(&args[0])?;
-            let length_array = as_int64_array(&args[1])?;
-            let fill_array = as_generic_string_array::<T>(&args[2])?;
-
-            let result = string_array
-                .iter()
-                .zip(length_array.iter())
-                .zip(fill_array.iter())
-                .map(|((string, length), fill)| match (string, length, fill) {
-                    (Some(string), Some(length), Some(fill)) => {
-                        if length > i32::MAX as i64 {
-                            return exec_err!(
-                                "lpad requested length {length} too large"
-                            );
-                        }
+        Ok(Arc::new(result) as ArrayRef)
+    } else {
+        let result = string_array
+            .iter()
+            .zip(length_array.iter())
+            .zip(fill_array.unwrap().iter())
+            .map(|((string, length), fill)| match (string, length, fill) {
+                (Some(string), Some(length), Some(fill)) => {
+                    if length > i32::MAX as i64 {
+                        return exec_err!("lpad requested length {length} too 
large");
+                    }
+
+                    let length = if length < 0 { 0 } else { length as usize };
+                    if length == 0 {
+                        Ok(Some("".to_string()))
+                    } else {
+                        let graphemes = 
string.graphemes(true).collect::<Vec<&str>>();
+                        let fill_chars = fill.chars().collect::<Vec<char>>();
 
-                        let length = if length < 0 { 0 } else { length as 
usize };
-                        if length == 0 {
-                            Ok(Some("".to_string()))
+                        if length < graphemes.len() {
+                            Ok(Some(graphemes[..length].concat()))
+                        } else if fill_chars.is_empty() {
+                            Ok(Some(string.to_string()))
                         } else {
-                            let graphemes = 
string.graphemes(true).collect::<Vec<&str>>();
-                            let fill_chars = 
fill.chars().collect::<Vec<char>>();
-
-                            if length < graphemes.len() {
-                                Ok(Some(graphemes[..length].concat()))
-                            } else if fill_chars.is_empty() {
-                                Ok(Some(string.to_string()))
-                            } else {
-                                let mut s = string.to_string();
-                                let mut char_vector =
-                                    Vec::<char>::with_capacity(length - 
graphemes.len());
-                                for l in 0..length - graphemes.len() {
-                                    char_vector.push(
-                                        *fill_chars.get(l % 
fill_chars.len()).unwrap(),
-                                    );
-                                }
-                                s.insert_str(
-                                    0,
-                                    
char_vector.iter().collect::<String>().as_str(),
-                                );
-                                Ok(Some(s))
+                            let mut s = string.to_string();
+                            let mut char_vector =
+                                Vec::<char>::with_capacity(length - 
graphemes.len());
+                            for l in 0..length - graphemes.len() {
+                                char_vector
+                                    .push(*fill_chars.get(l % 
fill_chars.len()).unwrap());
                             }
+                            s.insert_str(
+                                0,
+                                
char_vector.iter().collect::<String>().as_str(),
+                            );
+                            Ok(Some(s))
                         }
                     }
-                    _ => Ok(None),
-                })
-                .collect::<Result<GenericStringArray<T>>>()?;
+                }
+                _ => Ok(None),
+            })
+            .collect::<Result<GenericStringArray<T>>>()?;
 
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        other => exec_err!(
-            "lpad was called with {other} arguments. It requires at least 2 
and at most 3."
-        ),
+        Ok(Arc::new(result) as ArrayRef)
+    }
+}
+
+trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
+    fn iter(&self) -> ArrayIter<Self>;
+}
+impl<'a, O: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<O> 
{
+    fn iter(&self) -> ArrayIter<Self> {
+        GenericStringArray::<O>::iter(self)
+    }
+}
+impl<'a> StringArrayType<'a> for &'a StringViewArray {
+    fn iter(&self) -> ArrayIter<Self> {
+        StringViewArray::iter(self)
     }
 }
 
 #[cfg(test)]
 mod tests {
-    use arrow::array::{Array, StringArray};
-    use arrow::datatypes::DataType::Utf8;
+    use crate::unicode::lpad::LPadFunc;
+    use crate::utils::test::test_function;
+
+    use arrow::array::{Array, LargeStringArray, StringArray};
+    use arrow::datatypes::DataType::{LargeUtf8, Utf8};
 
     use datafusion_common::{Result, ScalarValue};
     use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
 
-    use crate::unicode::lpad::LPadFunc;
-    use crate::utils::test::test_function;
+    macro_rules! test_lpad {
+        ($INPUT:expr, $LENGTH:expr, $EXPECTED:expr) => {
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::Utf8($INPUT)),
+                    ColumnarValue::Scalar($LENGTH)
+                ],
+                $EXPECTED,
+                &str,
+                Utf8,
+                StringArray
+            );
+
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT)),
+                    ColumnarValue::Scalar($LENGTH)
+                ],
+                $EXPECTED,
+                &str,
+                LargeUtf8,
+                LargeStringArray
+            );
+
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT)),
+                    ColumnarValue::Scalar($LENGTH)
+                ],
+                $EXPECTED,
+                &str,
+                Utf8,
+                StringArray
+            );
+        };
+
+        ($INPUT:expr, $LENGTH:expr, $REPLACE:expr, $EXPECTED:expr) => {
+            // utf8, utf8
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::Utf8($INPUT)),
+                    ColumnarValue::Scalar($LENGTH),
+                    ColumnarValue::Scalar(ScalarValue::Utf8($REPLACE))
+                ],
+                $EXPECTED,
+                &str,
+                Utf8,
+                StringArray
+            );
+            // utf8, largeutf8
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::Utf8($INPUT)),
+                    ColumnarValue::Scalar($LENGTH),
+                    ColumnarValue::Scalar(ScalarValue::LargeUtf8($REPLACE))
+                ],
+                $EXPECTED,
+                &str,
+                Utf8,
+                StringArray
+            );
+            // utf8, utf8view
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::Utf8($INPUT)),
+                    ColumnarValue::Scalar($LENGTH),
+                    ColumnarValue::Scalar(ScalarValue::Utf8View($REPLACE))
+                ],
+                $EXPECTED,
+                &str,
+                Utf8,
+                StringArray
+            );
+
+            // largeutf8, utf8
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT)),
+                    ColumnarValue::Scalar($LENGTH),
+                    ColumnarValue::Scalar(ScalarValue::Utf8($REPLACE))
+                ],
+                $EXPECTED,
+                &str,
+                LargeUtf8,
+                LargeStringArray
+            );
+            // largeutf8, largeutf8
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT)),
+                    ColumnarValue::Scalar($LENGTH),
+                    ColumnarValue::Scalar(ScalarValue::LargeUtf8($REPLACE))
+                ],
+                $EXPECTED,
+                &str,
+                LargeUtf8,
+                LargeStringArray
+            );
+            // largeutf8, utf8view
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT)),
+                    ColumnarValue::Scalar($LENGTH),
+                    ColumnarValue::Scalar(ScalarValue::Utf8View($REPLACE))
+                ],
+                $EXPECTED,
+                &str,
+                LargeUtf8,
+                LargeStringArray
+            );
+
+            // utf8view, utf8
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT)),
+                    ColumnarValue::Scalar($LENGTH),
+                    ColumnarValue::Scalar(ScalarValue::Utf8($REPLACE))
+                ],
+                $EXPECTED,
+                &str,
+                Utf8,
+                StringArray
+            );
+            // utf8view, largeutf8
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT)),
+                    ColumnarValue::Scalar($LENGTH),
+                    ColumnarValue::Scalar(ScalarValue::LargeUtf8($REPLACE))
+                ],
+                $EXPECTED,
+                &str,
+                Utf8,
+                StringArray
+            );
+            // utf8view, utf8view
+            test_function!(
+                LPadFunc::new(),
+                &[
+                    ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT)),
+                    ColumnarValue::Scalar($LENGTH),
+                    ColumnarValue::Scalar(ScalarValue::Utf8View($REPLACE))
+                ],
+                $EXPECTED,
+                &str,
+                Utf8,
+                StringArray
+            );
+        };
+    }
 
     #[test]
     fn test_functions() -> Result<()> {
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("josé")),
-                ColumnarValue::Scalar(ScalarValue::from(5i64)),
-            ],
-            Ok(Some(" josé")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("hi")),
-                ColumnarValue::Scalar(ScalarValue::from(5i64)),
-            ],
-            Ok(Some("   hi")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("hi")),
-                ColumnarValue::Scalar(ScalarValue::from(0i64)),
-            ],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(
+            Some("josé".into()),
+            ScalarValue::Int64(Some(5i64)),
+            Ok(Some(" josé"))
         );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("hi")),
-                ColumnarValue::Scalar(ScalarValue::Int64(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(
+            Some("hi".into()),
+            ScalarValue::Int64(Some(5i64)),
+            Ok(Some("   hi"))
         );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::Utf8(None)),
-                ColumnarValue::Scalar(ScalarValue::from(5i64)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(
+            Some("hi".into()),
+            ScalarValue::Int64(Some(0i64)),
+            Ok(Some(""))
         );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("hi")),
-                ColumnarValue::Scalar(ScalarValue::from(5i64)),
-                ColumnarValue::Scalar(ScalarValue::from("xy")),
-            ],
-            Ok(Some("xyxhi")),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(Some("hi".into()), ScalarValue::Int64(None), Ok(None));
+        test_lpad!(None, ScalarValue::Int64(Some(5i64)), Ok(None));
+        test_lpad!(
+            Some("hi".into()),
+            ScalarValue::Int64(Some(5i64)),
+            Some("xy".into()),
+            Ok(Some("xyxhi"))
         );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("hi")),
-                ColumnarValue::Scalar(ScalarValue::from(21i64)),
-                ColumnarValue::Scalar(ScalarValue::from("abcdef")),
-            ],
-            Ok(Some("abcdefabcdefabcdefahi")),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(
+            Some("hi".into()),
+            ScalarValue::Int64(Some(21i64)),
+            Some("abcdef".into()),
+            Ok(Some("abcdefabcdefabcdefahi"))
         );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("hi")),
-                ColumnarValue::Scalar(ScalarValue::from(5i64)),
-                ColumnarValue::Scalar(ScalarValue::from(" ")),
-            ],
-            Ok(Some("   hi")),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(
+            Some("hi".into()),
+            ScalarValue::Int64(Some(5i64)),
+            Some(" ".into()),
+            Ok(Some("   hi"))
         );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("hi")),
-                ColumnarValue::Scalar(ScalarValue::from(5i64)),
-                ColumnarValue::Scalar(ScalarValue::from("")),
-            ],
-            Ok(Some("hi")),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(
+            Some("hi".into()),
+            ScalarValue::Int64(Some(5i64)),
+            Some("".into()),
+            Ok(Some("hi"))
         );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::Utf8(None)),
-                ColumnarValue::Scalar(ScalarValue::from(5i64)),
-                ColumnarValue::Scalar(ScalarValue::from("xy")),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(
+            None,
+            ScalarValue::Int64(Some(5i64)),
+            Some("xy".into()),
+            Ok(None)
         );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("hi")),
-                ColumnarValue::Scalar(ScalarValue::Int64(None)),
-                ColumnarValue::Scalar(ScalarValue::from("xy")),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(
+            Some("hi".into()),
+            ScalarValue::Int64(None),
+            Some("xy".into()),
+            Ok(None)
         );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("hi")),
-                ColumnarValue::Scalar(ScalarValue::from(5i64)),
-                ColumnarValue::Scalar(ScalarValue::Utf8(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(
+            Some("hi".into()),
+            ScalarValue::Int64(Some(5i64)),
+            None,
+            Ok(None)
         );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("josé")),
-                ColumnarValue::Scalar(ScalarValue::from(10i64)),
-                ColumnarValue::Scalar(ScalarValue::from("xy")),
-            ],
-            Ok(Some("xyxyxyjosé")),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(
+            Some("josé".into()),
+            ScalarValue::Int64(Some(10i64)),
+            Some("xy".into()),
+            Ok(Some("xyxyxyjosé"))
         );
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("josé")),
-                ColumnarValue::Scalar(ScalarValue::from(10i64)),
-                ColumnarValue::Scalar(ScalarValue::from("éñ")),
-            ],
-            Ok(Some("éñéñéñjosé")),
-            &str,
-            Utf8,
-            StringArray
+        test_lpad!(
+            Some("josé".into()),
+            ScalarValue::Int64(Some(10i64)),
+            Some("éñ".into()),
+            Ok(Some("éñéñéñjosé"))
         );
+
         #[cfg(not(feature = "unicode_expressions"))]
-        test_function!(
-            LPadFunc::new(),
-            &[
-                ColumnarValue::Scalar(ScalarValue::from("josé")),
-                ColumnarValue::Scalar(ScalarValue::from(5i64)),
-            ],
-            internal_err!(
+        test_lpad!(Some("josé".into()), ScalarValue::Int64(Some(5i64)), 
internal_err!(
                 "function lpad requires compilation with feature flag: 
unicode_expressions."
-            ),
-            &str,
-            Utf8,
-            StringArray
-        );
+        ));
+
         Ok(())
     }
 }
diff --git a/datafusion/sqllogictest/test_files/functions.slt 
b/datafusion/sqllogictest/test_files/functions.slt
index 3255ddccdb..bea3016a21 100644
--- a/datafusion/sqllogictest/test_files/functions.slt
+++ b/datafusion/sqllogictest/test_files/functions.slt
@@ -203,6 +203,32 @@ SELECT lpad(NULL, 5, 'xy')
 ----
 NULL
 
+# test largeutf8, utf8view for lpad
+query T
+SELECT lpad(arrow_cast('hi', 'LargeUtf8'), 5, 'xy')
+----
+xyxhi
+
+query T
+SELECT lpad(arrow_cast('hi', 'Utf8View'), 5, 'xy')
+----
+xyxhi
+
+query T
+SELECT lpad(arrow_cast('hi', 'LargeUtf8'), 5, arrow_cast('xy', 'LargeUtf8'))
+----
+xyxhi
+
+query T
+SELECT lpad(arrow_cast('hi', 'Utf8View'), 5, arrow_cast('xy', 'Utf8View'))
+----
+xyxhi
+
+query T
+SELECT lpad(arrow_cast(NULL, 'Utf8View'), 5, 'xy')
+----
+NULL
+
 query T
 SELECT reverse('abcde')
 ----
diff --git a/datafusion/sqllogictest/test_files/string_view.slt 
b/datafusion/sqllogictest/test_files/string_view.slt
index fcd71b7f7e..0088b035e7 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -697,16 +697,32 @@ logical_plan
 02)--TableScan: test projection=[column1_utf8view]
 
 ## Ensure no casts for LPAD
-## TODO https://github.com/apache/datafusion/issues/11857
 query TT
 EXPLAIN SELECT
   LPAD(column1_utf8view, 12, ' ') as c1
 FROM test;
 ----
 logical_plan
-01)Projection: lpad(CAST(test.column1_utf8view AS Utf8), Int64(12), Utf8(" ")) 
AS c1
+01)Projection: lpad(test.column1_utf8view, Int64(12), Utf8(" ")) AS c1
 02)--TableScan: test projection=[column1_utf8view]
 
+query TT
+EXPLAIN SELECT
+  LPAD(column1_utf8view, 12, column2_large_utf8) as c1
+FROM test;
+----
+logical_plan
+01)Projection: lpad(test.column1_utf8view, Int64(12), test.column2_large_utf8) 
AS c1
+02)--TableScan: test projection=[column2_large_utf8, column1_utf8view]
+
+query TT
+EXPLAIN SELECT
+  LPAD(column1_utf8view, 12, column2_utf8view) as c1
+FROM test;
+----
+logical_plan
+01)Projection: lpad(test.column1_utf8view, Int64(12), test.column2_utf8view) 
AS c1
+02)--TableScan: test projection=[column1_utf8view, column2_utf8view]
 
 ## Ensure no casts for OCTET_LENGTH
 query TT


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to