This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 1d8a41bc8e Move `starts_with`, `to_hex`,` trim`, `upper` to 
datafusion-functions (and add string_expressions) (#9541)
1d8a41bc8e is described below

commit 1d8a41bc8e08b56e90d6f8e6ef20e39a126987e4
Author: Reilly.tang <[email protected]>
AuthorDate: Thu Mar 21 07:57:05 2024 +0800

    Move `starts_with`, `to_hex`,` trim`, `upper` to datafusion-functions (and 
add string_expressions) (#9541)
    
    * [task #9539] Move starts_with, to_hex, trim, upper to datafusion-functions
    
    Signed-off-by: tangruilin <[email protected]>
    
    * Export expr_fn, restore tests
    
    * fix comments
    
    ---------
    
    Signed-off-by: tangruilin <[email protected]>
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion/expr/src/built_in_function.rs           |  57 +---
 datafusion/expr/src/expr_fn.rs                     |  18 --
 datafusion/functions/Cargo.toml                    |   3 +
 datafusion/functions/src/lib.rs                    |   9 +-
 datafusion/functions/src/string/mod.rs             | 292 +++++++++++++++++++++
 datafusion/functions/src/string/starts_with.rs     |  89 +++++++
 datafusion/functions/src/string/to_hex.rs          | 155 +++++++++++
 datafusion/functions/src/string/trim.rs            |  78 ++++++
 datafusion/functions/src/string/upper.rs           |  66 +++++
 datafusion/physical-expr/src/functions.rs          | 118 ---------
 datafusion/physical-expr/src/string_expressions.rs |  77 +-----
 datafusion/proto/proto/datafusion.proto            |   8 +-
 datafusion/proto/src/generated/pbjson.rs           |  12 -
 datafusion/proto/src/generated/prost.rs            |  16 +-
 datafusion/proto/src/logical_plan/from_proto.rs    |  22 +-
 datafusion/proto/src/logical_plan/to_proto.rs      |   4 -
 datafusion/sql/src/expr/mod.rs                     |   2 +-
 17 files changed, 720 insertions(+), 306 deletions(-)

diff --git a/datafusion/expr/src/built_in_function.rs 
b/datafusion/expr/src/built_in_function.rs
index 79cd6a24ce..fffe2cf4c9 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -147,20 +147,12 @@ pub enum BuiltinScalarFunction {
     Rtrim,
     /// split_part
     SplitPart,
-    /// starts_with
-    StartsWith,
     /// strpos
     Strpos,
     /// substr
     Substr,
-    /// to_hex
-    ToHex,
     /// translate
     Translate,
-    /// trim
-    Trim,
-    /// upper
-    Upper,
     /// uuid
     Uuid,
     /// overlay
@@ -276,13 +268,9 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Rpad => Volatility::Immutable,
             BuiltinScalarFunction::Rtrim => Volatility::Immutable,
             BuiltinScalarFunction::SplitPart => Volatility::Immutable,
-            BuiltinScalarFunction::StartsWith => Volatility::Immutable,
             BuiltinScalarFunction::Strpos => Volatility::Immutable,
             BuiltinScalarFunction::Substr => Volatility::Immutable,
-            BuiltinScalarFunction::ToHex => Volatility::Immutable,
             BuiltinScalarFunction::Translate => Volatility::Immutable,
-            BuiltinScalarFunction::Trim => Volatility::Immutable,
-            BuiltinScalarFunction::Upper => Volatility::Immutable,
             BuiltinScalarFunction::OverLay => Volatility::Immutable,
             BuiltinScalarFunction::Levenshtein => Volatility::Immutable,
             BuiltinScalarFunction::SubstrIndex => Volatility::Immutable,
@@ -365,7 +353,6 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::SplitPart => {
                 utf8_to_str_type(&input_expr_types[0], "split_part")
             }
-            BuiltinScalarFunction::StartsWith => Ok(Boolean),
             BuiltinScalarFunction::EndsWith => Ok(Boolean),
             BuiltinScalarFunction::Strpos => {
                 utf8_to_int_type(&input_expr_types[0], "strpos/instr/position")
@@ -373,12 +360,6 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Substr => {
                 utf8_to_str_type(&input_expr_types[0], "substr")
             }
-            BuiltinScalarFunction::ToHex => Ok(match input_expr_types[0] {
-                Int8 | Int16 | Int32 | Int64 => Utf8,
-                _ => {
-                    return plan_err!("The to_hex function can only accept 
integers.");
-                }
-            }),
             BuiltinScalarFunction::SubstrIndex => {
                 utf8_to_str_type(&input_expr_types[0], "substr_index")
             }
@@ -388,10 +369,6 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Translate => {
                 utf8_to_str_type(&input_expr_types[0], "translate")
             }
-            BuiltinScalarFunction::Trim => 
utf8_to_str_type(&input_expr_types[0], "trim"),
-            BuiltinScalarFunction::Upper => {
-                utf8_to_str_type(&input_expr_types[0], "upper")
-            }
 
             BuiltinScalarFunction::Factorial
             | BuiltinScalarFunction::Gcd
@@ -476,18 +453,16 @@ impl BuiltinScalarFunction {
             | BuiltinScalarFunction::InitCap
             | BuiltinScalarFunction::Lower
             | BuiltinScalarFunction::OctetLength
-            | BuiltinScalarFunction::Reverse
-            | BuiltinScalarFunction::Upper => {
+            | BuiltinScalarFunction::Reverse => {
                 Signature::uniform(1, vec![Utf8, LargeUtf8], self.volatility())
             }
             BuiltinScalarFunction::Btrim
             | BuiltinScalarFunction::Ltrim
-            | BuiltinScalarFunction::Rtrim
-            | BuiltinScalarFunction::Trim => Signature::one_of(
+            | BuiltinScalarFunction::Rtrim => Signature::one_of(
                 vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])],
                 self.volatility(),
             ),
-            BuiltinScalarFunction::Chr | BuiltinScalarFunction::ToHex => {
+            BuiltinScalarFunction::Chr => {
                 Signature::uniform(1, vec![Int64], self.volatility())
             }
             BuiltinScalarFunction::Lpad | BuiltinScalarFunction::Rpad => {
@@ -519,17 +494,17 @@ impl BuiltinScalarFunction {
                 self.volatility(),
             ),
 
-            BuiltinScalarFunction::EndsWith
-            | BuiltinScalarFunction::Strpos
-            | BuiltinScalarFunction::StartsWith => Signature::one_of(
-                vec![
-                    Exact(vec![Utf8, Utf8]),
-                    Exact(vec![Utf8, LargeUtf8]),
-                    Exact(vec![LargeUtf8, Utf8]),
-                    Exact(vec![LargeUtf8, LargeUtf8]),
-                ],
-                self.volatility(),
-            ),
+            BuiltinScalarFunction::EndsWith | BuiltinScalarFunction::Strpos => 
{
+                Signature::one_of(
+                    vec![
+                        Exact(vec![Utf8, Utf8]),
+                        Exact(vec![Utf8, LargeUtf8]),
+                        Exact(vec![LargeUtf8, Utf8]),
+                        Exact(vec![LargeUtf8, LargeUtf8]),
+                    ],
+                    self.volatility(),
+                )
+            }
 
             BuiltinScalarFunction::Substr => Signature::one_of(
                 vec![
@@ -749,13 +724,9 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Rpad => &["rpad"],
             BuiltinScalarFunction::Rtrim => &["rtrim"],
             BuiltinScalarFunction::SplitPart => &["split_part"],
-            BuiltinScalarFunction::StartsWith => &["starts_with"],
             BuiltinScalarFunction::Strpos => &["strpos", "instr", "position"],
             BuiltinScalarFunction::Substr => &["substr"],
-            BuiltinScalarFunction::ToHex => &["to_hex"],
             BuiltinScalarFunction::Translate => &["translate"],
-            BuiltinScalarFunction::Trim => &["trim"],
-            BuiltinScalarFunction::Upper => &["upper"],
             BuiltinScalarFunction::Uuid => &["uuid"],
             BuiltinScalarFunction::Levenshtein => &["levenshtein"],
             BuiltinScalarFunction::SubstrIndex => &["substr_index", 
"substring_index"],
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index b76164a1c8..8667f631c5 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -575,12 +575,6 @@ scalar_expr!(Log10, log10, num, "base 10 logarithm of 
number");
 scalar_expr!(Ln, ln, num, "natural logarithm (base e) of number");
 scalar_expr!(Power, power, base exponent, "`base` raised to the power of 
`exponent`");
 scalar_expr!(Atan2, atan2, y x, "inverse tangent of a division given in the 
argument");
-scalar_expr!(
-    ToHex,
-    to_hex,
-    num,
-    "returns the hexdecimal representation of an integer"
-);
 scalar_expr!(Uuid, uuid, , "returns uuid v4 as a string value");
 scalar_expr!(Log, log, base x, "logarithm of a `x` for a particular `base`");
 
@@ -630,19 +624,11 @@ scalar_expr!(
     "removes all characters, spaces by default, from the end of a string"
 );
 scalar_expr!(SplitPart, split_part, string delimiter index, "splits a string 
based on a delimiter and picks out the desired field based on the index.");
-scalar_expr!(StartsWith, starts_with, string prefix, "whether the `string` 
starts with the `prefix`");
 scalar_expr!(EndsWith, ends_with, string suffix, "whether the `string` ends 
with the `suffix`");
 scalar_expr!(Strpos, strpos, string substring, "finds the position from where 
the `substring` matches the `string`");
 scalar_expr!(Substr, substr, string position, "substring from the `position` 
to the end");
 scalar_expr!(Substr, substring, string position length, "substring from the 
`position` with `length` characters");
 scalar_expr!(Translate, translate, string from to, "replaces the characters in 
`from` with the counterpart in `to`");
-scalar_expr!(
-    Trim,
-    trim,
-    string,
-    "removes all characters, space by default from the string"
-);
-scalar_expr!(Upper, upper, string, "converts the string to upper case");
 //use vec as parameter
 nary_scalar_expr!(
     Lpad,
@@ -1117,15 +1103,11 @@ mod test {
         test_nary_scalar_expr!(Rpad, rpad, string, count, characters);
         test_scalar_expr!(Rtrim, rtrim, string);
         test_scalar_expr!(SplitPart, split_part, expr, delimiter, index);
-        test_scalar_expr!(StartsWith, starts_with, string, characters);
         test_scalar_expr!(EndsWith, ends_with, string, characters);
         test_scalar_expr!(Strpos, strpos, string, substring);
         test_scalar_expr!(Substr, substr, string, position);
         test_scalar_expr!(Substr, substring, string, position, count);
-        test_scalar_expr!(ToHex, to_hex, string);
         test_scalar_expr!(Translate, translate, string, from, to);
-        test_scalar_expr!(Trim, trim, string);
-        test_scalar_expr!(Upper, upper, string);
         test_nary_scalar_expr!(OverLay, overlay, string, characters, position, 
len);
         test_nary_scalar_expr!(OverLay, overlay, string, characters, position);
         test_scalar_expr!(Levenshtein, levenshtein, string1, string2);
diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index 5a6da5345d..b12c99e84a 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -29,6 +29,8 @@ authors = { workspace = true }
 rust-version = { workspace = true }
 
 [features]
+# enable string functions
+string_expressions = []
 # enable core functions
 core_expressions = []
 # enable datetime functions
@@ -41,6 +43,7 @@ default = [
     "math_expressions",
     "regex_expressions",
     "crypto_expressions",
+    "string_expressions",
 ]
 # enable encode/decode functions
 encoding_expressions = ["base64", "hex"]
diff --git a/datafusion/functions/src/lib.rs b/datafusion/functions/src/lib.rs
index 3a2eab8e5f..f469b343e1 100644
--- a/datafusion/functions/src/lib.rs
+++ b/datafusion/functions/src/lib.rs
@@ -84,6 +84,10 @@ use log::debug;
 #[macro_use]
 pub mod macros;
 
+#[cfg(feature = "string_expressions")]
+pub mod string;
+make_stub_package!(string, "string_expressions");
+
 /// Core datafusion expressions
 /// Enabled via feature flag `core_expressions`
 #[cfg(feature = "core_expressions")]
@@ -134,6 +138,8 @@ pub mod expr_fn {
     pub use super::math::expr_fn::*;
     #[cfg(feature = "regex_expressions")]
     pub use super::regex::expr_fn::*;
+    #[cfg(feature = "string_expressions")]
+    pub use super::string::expr_fn::*;
 }
 
 /// Registers all enabled packages with a [`FunctionRegistry`]
@@ -144,7 +150,8 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) -> 
Result<()> {
         .chain(encoding::functions())
         .chain(math::functions())
         .chain(regex::functions())
-        .chain(crypto::functions());
+        .chain(crypto::functions())
+        .chain(string::functions());
 
     all_functions.try_for_each(|udf| {
         let existing_udf = registry.register_udf(udf)?;
diff --git a/datafusion/functions/src/string/mod.rs 
b/datafusion/functions/src/string/mod.rs
new file mode 100644
index 0000000000..08fcbb363b
--- /dev/null
+++ b/datafusion/functions/src/string/mod.rs
@@ -0,0 +1,292 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::{
+    array::{Array, ArrayRef, GenericStringArray, OffsetSizeTrait},
+    datatypes::DataType,
+};
+use datafusion_common::{
+    cast::as_generic_string_array, exec_err, plan_err, Result, ScalarValue,
+};
+use datafusion_expr::{ColumnarValue, ScalarFunctionImplementation};
+use datafusion_physical_expr::functions::Hint;
+use std::{
+    fmt::{Display, Formatter},
+    sync::Arc,
+};
+
+/// Creates a function to identify the optimal return type of a string 
function given
+/// the type of its first argument.
+///
+/// If the input type is `LargeUtf8` or `LargeBinary` the return type is
+/// `$largeUtf8Type`,
+///
+/// If the input type is `Utf8` or `Binary` the return type is `$utf8Type`,
+macro_rules! get_optimal_return_type {
+    ($FUNC:ident, $largeUtf8Type:expr, $utf8Type:expr) => {
+        fn $FUNC(arg_type: &DataType, name: &str) -> Result<DataType> {
+            Ok(match arg_type {
+                // LargeBinary inputs are automatically coerced to Utf8
+                DataType::LargeUtf8 | DataType::LargeBinary => $largeUtf8Type,
+                // Binary inputs are automatically coerced to Utf8
+                DataType::Utf8 | DataType::Binary => $utf8Type,
+                DataType::Null => DataType::Null,
+                DataType::Dictionary(_, value_type) => match **value_type {
+                    DataType::LargeUtf8 | DataType::LargeBinary => 
$largeUtf8Type,
+                    DataType::Utf8 | DataType::Binary => $utf8Type,
+                    DataType::Null => DataType::Null,
+                    _ => {
+                        return plan_err!(
+                            "The {} function can only accept strings, but got 
{:?}.",
+                            name.to_uppercase(),
+                            **value_type
+                        );
+                    }
+                },
+                data_type => {
+                    return plan_err!(
+                        "The {} function can only accept strings, but got 
{:?}.",
+                        name.to_uppercase(),
+                        data_type
+                    );
+                }
+            })
+        }
+    };
+}
+
+// `utf8_to_str_type`: returns either a Utf8 or LargeUtf8 based on the input 
type size.
+get_optimal_return_type!(utf8_to_str_type, DataType::LargeUtf8, 
DataType::Utf8);
+
+/// applies a unary expression to `args[0]` that is expected to be 
downcastable to
+/// a `GenericStringArray` and returns a `GenericStringArray` (which may have 
a different offset)
+/// # Errors
+/// This function errors when:
+/// * the number of arguments is not 1
+/// * the first argument is not castable to a `GenericStringArray`
+pub(crate) fn unary_string_function<'a, T, O, F, R>(
+    args: &[&'a dyn Array],
+    op: F,
+    name: &str,
+) -> Result<GenericStringArray<O>>
+where
+    R: AsRef<str>,
+    O: OffsetSizeTrait,
+    T: OffsetSizeTrait,
+    F: Fn(&'a str) -> R,
+{
+    if args.len() != 1 {
+        return exec_err!(
+            "{:?} args were supplied but {} takes exactly one argument",
+            args.len(),
+            name
+        );
+    }
+
+    let string_array = as_generic_string_array::<T>(args[0])?;
+
+    // first map is the iterator, second is for the `Option<_>`
+    Ok(string_array.iter().map(|string| string.map(&op)).collect())
+}
+
+fn handle<'a, F, R>(args: &'a [ColumnarValue], op: F, name: &str) -> 
Result<ColumnarValue>
+where
+    R: AsRef<str>,
+    F: Fn(&'a str) -> R,
+{
+    match &args[0] {
+        ColumnarValue::Array(a) => match a.data_type() {
+            DataType::Utf8 => {
+                Ok(ColumnarValue::Array(Arc::new(unary_string_function::<
+                    i32,
+                    i32,
+                    _,
+                    _,
+                >(
+                    &[a.as_ref()], op, name
+                )?)))
+            }
+            DataType::LargeUtf8 => {
+                Ok(ColumnarValue::Array(Arc::new(unary_string_function::<
+                    i64,
+                    i64,
+                    _,
+                    _,
+                >(
+                    &[a.as_ref()], op, name
+                )?)))
+            }
+            other => exec_err!("Unsupported data type {other:?} for function 
{name}"),
+        },
+        ColumnarValue::Scalar(scalar) => match scalar {
+            ScalarValue::Utf8(a) => {
+                let result = a.as_ref().map(|x| (op)(x).as_ref().to_string());
+                Ok(ColumnarValue::Scalar(ScalarValue::Utf8(result)))
+            }
+            ScalarValue::LargeUtf8(a) => {
+                let result = a.as_ref().map(|x| (op)(x).as_ref().to_string());
+                Ok(ColumnarValue::Scalar(ScalarValue::LargeUtf8(result)))
+            }
+            other => exec_err!("Unsupported data type {other:?} for function 
{name}"),
+        },
+    }
+}
+
+// TODO: mode allow[(dead_code)] after move ltrim and rtrim
+enum TrimType {
+    #[allow(dead_code)]
+    Left,
+    #[allow(dead_code)]
+    Right,
+    Both,
+}
+
+impl Display for TrimType {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self {
+            TrimType::Left => write!(f, "ltrim"),
+            TrimType::Right => write!(f, "rtrim"),
+            TrimType::Both => write!(f, "btrim"),
+        }
+    }
+}
+
+fn general_trim<T: OffsetSizeTrait>(
+    args: &[ArrayRef],
+    trim_type: TrimType,
+) -> Result<ArrayRef> {
+    let func = match trim_type {
+        TrimType::Left => |input, pattern: &str| {
+            let pattern = pattern.chars().collect::<Vec<char>>();
+            str::trim_start_matches::<&[char]>(input, pattern.as_ref())
+        },
+        TrimType::Right => |input, pattern: &str| {
+            let pattern = pattern.chars().collect::<Vec<char>>();
+            str::trim_end_matches::<&[char]>(input, pattern.as_ref())
+        },
+        TrimType::Both => |input, pattern: &str| {
+            let pattern = pattern.chars().collect::<Vec<char>>();
+            str::trim_end_matches::<&[char]>(
+                str::trim_start_matches::<&[char]>(input, pattern.as_ref()),
+                pattern.as_ref(),
+            )
+        },
+    };
+
+    let string_array = as_generic_string_array::<T>(&args[0])?;
+
+    match args.len() {
+        1 => {
+            let result = string_array
+                .iter()
+                .map(|string| string.map(|string: &str| func(string, " ")))
+                .collect::<GenericStringArray<T>>();
+
+            Ok(Arc::new(result) as ArrayRef)
+        }
+        2 => {
+            let characters_array = as_generic_string_array::<T>(&args[1])?;
+
+            let result = string_array
+                .iter()
+                .zip(characters_array.iter())
+                .map(|(string, characters)| match (string, characters) {
+                    (Some(string), Some(characters)) => Some(func(string, 
characters)),
+                    _ => None,
+                })
+                .collect::<GenericStringArray<T>>();
+
+            Ok(Arc::new(result) as ArrayRef)
+        }
+        other => {
+            exec_err!(
+            "{trim_type} was called with {other} arguments. It requires at 
least 1 and at most 2."
+        )
+        }
+    }
+}
+
+pub(super) fn make_scalar_function<F>(
+    inner: F,
+    hints: Vec<Hint>,
+) -> ScalarFunctionImplementation
+where
+    F: Fn(&[ArrayRef]) -> Result<ArrayRef> + Sync + Send + 'static,
+{
+    Arc::new(move |args: &[ColumnarValue]| {
+        // first, identify if any of the arguments is an Array. If yes, store 
its `len`,
+        // as any scalar will need to be converted to an array of len `len`.
+        let len = args
+            .iter()
+            .fold(Option::<usize>::None, |acc, arg| match arg {
+                ColumnarValue::Scalar(_) => acc,
+                ColumnarValue::Array(a) => Some(a.len()),
+            });
+
+        let is_scalar = len.is_none();
+
+        let inferred_length = len.unwrap_or(1);
+        let args = args
+            .iter()
+            .zip(hints.iter().chain(std::iter::repeat(&Hint::Pad)))
+            .map(|(arg, hint)| {
+                // Decide on the length to expand this scalar to depending
+                // on the given hints.
+                let expansion_len = match hint {
+                    Hint::AcceptsSingular => 1,
+                    Hint::Pad => inferred_length,
+                };
+                arg.clone().into_array(expansion_len)
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        let result = (inner)(&args);
+        if is_scalar {
+            // If all inputs are scalar, keeps output as scalar
+            let result = result.and_then(|arr| 
ScalarValue::try_from_array(&arr, 0));
+            result.map(ColumnarValue::Scalar)
+        } else {
+            result.map(ColumnarValue::Array)
+        }
+    })
+}
+
+mod starts_with;
+mod to_hex;
+mod trim;
+mod upper;
+// create UDFs
+make_udf_function!(starts_with::StartsWithFunc, STARTS_WITH, starts_with);
+make_udf_function!(to_hex::ToHexFunc, TO_HEX, to_hex);
+make_udf_function!(trim::TrimFunc, TRIM, trim);
+make_udf_function!(upper::UpperFunc, UPPER, upper);
+
+export_functions!(
+    (
+    starts_with,
+    arg1 arg2,
+    "Returns true if string starts with prefix."),
+    (
+    to_hex,
+    arg1,
+    "Converts an integer to a hexadecimal string."),
+    (trim,
+    arg1,
+    "removes all characters, space by default from the string"),
+    (upper,
+    arg1,
+    "Converts a string to uppercase."));
diff --git a/datafusion/functions/src/string/starts_with.rs 
b/datafusion/functions/src/string/starts_with.rs
new file mode 100644
index 0000000000..1fce399d1e
--- /dev/null
+++ b/datafusion/functions/src/string/starts_with.rs
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{ArrayRef, OffsetSizeTrait};
+use arrow::datatypes::DataType;
+use datafusion_common::{cast::as_generic_string_array, internal_err, Result};
+use datafusion_expr::ColumnarValue;
+use datafusion_expr::TypeSignature::*;
+use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+use std::sync::Arc;
+
+use crate::string::make_scalar_function;
+
+/// Returns true if string starts with prefix.
+/// starts_with('alphabet', 'alph') = 't'
+pub fn starts_with<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let left = as_generic_string_array::<T>(&args[0])?;
+    let right = as_generic_string_array::<T>(&args[1])?;
+
+    let result = arrow::compute::kernels::comparison::starts_with(left, 
right)?;
+
+    Ok(Arc::new(result) as ArrayRef)
+}
+
+#[derive(Debug)]
+pub(super) struct StartsWithFunc {
+    signature: Signature,
+}
+impl StartsWithFunc {
+    pub fn new() -> Self {
+        use DataType::*;
+        Self {
+            signature: Signature::one_of(
+                vec![
+                    Exact(vec![Utf8, Utf8]),
+                    Exact(vec![Utf8, LargeUtf8]),
+                    Exact(vec![LargeUtf8, Utf8]),
+                    Exact(vec![LargeUtf8, LargeUtf8]),
+                ],
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl ScalarUDFImpl for StartsWithFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "starts_with"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        use DataType::*;
+
+        Ok(Boolean)
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        match args[0].data_type() {
+            DataType::Utf8 => make_scalar_function(starts_with::<i32>, 
vec![])(args),
+            DataType::LargeUtf8 => {
+                return make_scalar_function(starts_with::<i64>, vec![])(args);
+            }
+            _ => internal_err!("Unsupported data type"),
+        }
+    }
+}
diff --git a/datafusion/functions/src/string/to_hex.rs 
b/datafusion/functions/src/string/to_hex.rs
new file mode 100644
index 0000000000..4dfc84887d
--- /dev/null
+++ b/datafusion/functions/src/string/to_hex.rs
@@ -0,0 +1,155 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
+use arrow::datatypes::{
+    ArrowNativeType, ArrowPrimitiveType, DataType, Int32Type, Int64Type,
+};
+use datafusion_common::cast::as_primitive_array;
+use datafusion_common::Result;
+use datafusion_common::{exec_err, plan_err};
+use datafusion_expr::ColumnarValue;
+use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+use std::sync::Arc;
+
+use super::make_scalar_function;
+
+/// Converts the number to its equivalent hexadecimal representation.
+/// to_hex(2147483647) = '7fffffff'
+pub fn to_hex<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
+where
+    T::Native: OffsetSizeTrait,
+{
+    let integer_array = as_primitive_array::<T>(&args[0])?;
+
+    let result = integer_array
+        .iter()
+        .map(|integer| {
+            if let Some(value) = integer {
+                if let Some(value_usize) = value.to_usize() {
+                    Ok(Some(format!("{value_usize:x}")))
+                } else if let Some(value_isize) = value.to_isize() {
+                    Ok(Some(format!("{value_isize:x}")))
+                } else {
+                    exec_err!("Unsupported data type {integer:?} for function 
to_hex")
+                }
+            } else {
+                Ok(None)
+            }
+        })
+        .collect::<Result<GenericStringArray<i32>>>()?;
+
+    Ok(Arc::new(result) as ArrayRef)
+}
+
+#[derive(Debug)]
+pub(super) struct ToHexFunc {
+    signature: Signature,
+}
+impl ToHexFunc {
+    pub fn new() -> Self {
+        use DataType::*;
+        Self {
+            signature: Signature::uniform(1, vec![Int64], 
Volatility::Immutable),
+        }
+    }
+}
+
+impl ScalarUDFImpl for ToHexFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "to_hex"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        use DataType::*;
+
+        Ok(match arg_types[0] {
+            Int8 | Int16 | Int32 | Int64 => Utf8,
+            _ => {
+                return plan_err!("The to_hex function can only accept 
integers.");
+            }
+        })
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        match args[0].data_type() {
+            DataType::Int32 => make_scalar_function(to_hex::<Int32Type>, 
vec![])(args),
+            DataType::Int64 => make_scalar_function(to_hex::<Int64Type>, 
vec![])(args),
+            other => exec_err!("Unsupported data type {other:?} for function 
to_hex"),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow::{
+        array::{Int32Array, StringArray},
+        datatypes::Int32Type,
+    };
+
+    use datafusion_common::cast::as_string_array;
+
+    use super::*;
+
+    #[test]
+    // Test to_hex function for zero
+    fn to_hex_zero() -> Result<()> {
+        let array = vec![0].into_iter().collect::<Int32Array>();
+        let array_ref = Arc::new(array);
+        let hex_value_arc = to_hex::<Int32Type>(&[array_ref])?;
+        let hex_value = as_string_array(&hex_value_arc)?;
+        let expected = StringArray::from(vec![Some("0")]);
+        assert_eq!(&expected, hex_value);
+
+        Ok(())
+    }
+
+    #[test]
+    // Test to_hex function for positive number
+    fn to_hex_positive_number() -> Result<()> {
+        let array = vec![100].into_iter().collect::<Int32Array>();
+        let array_ref = Arc::new(array);
+        let hex_value_arc = to_hex::<Int32Type>(&[array_ref])?;
+        let hex_value = as_string_array(&hex_value_arc)?;
+        let expected = StringArray::from(vec![Some("64")]);
+        assert_eq!(&expected, hex_value);
+
+        Ok(())
+    }
+
+    #[test]
+    // Test to_hex function for negative number
+    fn to_hex_negative_number() -> Result<()> {
+        let array = vec![-1].into_iter().collect::<Int32Array>();
+        let array_ref = Arc::new(array);
+        let hex_value_arc = to_hex::<Int32Type>(&[array_ref])?;
+        let hex_value = as_string_array(&hex_value_arc)?;
+        let expected = StringArray::from(vec![Some("ffffffffffffffff")]);
+        assert_eq!(&expected, hex_value);
+
+        Ok(())
+    }
+}
diff --git a/datafusion/functions/src/string/trim.rs 
b/datafusion/functions/src/string/trim.rs
new file mode 100644
index 0000000000..e04a171722
--- /dev/null
+++ b/datafusion/functions/src/string/trim.rs
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{ArrayRef, OffsetSizeTrait};
+use arrow::datatypes::DataType;
+use datafusion_common::exec_err;
+use datafusion_common::Result;
+use datafusion_expr::ColumnarValue;
+use datafusion_expr::TypeSignature::*;
+use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+
+use crate::string::{make_scalar_function, utf8_to_str_type};
+
+use super::{general_trim, TrimType};
+
+/// Returns the longest string  with leading and trailing characters removed. 
If the characters are not specified, whitespace is removed.
+/// btrim('xyxtrimyyx', 'xyz') = 'trim'
+pub fn btrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+    general_trim::<T>(args, TrimType::Both)
+}
+
+#[derive(Debug)]
+pub(super) struct TrimFunc {
+    signature: Signature,
+}
+
+impl TrimFunc {
+    pub fn new() -> Self {
+        use DataType::*;
+        Self {
+            signature: Signature::one_of(
+                vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])],
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl ScalarUDFImpl for TrimFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "trim"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        utf8_to_str_type(&arg_types[0], "trim")
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        match args[0].data_type() {
+            DataType::Utf8 => make_scalar_function(btrim::<i32>, vec![])(args),
+            DataType::LargeUtf8 => make_scalar_function(btrim::<i64>, 
vec![])(args),
+            other => exec_err!("Unsupported data type {other:?} for function 
trim"),
+        }
+    }
+}
diff --git a/datafusion/functions/src/string/upper.rs 
b/datafusion/functions/src/string/upper.rs
new file mode 100644
index 0000000000..ed41487699
--- /dev/null
+++ b/datafusion/functions/src/string/upper.rs
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::DataType;
+use datafusion_common::Result;
+use datafusion_expr::ColumnarValue;
+use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+
+use crate::string::utf8_to_str_type;
+
+use super::handle;
+
+#[derive(Debug)]
+pub(super) struct UpperFunc {
+    signature: Signature,
+}
+
+impl UpperFunc {
+    pub fn new() -> Self {
+        use DataType::*;
+        Self {
+            signature: Signature::uniform(
+                1,
+                vec![Utf8, LargeUtf8],
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl ScalarUDFImpl for UpperFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "upper"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        utf8_to_str_type(&arg_types[0], "upper")
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        handle(args, |string| string.to_uppercase(), "upper")
+    }
+}
diff --git a/datafusion/physical-expr/src/functions.rs 
b/datafusion/physical-expr/src/functions.rs
index e76e7f56dc..f2c93c3ec1 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -447,17 +447,6 @@ pub fn create_physical_fun(
                 exec_err!("Unsupported data type {other:?} for function 
split_part")
             }
         }),
-        BuiltinScalarFunction::StartsWith => Arc::new(|args| match 
args[0].data_type() {
-            DataType::Utf8 => {
-                
make_scalar_function_inner(string_expressions::starts_with::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                
make_scalar_function_inner(string_expressions::starts_with::<i64>)(args)
-            }
-            other => {
-                exec_err!("Unsupported data type {other:?} for function 
starts_with")
-            }
-        }),
         BuiltinScalarFunction::EndsWith => Arc::new(|args| match 
args[0].data_type() {
             DataType::Utf8 => {
                 
make_scalar_function_inner(string_expressions::ends_with::<i32>)(args)
@@ -497,15 +486,6 @@ pub fn create_physical_fun(
             }
             other => exec_err!("Unsupported data type {other:?} for function 
substr"),
         }),
-        BuiltinScalarFunction::ToHex => Arc::new(|args| match 
args[0].data_type() {
-            DataType::Int32 => {
-                
make_scalar_function_inner(string_expressions::to_hex::<Int32Type>)(args)
-            }
-            DataType::Int64 => {
-                
make_scalar_function_inner(string_expressions::to_hex::<Int64Type>)(args)
-            }
-            other => exec_err!("Unsupported data type {other:?} for function 
to_hex"),
-        }),
         BuiltinScalarFunction::Translate => Arc::new(|args| match 
args[0].data_type() {
             DataType::Utf8 => {
                 let func = invoke_if_unicode_expressions_feature_flag!(
@@ -527,16 +507,6 @@ pub fn create_physical_fun(
                 exec_err!("Unsupported data type {other:?} for function 
translate")
             }
         }),
-        BuiltinScalarFunction::Trim => Arc::new(|args| match 
args[0].data_type() {
-            DataType::Utf8 => {
-                
make_scalar_function_inner(string_expressions::btrim::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                
make_scalar_function_inner(string_expressions::btrim::<i64>)(args)
-            }
-            other => exec_err!("Unsupported data type {other:?} for function 
trim"),
-        }),
-        BuiltinScalarFunction::Upper => Arc::new(string_expressions::upper),
         BuiltinScalarFunction::Uuid => Arc::new(string_expressions::uuid),
         BuiltinScalarFunction::OverLay => Arc::new(|args| match 
args[0].data_type() {
             DataType::Utf8 => {
@@ -1797,38 +1767,6 @@ mod tests {
             Utf8,
             StringArray
         );
-        test_function!(
-            StartsWith,
-            &[lit("alphabet"), lit("alph"),],
-            Ok(Some(true)),
-            bool,
-            Boolean,
-            BooleanArray
-        );
-        test_function!(
-            StartsWith,
-            &[lit("alphabet"), lit("blph"),],
-            Ok(Some(false)),
-            bool,
-            Boolean,
-            BooleanArray
-        );
-        test_function!(
-            StartsWith,
-            &[lit(ScalarValue::Utf8(None)), lit("alph"),],
-            Ok(None),
-            bool,
-            Boolean,
-            BooleanArray
-        );
-        test_function!(
-            StartsWith,
-            &[lit("alphabet"), lit(ScalarValue::Utf8(None)),],
-            Ok(None),
-            bool,
-            Boolean,
-            BooleanArray
-        );
         test_function!(
             EndsWith,
             &[lit("alphabet"), lit("alph"),],
@@ -2149,62 +2087,6 @@ mod tests {
             Utf8,
             StringArray
         );
-        test_function!(
-            Trim,
-            &[lit(" trim ")],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Trim,
-            &[lit("trim ")],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Trim,
-            &[lit(" trim")],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Trim,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Upper,
-            &[lit("upper")],
-            Ok(Some("UPPER")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Upper,
-            &[lit("UPPER")],
-            Ok(Some("UPPER")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Upper,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
         Ok(())
     }
 
diff --git a/datafusion/physical-expr/src/string_expressions.rs 
b/datafusion/physical-expr/src/string_expressions.rs
index ace7ef2888..86c0092a22 100644
--- a/datafusion/physical-expr/src/string_expressions.rs
+++ b/datafusion/physical-expr/src/string_expressions.rs
@@ -32,16 +32,14 @@ use arrow::{
         Array, ArrayRef, GenericStringArray, Int32Array, Int64Array, 
OffsetSizeTrait,
         StringArray,
     },
-    datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType},
+    datatypes::DataType,
 };
 use uuid::Uuid;
 
 use datafusion_common::utils::datafusion_strsim;
 use datafusion_common::Result;
 use datafusion_common::{
-    cast::{
-        as_generic_string_array, as_int64_array, as_primitive_array, 
as_string_array,
-    },
+    cast::{as_generic_string_array, as_int64_array, as_string_array},
     exec_err, ScalarValue,
 };
 use datafusion_expr::ColumnarValue;
@@ -526,34 +524,6 @@ pub fn ends_with<T: OffsetSizeTrait>(args: &[ArrayRef]) -> 
Result<ArrayRef> {
     Ok(Arc::new(result) as ArrayRef)
 }
 
-/// Converts the number to its equivalent hexadecimal representation.
-/// to_hex(2147483647) = '7fffffff'
-pub fn to_hex<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
-where
-    T::Native: OffsetSizeTrait,
-{
-    let integer_array = as_primitive_array::<T>(&args[0])?;
-
-    let result = integer_array
-        .iter()
-        .map(|integer| {
-            if let Some(value) = integer {
-                if let Some(value_usize) = value.to_usize() {
-                    Ok(Some(format!("{value_usize:x}")))
-                } else if let Some(value_isize) = value.to_isize() {
-                    Ok(Some(format!("{value_isize:x}")))
-                } else {
-                    exec_err!("Unsupported data type {integer:?} for function 
to_hex")
-                }
-            } else {
-                Ok(None)
-            }
-        })
-        .collect::<Result<GenericStringArray<i32>>>()?;
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
 /// Converts the string to all upper case.
 /// upper('tom') = 'TOM'
 pub fn upper(args: &[ColumnarValue]) -> Result<ColumnarValue> {
@@ -709,54 +679,13 @@ pub fn levenshtein<T: OffsetSizeTrait>(args: &[ArrayRef]) 
-> Result<ArrayRef> {
 
 #[cfg(test)]
 mod tests {
-    use arrow::{array::Int32Array, datatypes::Int32Type};
+    use arrow::array::Int32Array;
     use arrow_array::Int64Array;
 
     use datafusion_common::cast::as_int32_array;
 
-    use crate::string_expressions;
-
     use super::*;
 
-    #[test]
-    // Test to_hex function for zero
-    fn to_hex_zero() -> Result<()> {
-        let array = vec![0].into_iter().collect::<Int32Array>();
-        let array_ref = Arc::new(array);
-        let hex_value_arc = 
string_expressions::to_hex::<Int32Type>(&[array_ref])?;
-        let hex_value = as_string_array(&hex_value_arc)?;
-        let expected = StringArray::from(vec![Some("0")]);
-        assert_eq!(&expected, hex_value);
-
-        Ok(())
-    }
-
-    #[test]
-    // Test to_hex function for positive number
-    fn to_hex_positive_number() -> Result<()> {
-        let array = vec![100].into_iter().collect::<Int32Array>();
-        let array_ref = Arc::new(array);
-        let hex_value_arc = 
string_expressions::to_hex::<Int32Type>(&[array_ref])?;
-        let hex_value = as_string_array(&hex_value_arc)?;
-        let expected = StringArray::from(vec![Some("64")]);
-        assert_eq!(&expected, hex_value);
-
-        Ok(())
-    }
-
-    #[test]
-    // Test to_hex function for negative number
-    fn to_hex_negative_number() -> Result<()> {
-        let array = vec![-1].into_iter().collect::<Int32Array>();
-        let array_ref = Arc::new(array);
-        let hex_value_arc = 
string_expressions::to_hex::<Int32Type>(&[array_ref])?;
-        let hex_value = as_string_array(&hex_value_arc)?;
-        let expected = StringArray::from(vec![Some("ffffffffffffffff")]);
-        assert_eq!(&expected, hex_value);
-
-        Ok(())
-    }
-
     #[test]
     fn to_overlay() -> Result<()> {
         let string =
diff --git a/datafusion/proto/proto/datafusion.proto 
b/datafusion/proto/proto/datafusion.proto
index 10f79a2b8c..c009682d5a 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -592,18 +592,18 @@ enum ScalarFunction {
   // 48 was SHA384
   // 49 was SHA512
   SplitPart = 50;
-  StartsWith = 51;
+  // StartsWith = 51;
   Strpos = 52;
   Substr = 53;
-  ToHex = 54;
+  // ToHex = 54;
   // 55 was ToTimestamp
   // 56 was ToTimestampMillis
   // 57 was ToTimestampMicros
   // 58 was ToTimestampSeconds
   // 59 was Now
   Translate = 60;
-  Trim = 61;
-  Upper = 62;
+  // Trim = 61;
+  // Upper = 62;
   Coalesce = 63;
   Power = 64;
   // 65 was StructFun
diff --git a/datafusion/proto/src/generated/pbjson.rs 
b/datafusion/proto/src/generated/pbjson.rs
index 7757a64ef3..58683dba6d 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -22949,13 +22949,9 @@ impl serde::Serialize for ScalarFunction {
             Self::Rpad => "Rpad",
             Self::Rtrim => "Rtrim",
             Self::SplitPart => "SplitPart",
-            Self::StartsWith => "StartsWith",
             Self::Strpos => "Strpos",
             Self::Substr => "Substr",
-            Self::ToHex => "ToHex",
             Self::Translate => "Translate",
-            Self::Trim => "Trim",
-            Self::Upper => "Upper",
             Self::Coalesce => "Coalesce",
             Self::Power => "Power",
             Self::Atan2 => "Atan2",
@@ -23027,13 +23023,9 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "Rpad",
             "Rtrim",
             "SplitPart",
-            "StartsWith",
             "Strpos",
             "Substr",
-            "ToHex",
             "Translate",
-            "Trim",
-            "Upper",
             "Coalesce",
             "Power",
             "Atan2",
@@ -23134,13 +23126,9 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "Rpad" => Ok(ScalarFunction::Rpad),
                     "Rtrim" => Ok(ScalarFunction::Rtrim),
                     "SplitPart" => Ok(ScalarFunction::SplitPart),
-                    "StartsWith" => Ok(ScalarFunction::StartsWith),
                     "Strpos" => Ok(ScalarFunction::Strpos),
                     "Substr" => Ok(ScalarFunction::Substr),
-                    "ToHex" => Ok(ScalarFunction::ToHex),
                     "Translate" => Ok(ScalarFunction::Translate),
-                    "Trim" => Ok(ScalarFunction::Trim),
-                    "Upper" => Ok(ScalarFunction::Upper),
                     "Coalesce" => Ok(ScalarFunction::Coalesce),
                     "Power" => Ok(ScalarFunction::Power),
                     "Atan2" => Ok(ScalarFunction::Atan2),
diff --git a/datafusion/proto/src/generated/prost.rs 
b/datafusion/proto/src/generated/prost.rs
index ab0ddb14eb..8eabb3b186 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2891,18 +2891,18 @@ pub enum ScalarFunction {
     /// 48 was SHA384
     /// 49 was SHA512
     SplitPart = 50,
-    StartsWith = 51,
+    /// StartsWith = 51;
     Strpos = 52,
     Substr = 53,
-    ToHex = 54,
+    /// ToHex = 54;
     /// 55 was ToTimestamp
     /// 56 was ToTimestampMillis
     /// 57 was ToTimestampMicros
     /// 58 was ToTimestampSeconds
     /// 59 was Now
     Translate = 60,
-    Trim = 61,
-    Upper = 62,
+    /// Trim = 61;
+    /// Upper = 62;
     Coalesce = 63,
     Power = 64,
     /// 65 was StructFun
@@ -3022,13 +3022,9 @@ impl ScalarFunction {
             ScalarFunction::Rpad => "Rpad",
             ScalarFunction::Rtrim => "Rtrim",
             ScalarFunction::SplitPart => "SplitPart",
-            ScalarFunction::StartsWith => "StartsWith",
             ScalarFunction::Strpos => "Strpos",
             ScalarFunction::Substr => "Substr",
-            ScalarFunction::ToHex => "ToHex",
             ScalarFunction::Translate => "Translate",
-            ScalarFunction::Trim => "Trim",
-            ScalarFunction::Upper => "Upper",
             ScalarFunction::Coalesce => "Coalesce",
             ScalarFunction::Power => "Power",
             ScalarFunction::Atan2 => "Atan2",
@@ -3094,13 +3090,9 @@ impl ScalarFunction {
             "Rpad" => Some(Self::Rpad),
             "Rtrim" => Some(Self::Rtrim),
             "SplitPart" => Some(Self::SplitPart),
-            "StartsWith" => Some(Self::StartsWith),
             "Strpos" => Some(Self::Strpos),
             "Substr" => Some(Self::Substr),
-            "ToHex" => Some(Self::ToHex),
             "Translate" => Some(Self::Translate),
-            "Trim" => Some(Self::Trim),
-            "Upper" => Some(Self::Upper),
             "Coalesce" => Some(Self::Coalesce),
             "Power" => Some(Self::Power),
             "Atan2" => Some(Self::Atan2),
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs 
b/datafusion/proto/src/logical_plan/from_proto.rs
index 8581156e2b..64ceb37d29 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -57,10 +57,9 @@ use datafusion_expr::{
     logical_plan::{PlanType, StringifiedPlan},
     lower, lpad, ltrim, nanvl, octet_length, overlay, pi, power, radians, 
random, repeat,
     replace, reverse, right, round, rpad, rtrim, signum, sin, sinh, 
split_part, sqrt,
-    starts_with, strpos, substr, substr_index, substring, to_hex, translate, 
trim, trunc,
-    upper, uuid, AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction,
-    BuiltinScalarFunction, Case, Cast, Expr, GetFieldAccess, GetIndexedField,
-    GroupingSet,
+    strpos, substr, substr_index, substring, translate, trunc, uuid, 
AggregateFunction,
+    Between, BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction, Case, 
Cast, Expr,
+    GetFieldAccess, GetIndexedField, GroupingSet,
     GroupingSet::GroupingSets,
     JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, 
WindowFrameBound,
     WindowFrameUnits,
@@ -462,8 +461,6 @@ impl From<&protobuf::ScalarFunction> for 
BuiltinScalarFunction {
             ScalarFunction::OctetLength => Self::OctetLength,
             ScalarFunction::Concat => Self::Concat,
             ScalarFunction::Lower => Self::Lower,
-            ScalarFunction::Upper => Self::Upper,
-            ScalarFunction::Trim => Self::Trim,
             ScalarFunction::Ltrim => Self::Ltrim,
             ScalarFunction::Rtrim => Self::Rtrim,
             ScalarFunction::Log2 => Self::Log2,
@@ -485,10 +482,8 @@ impl From<&protobuf::ScalarFunction> for 
BuiltinScalarFunction {
             ScalarFunction::Right => Self::Right,
             ScalarFunction::Rpad => Self::Rpad,
             ScalarFunction::SplitPart => Self::SplitPart,
-            ScalarFunction::StartsWith => Self::StartsWith,
             ScalarFunction::Strpos => Self::Strpos,
             ScalarFunction::Substr => Self::Substr,
-            ScalarFunction::ToHex => Self::ToHex,
             ScalarFunction::Uuid => Self::Uuid,
             ScalarFunction::Translate => Self::Translate,
             ScalarFunction::Coalesce => Self::Coalesce,
@@ -1444,10 +1439,6 @@ pub fn parse_expr(
                 ScalarFunction::Lower => {
                     Ok(lower(parse_expr(&args[0], registry, codec)?))
                 }
-                ScalarFunction::Upper => {
-                    Ok(upper(parse_expr(&args[0], registry, codec)?))
-                }
-                ScalarFunction::Trim => Ok(trim(parse_expr(&args[0], registry, 
codec)?)),
                 ScalarFunction::Ltrim => {
                     Ok(ltrim(parse_expr(&args[0], registry, codec)?))
                 }
@@ -1532,10 +1523,6 @@ pub fn parse_expr(
                     parse_expr(&args[1], registry, codec)?,
                     parse_expr(&args[2], registry, codec)?,
                 )),
-                ScalarFunction::StartsWith => Ok(starts_with(
-                    parse_expr(&args[0], registry, codec)?,
-                    parse_expr(&args[1], registry, codec)?,
-                )),
                 ScalarFunction::EndsWith => Ok(ends_with(
                     parse_expr(&args[0], registry, codec)?,
                     parse_expr(&args[1], registry, codec)?,
@@ -1563,9 +1550,6 @@ pub fn parse_expr(
                     parse_expr(&args[0], registry, codec)?,
                     parse_expr(&args[1], registry, codec)?,
                 )),
-                ScalarFunction::ToHex => {
-                    Ok(to_hex(parse_expr(&args[0], registry, codec)?))
-                }
                 ScalarFunction::Translate => Ok(translate(
                     parse_expr(&args[0], registry, codec)?,
                     parse_expr(&args[1], registry, codec)?,
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs 
b/datafusion/proto/src/logical_plan/to_proto.rs
index 05a29ff6d4..89bd93550a 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1481,8 +1481,6 @@ impl TryFrom<&BuiltinScalarFunction> for 
protobuf::ScalarFunction {
             BuiltinScalarFunction::OctetLength => Self::OctetLength,
             BuiltinScalarFunction::Concat => Self::Concat,
             BuiltinScalarFunction::Lower => Self::Lower,
-            BuiltinScalarFunction::Upper => Self::Upper,
-            BuiltinScalarFunction::Trim => Self::Trim,
             BuiltinScalarFunction::Ltrim => Self::Ltrim,
             BuiltinScalarFunction::Rtrim => Self::Rtrim,
             BuiltinScalarFunction::Log2 => Self::Log2,
@@ -1505,10 +1503,8 @@ impl TryFrom<&BuiltinScalarFunction> for 
protobuf::ScalarFunction {
             BuiltinScalarFunction::Right => Self::Right,
             BuiltinScalarFunction::Rpad => Self::Rpad,
             BuiltinScalarFunction::SplitPart => Self::SplitPart,
-            BuiltinScalarFunction::StartsWith => Self::StartsWith,
             BuiltinScalarFunction::Strpos => Self::Strpos,
             BuiltinScalarFunction::Substr => Self::Substr,
-            BuiltinScalarFunction::ToHex => Self::ToHex,
             BuiltinScalarFunction::Translate => Self::Translate,
             BuiltinScalarFunction::Coalesce => Self::Coalesce,
             BuiltinScalarFunction::Pi => Self::Pi,
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index 5e9c0623a2..c34b42193c 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -747,7 +747,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             Some(TrimWhereField::Leading) => BuiltinScalarFunction::Ltrim,
             Some(TrimWhereField::Trailing) => BuiltinScalarFunction::Rtrim,
             Some(TrimWhereField::Both) => BuiltinScalarFunction::Btrim,
-            None => BuiltinScalarFunction::Trim,
+            None => BuiltinScalarFunction::Btrim,
         };
 
         let arg = self.sql_expr_to_logical_expr(expr, schema, 
planner_context)?;

Reply via email to