This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 1d8a41bc8e Move `starts_with`, `to_hex`,` trim`, `upper` to
datafusion-functions (and add string_expressions) (#9541)
1d8a41bc8e is described below
commit 1d8a41bc8e08b56e90d6f8e6ef20e39a126987e4
Author: Reilly.tang <[email protected]>
AuthorDate: Thu Mar 21 07:57:05 2024 +0800
Move `starts_with`, `to_hex`,` trim`, `upper` to datafusion-functions (and
add string_expressions) (#9541)
* [task #9539] Move starts_with, to_hex, trim, upper to datafusion-functions
Signed-off-by: tangruilin <[email protected]>
* Export expr_fn, restore tests
* fix comments
---------
Signed-off-by: tangruilin <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/expr/src/built_in_function.rs | 57 +---
datafusion/expr/src/expr_fn.rs | 18 --
datafusion/functions/Cargo.toml | 3 +
datafusion/functions/src/lib.rs | 9 +-
datafusion/functions/src/string/mod.rs | 292 +++++++++++++++++++++
datafusion/functions/src/string/starts_with.rs | 89 +++++++
datafusion/functions/src/string/to_hex.rs | 155 +++++++++++
datafusion/functions/src/string/trim.rs | 78 ++++++
datafusion/functions/src/string/upper.rs | 66 +++++
datafusion/physical-expr/src/functions.rs | 118 ---------
datafusion/physical-expr/src/string_expressions.rs | 77 +-----
datafusion/proto/proto/datafusion.proto | 8 +-
datafusion/proto/src/generated/pbjson.rs | 12 -
datafusion/proto/src/generated/prost.rs | 16 +-
datafusion/proto/src/logical_plan/from_proto.rs | 22 +-
datafusion/proto/src/logical_plan/to_proto.rs | 4 -
datafusion/sql/src/expr/mod.rs | 2 +-
17 files changed, 720 insertions(+), 306 deletions(-)
diff --git a/datafusion/expr/src/built_in_function.rs
b/datafusion/expr/src/built_in_function.rs
index 79cd6a24ce..fffe2cf4c9 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -147,20 +147,12 @@ pub enum BuiltinScalarFunction {
Rtrim,
/// split_part
SplitPart,
- /// starts_with
- StartsWith,
/// strpos
Strpos,
/// substr
Substr,
- /// to_hex
- ToHex,
/// translate
Translate,
- /// trim
- Trim,
- /// upper
- Upper,
/// uuid
Uuid,
/// overlay
@@ -276,13 +268,9 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Rpad => Volatility::Immutable,
BuiltinScalarFunction::Rtrim => Volatility::Immutable,
BuiltinScalarFunction::SplitPart => Volatility::Immutable,
- BuiltinScalarFunction::StartsWith => Volatility::Immutable,
BuiltinScalarFunction::Strpos => Volatility::Immutable,
BuiltinScalarFunction::Substr => Volatility::Immutable,
- BuiltinScalarFunction::ToHex => Volatility::Immutable,
BuiltinScalarFunction::Translate => Volatility::Immutable,
- BuiltinScalarFunction::Trim => Volatility::Immutable,
- BuiltinScalarFunction::Upper => Volatility::Immutable,
BuiltinScalarFunction::OverLay => Volatility::Immutable,
BuiltinScalarFunction::Levenshtein => Volatility::Immutable,
BuiltinScalarFunction::SubstrIndex => Volatility::Immutable,
@@ -365,7 +353,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::SplitPart => {
utf8_to_str_type(&input_expr_types[0], "split_part")
}
- BuiltinScalarFunction::StartsWith => Ok(Boolean),
BuiltinScalarFunction::EndsWith => Ok(Boolean),
BuiltinScalarFunction::Strpos => {
utf8_to_int_type(&input_expr_types[0], "strpos/instr/position")
@@ -373,12 +360,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Substr => {
utf8_to_str_type(&input_expr_types[0], "substr")
}
- BuiltinScalarFunction::ToHex => Ok(match input_expr_types[0] {
- Int8 | Int16 | Int32 | Int64 => Utf8,
- _ => {
- return plan_err!("The to_hex function can only accept
integers.");
- }
- }),
BuiltinScalarFunction::SubstrIndex => {
utf8_to_str_type(&input_expr_types[0], "substr_index")
}
@@ -388,10 +369,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Translate => {
utf8_to_str_type(&input_expr_types[0], "translate")
}
- BuiltinScalarFunction::Trim =>
utf8_to_str_type(&input_expr_types[0], "trim"),
- BuiltinScalarFunction::Upper => {
- utf8_to_str_type(&input_expr_types[0], "upper")
- }
BuiltinScalarFunction::Factorial
| BuiltinScalarFunction::Gcd
@@ -476,18 +453,16 @@ impl BuiltinScalarFunction {
| BuiltinScalarFunction::InitCap
| BuiltinScalarFunction::Lower
| BuiltinScalarFunction::OctetLength
- | BuiltinScalarFunction::Reverse
- | BuiltinScalarFunction::Upper => {
+ | BuiltinScalarFunction::Reverse => {
Signature::uniform(1, vec![Utf8, LargeUtf8], self.volatility())
}
BuiltinScalarFunction::Btrim
| BuiltinScalarFunction::Ltrim
- | BuiltinScalarFunction::Rtrim
- | BuiltinScalarFunction::Trim => Signature::one_of(
+ | BuiltinScalarFunction::Rtrim => Signature::one_of(
vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])],
self.volatility(),
),
- BuiltinScalarFunction::Chr | BuiltinScalarFunction::ToHex => {
+ BuiltinScalarFunction::Chr => {
Signature::uniform(1, vec![Int64], self.volatility())
}
BuiltinScalarFunction::Lpad | BuiltinScalarFunction::Rpad => {
@@ -519,17 +494,17 @@ impl BuiltinScalarFunction {
self.volatility(),
),
- BuiltinScalarFunction::EndsWith
- | BuiltinScalarFunction::Strpos
- | BuiltinScalarFunction::StartsWith => Signature::one_of(
- vec![
- Exact(vec![Utf8, Utf8]),
- Exact(vec![Utf8, LargeUtf8]),
- Exact(vec![LargeUtf8, Utf8]),
- Exact(vec![LargeUtf8, LargeUtf8]),
- ],
- self.volatility(),
- ),
+ BuiltinScalarFunction::EndsWith | BuiltinScalarFunction::Strpos =>
{
+ Signature::one_of(
+ vec![
+ Exact(vec![Utf8, Utf8]),
+ Exact(vec![Utf8, LargeUtf8]),
+ Exact(vec![LargeUtf8, Utf8]),
+ Exact(vec![LargeUtf8, LargeUtf8]),
+ ],
+ self.volatility(),
+ )
+ }
BuiltinScalarFunction::Substr => Signature::one_of(
vec![
@@ -749,13 +724,9 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Rpad => &["rpad"],
BuiltinScalarFunction::Rtrim => &["rtrim"],
BuiltinScalarFunction::SplitPart => &["split_part"],
- BuiltinScalarFunction::StartsWith => &["starts_with"],
BuiltinScalarFunction::Strpos => &["strpos", "instr", "position"],
BuiltinScalarFunction::Substr => &["substr"],
- BuiltinScalarFunction::ToHex => &["to_hex"],
BuiltinScalarFunction::Translate => &["translate"],
- BuiltinScalarFunction::Trim => &["trim"],
- BuiltinScalarFunction::Upper => &["upper"],
BuiltinScalarFunction::Uuid => &["uuid"],
BuiltinScalarFunction::Levenshtein => &["levenshtein"],
BuiltinScalarFunction::SubstrIndex => &["substr_index",
"substring_index"],
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index b76164a1c8..8667f631c5 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -575,12 +575,6 @@ scalar_expr!(Log10, log10, num, "base 10 logarithm of
number");
scalar_expr!(Ln, ln, num, "natural logarithm (base e) of number");
scalar_expr!(Power, power, base exponent, "`base` raised to the power of
`exponent`");
scalar_expr!(Atan2, atan2, y x, "inverse tangent of a division given in the
argument");
-scalar_expr!(
- ToHex,
- to_hex,
- num,
- "returns the hexdecimal representation of an integer"
-);
scalar_expr!(Uuid, uuid, , "returns uuid v4 as a string value");
scalar_expr!(Log, log, base x, "logarithm of a `x` for a particular `base`");
@@ -630,19 +624,11 @@ scalar_expr!(
"removes all characters, spaces by default, from the end of a string"
);
scalar_expr!(SplitPart, split_part, string delimiter index, "splits a string
based on a delimiter and picks out the desired field based on the index.");
-scalar_expr!(StartsWith, starts_with, string prefix, "whether the `string`
starts with the `prefix`");
scalar_expr!(EndsWith, ends_with, string suffix, "whether the `string` ends
with the `suffix`");
scalar_expr!(Strpos, strpos, string substring, "finds the position from where
the `substring` matches the `string`");
scalar_expr!(Substr, substr, string position, "substring from the `position`
to the end");
scalar_expr!(Substr, substring, string position length, "substring from the
`position` with `length` characters");
scalar_expr!(Translate, translate, string from to, "replaces the characters in
`from` with the counterpart in `to`");
-scalar_expr!(
- Trim,
- trim,
- string,
- "removes all characters, space by default from the string"
-);
-scalar_expr!(Upper, upper, string, "converts the string to upper case");
//use vec as parameter
nary_scalar_expr!(
Lpad,
@@ -1117,15 +1103,11 @@ mod test {
test_nary_scalar_expr!(Rpad, rpad, string, count, characters);
test_scalar_expr!(Rtrim, rtrim, string);
test_scalar_expr!(SplitPart, split_part, expr, delimiter, index);
- test_scalar_expr!(StartsWith, starts_with, string, characters);
test_scalar_expr!(EndsWith, ends_with, string, characters);
test_scalar_expr!(Strpos, strpos, string, substring);
test_scalar_expr!(Substr, substr, string, position);
test_scalar_expr!(Substr, substring, string, position, count);
- test_scalar_expr!(ToHex, to_hex, string);
test_scalar_expr!(Translate, translate, string, from, to);
- test_scalar_expr!(Trim, trim, string);
- test_scalar_expr!(Upper, upper, string);
test_nary_scalar_expr!(OverLay, overlay, string, characters, position,
len);
test_nary_scalar_expr!(OverLay, overlay, string, characters, position);
test_scalar_expr!(Levenshtein, levenshtein, string1, string2);
diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index 5a6da5345d..b12c99e84a 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -29,6 +29,8 @@ authors = { workspace = true }
rust-version = { workspace = true }
[features]
+# enable string functions
+string_expressions = []
# enable core functions
core_expressions = []
# enable datetime functions
@@ -41,6 +43,7 @@ default = [
"math_expressions",
"regex_expressions",
"crypto_expressions",
+ "string_expressions",
]
# enable encode/decode functions
encoding_expressions = ["base64", "hex"]
diff --git a/datafusion/functions/src/lib.rs b/datafusion/functions/src/lib.rs
index 3a2eab8e5f..f469b343e1 100644
--- a/datafusion/functions/src/lib.rs
+++ b/datafusion/functions/src/lib.rs
@@ -84,6 +84,10 @@ use log::debug;
#[macro_use]
pub mod macros;
+#[cfg(feature = "string_expressions")]
+pub mod string;
+make_stub_package!(string, "string_expressions");
+
/// Core datafusion expressions
/// Enabled via feature flag `core_expressions`
#[cfg(feature = "core_expressions")]
@@ -134,6 +138,8 @@ pub mod expr_fn {
pub use super::math::expr_fn::*;
#[cfg(feature = "regex_expressions")]
pub use super::regex::expr_fn::*;
+ #[cfg(feature = "string_expressions")]
+ pub use super::string::expr_fn::*;
}
/// Registers all enabled packages with a [`FunctionRegistry`]
@@ -144,7 +150,8 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) ->
Result<()> {
.chain(encoding::functions())
.chain(math::functions())
.chain(regex::functions())
- .chain(crypto::functions());
+ .chain(crypto::functions())
+ .chain(string::functions());
all_functions.try_for_each(|udf| {
let existing_udf = registry.register_udf(udf)?;
diff --git a/datafusion/functions/src/string/mod.rs
b/datafusion/functions/src/string/mod.rs
new file mode 100644
index 0000000000..08fcbb363b
--- /dev/null
+++ b/datafusion/functions/src/string/mod.rs
@@ -0,0 +1,292 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::{
+ array::{Array, ArrayRef, GenericStringArray, OffsetSizeTrait},
+ datatypes::DataType,
+};
+use datafusion_common::{
+ cast::as_generic_string_array, exec_err, plan_err, Result, ScalarValue,
+};
+use datafusion_expr::{ColumnarValue, ScalarFunctionImplementation};
+use datafusion_physical_expr::functions::Hint;
+use std::{
+ fmt::{Display, Formatter},
+ sync::Arc,
+};
+
+/// Creates a function to identify the optimal return type of a string
function given
+/// the type of its first argument.
+///
+/// If the input type is `LargeUtf8` or `LargeBinary` the return type is
+/// `$largeUtf8Type`,
+///
+/// If the input type is `Utf8` or `Binary` the return type is `$utf8Type`,
+macro_rules! get_optimal_return_type {
+ ($FUNC:ident, $largeUtf8Type:expr, $utf8Type:expr) => {
+ fn $FUNC(arg_type: &DataType, name: &str) -> Result<DataType> {
+ Ok(match arg_type {
+ // LargeBinary inputs are automatically coerced to Utf8
+ DataType::LargeUtf8 | DataType::LargeBinary => $largeUtf8Type,
+ // Binary inputs are automatically coerced to Utf8
+ DataType::Utf8 | DataType::Binary => $utf8Type,
+ DataType::Null => DataType::Null,
+ DataType::Dictionary(_, value_type) => match **value_type {
+ DataType::LargeUtf8 | DataType::LargeBinary =>
$largeUtf8Type,
+ DataType::Utf8 | DataType::Binary => $utf8Type,
+ DataType::Null => DataType::Null,
+ _ => {
+ return plan_err!(
+ "The {} function can only accept strings, but got
{:?}.",
+ name.to_uppercase(),
+ **value_type
+ );
+ }
+ },
+ data_type => {
+ return plan_err!(
+ "The {} function can only accept strings, but got
{:?}.",
+ name.to_uppercase(),
+ data_type
+ );
+ }
+ })
+ }
+ };
+}
+
+// `utf8_to_str_type`: returns either a Utf8 or LargeUtf8 based on the input
type size.
+get_optimal_return_type!(utf8_to_str_type, DataType::LargeUtf8,
DataType::Utf8);
+
+/// applies a unary expression to `args[0]` that is expected to be
downcastable to
+/// a `GenericStringArray` and returns a `GenericStringArray` (which may have
a different offset)
+/// # Errors
+/// This function errors when:
+/// * the number of arguments is not 1
+/// * the first argument is not castable to a `GenericStringArray`
+pub(crate) fn unary_string_function<'a, T, O, F, R>(
+ args: &[&'a dyn Array],
+ op: F,
+ name: &str,
+) -> Result<GenericStringArray<O>>
+where
+ R: AsRef<str>,
+ O: OffsetSizeTrait,
+ T: OffsetSizeTrait,
+ F: Fn(&'a str) -> R,
+{
+ if args.len() != 1 {
+ return exec_err!(
+ "{:?} args were supplied but {} takes exactly one argument",
+ args.len(),
+ name
+ );
+ }
+
+ let string_array = as_generic_string_array::<T>(args[0])?;
+
+ // first map is the iterator, second is for the `Option<_>`
+ Ok(string_array.iter().map(|string| string.map(&op)).collect())
+}
+
+fn handle<'a, F, R>(args: &'a [ColumnarValue], op: F, name: &str) ->
Result<ColumnarValue>
+where
+ R: AsRef<str>,
+ F: Fn(&'a str) -> R,
+{
+ match &args[0] {
+ ColumnarValue::Array(a) => match a.data_type() {
+ DataType::Utf8 => {
+ Ok(ColumnarValue::Array(Arc::new(unary_string_function::<
+ i32,
+ i32,
+ _,
+ _,
+ >(
+ &[a.as_ref()], op, name
+ )?)))
+ }
+ DataType::LargeUtf8 => {
+ Ok(ColumnarValue::Array(Arc::new(unary_string_function::<
+ i64,
+ i64,
+ _,
+ _,
+ >(
+ &[a.as_ref()], op, name
+ )?)))
+ }
+ other => exec_err!("Unsupported data type {other:?} for function
{name}"),
+ },
+ ColumnarValue::Scalar(scalar) => match scalar {
+ ScalarValue::Utf8(a) => {
+ let result = a.as_ref().map(|x| (op)(x).as_ref().to_string());
+ Ok(ColumnarValue::Scalar(ScalarValue::Utf8(result)))
+ }
+ ScalarValue::LargeUtf8(a) => {
+ let result = a.as_ref().map(|x| (op)(x).as_ref().to_string());
+ Ok(ColumnarValue::Scalar(ScalarValue::LargeUtf8(result)))
+ }
+ other => exec_err!("Unsupported data type {other:?} for function
{name}"),
+ },
+ }
+}
+
+// TODO: mode allow[(dead_code)] after move ltrim and rtrim
+enum TrimType {
+ #[allow(dead_code)]
+ Left,
+ #[allow(dead_code)]
+ Right,
+ Both,
+}
+
+impl Display for TrimType {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ match self {
+ TrimType::Left => write!(f, "ltrim"),
+ TrimType::Right => write!(f, "rtrim"),
+ TrimType::Both => write!(f, "btrim"),
+ }
+ }
+}
+
+fn general_trim<T: OffsetSizeTrait>(
+ args: &[ArrayRef],
+ trim_type: TrimType,
+) -> Result<ArrayRef> {
+ let func = match trim_type {
+ TrimType::Left => |input, pattern: &str| {
+ let pattern = pattern.chars().collect::<Vec<char>>();
+ str::trim_start_matches::<&[char]>(input, pattern.as_ref())
+ },
+ TrimType::Right => |input, pattern: &str| {
+ let pattern = pattern.chars().collect::<Vec<char>>();
+ str::trim_end_matches::<&[char]>(input, pattern.as_ref())
+ },
+ TrimType::Both => |input, pattern: &str| {
+ let pattern = pattern.chars().collect::<Vec<char>>();
+ str::trim_end_matches::<&[char]>(
+ str::trim_start_matches::<&[char]>(input, pattern.as_ref()),
+ pattern.as_ref(),
+ )
+ },
+ };
+
+ let string_array = as_generic_string_array::<T>(&args[0])?;
+
+ match args.len() {
+ 1 => {
+ let result = string_array
+ .iter()
+ .map(|string| string.map(|string: &str| func(string, " ")))
+ .collect::<GenericStringArray<T>>();
+
+ Ok(Arc::new(result) as ArrayRef)
+ }
+ 2 => {
+ let characters_array = as_generic_string_array::<T>(&args[1])?;
+
+ let result = string_array
+ .iter()
+ .zip(characters_array.iter())
+ .map(|(string, characters)| match (string, characters) {
+ (Some(string), Some(characters)) => Some(func(string,
characters)),
+ _ => None,
+ })
+ .collect::<GenericStringArray<T>>();
+
+ Ok(Arc::new(result) as ArrayRef)
+ }
+ other => {
+ exec_err!(
+ "{trim_type} was called with {other} arguments. It requires at
least 1 and at most 2."
+ )
+ }
+ }
+}
+
+pub(super) fn make_scalar_function<F>(
+ inner: F,
+ hints: Vec<Hint>,
+) -> ScalarFunctionImplementation
+where
+ F: Fn(&[ArrayRef]) -> Result<ArrayRef> + Sync + Send + 'static,
+{
+ Arc::new(move |args: &[ColumnarValue]| {
+ // first, identify if any of the arguments is an Array. If yes, store
its `len`,
+ // as any scalar will need to be converted to an array of len `len`.
+ let len = args
+ .iter()
+ .fold(Option::<usize>::None, |acc, arg| match arg {
+ ColumnarValue::Scalar(_) => acc,
+ ColumnarValue::Array(a) => Some(a.len()),
+ });
+
+ let is_scalar = len.is_none();
+
+ let inferred_length = len.unwrap_or(1);
+ let args = args
+ .iter()
+ .zip(hints.iter().chain(std::iter::repeat(&Hint::Pad)))
+ .map(|(arg, hint)| {
+ // Decide on the length to expand this scalar to depending
+ // on the given hints.
+ let expansion_len = match hint {
+ Hint::AcceptsSingular => 1,
+ Hint::Pad => inferred_length,
+ };
+ arg.clone().into_array(expansion_len)
+ })
+ .collect::<Result<Vec<_>>>()?;
+
+ let result = (inner)(&args);
+ if is_scalar {
+ // If all inputs are scalar, keeps output as scalar
+ let result = result.and_then(|arr|
ScalarValue::try_from_array(&arr, 0));
+ result.map(ColumnarValue::Scalar)
+ } else {
+ result.map(ColumnarValue::Array)
+ }
+ })
+}
+
+mod starts_with;
+mod to_hex;
+mod trim;
+mod upper;
+// create UDFs
+make_udf_function!(starts_with::StartsWithFunc, STARTS_WITH, starts_with);
+make_udf_function!(to_hex::ToHexFunc, TO_HEX, to_hex);
+make_udf_function!(trim::TrimFunc, TRIM, trim);
+make_udf_function!(upper::UpperFunc, UPPER, upper);
+
+export_functions!(
+ (
+ starts_with,
+ arg1 arg2,
+ "Returns true if string starts with prefix."),
+ (
+ to_hex,
+ arg1,
+ "Converts an integer to a hexadecimal string."),
+ (trim,
+ arg1,
+ "removes all characters, space by default from the string"),
+ (upper,
+ arg1,
+ "Converts a string to uppercase."));
diff --git a/datafusion/functions/src/string/starts_with.rs
b/datafusion/functions/src/string/starts_with.rs
new file mode 100644
index 0000000000..1fce399d1e
--- /dev/null
+++ b/datafusion/functions/src/string/starts_with.rs
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{ArrayRef, OffsetSizeTrait};
+use arrow::datatypes::DataType;
+use datafusion_common::{cast::as_generic_string_array, internal_err, Result};
+use datafusion_expr::ColumnarValue;
+use datafusion_expr::TypeSignature::*;
+use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+use std::sync::Arc;
+
+use crate::string::make_scalar_function;
+
+/// Returns true if string starts with prefix.
+/// starts_with('alphabet', 'alph') = 't'
+pub fn starts_with<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+ let left = as_generic_string_array::<T>(&args[0])?;
+ let right = as_generic_string_array::<T>(&args[1])?;
+
+ let result = arrow::compute::kernels::comparison::starts_with(left,
right)?;
+
+ Ok(Arc::new(result) as ArrayRef)
+}
+
+#[derive(Debug)]
+pub(super) struct StartsWithFunc {
+ signature: Signature,
+}
+impl StartsWithFunc {
+ pub fn new() -> Self {
+ use DataType::*;
+ Self {
+ signature: Signature::one_of(
+ vec![
+ Exact(vec![Utf8, Utf8]),
+ Exact(vec![Utf8, LargeUtf8]),
+ Exact(vec![LargeUtf8, Utf8]),
+ Exact(vec![LargeUtf8, LargeUtf8]),
+ ],
+ Volatility::Immutable,
+ ),
+ }
+ }
+}
+
+impl ScalarUDFImpl for StartsWithFunc {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+
+ fn name(&self) -> &str {
+ "starts_with"
+ }
+
+ fn signature(&self) -> &Signature {
+ &self.signature
+ }
+
+ fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+ use DataType::*;
+
+ Ok(Boolean)
+ }
+
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ match args[0].data_type() {
+ DataType::Utf8 => make_scalar_function(starts_with::<i32>,
vec![])(args),
+ DataType::LargeUtf8 => {
+ return make_scalar_function(starts_with::<i64>, vec![])(args);
+ }
+ _ => internal_err!("Unsupported data type"),
+ }
+ }
+}
diff --git a/datafusion/functions/src/string/to_hex.rs
b/datafusion/functions/src/string/to_hex.rs
new file mode 100644
index 0000000000..4dfc84887d
--- /dev/null
+++ b/datafusion/functions/src/string/to_hex.rs
@@ -0,0 +1,155 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
+use arrow::datatypes::{
+ ArrowNativeType, ArrowPrimitiveType, DataType, Int32Type, Int64Type,
+};
+use datafusion_common::cast::as_primitive_array;
+use datafusion_common::Result;
+use datafusion_common::{exec_err, plan_err};
+use datafusion_expr::ColumnarValue;
+use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+use std::sync::Arc;
+
+use super::make_scalar_function;
+
+/// Converts the number to its equivalent hexadecimal representation.
+/// to_hex(2147483647) = '7fffffff'
+pub fn to_hex<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
+where
+ T::Native: OffsetSizeTrait,
+{
+ let integer_array = as_primitive_array::<T>(&args[0])?;
+
+ let result = integer_array
+ .iter()
+ .map(|integer| {
+ if let Some(value) = integer {
+ if let Some(value_usize) = value.to_usize() {
+ Ok(Some(format!("{value_usize:x}")))
+ } else if let Some(value_isize) = value.to_isize() {
+ Ok(Some(format!("{value_isize:x}")))
+ } else {
+ exec_err!("Unsupported data type {integer:?} for function
to_hex")
+ }
+ } else {
+ Ok(None)
+ }
+ })
+ .collect::<Result<GenericStringArray<i32>>>()?;
+
+ Ok(Arc::new(result) as ArrayRef)
+}
+
+#[derive(Debug)]
+pub(super) struct ToHexFunc {
+ signature: Signature,
+}
+impl ToHexFunc {
+ pub fn new() -> Self {
+ use DataType::*;
+ Self {
+ signature: Signature::uniform(1, vec![Int64],
Volatility::Immutable),
+ }
+ }
+}
+
+impl ScalarUDFImpl for ToHexFunc {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+
+ fn name(&self) -> &str {
+ "to_hex"
+ }
+
+ fn signature(&self) -> &Signature {
+ &self.signature
+ }
+
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+ use DataType::*;
+
+ Ok(match arg_types[0] {
+ Int8 | Int16 | Int32 | Int64 => Utf8,
+ _ => {
+ return plan_err!("The to_hex function can only accept
integers.");
+ }
+ })
+ }
+
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ match args[0].data_type() {
+ DataType::Int32 => make_scalar_function(to_hex::<Int32Type>,
vec![])(args),
+ DataType::Int64 => make_scalar_function(to_hex::<Int64Type>,
vec![])(args),
+ other => exec_err!("Unsupported data type {other:?} for function
to_hex"),
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use arrow::{
+ array::{Int32Array, StringArray},
+ datatypes::Int32Type,
+ };
+
+ use datafusion_common::cast::as_string_array;
+
+ use super::*;
+
+ #[test]
+ // Test to_hex function for zero
+ fn to_hex_zero() -> Result<()> {
+ let array = vec![0].into_iter().collect::<Int32Array>();
+ let array_ref = Arc::new(array);
+ let hex_value_arc = to_hex::<Int32Type>(&[array_ref])?;
+ let hex_value = as_string_array(&hex_value_arc)?;
+ let expected = StringArray::from(vec![Some("0")]);
+ assert_eq!(&expected, hex_value);
+
+ Ok(())
+ }
+
+ #[test]
+ // Test to_hex function for positive number
+ fn to_hex_positive_number() -> Result<()> {
+ let array = vec![100].into_iter().collect::<Int32Array>();
+ let array_ref = Arc::new(array);
+ let hex_value_arc = to_hex::<Int32Type>(&[array_ref])?;
+ let hex_value = as_string_array(&hex_value_arc)?;
+ let expected = StringArray::from(vec![Some("64")]);
+ assert_eq!(&expected, hex_value);
+
+ Ok(())
+ }
+
+ #[test]
+ // Test to_hex function for negative number
+ fn to_hex_negative_number() -> Result<()> {
+ let array = vec![-1].into_iter().collect::<Int32Array>();
+ let array_ref = Arc::new(array);
+ let hex_value_arc = to_hex::<Int32Type>(&[array_ref])?;
+ let hex_value = as_string_array(&hex_value_arc)?;
+ let expected = StringArray::from(vec![Some("ffffffffffffffff")]);
+ assert_eq!(&expected, hex_value);
+
+ Ok(())
+ }
+}
diff --git a/datafusion/functions/src/string/trim.rs
b/datafusion/functions/src/string/trim.rs
new file mode 100644
index 0000000000..e04a171722
--- /dev/null
+++ b/datafusion/functions/src/string/trim.rs
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{ArrayRef, OffsetSizeTrait};
+use arrow::datatypes::DataType;
+use datafusion_common::exec_err;
+use datafusion_common::Result;
+use datafusion_expr::ColumnarValue;
+use datafusion_expr::TypeSignature::*;
+use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+
+use crate::string::{make_scalar_function, utf8_to_str_type};
+
+use super::{general_trim, TrimType};
+
+/// Returns the longest string with leading and trailing characters removed.
If the characters are not specified, whitespace is removed.
+/// btrim('xyxtrimyyx', 'xyz') = 'trim'
+pub fn btrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+ general_trim::<T>(args, TrimType::Both)
+}
+
+#[derive(Debug)]
+pub(super) struct TrimFunc {
+ signature: Signature,
+}
+
+impl TrimFunc {
+ pub fn new() -> Self {
+ use DataType::*;
+ Self {
+ signature: Signature::one_of(
+ vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])],
+ Volatility::Immutable,
+ ),
+ }
+ }
+}
+
+impl ScalarUDFImpl for TrimFunc {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+
+ fn name(&self) -> &str {
+ "trim"
+ }
+
+ fn signature(&self) -> &Signature {
+ &self.signature
+ }
+
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+ utf8_to_str_type(&arg_types[0], "trim")
+ }
+
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ match args[0].data_type() {
+ DataType::Utf8 => make_scalar_function(btrim::<i32>, vec![])(args),
+ DataType::LargeUtf8 => make_scalar_function(btrim::<i64>,
vec![])(args),
+ other => exec_err!("Unsupported data type {other:?} for function
trim"),
+ }
+ }
+}
diff --git a/datafusion/functions/src/string/upper.rs
b/datafusion/functions/src/string/upper.rs
new file mode 100644
index 0000000000..ed41487699
--- /dev/null
+++ b/datafusion/functions/src/string/upper.rs
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::DataType;
+use datafusion_common::Result;
+use datafusion_expr::ColumnarValue;
+use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+
+use crate::string::utf8_to_str_type;
+
+use super::handle;
+
+#[derive(Debug)]
+pub(super) struct UpperFunc {
+ signature: Signature,
+}
+
+impl UpperFunc {
+ pub fn new() -> Self {
+ use DataType::*;
+ Self {
+ signature: Signature::uniform(
+ 1,
+ vec![Utf8, LargeUtf8],
+ Volatility::Immutable,
+ ),
+ }
+ }
+}
+
+impl ScalarUDFImpl for UpperFunc {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+
+ fn name(&self) -> &str {
+ "upper"
+ }
+
+ fn signature(&self) -> &Signature {
+ &self.signature
+ }
+
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+ utf8_to_str_type(&arg_types[0], "upper")
+ }
+
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ handle(args, |string| string.to_uppercase(), "upper")
+ }
+}
diff --git a/datafusion/physical-expr/src/functions.rs
b/datafusion/physical-expr/src/functions.rs
index e76e7f56dc..f2c93c3ec1 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -447,17 +447,6 @@ pub fn create_physical_fun(
exec_err!("Unsupported data type {other:?} for function
split_part")
}
}),
- BuiltinScalarFunction::StartsWith => Arc::new(|args| match
args[0].data_type() {
- DataType::Utf8 => {
-
make_scalar_function_inner(string_expressions::starts_with::<i32>)(args)
- }
- DataType::LargeUtf8 => {
-
make_scalar_function_inner(string_expressions::starts_with::<i64>)(args)
- }
- other => {
- exec_err!("Unsupported data type {other:?} for function
starts_with")
- }
- }),
BuiltinScalarFunction::EndsWith => Arc::new(|args| match
args[0].data_type() {
DataType::Utf8 => {
make_scalar_function_inner(string_expressions::ends_with::<i32>)(args)
@@ -497,15 +486,6 @@ pub fn create_physical_fun(
}
other => exec_err!("Unsupported data type {other:?} for function
substr"),
}),
- BuiltinScalarFunction::ToHex => Arc::new(|args| match
args[0].data_type() {
- DataType::Int32 => {
-
make_scalar_function_inner(string_expressions::to_hex::<Int32Type>)(args)
- }
- DataType::Int64 => {
-
make_scalar_function_inner(string_expressions::to_hex::<Int64Type>)(args)
- }
- other => exec_err!("Unsupported data type {other:?} for function
to_hex"),
- }),
BuiltinScalarFunction::Translate => Arc::new(|args| match
args[0].data_type() {
DataType::Utf8 => {
let func = invoke_if_unicode_expressions_feature_flag!(
@@ -527,16 +507,6 @@ pub fn create_physical_fun(
exec_err!("Unsupported data type {other:?} for function
translate")
}
}),
- BuiltinScalarFunction::Trim => Arc::new(|args| match
args[0].data_type() {
- DataType::Utf8 => {
-
make_scalar_function_inner(string_expressions::btrim::<i32>)(args)
- }
- DataType::LargeUtf8 => {
-
make_scalar_function_inner(string_expressions::btrim::<i64>)(args)
- }
- other => exec_err!("Unsupported data type {other:?} for function
trim"),
- }),
- BuiltinScalarFunction::Upper => Arc::new(string_expressions::upper),
BuiltinScalarFunction::Uuid => Arc::new(string_expressions::uuid),
BuiltinScalarFunction::OverLay => Arc::new(|args| match
args[0].data_type() {
DataType::Utf8 => {
@@ -1797,38 +1767,6 @@ mod tests {
Utf8,
StringArray
);
- test_function!(
- StartsWith,
- &[lit("alphabet"), lit("alph"),],
- Ok(Some(true)),
- bool,
- Boolean,
- BooleanArray
- );
- test_function!(
- StartsWith,
- &[lit("alphabet"), lit("blph"),],
- Ok(Some(false)),
- bool,
- Boolean,
- BooleanArray
- );
- test_function!(
- StartsWith,
- &[lit(ScalarValue::Utf8(None)), lit("alph"),],
- Ok(None),
- bool,
- Boolean,
- BooleanArray
- );
- test_function!(
- StartsWith,
- &[lit("alphabet"), lit(ScalarValue::Utf8(None)),],
- Ok(None),
- bool,
- Boolean,
- BooleanArray
- );
test_function!(
EndsWith,
&[lit("alphabet"), lit("alph"),],
@@ -2149,62 +2087,6 @@ mod tests {
Utf8,
StringArray
);
- test_function!(
- Trim,
- &[lit(" trim ")],
- Ok(Some("trim")),
- &str,
- Utf8,
- StringArray
- );
- test_function!(
- Trim,
- &[lit("trim ")],
- Ok(Some("trim")),
- &str,
- Utf8,
- StringArray
- );
- test_function!(
- Trim,
- &[lit(" trim")],
- Ok(Some("trim")),
- &str,
- Utf8,
- StringArray
- );
- test_function!(
- Trim,
- &[lit(ScalarValue::Utf8(None))],
- Ok(None),
- &str,
- Utf8,
- StringArray
- );
- test_function!(
- Upper,
- &[lit("upper")],
- Ok(Some("UPPER")),
- &str,
- Utf8,
- StringArray
- );
- test_function!(
- Upper,
- &[lit("UPPER")],
- Ok(Some("UPPER")),
- &str,
- Utf8,
- StringArray
- );
- test_function!(
- Upper,
- &[lit(ScalarValue::Utf8(None))],
- Ok(None),
- &str,
- Utf8,
- StringArray
- );
Ok(())
}
diff --git a/datafusion/physical-expr/src/string_expressions.rs
b/datafusion/physical-expr/src/string_expressions.rs
index ace7ef2888..86c0092a22 100644
--- a/datafusion/physical-expr/src/string_expressions.rs
+++ b/datafusion/physical-expr/src/string_expressions.rs
@@ -32,16 +32,14 @@ use arrow::{
Array, ArrayRef, GenericStringArray, Int32Array, Int64Array,
OffsetSizeTrait,
StringArray,
},
- datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType},
+ datatypes::DataType,
};
use uuid::Uuid;
use datafusion_common::utils::datafusion_strsim;
use datafusion_common::Result;
use datafusion_common::{
- cast::{
- as_generic_string_array, as_int64_array, as_primitive_array,
as_string_array,
- },
+ cast::{as_generic_string_array, as_int64_array, as_string_array},
exec_err, ScalarValue,
};
use datafusion_expr::ColumnarValue;
@@ -526,34 +524,6 @@ pub fn ends_with<T: OffsetSizeTrait>(args: &[ArrayRef]) ->
Result<ArrayRef> {
Ok(Arc::new(result) as ArrayRef)
}
-/// Converts the number to its equivalent hexadecimal representation.
-/// to_hex(2147483647) = '7fffffff'
-pub fn to_hex<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
-where
- T::Native: OffsetSizeTrait,
-{
- let integer_array = as_primitive_array::<T>(&args[0])?;
-
- let result = integer_array
- .iter()
- .map(|integer| {
- if let Some(value) = integer {
- if let Some(value_usize) = value.to_usize() {
- Ok(Some(format!("{value_usize:x}")))
- } else if let Some(value_isize) = value.to_isize() {
- Ok(Some(format!("{value_isize:x}")))
- } else {
- exec_err!("Unsupported data type {integer:?} for function
to_hex")
- }
- } else {
- Ok(None)
- }
- })
- .collect::<Result<GenericStringArray<i32>>>()?;
-
- Ok(Arc::new(result) as ArrayRef)
-}
-
/// Converts the string to all upper case.
/// upper('tom') = 'TOM'
pub fn upper(args: &[ColumnarValue]) -> Result<ColumnarValue> {
@@ -709,54 +679,13 @@ pub fn levenshtein<T: OffsetSizeTrait>(args: &[ArrayRef])
-> Result<ArrayRef> {
#[cfg(test)]
mod tests {
- use arrow::{array::Int32Array, datatypes::Int32Type};
+ use arrow::array::Int32Array;
use arrow_array::Int64Array;
use datafusion_common::cast::as_int32_array;
- use crate::string_expressions;
-
use super::*;
- #[test]
- // Test to_hex function for zero
- fn to_hex_zero() -> Result<()> {
- let array = vec![0].into_iter().collect::<Int32Array>();
- let array_ref = Arc::new(array);
- let hex_value_arc =
string_expressions::to_hex::<Int32Type>(&[array_ref])?;
- let hex_value = as_string_array(&hex_value_arc)?;
- let expected = StringArray::from(vec![Some("0")]);
- assert_eq!(&expected, hex_value);
-
- Ok(())
- }
-
- #[test]
- // Test to_hex function for positive number
- fn to_hex_positive_number() -> Result<()> {
- let array = vec![100].into_iter().collect::<Int32Array>();
- let array_ref = Arc::new(array);
- let hex_value_arc =
string_expressions::to_hex::<Int32Type>(&[array_ref])?;
- let hex_value = as_string_array(&hex_value_arc)?;
- let expected = StringArray::from(vec![Some("64")]);
- assert_eq!(&expected, hex_value);
-
- Ok(())
- }
-
- #[test]
- // Test to_hex function for negative number
- fn to_hex_negative_number() -> Result<()> {
- let array = vec![-1].into_iter().collect::<Int32Array>();
- let array_ref = Arc::new(array);
- let hex_value_arc =
string_expressions::to_hex::<Int32Type>(&[array_ref])?;
- let hex_value = as_string_array(&hex_value_arc)?;
- let expected = StringArray::from(vec![Some("ffffffffffffffff")]);
- assert_eq!(&expected, hex_value);
-
- Ok(())
- }
-
#[test]
fn to_overlay() -> Result<()> {
let string =
diff --git a/datafusion/proto/proto/datafusion.proto
b/datafusion/proto/proto/datafusion.proto
index 10f79a2b8c..c009682d5a 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -592,18 +592,18 @@ enum ScalarFunction {
// 48 was SHA384
// 49 was SHA512
SplitPart = 50;
- StartsWith = 51;
+ // StartsWith = 51;
Strpos = 52;
Substr = 53;
- ToHex = 54;
+ // ToHex = 54;
// 55 was ToTimestamp
// 56 was ToTimestampMillis
// 57 was ToTimestampMicros
// 58 was ToTimestampSeconds
// 59 was Now
Translate = 60;
- Trim = 61;
- Upper = 62;
+ // Trim = 61;
+ // Upper = 62;
Coalesce = 63;
Power = 64;
// 65 was StructFun
diff --git a/datafusion/proto/src/generated/pbjson.rs
b/datafusion/proto/src/generated/pbjson.rs
index 7757a64ef3..58683dba6d 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -22949,13 +22949,9 @@ impl serde::Serialize for ScalarFunction {
Self::Rpad => "Rpad",
Self::Rtrim => "Rtrim",
Self::SplitPart => "SplitPart",
- Self::StartsWith => "StartsWith",
Self::Strpos => "Strpos",
Self::Substr => "Substr",
- Self::ToHex => "ToHex",
Self::Translate => "Translate",
- Self::Trim => "Trim",
- Self::Upper => "Upper",
Self::Coalesce => "Coalesce",
Self::Power => "Power",
Self::Atan2 => "Atan2",
@@ -23027,13 +23023,9 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"Rpad",
"Rtrim",
"SplitPart",
- "StartsWith",
"Strpos",
"Substr",
- "ToHex",
"Translate",
- "Trim",
- "Upper",
"Coalesce",
"Power",
"Atan2",
@@ -23134,13 +23126,9 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"Rpad" => Ok(ScalarFunction::Rpad),
"Rtrim" => Ok(ScalarFunction::Rtrim),
"SplitPart" => Ok(ScalarFunction::SplitPart),
- "StartsWith" => Ok(ScalarFunction::StartsWith),
"Strpos" => Ok(ScalarFunction::Strpos),
"Substr" => Ok(ScalarFunction::Substr),
- "ToHex" => Ok(ScalarFunction::ToHex),
"Translate" => Ok(ScalarFunction::Translate),
- "Trim" => Ok(ScalarFunction::Trim),
- "Upper" => Ok(ScalarFunction::Upper),
"Coalesce" => Ok(ScalarFunction::Coalesce),
"Power" => Ok(ScalarFunction::Power),
"Atan2" => Ok(ScalarFunction::Atan2),
diff --git a/datafusion/proto/src/generated/prost.rs
b/datafusion/proto/src/generated/prost.rs
index ab0ddb14eb..8eabb3b186 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2891,18 +2891,18 @@ pub enum ScalarFunction {
/// 48 was SHA384
/// 49 was SHA512
SplitPart = 50,
- StartsWith = 51,
+ /// StartsWith = 51;
Strpos = 52,
Substr = 53,
- ToHex = 54,
+ /// ToHex = 54;
/// 55 was ToTimestamp
/// 56 was ToTimestampMillis
/// 57 was ToTimestampMicros
/// 58 was ToTimestampSeconds
/// 59 was Now
Translate = 60,
- Trim = 61,
- Upper = 62,
+ /// Trim = 61;
+ /// Upper = 62;
Coalesce = 63,
Power = 64,
/// 65 was StructFun
@@ -3022,13 +3022,9 @@ impl ScalarFunction {
ScalarFunction::Rpad => "Rpad",
ScalarFunction::Rtrim => "Rtrim",
ScalarFunction::SplitPart => "SplitPart",
- ScalarFunction::StartsWith => "StartsWith",
ScalarFunction::Strpos => "Strpos",
ScalarFunction::Substr => "Substr",
- ScalarFunction::ToHex => "ToHex",
ScalarFunction::Translate => "Translate",
- ScalarFunction::Trim => "Trim",
- ScalarFunction::Upper => "Upper",
ScalarFunction::Coalesce => "Coalesce",
ScalarFunction::Power => "Power",
ScalarFunction::Atan2 => "Atan2",
@@ -3094,13 +3090,9 @@ impl ScalarFunction {
"Rpad" => Some(Self::Rpad),
"Rtrim" => Some(Self::Rtrim),
"SplitPart" => Some(Self::SplitPart),
- "StartsWith" => Some(Self::StartsWith),
"Strpos" => Some(Self::Strpos),
"Substr" => Some(Self::Substr),
- "ToHex" => Some(Self::ToHex),
"Translate" => Some(Self::Translate),
- "Trim" => Some(Self::Trim),
- "Upper" => Some(Self::Upper),
"Coalesce" => Some(Self::Coalesce),
"Power" => Some(Self::Power),
"Atan2" => Some(Self::Atan2),
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs
b/datafusion/proto/src/logical_plan/from_proto.rs
index 8581156e2b..64ceb37d29 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -57,10 +57,9 @@ use datafusion_expr::{
logical_plan::{PlanType, StringifiedPlan},
lower, lpad, ltrim, nanvl, octet_length, overlay, pi, power, radians,
random, repeat,
replace, reverse, right, round, rpad, rtrim, signum, sin, sinh,
split_part, sqrt,
- starts_with, strpos, substr, substr_index, substring, to_hex, translate,
trim, trunc,
- upper, uuid, AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction,
- BuiltinScalarFunction, Case, Cast, Expr, GetFieldAccess, GetIndexedField,
- GroupingSet,
+ strpos, substr, substr_index, substring, translate, trunc, uuid,
AggregateFunction,
+ Between, BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction, Case,
Cast, Expr,
+ GetFieldAccess, GetIndexedField, GroupingSet,
GroupingSet::GroupingSets,
JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame,
WindowFrameBound,
WindowFrameUnits,
@@ -462,8 +461,6 @@ impl From<&protobuf::ScalarFunction> for
BuiltinScalarFunction {
ScalarFunction::OctetLength => Self::OctetLength,
ScalarFunction::Concat => Self::Concat,
ScalarFunction::Lower => Self::Lower,
- ScalarFunction::Upper => Self::Upper,
- ScalarFunction::Trim => Self::Trim,
ScalarFunction::Ltrim => Self::Ltrim,
ScalarFunction::Rtrim => Self::Rtrim,
ScalarFunction::Log2 => Self::Log2,
@@ -485,10 +482,8 @@ impl From<&protobuf::ScalarFunction> for
BuiltinScalarFunction {
ScalarFunction::Right => Self::Right,
ScalarFunction::Rpad => Self::Rpad,
ScalarFunction::SplitPart => Self::SplitPart,
- ScalarFunction::StartsWith => Self::StartsWith,
ScalarFunction::Strpos => Self::Strpos,
ScalarFunction::Substr => Self::Substr,
- ScalarFunction::ToHex => Self::ToHex,
ScalarFunction::Uuid => Self::Uuid,
ScalarFunction::Translate => Self::Translate,
ScalarFunction::Coalesce => Self::Coalesce,
@@ -1444,10 +1439,6 @@ pub fn parse_expr(
ScalarFunction::Lower => {
Ok(lower(parse_expr(&args[0], registry, codec)?))
}
- ScalarFunction::Upper => {
- Ok(upper(parse_expr(&args[0], registry, codec)?))
- }
- ScalarFunction::Trim => Ok(trim(parse_expr(&args[0], registry,
codec)?)),
ScalarFunction::Ltrim => {
Ok(ltrim(parse_expr(&args[0], registry, codec)?))
}
@@ -1532,10 +1523,6 @@ pub fn parse_expr(
parse_expr(&args[1], registry, codec)?,
parse_expr(&args[2], registry, codec)?,
)),
- ScalarFunction::StartsWith => Ok(starts_with(
- parse_expr(&args[0], registry, codec)?,
- parse_expr(&args[1], registry, codec)?,
- )),
ScalarFunction::EndsWith => Ok(ends_with(
parse_expr(&args[0], registry, codec)?,
parse_expr(&args[1], registry, codec)?,
@@ -1563,9 +1550,6 @@ pub fn parse_expr(
parse_expr(&args[0], registry, codec)?,
parse_expr(&args[1], registry, codec)?,
)),
- ScalarFunction::ToHex => {
- Ok(to_hex(parse_expr(&args[0], registry, codec)?))
- }
ScalarFunction::Translate => Ok(translate(
parse_expr(&args[0], registry, codec)?,
parse_expr(&args[1], registry, codec)?,
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs
b/datafusion/proto/src/logical_plan/to_proto.rs
index 05a29ff6d4..89bd93550a 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1481,8 +1481,6 @@ impl TryFrom<&BuiltinScalarFunction> for
protobuf::ScalarFunction {
BuiltinScalarFunction::OctetLength => Self::OctetLength,
BuiltinScalarFunction::Concat => Self::Concat,
BuiltinScalarFunction::Lower => Self::Lower,
- BuiltinScalarFunction::Upper => Self::Upper,
- BuiltinScalarFunction::Trim => Self::Trim,
BuiltinScalarFunction::Ltrim => Self::Ltrim,
BuiltinScalarFunction::Rtrim => Self::Rtrim,
BuiltinScalarFunction::Log2 => Self::Log2,
@@ -1505,10 +1503,8 @@ impl TryFrom<&BuiltinScalarFunction> for
protobuf::ScalarFunction {
BuiltinScalarFunction::Right => Self::Right,
BuiltinScalarFunction::Rpad => Self::Rpad,
BuiltinScalarFunction::SplitPart => Self::SplitPart,
- BuiltinScalarFunction::StartsWith => Self::StartsWith,
BuiltinScalarFunction::Strpos => Self::Strpos,
BuiltinScalarFunction::Substr => Self::Substr,
- BuiltinScalarFunction::ToHex => Self::ToHex,
BuiltinScalarFunction::Translate => Self::Translate,
BuiltinScalarFunction::Coalesce => Self::Coalesce,
BuiltinScalarFunction::Pi => Self::Pi,
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index 5e9c0623a2..c34b42193c 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -747,7 +747,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
Some(TrimWhereField::Leading) => BuiltinScalarFunction::Ltrim,
Some(TrimWhereField::Trailing) => BuiltinScalarFunction::Rtrim,
Some(TrimWhereField::Both) => BuiltinScalarFunction::Btrim,
- None => BuiltinScalarFunction::Trim,
+ None => BuiltinScalarFunction::Btrim,
};
let arg = self.sql_expr_to_logical_expr(expr, schema,
planner_context)?;