This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 1e4ddb6d86 Move repeat, replace, split_part to datafusion_functions 
(#9784)
1e4ddb6d86 is described below

commit 1e4ddb6d86328cb6596bb50da9ccc654f19a83ea
Author: Bruce Ritchie <[email protected]>
AuthorDate: Sun Mar 24 14:28:53 2024 -0400

    Move repeat, replace, split_part to datafusion_functions (#9784)
    
    * Fix to_timestamp benchmark
    
    * Remove reference to simd and nightly build as simd is no longer an 
available feature in DataFusion and building with nightly may not be a good 
recommendation when getting started.
    
    * Fixed missing trim() function.
    
    * Move repeat, replace, split_part to datafusion_functions
---
 datafusion/expr/src/built_in_function.rs           |  44 +-----
 datafusion/expr/src/expr_fn.rs                     |   6 -
 datafusion/functions/src/string/mod.rs             |  24 +++
 datafusion/functions/src/string/repeat.rs          | 144 +++++++++++++++++
 datafusion/functions/src/string/replace.rs         |  97 ++++++++++++
 datafusion/functions/src/string/split_part.rs      | 170 +++++++++++++++++++++
 datafusion/physical-expr/src/functions.rs          | 122 +++------------
 datafusion/physical-expr/src/string_expressions.rs |  67 --------
 datafusion/proto/proto/datafusion.proto            |   6 +-
 datafusion/proto/src/generated/pbjson.rs           |   9 --
 datafusion/proto/src/generated/prost.rs            |  12 +-
 datafusion/proto/src/logical_plan/from_proto.rs    |  26 +---
 datafusion/proto/src/logical_plan/to_proto.rs      |   3 -
 13 files changed, 469 insertions(+), 261 deletions(-)

diff --git a/datafusion/expr/src/built_in_function.rs 
b/datafusion/expr/src/built_in_function.rs
index 1904d58cfc..b3f17ae3c2 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -123,18 +123,12 @@ pub enum BuiltinScalarFunction {
     Lpad,
     /// random
     Random,
-    /// repeat
-    Repeat,
-    /// replace
-    Replace,
     /// reverse
     Reverse,
     /// right
     Right,
     /// rpad
     Rpad,
-    /// split_part
-    SplitPart,
     /// strpos
     Strpos,
     /// substr
@@ -238,12 +232,9 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Left => Volatility::Immutable,
             BuiltinScalarFunction::Lpad => Volatility::Immutable,
             BuiltinScalarFunction::Radians => Volatility::Immutable,
-            BuiltinScalarFunction::Repeat => Volatility::Immutable,
-            BuiltinScalarFunction::Replace => Volatility::Immutable,
             BuiltinScalarFunction::Reverse => Volatility::Immutable,
             BuiltinScalarFunction::Right => Volatility::Immutable,
             BuiltinScalarFunction::Rpad => Volatility::Immutable,
-            BuiltinScalarFunction::SplitPart => Volatility::Immutable,
             BuiltinScalarFunction::Strpos => Volatility::Immutable,
             BuiltinScalarFunction::Substr => Volatility::Immutable,
             BuiltinScalarFunction::Translate => Volatility::Immutable,
@@ -293,12 +284,6 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Lpad => 
utf8_to_str_type(&input_expr_types[0], "lpad"),
             BuiltinScalarFunction::Pi => Ok(Float64),
             BuiltinScalarFunction::Random => Ok(Float64),
-            BuiltinScalarFunction::Repeat => {
-                utf8_to_str_type(&input_expr_types[0], "repeat")
-            }
-            BuiltinScalarFunction::Replace => {
-                utf8_to_str_type(&input_expr_types[0], "replace")
-            }
             BuiltinScalarFunction::Reverse => {
                 utf8_to_str_type(&input_expr_types[0], "reverse")
             }
@@ -306,9 +291,6 @@ impl BuiltinScalarFunction {
                 utf8_to_str_type(&input_expr_types[0], "right")
             }
             BuiltinScalarFunction::Rpad => 
utf8_to_str_type(&input_expr_types[0], "rpad"),
-            BuiltinScalarFunction::SplitPart => {
-                utf8_to_str_type(&input_expr_types[0], "split_part")
-            }
             BuiltinScalarFunction::EndsWith => Ok(Boolean),
             BuiltinScalarFunction::Strpos => {
                 utf8_to_int_type(&input_expr_types[0], "strpos/instr/position")
@@ -417,21 +399,12 @@ impl BuiltinScalarFunction {
                     self.volatility(),
                 )
             }
-            BuiltinScalarFunction::Left
-            | BuiltinScalarFunction::Repeat
-            | BuiltinScalarFunction::Right => Signature::one_of(
-                vec![Exact(vec![Utf8, Int64]), Exact(vec![LargeUtf8, Int64])],
-                self.volatility(),
-            ),
-            BuiltinScalarFunction::SplitPart => Signature::one_of(
-                vec![
-                    Exact(vec![Utf8, Utf8, Int64]),
-                    Exact(vec![LargeUtf8, Utf8, Int64]),
-                    Exact(vec![Utf8, LargeUtf8, Int64]),
-                    Exact(vec![LargeUtf8, LargeUtf8, Int64]),
-                ],
-                self.volatility(),
-            ),
+            BuiltinScalarFunction::Left | BuiltinScalarFunction::Right => {
+                Signature::one_of(
+                    vec![Exact(vec![Utf8, Int64]), Exact(vec![LargeUtf8, 
Int64])],
+                    self.volatility(),
+                )
+            }
 
             BuiltinScalarFunction::EndsWith | BuiltinScalarFunction::Strpos => 
{
                 Signature::one_of(
@@ -467,7 +440,7 @@ impl BuiltinScalarFunction {
                 self.volatility(),
             ),
 
-            BuiltinScalarFunction::Replace | BuiltinScalarFunction::Translate 
=> {
+            BuiltinScalarFunction::Translate => {
                 Signature::one_of(vec![Exact(vec![Utf8, Utf8, Utf8])], 
self.volatility())
             }
             BuiltinScalarFunction::Pi => Signature::exact(vec![], 
self.volatility()),
@@ -637,12 +610,9 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::InitCap => &["initcap"],
             BuiltinScalarFunction::Left => &["left"],
             BuiltinScalarFunction::Lpad => &["lpad"],
-            BuiltinScalarFunction::Repeat => &["repeat"],
-            BuiltinScalarFunction::Replace => &["replace"],
             BuiltinScalarFunction::Reverse => &["reverse"],
             BuiltinScalarFunction::Right => &["right"],
             BuiltinScalarFunction::Rpad => &["rpad"],
-            BuiltinScalarFunction::SplitPart => &["split_part"],
             BuiltinScalarFunction::Strpos => &["strpos", "instr", "position"],
             BuiltinScalarFunction::Substr => &["substr"],
             BuiltinScalarFunction::Translate => &["translate"],
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 60db21e5f5..f75d886967 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -598,11 +598,8 @@ scalar_expr!(
 );
 scalar_expr!(InitCap, initcap, string, "converts the first letter of each word 
in `string` in uppercase and the remaining characters in lowercase");
 scalar_expr!(Left, left, string n, "returns the first `n` characters in the 
`string`");
-scalar_expr!(Replace, replace, string from to, "replaces all occurrences of 
`from` with `to` in the `string`");
-scalar_expr!(Repeat, repeat, string n, "repeats the `string` to `n` times");
 scalar_expr!(Reverse, reverse, string, "reverses the `string`");
 scalar_expr!(Right, right, string n, "returns the last `n` characters in the 
`string`");
-scalar_expr!(SplitPart, split_part, string delimiter index, "splits a string 
based on a delimiter and picks out the desired field based on the index.");
 scalar_expr!(EndsWith, ends_with, string suffix, "whether the `string` ends 
with the `suffix`");
 scalar_expr!(Strpos, strpos, string substring, "finds the position from where 
the `substring` matches the `string`");
 scalar_expr!(Substr, substr, string position, "substring from the `position` 
to the end");
@@ -1056,13 +1053,10 @@ mod test {
         test_scalar_expr!(Left, left, string, count);
         test_nary_scalar_expr!(Lpad, lpad, string, count);
         test_nary_scalar_expr!(Lpad, lpad, string, count, characters);
-        test_scalar_expr!(Replace, replace, string, from, to);
-        test_scalar_expr!(Repeat, repeat, string, count);
         test_scalar_expr!(Reverse, reverse, string);
         test_scalar_expr!(Right, right, string, count);
         test_nary_scalar_expr!(Rpad, rpad, string, count);
         test_nary_scalar_expr!(Rpad, rpad, string, count, characters);
-        test_scalar_expr!(SplitPart, split_part, expr, delimiter, index);
         test_scalar_expr!(EndsWith, ends_with, string, characters);
         test_scalar_expr!(Strpos, strpos, string, substring);
         test_scalar_expr!(Substr, substr, string, position);
diff --git a/datafusion/functions/src/string/mod.rs 
b/datafusion/functions/src/string/mod.rs
index 165a7c6604..d2b9fb2da8 100644
--- a/datafusion/functions/src/string/mod.rs
+++ b/datafusion/functions/src/string/mod.rs
@@ -29,7 +29,10 @@ mod lower;
 mod ltrim;
 mod octet_length;
 mod overlay;
+mod repeat;
+mod replace;
 mod rtrim;
+mod split_part;
 mod starts_with;
 mod to_hex;
 mod upper;
@@ -43,8 +46,11 @@ make_udf_function!(ltrim::LtrimFunc, LTRIM, ltrim);
 make_udf_function!(lower::LowerFunc, LOWER, lower);
 make_udf_function!(octet_length::OctetLengthFunc, OCTET_LENGTH, octet_length);
 make_udf_function!(overlay::OverlayFunc, OVERLAY, overlay);
+make_udf_function!(repeat::RepeatFunc, REPEAT, repeat);
+make_udf_function!(replace::ReplaceFunc, REPLACE, replace);
 make_udf_function!(rtrim::RtrimFunc, RTRIM, rtrim);
 make_udf_function!(starts_with::StartsWithFunc, STARTS_WITH, starts_with);
+make_udf_function!(split_part::SplitPartFunc, SPLIT_PART, split_part);
 make_udf_function!(to_hex::ToHexFunc, TO_HEX, to_hex);
 make_udf_function!(upper::UpperFunc, UPPER, upper);
 make_udf_function!(uuid::UuidFunc, UUID, uuid);
@@ -87,11 +93,26 @@ pub mod expr_fn {
         super::overlay().call(args)
     }
 
+    #[doc = "Repeats the `string` to `n` times"]
+    pub fn repeat(string: Expr, n: Expr) -> Expr {
+        super::repeat().call(vec![string, n])
+    }
+
+    #[doc = "Replaces all occurrences of `from` with `to` in the `string`"]
+    pub fn replace(string: Expr, from: Expr, to: Expr) -> Expr {
+        super::replace().call(vec![string, from, to])
+    }
+
     #[doc = "Removes all characters, spaces by default, from the end of a 
string"]
     pub fn rtrim(args: Vec<Expr>) -> Expr {
         super::rtrim().call(args)
     }
 
+    #[doc = "Splits a string based on a delimiter and picks out the desired 
field based on the index."]
+    pub fn split_part(string: Expr, delimiter: Expr, index: Expr) -> Expr {
+        super::split_part().call(vec![string, delimiter, index])
+    }
+
     #[doc = "Returns true if string starts with prefix."]
     pub fn starts_with(arg1: Expr, arg2: Expr) -> Expr {
         super::starts_with().call(vec![arg1, arg2])
@@ -128,7 +149,10 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
         ltrim(),
         octet_length(),
         overlay(),
+        repeat(),
+        replace(),
         rtrim(),
+        split_part(),
         starts_with(),
         to_hex(),
         upper(),
diff --git a/datafusion/functions/src/string/repeat.rs 
b/datafusion/functions/src/string/repeat.rs
new file mode 100644
index 0000000000..83bc929cb9
--- /dev/null
+++ b/datafusion/functions/src/string/repeat.rs
@@ -0,0 +1,144 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
+use arrow::datatypes::DataType;
+
+use datafusion_common::cast::{as_generic_string_array, as_int64_array};
+use datafusion_common::{exec_err, Result};
+use datafusion_expr::TypeSignature::*;
+use datafusion_expr::{ColumnarValue, Volatility};
+use datafusion_expr::{ScalarUDFImpl, Signature};
+
+use crate::string::common::*;
+
+#[derive(Debug)]
+pub(super) struct RepeatFunc {
+    signature: Signature,
+}
+
+impl RepeatFunc {
+    pub fn new() -> Self {
+        use DataType::*;
+        Self {
+            signature: Signature::one_of(
+                vec![Exact(vec![Utf8, Int64]), Exact(vec![LargeUtf8, Int64])],
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl ScalarUDFImpl for RepeatFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "repeat"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        utf8_to_str_type(&arg_types[0], "repeat")
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        match args[0].data_type() {
+            DataType::Utf8 => make_scalar_function(repeat::<i32>, 
vec![])(args),
+            DataType::LargeUtf8 => make_scalar_function(repeat::<i64>, 
vec![])(args),
+            other => exec_err!("Unsupported data type {other:?} for function 
repeat"),
+        }
+    }
+}
+
+/// Repeats string the specified number of times.
+/// repeat('Pg', 4) = 'PgPgPgPg'
+fn repeat<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let string_array = as_generic_string_array::<T>(&args[0])?;
+    let number_array = as_int64_array(&args[1])?;
+
+    let result = string_array
+        .iter()
+        .zip(number_array.iter())
+        .map(|(string, number)| match (string, number) {
+            (Some(string), Some(number)) => Some(string.repeat(number as 
usize)),
+            _ => None,
+        })
+        .collect::<GenericStringArray<T>>();
+
+    Ok(Arc::new(result) as ArrayRef)
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow::array::{Array, StringArray};
+    use arrow::datatypes::DataType::Utf8;
+
+    use datafusion_common::Result;
+    use datafusion_common::ScalarValue;
+    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
+
+    use crate::string::common::test::test_function;
+    use crate::string::repeat::RepeatFunc;
+
+    #[test]
+    fn test_functions() -> Result<()> {
+        test_function!(
+            RepeatFunc::new(),
+            &[
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("Pg")))),
+                ColumnarValue::Scalar(ScalarValue::Int64(Some(4))),
+            ],
+            Ok(Some("PgPgPgPg")),
+            &str,
+            Utf8,
+            StringArray
+        );
+
+        test_function!(
+            RepeatFunc::new(),
+            &[
+                ColumnarValue::Scalar(ScalarValue::Utf8(None)),
+                ColumnarValue::Scalar(ScalarValue::Int64(Some(4))),
+            ],
+            Ok(None),
+            &str,
+            Utf8,
+            StringArray
+        );
+        test_function!(
+            RepeatFunc::new(),
+            &[
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("Pg")))),
+                ColumnarValue::Scalar(ScalarValue::Int64(None)),
+            ],
+            Ok(None),
+            &str,
+            Utf8,
+            StringArray
+        );
+
+        Ok(())
+    }
+}
diff --git a/datafusion/functions/src/string/replace.rs 
b/datafusion/functions/src/string/replace.rs
new file mode 100644
index 0000000000..e352442960
--- /dev/null
+++ b/datafusion/functions/src/string/replace.rs
@@ -0,0 +1,97 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
+use arrow::datatypes::DataType;
+
+use datafusion_common::cast::as_generic_string_array;
+use datafusion_common::{exec_err, Result};
+use datafusion_expr::TypeSignature::*;
+use datafusion_expr::{ColumnarValue, Volatility};
+use datafusion_expr::{ScalarUDFImpl, Signature};
+
+use crate::string::common::*;
+
+#[derive(Debug)]
+pub(super) struct ReplaceFunc {
+    signature: Signature,
+}
+
+impl ReplaceFunc {
+    pub fn new() -> Self {
+        use DataType::*;
+        Self {
+            signature: Signature::one_of(
+                vec![Exact(vec![Utf8, Utf8, Utf8])],
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl ScalarUDFImpl for ReplaceFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "replace"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        utf8_to_str_type(&arg_types[0], "replace")
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        match args[0].data_type() {
+            DataType::Utf8 => make_scalar_function(replace::<i32>, 
vec![])(args),
+            DataType::LargeUtf8 => make_scalar_function(replace::<i64>, 
vec![])(args),
+            other => {
+                exec_err!("Unsupported data type {other:?} for function 
replace")
+            }
+        }
+    }
+}
+
+/// Replaces all occurrences in string of substring from with substring to.
+/// replace('abcdefabcdef', 'cd', 'XX') = 'abXXefabXXef'
+fn replace<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let string_array = as_generic_string_array::<T>(&args[0])?;
+    let from_array = as_generic_string_array::<T>(&args[1])?;
+    let to_array = as_generic_string_array::<T>(&args[2])?;
+
+    let result = string_array
+        .iter()
+        .zip(from_array.iter())
+        .zip(to_array.iter())
+        .map(|((string, from), to)| match (string, from, to) {
+            (Some(string), Some(from), Some(to)) => Some(string.replace(from, 
to)),
+            _ => None,
+        })
+        .collect::<GenericStringArray<T>>();
+
+    Ok(Arc::new(result) as ArrayRef)
+}
+
+mod test {}
diff --git a/datafusion/functions/src/string/split_part.rs 
b/datafusion/functions/src/string/split_part.rs
new file mode 100644
index 0000000000..af201e90fc
--- /dev/null
+++ b/datafusion/functions/src/string/split_part.rs
@@ -0,0 +1,170 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
+use arrow::datatypes::DataType;
+
+use datafusion_common::cast::{as_generic_string_array, as_int64_array};
+use datafusion_common::{exec_err, Result};
+use datafusion_expr::TypeSignature::*;
+use datafusion_expr::{ColumnarValue, Volatility};
+use datafusion_expr::{ScalarUDFImpl, Signature};
+
+use crate::string::common::*;
+
+#[derive(Debug)]
+pub(super) struct SplitPartFunc {
+    signature: Signature,
+}
+
+impl SplitPartFunc {
+    pub fn new() -> Self {
+        use DataType::*;
+        Self {
+            signature: Signature::one_of(
+                vec![
+                    Exact(vec![Utf8, Utf8, Int64]),
+                    Exact(vec![LargeUtf8, Utf8, Int64]),
+                    Exact(vec![Utf8, LargeUtf8, Int64]),
+                    Exact(vec![LargeUtf8, LargeUtf8, Int64]),
+                ],
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl ScalarUDFImpl for SplitPartFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "split_part"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        utf8_to_str_type(&arg_types[0], "split_part")
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        match args[0].data_type() {
+            DataType::Utf8 => make_scalar_function(split_part::<i32>, 
vec![])(args),
+            DataType::LargeUtf8 => make_scalar_function(split_part::<i64>, 
vec![])(args),
+            other => {
+                exec_err!("Unsupported data type {other:?} for function 
split_part")
+            }
+        }
+    }
+}
+
+/// Splits string at occurrences of delimiter and returns the n'th field 
(counting from one).
+/// split_part('abc~@~def~@~ghi', '~@~', 2) = 'def'
+fn split_part<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let string_array = as_generic_string_array::<T>(&args[0])?;
+    let delimiter_array = as_generic_string_array::<T>(&args[1])?;
+    let n_array = as_int64_array(&args[2])?;
+    let result = string_array
+        .iter()
+        .zip(delimiter_array.iter())
+        .zip(n_array.iter())
+        .map(|((string, delimiter), n)| match (string, delimiter, n) {
+            (Some(string), Some(delimiter), Some(n)) => {
+                if n <= 0 {
+                    exec_err!("field position must be greater than zero")
+                } else {
+                    let split_string: Vec<&str> = 
string.split(delimiter).collect();
+                    match split_string.get(n as usize - 1) {
+                        Some(s) => Ok(Some(*s)),
+                        None => Ok(Some("")),
+                    }
+                }
+            }
+            _ => Ok(None),
+        })
+        .collect::<Result<GenericStringArray<T>>>()?;
+
+    Ok(Arc::new(result) as ArrayRef)
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow::array::{Array, StringArray};
+    use arrow::datatypes::DataType::Utf8;
+
+    use datafusion_common::ScalarValue;
+    use datafusion_common::{exec_err, Result};
+    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
+
+    use crate::string::common::test::test_function;
+    use crate::string::split_part::SplitPartFunc;
+
+    #[test]
+    fn test_functions() -> Result<()> {
+        test_function!(
+            SplitPartFunc::new(),
+            &[
+                ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from(
+                    "abc~@~def~@~ghi"
+                )))),
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("~@~")))),
+                ColumnarValue::Scalar(ScalarValue::Int64(Some(2))),
+            ],
+            Ok(Some("def")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        test_function!(
+            SplitPartFunc::new(),
+            &[
+                ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from(
+                    "abc~@~def~@~ghi"
+                )))),
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("~@~")))),
+                ColumnarValue::Scalar(ScalarValue::Int64(Some(20))),
+            ],
+            Ok(Some("")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        test_function!(
+            SplitPartFunc::new(),
+            &[
+                ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from(
+                    "abc~@~def~@~ghi"
+                )))),
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("~@~")))),
+                ColumnarValue::Scalar(ScalarValue::Int64(Some(-1))),
+            ],
+            exec_err!("field position must be greater than zero"),
+            &str,
+            Utf8,
+            StringArray
+        );
+
+        Ok(())
+    }
+}
diff --git a/datafusion/physical-expr/src/functions.rs 
b/datafusion/physical-expr/src/functions.rs
index 8759adc89b..163598c2df 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -30,17 +30,16 @@
 //! an argument i32 is passed to a function that supports f64, the
 //! argument is automatically is coerced to f64.
 
-use crate::sort_properties::SortProperties;
-use crate::{
-    conditional_expressions, math_expressions, string_expressions, 
PhysicalExpr,
-    ScalarFunctionExpr,
-};
+use std::ops::Neg;
+use std::sync::Arc;
+
 use arrow::{
     array::ArrayRef,
     compute::kernels::length::bit_length,
     datatypes::{DataType, Int32Type, Int64Type, Schema},
 };
 use arrow_array::Array;
+
 use datafusion_common::{exec_err, Result, ScalarValue};
 use datafusion_expr::execution_props::ExecutionProps;
 pub use datafusion_expr::FuncMonotonicity;
@@ -49,8 +48,12 @@ use datafusion_expr::{
     type_coercion::functions::data_types, BuiltinScalarFunction, ColumnarValue,
     ScalarFunctionImplementation,
 };
-use std::ops::Neg;
-use std::sync::Arc;
+
+use crate::sort_properties::SortProperties;
+use crate::{
+    conditional_expressions, math_expressions, string_expressions, 
PhysicalExpr,
+    ScalarFunctionExpr,
+};
 
 /// Create a physical (function) expression.
 /// This function errors when `args`' can't be coerced to a valid argument 
type of the function.
@@ -328,26 +331,6 @@ pub fn create_physical_fun(
             }
             other => exec_err!("Unsupported data type {other:?} for function 
lpad"),
         }),
-        BuiltinScalarFunction::Repeat => Arc::new(|args| match 
args[0].data_type() {
-            DataType::Utf8 => {
-                
make_scalar_function_inner(string_expressions::repeat::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                
make_scalar_function_inner(string_expressions::repeat::<i64>)(args)
-            }
-            other => exec_err!("Unsupported data type {other:?} for function 
repeat"),
-        }),
-        BuiltinScalarFunction::Replace => Arc::new(|args| match 
args[0].data_type() {
-            DataType::Utf8 => {
-                
make_scalar_function_inner(string_expressions::replace::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                
make_scalar_function_inner(string_expressions::replace::<i64>)(args)
-            }
-            other => {
-                exec_err!("Unsupported data type {other:?} for function 
replace")
-            }
-        }),
         BuiltinScalarFunction::Reverse => Arc::new(|args| match 
args[0].data_type() {
             DataType::Utf8 => {
                 let func =
@@ -387,17 +370,6 @@ pub fn create_physical_fun(
             }
             other => exec_err!("Unsupported data type {other:?} for function 
rpad"),
         }),
-        BuiltinScalarFunction::SplitPart => Arc::new(|args| match 
args[0].data_type() {
-            DataType::Utf8 => {
-                
make_scalar_function_inner(string_expressions::split_part::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                
make_scalar_function_inner(string_expressions::split_part::<i64>)(args)
-            }
-            other => {
-                exec_err!("Unsupported data type {other:?} for function 
split_part")
-            }
-        }),
         BuiltinScalarFunction::EndsWith => Arc::new(|args| match 
args[0].data_type() {
             DataType::Utf8 => {
                 
make_scalar_function_inner(string_expressions::ends_with::<i32>)(args)
@@ -568,9 +540,6 @@ fn func_order_in_one_dimension(
 
 #[cfg(test)]
 mod tests {
-    use super::*;
-    use crate::expressions::lit;
-    use crate::expressions::try_cast;
     use arrow::{
         array::{
             Array, ArrayRef, BooleanArray, Float32Array, Float64Array, 
Int32Array,
@@ -579,12 +548,18 @@ mod tests {
         datatypes::Field,
         record_batch::RecordBatch,
     };
+
     use datafusion_common::cast::as_uint64_array;
     use datafusion_common::{exec_err, internal_err, plan_err};
     use datafusion_common::{DataFusionError, Result, ScalarValue};
     use datafusion_expr::type_coercion::functions::data_types;
     use datafusion_expr::Signature;
 
+    use crate::expressions::lit;
+    use crate::expressions::try_cast;
+
+    use super::*;
+
     /// $FUNC function to test
     /// $ARGS arguments (vec) to pass to function
     /// $EXPECTED a Result<Option<$EXPECTED_TYPE>> where Result allows testing 
errors and Option allows testing Null
@@ -1124,33 +1099,6 @@ mod tests {
             Utf8,
             StringArray
         );
-        test_function!(
-            Repeat,
-            &[lit("Pg"), lit(ScalarValue::Int64(Some(4))),],
-            Ok(Some("PgPgPgPg")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Repeat,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Int64(Some(4))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Repeat,
-            &[lit("Pg"), lit(ScalarValue::Int64(None)),],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
         #[cfg(feature = "unicode_expressions")]
         test_function!(
             Reverse,
@@ -1447,42 +1395,6 @@ mod tests {
             Utf8,
             StringArray
         );
-        test_function!(
-            SplitPart,
-            &[
-                lit("abc~@~def~@~ghi"),
-                lit("~@~"),
-                lit(ScalarValue::Int64(Some(2))),
-            ],
-            Ok(Some("def")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            SplitPart,
-            &[
-                lit("abc~@~def~@~ghi"),
-                lit("~@~"),
-                lit(ScalarValue::Int64(Some(20))),
-            ],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            SplitPart,
-            &[
-                lit("abc~@~def~@~ghi"),
-                lit("~@~"),
-                lit(ScalarValue::Int64(Some(-1))),
-            ],
-            exec_err!("field position must be greater than zero"),
-            &str,
-            Utf8,
-            StringArray
-        );
         test_function!(
             EndsWith,
             &[lit("alphabet"), lit("alph"),],
@@ -1812,7 +1724,7 @@ mod tests {
         let schema = Schema::new(vec![Field::new("a", DataType::Int32, 
false)]);
 
         // pick some arbitrary functions to test
-        let funs = [BuiltinScalarFunction::Concat, 
BuiltinScalarFunction::Repeat];
+        let funs = [BuiltinScalarFunction::Concat];
 
         for fun in funs.iter() {
             let expr = create_physical_expr_with_type_coercion(
diff --git a/datafusion/physical-expr/src/string_expressions.rs 
b/datafusion/physical-expr/src/string_expressions.rs
index 766e167a94..812b746354 100644
--- a/datafusion/physical-expr/src/string_expressions.rs
+++ b/datafusion/physical-expr/src/string_expressions.rs
@@ -242,73 +242,6 @@ pub fn instr<T: OffsetSizeTrait>(args: &[ArrayRef]) -> 
Result<ArrayRef> {
     }
 }
 
-/// Repeats string the specified number of times.
-/// repeat('Pg', 4) = 'PgPgPgPg'
-pub fn repeat<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = as_generic_string_array::<T>(&args[0])?;
-    let number_array = as_int64_array(&args[1])?;
-
-    let result = string_array
-        .iter()
-        .zip(number_array.iter())
-        .map(|(string, number)| match (string, number) {
-            (Some(string), Some(number)) => Some(string.repeat(number as 
usize)),
-            _ => None,
-        })
-        .collect::<GenericStringArray<T>>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Replaces all occurrences in string of substring from with substring to.
-/// replace('abcdefabcdef', 'cd', 'XX') = 'abXXefabXXef'
-pub fn replace<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = as_generic_string_array::<T>(&args[0])?;
-    let from_array = as_generic_string_array::<T>(&args[1])?;
-    let to_array = as_generic_string_array::<T>(&args[2])?;
-
-    let result = string_array
-        .iter()
-        .zip(from_array.iter())
-        .zip(to_array.iter())
-        .map(|((string, from), to)| match (string, from, to) {
-            (Some(string), Some(from), Some(to)) => Some(string.replace(from, 
to)),
-            _ => None,
-        })
-        .collect::<GenericStringArray<T>>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Splits string at occurrences of delimiter and returns the n'th field 
(counting from one).
-/// split_part('abc~@~def~@~ghi', '~@~', 2) = 'def'
-pub fn split_part<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = as_generic_string_array::<T>(&args[0])?;
-    let delimiter_array = as_generic_string_array::<T>(&args[1])?;
-    let n_array = as_int64_array(&args[2])?;
-    let result = string_array
-        .iter()
-        .zip(delimiter_array.iter())
-        .zip(n_array.iter())
-        .map(|((string, delimiter), n)| match (string, delimiter, n) {
-            (Some(string), Some(delimiter), Some(n)) => {
-                if n <= 0 {
-                    exec_err!("field position must be greater than zero")
-                } else {
-                    let split_string: Vec<&str> = 
string.split(delimiter).collect();
-                    match split_string.get(n as usize - 1) {
-                        Some(s) => Ok(Some(*s)),
-                        None => Ok(Some("")),
-                    }
-                }
-            }
-            _ => Ok(None),
-        })
-        .collect::<Result<GenericStringArray<T>>>()?;
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
 /// Returns true if string starts with prefix.
 /// starts_with('alphabet', 'alph') = 't'
 pub fn starts_with<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
diff --git a/datafusion/proto/proto/datafusion.proto 
b/datafusion/proto/proto/datafusion.proto
index 795995ce2c..297e355dd7 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -581,8 +581,8 @@ enum ScalarFunction {
   // 37 was OctetLength 
   Random = 38;
   // 39 was RegexpReplace
-  Repeat = 40;
-  Replace = 41;
+  // 40 was Repeat
+  // 41 was Replace
   Reverse = 42;
   Right = 43;
   Rpad = 44;
@@ -591,7 +591,7 @@ enum ScalarFunction {
   // 47 was SHA256
   // 48 was SHA384
   // 49 was SHA512
-  SplitPart = 50;
+  // 50 was SplitPart
   // StartsWith = 51;
   Strpos = 52;
   Substr = 53;
diff --git a/datafusion/proto/src/generated/pbjson.rs 
b/datafusion/proto/src/generated/pbjson.rs
index 3941171e4f..dce815f0f2 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -22937,12 +22937,9 @@ impl serde::Serialize for ScalarFunction {
             Self::Left => "Left",
             Self::Lpad => "Lpad",
             Self::Random => "Random",
-            Self::Repeat => "Repeat",
-            Self::Replace => "Replace",
             Self::Reverse => "Reverse",
             Self::Right => "Right",
             Self::Rpad => "Rpad",
-            Self::SplitPart => "SplitPart",
             Self::Strpos => "Strpos",
             Self::Substr => "Substr",
             Self::Translate => "Translate",
@@ -23002,12 +22999,9 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "Left",
             "Lpad",
             "Random",
-            "Repeat",
-            "Replace",
             "Reverse",
             "Right",
             "Rpad",
-            "SplitPart",
             "Strpos",
             "Substr",
             "Translate",
@@ -23096,12 +23090,9 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "Left" => Ok(ScalarFunction::Left),
                     "Lpad" => Ok(ScalarFunction::Lpad),
                     "Random" => Ok(ScalarFunction::Random),
-                    "Repeat" => Ok(ScalarFunction::Repeat),
-                    "Replace" => Ok(ScalarFunction::Replace),
                     "Reverse" => Ok(ScalarFunction::Reverse),
                     "Right" => Ok(ScalarFunction::Right),
                     "Rpad" => Ok(ScalarFunction::Rpad),
-                    "SplitPart" => Ok(ScalarFunction::SplitPart),
                     "Strpos" => Ok(ScalarFunction::Strpos),
                     "Substr" => Ok(ScalarFunction::Substr),
                     "Translate" => Ok(ScalarFunction::Translate),
diff --git a/datafusion/proto/src/generated/prost.rs 
b/datafusion/proto/src/generated/prost.rs
index 58fda7fcb5..2292687b45 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2880,8 +2880,8 @@ pub enum ScalarFunction {
     /// 37 was OctetLength
     Random = 38,
     /// 39 was RegexpReplace
-    Repeat = 40,
-    Replace = 41,
+    /// 40 was Repeat
+    /// 41 was Replace
     Reverse = 42,
     Right = 43,
     Rpad = 44,
@@ -2890,7 +2890,7 @@ pub enum ScalarFunction {
     /// 47 was SHA256
     /// 48 was SHA384
     /// 49 was SHA512
-    SplitPart = 50,
+    /// 50 was SplitPart
     /// StartsWith = 51;
     Strpos = 52,
     Substr = 53,
@@ -3010,12 +3010,9 @@ impl ScalarFunction {
             ScalarFunction::Left => "Left",
             ScalarFunction::Lpad => "Lpad",
             ScalarFunction::Random => "Random",
-            ScalarFunction::Repeat => "Repeat",
-            ScalarFunction::Replace => "Replace",
             ScalarFunction::Reverse => "Reverse",
             ScalarFunction::Right => "Right",
             ScalarFunction::Rpad => "Rpad",
-            ScalarFunction::SplitPart => "SplitPart",
             ScalarFunction::Strpos => "Strpos",
             ScalarFunction::Substr => "Substr",
             ScalarFunction::Translate => "Translate",
@@ -3069,12 +3066,9 @@ impl ScalarFunction {
             "Left" => Some(Self::Left),
             "Lpad" => Some(Self::Lpad),
             "Random" => Some(Self::Random),
-            "Repeat" => Some(Self::Repeat),
-            "Replace" => Some(Self::Replace),
             "Reverse" => Some(Self::Reverse),
             "Right" => Some(Self::Right),
             "Rpad" => Some(Self::Rpad),
-            "SplitPart" => Some(Self::SplitPart),
             "Strpos" => Some(Self::Strpos),
             "Substr" => Some(Self::Substr),
             "Translate" => Some(Self::Translate),
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs 
b/datafusion/proto/src/logical_plan/from_proto.rs
index 3b44c1cb27..b78e3ae6dc 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -53,11 +53,10 @@ use datafusion_expr::{
     expr::{self, InList, Sort, WindowFunction},
     factorial, find_in_set, floor, gcd, initcap, iszero, lcm, left, ln, log, 
log10, log2,
     logical_plan::{PlanType, StringifiedPlan},
-    lpad, nanvl, pi, power, radians, random, repeat, replace, reverse, right, 
round,
-    rpad, signum, sin, sinh, split_part, sqrt, strpos, substr, substr_index, 
substring,
-    translate, trunc, AggregateFunction, Between, BinaryExpr, 
BuiltInWindowFunction,
-    BuiltinScalarFunction, Case, Cast, Expr, GetFieldAccess, GetIndexedField,
-    GroupingSet,
+    lpad, nanvl, pi, power, radians, random, reverse, right, round, rpad, 
signum, sin,
+    sinh, sqrt, strpos, substr, substr_index, substring, translate, trunc,
+    AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, 
BuiltinScalarFunction,
+    Case, Cast, Expr, GetFieldAccess, GetIndexedField, GroupingSet,
     GroupingSet::GroupingSets,
     JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, 
WindowFrameBound,
     WindowFrameUnits,
@@ -468,12 +467,9 @@ impl From<&protobuf::ScalarFunction> for 
BuiltinScalarFunction {
             ScalarFunction::Left => Self::Left,
             ScalarFunction::Lpad => Self::Lpad,
             ScalarFunction::Random => Self::Random,
-            ScalarFunction::Repeat => Self::Repeat,
-            ScalarFunction::Replace => Self::Replace,
             ScalarFunction::Reverse => Self::Reverse,
             ScalarFunction::Right => Self::Right,
             ScalarFunction::Rpad => Self::Rpad,
-            ScalarFunction::SplitPart => Self::SplitPart,
             ScalarFunction::Strpos => Self::Strpos,
             ScalarFunction::Substr => Self::Substr,
             ScalarFunction::Translate => Self::Translate,
@@ -1445,15 +1441,6 @@ pub fn parse_expr(
                     parse_expr(&args[1], registry, codec)?,
                 )),
                 ScalarFunction::Random => Ok(random()),
-                ScalarFunction::Repeat => Ok(repeat(
-                    parse_expr(&args[0], registry, codec)?,
-                    parse_expr(&args[1], registry, codec)?,
-                )),
-                ScalarFunction::Replace => Ok(replace(
-                    parse_expr(&args[0], registry, codec)?,
-                    parse_expr(&args[1], registry, codec)?,
-                    parse_expr(&args[2], registry, codec)?,
-                )),
                 ScalarFunction::Reverse => {
                     Ok(reverse(parse_expr(&args[0], registry, codec)?))
                 }
@@ -1485,11 +1472,6 @@ pub fn parse_expr(
                         .map(|expr| parse_expr(expr, registry, codec))
                         .collect::<Result<Vec<_>, _>>()?,
                 )),
-                ScalarFunction::SplitPart => Ok(split_part(
-                    parse_expr(&args[0], registry, codec)?,
-                    parse_expr(&args[1], registry, codec)?,
-                    parse_expr(&args[2], registry, codec)?,
-                )),
                 ScalarFunction::EndsWith => Ok(ends_with(
                     parse_expr(&args[0], registry, codec)?,
                     parse_expr(&args[1], registry, codec)?,
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs 
b/datafusion/proto/src/logical_plan/to_proto.rs
index 446a91a39a..0c0f0c6e0a 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1490,12 +1490,9 @@ impl TryFrom<&BuiltinScalarFunction> for 
protobuf::ScalarFunction {
             BuiltinScalarFunction::Left => Self::Left,
             BuiltinScalarFunction::Lpad => Self::Lpad,
             BuiltinScalarFunction::Random => Self::Random,
-            BuiltinScalarFunction::Repeat => Self::Repeat,
-            BuiltinScalarFunction::Replace => Self::Replace,
             BuiltinScalarFunction::Reverse => Self::Reverse,
             BuiltinScalarFunction::Right => Self::Right,
             BuiltinScalarFunction::Rpad => Self::Rpad,
-            BuiltinScalarFunction::SplitPart => Self::SplitPart,
             BuiltinScalarFunction::Strpos => Self::Strpos,
             BuiltinScalarFunction::Substr => Self::Substr,
             BuiltinScalarFunction::Translate => Self::Translate,


Reply via email to