This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 084fdfbeb7 feat:implement sql style 'ends_with' and 'instr' string 
function (#8862)
084fdfbeb7 is described below

commit 084fdfbeb7c1174b2ccee400a80bcddd6bac22dd
Author: zy-kkk <[email protected]>
AuthorDate: Wed Jan 24 00:05:42 2024 +0800

    feat:implement sql style 'ends_with' and 'instr' string function (#8862)
    
    * feat:implement sql style 'ends_with' and 'instr' string function
    
    * Use Arrow comparison functions for ends_with and starts_with 
implementations and extend tests for instr function
---
 .../core/tests/dataframe/dataframe_functions.rs    |  40 ++++++
 datafusion/expr/src/built_in_function.rs           |  36 ++++--
 datafusion/expr/src/expr_fn.rs                     |   4 +
 datafusion/physical-expr/src/functions.rs          | 143 ++++++++++++++++++++-
 datafusion/physical-expr/src/string_expressions.rs |  72 +++++++++--
 datafusion/proto/proto/datafusion.proto            |   2 +
 datafusion/proto/src/generated/pbjson.rs           |   6 +
 datafusion/proto/src/generated/prost.rs            |   6 +
 datafusion/proto/src/logical_plan/from_proto.rs    |  14 +-
 datafusion/proto/src/logical_plan/to_proto.rs      |   2 +
 datafusion/sqllogictest/test_files/functions.slt   |  35 +++++
 docs/source/user-guide/sql/scalar_functions.md     |  32 +++++
 12 files changed, 366 insertions(+), 26 deletions(-)

diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs 
b/datafusion/core/tests/dataframe/dataframe_functions.rs
index fe56fc22ea..2d42034643 100644
--- a/datafusion/core/tests/dataframe/dataframe_functions.rs
+++ b/datafusion/core/tests/dataframe/dataframe_functions.rs
@@ -267,6 +267,26 @@ async fn test_fn_initcap() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn test_fn_instr() -> Result<()> {
+    let expr = instr(col("a"), lit("b"));
+
+    let expected = [
+        "+-------------------------+",
+        "| instr(test.a,Utf8(\"b\")) |",
+        "+-------------------------+",
+        "| 2                       |",
+        "| 2                       |",
+        "| 0                       |",
+        "| 5                       |",
+        "+-------------------------+",
+    ];
+
+    assert_fn_batches!(expr, expected);
+
+    Ok(())
+}
+
 #[tokio::test]
 #[cfg(feature = "unicode_expressions")]
 async fn test_fn_left() -> Result<()> {
@@ -634,6 +654,26 @@ async fn test_fn_starts_with() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn test_fn_ends_with() -> Result<()> {
+    let expr = ends_with(col("a"), lit("DEF"));
+
+    let expected = [
+        "+-------------------------------+",
+        "| ends_with(test.a,Utf8(\"DEF\")) |",
+        "+-------------------------------+",
+        "| true                          |",
+        "| false                         |",
+        "| false                         |",
+        "| false                         |",
+        "+-------------------------------+",
+    ];
+
+    assert_fn_batches!(expr, expected);
+
+    Ok(())
+}
+
 #[tokio::test]
 #[cfg(feature = "unicode_expressions")]
 async fn test_fn_strpos() -> Result<()> {
diff --git a/datafusion/expr/src/built_in_function.rs 
b/datafusion/expr/src/built_in_function.rs
index 81c8f67cc6..e86d6172ce 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -221,8 +221,12 @@ pub enum BuiltinScalarFunction {
     DateTrunc,
     /// date_bin
     DateBin,
+    /// ends_with
+    EndsWith,
     /// initcap
     InitCap,
+    /// InStr
+    InStr,
     /// left
     Left,
     /// lpad
@@ -446,7 +450,9 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::DatePart => Volatility::Immutable,
             BuiltinScalarFunction::DateTrunc => Volatility::Immutable,
             BuiltinScalarFunction::DateBin => Volatility::Immutable,
+            BuiltinScalarFunction::EndsWith => Volatility::Immutable,
             BuiltinScalarFunction::InitCap => Volatility::Immutable,
+            BuiltinScalarFunction::InStr => Volatility::Immutable,
             BuiltinScalarFunction::Left => Volatility::Immutable,
             BuiltinScalarFunction::Lpad => Volatility::Immutable,
             BuiltinScalarFunction::Lower => Volatility::Immutable,
@@ -708,6 +714,9 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::InitCap => {
                 utf8_to_str_type(&input_expr_types[0], "initcap")
             }
+            BuiltinScalarFunction::InStr => {
+                utf8_to_int_type(&input_expr_types[0], "instr")
+            }
             BuiltinScalarFunction::Left => 
utf8_to_str_type(&input_expr_types[0], "left"),
             BuiltinScalarFunction::Lower => {
                 utf8_to_str_type(&input_expr_types[0], "lower")
@@ -795,6 +804,7 @@ impl BuiltinScalarFunction {
                 true,
             )))),
             BuiltinScalarFunction::StartsWith => Ok(Boolean),
+            BuiltinScalarFunction::EndsWith => Ok(Boolean),
             BuiltinScalarFunction::Strpos => {
                 utf8_to_int_type(&input_expr_types[0], "strpos")
             }
@@ -1211,17 +1221,19 @@ impl BuiltinScalarFunction {
                 ],
                 self.volatility(),
             ),
-            BuiltinScalarFunction::Strpos | BuiltinScalarFunction::StartsWith 
=> {
-                Signature::one_of(
-                    vec![
-                        Exact(vec![Utf8, Utf8]),
-                        Exact(vec![Utf8, LargeUtf8]),
-                        Exact(vec![LargeUtf8, Utf8]),
-                        Exact(vec![LargeUtf8, LargeUtf8]),
-                    ],
-                    self.volatility(),
-                )
-            }
+
+            BuiltinScalarFunction::EndsWith
+            | BuiltinScalarFunction::InStr
+            | BuiltinScalarFunction::Strpos
+            | BuiltinScalarFunction::StartsWith => Signature::one_of(
+                vec![
+                    Exact(vec![Utf8, Utf8]),
+                    Exact(vec![Utf8, LargeUtf8]),
+                    Exact(vec![LargeUtf8, Utf8]),
+                    Exact(vec![LargeUtf8, LargeUtf8]),
+                ],
+                self.volatility(),
+            ),
 
             BuiltinScalarFunction::Substr => Signature::one_of(
                 vec![
@@ -1473,7 +1485,9 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Concat => &["concat"],
             BuiltinScalarFunction::ConcatWithSeparator => &["concat_ws"],
             BuiltinScalarFunction::Chr => &["chr"],
+            BuiltinScalarFunction::EndsWith => &["ends_with"],
             BuiltinScalarFunction::InitCap => &["initcap"],
+            BuiltinScalarFunction::InStr => &["instr"],
             BuiltinScalarFunction::Left => &["left"],
             BuiltinScalarFunction::Lower => &["lower"],
             BuiltinScalarFunction::Lpad => &["lpad"],
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 1d45fa4fac..006b5f10f1 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -798,6 +798,7 @@ scalar_expr!(Digest, digest, input algorithm, "compute the 
binary hash of `input
 scalar_expr!(Encode, encode, input encoding, "encode the `input`, using the 
`encoding`. encoding can be base64 or hex");
 scalar_expr!(Decode, decode, input encoding, "decode the`input`, using the 
`encoding`. encoding can be base64 or hex");
 scalar_expr!(InitCap, initcap, string, "converts the first letter of each word 
in `string` in uppercase and the remaining characters in lowercase");
+scalar_expr!(InStr, instr, string substring, "returns the position of the 
first occurrence of `substring` in `string`");
 scalar_expr!(Left, left, string n, "returns the first `n` characters in the 
`string`");
 scalar_expr!(Lower, lower, string, "convert the string to lower case");
 scalar_expr!(
@@ -830,6 +831,7 @@ scalar_expr!(SHA512, sha512, string, "SHA-512 hash");
 scalar_expr!(SplitPart, split_part, string delimiter index, "splits a string 
based on a delimiter and picks out the desired field based on the index.");
 scalar_expr!(StringToArray, string_to_array, string delimiter null_string, 
"splits a `string` based on a `delimiter` and returns an array of parts. Any 
parts matching the optional `null_string` will be replaced with `NULL`");
 scalar_expr!(StartsWith, starts_with, string prefix, "whether the `string` 
starts with the `prefix`");
+scalar_expr!(EndsWith, ends_with, string suffix, "whether the `string` ends 
with the `suffix`");
 scalar_expr!(Strpos, strpos, string substring, "finds the position from where 
the `substring` matches the `string`");
 scalar_expr!(Substr, substr, string position, "substring from the `position` 
to the end");
 scalar_expr!(Substr, substring, string position length, "substring from the 
`position` with `length` characters");
@@ -1372,6 +1374,7 @@ mod test {
         test_scalar_expr!(Gcd, gcd, arg_1, arg_2);
         test_scalar_expr!(Lcm, lcm, arg_1, arg_2);
         test_scalar_expr!(InitCap, initcap, string);
+        test_scalar_expr!(InStr, instr, string, substring);
         test_scalar_expr!(Left, left, string, count);
         test_scalar_expr!(Lower, lower, string);
         test_nary_scalar_expr!(Lpad, lpad, string, count);
@@ -1410,6 +1413,7 @@ mod test {
         test_scalar_expr!(SplitPart, split_part, expr, delimiter, index);
         test_scalar_expr!(StringToArray, string_to_array, expr, delimiter, 
null_value);
         test_scalar_expr!(StartsWith, starts_with, string, characters);
+        test_scalar_expr!(EndsWith, ends_with, string, characters);
         test_scalar_expr!(Strpos, strpos, string, substring);
         test_scalar_expr!(Substr, substr, string, position);
         test_scalar_expr!(Substr, substring, string, position, count);
diff --git a/datafusion/physical-expr/src/functions.rs 
b/datafusion/physical-expr/src/functions.rs
index d1e75bfe4f..57e04c6ecf 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -557,6 +557,15 @@ pub fn create_physical_fun(
                 internal_err!("Unsupported data type {other:?} for function 
initcap")
             }
         }),
+        BuiltinScalarFunction::InStr => Arc::new(|args| match 
args[0].data_type() {
+            DataType::Utf8 => {
+                make_scalar_function(string_expressions::instr::<i32>)(args)
+            }
+            DataType::LargeUtf8 => {
+                make_scalar_function(string_expressions::instr::<i64>)(args)
+            }
+            other => internal_err!("Unsupported data type {other:?} for 
function instr"),
+        }),
         BuiltinScalarFunction::Left => Arc::new(|args| match 
args[0].data_type() {
             DataType::Utf8 => {
                 let func = invoke_if_unicode_expressions_feature_flag!(left, 
i32, "left");
@@ -779,6 +788,17 @@ pub fn create_physical_fun(
                 internal_err!("Unsupported data type {other:?} for function 
starts_with")
             }
         }),
+        BuiltinScalarFunction::EndsWith => Arc::new(|args| match 
args[0].data_type() {
+            DataType::Utf8 => {
+                
make_scalar_function(string_expressions::ends_with::<i32>)(args)
+            }
+            DataType::LargeUtf8 => {
+                
make_scalar_function(string_expressions::ends_with::<i64>)(args)
+            }
+            other => {
+                internal_err!("Unsupported data type {other:?} for function 
ends_with")
+            }
+        }),
         BuiltinScalarFunction::Strpos => Arc::new(|args| match 
args[0].data_type() {
             DataType::Utf8 => {
                 let func = invoke_if_unicode_expressions_feature_flag!(
@@ -1001,7 +1021,7 @@ mod tests {
     use arrow::{
         array::{
             Array, ArrayRef, BinaryArray, BooleanArray, Float32Array, 
Float64Array,
-            Int32Array, StringArray, UInt64Array,
+            Int32Array, Int64Array, StringArray, UInt64Array,
         },
         datatypes::Field,
         record_batch::RecordBatch,
@@ -1393,6 +1413,95 @@ mod tests {
             Utf8,
             StringArray
         );
+        test_function!(
+            InStr,
+            &[lit("abc"), lit("b")],
+            Ok(Some(2)),
+            i32,
+            Int32,
+            Int32Array
+        );
+        test_function!(
+            InStr,
+            &[lit("abc"), lit("c")],
+            Ok(Some(3)),
+            i32,
+            Int32,
+            Int32Array
+        );
+        test_function!(
+            InStr,
+            &[lit("abc"), lit("d")],
+            Ok(Some(0)),
+            i32,
+            Int32,
+            Int32Array
+        );
+        test_function!(
+            InStr,
+            &[lit("abc"), lit("")],
+            Ok(Some(1)),
+            i32,
+            Int32,
+            Int32Array
+        );
+        test_function!(
+            InStr,
+            &[lit("Helloworld"), lit("world")],
+            Ok(Some(6)),
+            i32,
+            Int32,
+            Int32Array
+        );
+        test_function!(
+            InStr,
+            &[lit("Helloworld"), lit(ScalarValue::Utf8(None))],
+            Ok(None),
+            i32,
+            Int32,
+            Int32Array
+        );
+        test_function!(
+            InStr,
+            &[lit(ScalarValue::Utf8(None)), lit("Hello")],
+            Ok(None),
+            i32,
+            Int32,
+            Int32Array
+        );
+        test_function!(
+            InStr,
+            &[
+                lit(ScalarValue::LargeUtf8(Some("Helloworld".to_string()))),
+                lit(ScalarValue::LargeUtf8(Some("world".to_string())))
+            ],
+            Ok(Some(6)),
+            i64,
+            Int64,
+            Int64Array
+        );
+        test_function!(
+            InStr,
+            &[
+                lit(ScalarValue::LargeUtf8(None)),
+                lit(ScalarValue::LargeUtf8(Some("world".to_string())))
+            ],
+            Ok(None),
+            i64,
+            Int64,
+            Int64Array
+        );
+        test_function!(
+            InStr,
+            &[
+                lit(ScalarValue::LargeUtf8(Some("Helloworld".to_string()))),
+                lit(ScalarValue::LargeUtf8(None))
+            ],
+            Ok(None),
+            i64,
+            Int64,
+            Int64Array
+        );
         #[cfg(feature = "unicode_expressions")]
         test_function!(
             Left,
@@ -2511,6 +2620,38 @@ mod tests {
             Boolean,
             BooleanArray
         );
+        test_function!(
+            EndsWith,
+            &[lit("alphabet"), lit("alph"),],
+            Ok(Some(false)),
+            bool,
+            Boolean,
+            BooleanArray
+        );
+        test_function!(
+            EndsWith,
+            &[lit("alphabet"), lit("bet"),],
+            Ok(Some(true)),
+            bool,
+            Boolean,
+            BooleanArray
+        );
+        test_function!(
+            EndsWith,
+            &[lit(ScalarValue::Utf8(None)), lit("alph"),],
+            Ok(None),
+            bool,
+            Boolean,
+            BooleanArray
+        );
+        test_function!(
+            EndsWith,
+            &[lit("alphabet"), lit(ScalarValue::Utf8(None)),],
+            Ok(None),
+            bool,
+            Boolean,
+            BooleanArray
+        );
         #[cfg(feature = "unicode_expressions")]
         test_function!(
             Strpos,
diff --git a/datafusion/physical-expr/src/string_expressions.rs 
b/datafusion/physical-expr/src/string_expressions.rs
index 7d9fecf614..d5344773cf 100644
--- a/datafusion/physical-expr/src/string_expressions.rs
+++ b/datafusion/physical-expr/src/string_expressions.rs
@@ -23,8 +23,8 @@
 
 use arrow::{
     array::{
-        Array, ArrayRef, BooleanArray, GenericStringArray, Int32Array, 
Int64Array,
-        OffsetSizeTrait, StringArray,
+        Array, ArrayRef, GenericStringArray, Int32Array, Int64Array, 
OffsetSizeTrait,
+        StringArray,
     },
     datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType},
 };
@@ -296,6 +296,50 @@ pub fn initcap<T: OffsetSizeTrait>(args: &[ArrayRef]) -> 
Result<ArrayRef> {
     Ok(Arc::new(result) as ArrayRef)
 }
 
+/// Returns the position of the first occurrence of substring in string.
+/// The position is counted from 1. If the substring is not found, returns 0.
+/// For example, instr('Helloworld', 'world') = 6.
+pub fn instr<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let string_array = as_generic_string_array::<T>(&args[0])?;
+    let substr_array = as_generic_string_array::<T>(&args[1])?;
+
+    match args[0].data_type() {
+        DataType::Utf8 => {
+            let result = string_array
+                .iter()
+                .zip(substr_array.iter())
+                .map(|(string, substr)| match (string, substr) {
+                    (Some(string), Some(substr)) => string
+                        .find(substr)
+                        .map_or(Some(0), |index| Some((index + 1) as i32)),
+                    _ => None,
+                })
+                .collect::<Int32Array>();
+
+            Ok(Arc::new(result) as ArrayRef)
+        }
+        DataType::LargeUtf8 => {
+            let result = string_array
+                .iter()
+                .zip(substr_array.iter())
+                .map(|(string, substr)| match (string, substr) {
+                    (Some(string), Some(substr)) => string
+                        .find(substr)
+                        .map_or(Some(0), |index| Some((index + 1) as i64)),
+                    _ => None,
+                })
+                .collect::<Int64Array>();
+
+            Ok(Arc::new(result) as ArrayRef)
+        }
+        other => {
+            internal_err!(
+                "instr was called with {other} datatype arguments. It requires 
Utf8 or LargeUtf8."
+            )
+        }
+    }
+}
+
 /// Converts the string to all lower case.
 /// lower('TOM') = 'tom'
 pub fn lower(args: &[ColumnarValue]) -> Result<ColumnarValue> {
@@ -461,17 +505,21 @@ pub fn split_part<T: OffsetSizeTrait>(args: &[ArrayRef]) 
-> Result<ArrayRef> {
 /// Returns true if string starts with prefix.
 /// starts_with('alphabet', 'alph') = 't'
 pub fn starts_with<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = as_generic_string_array::<T>(&args[0])?;
-    let prefix_array = as_generic_string_array::<T>(&args[1])?;
+    let left = as_generic_string_array::<T>(&args[0])?;
+    let right = as_generic_string_array::<T>(&args[1])?;
 
-    let result = string_array
-        .iter()
-        .zip(prefix_array.iter())
-        .map(|(string, prefix)| match (string, prefix) {
-            (Some(string), Some(prefix)) => Some(string.starts_with(prefix)),
-            _ => None,
-        })
-        .collect::<BooleanArray>();
+    let result = arrow::compute::kernels::comparison::starts_with(left, 
right)?;
+
+    Ok(Arc::new(result) as ArrayRef)
+}
+
+/// Returns true if string ends with suffix.
+/// ends_with('alphabet', 'abet') = 't'
+pub fn ends_with<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let left = as_generic_string_array::<T>(&args[0])?;
+    let right = as_generic_string_array::<T>(&args[1])?;
+
+    let result = arrow::compute::kernels::comparison::ends_with(left, right)?;
 
     Ok(Arc::new(result) as ArrayRef)
 }
diff --git a/datafusion/proto/proto/datafusion.proto 
b/datafusion/proto/proto/datafusion.proto
index d79879e57a..66c1271e65 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -669,6 +669,8 @@ enum ScalarFunction {
   ArraySort = 128;
   ArrayDistinct = 129;
   ArrayResize = 130;
+  EndsWith = 131;
+  InStr = 132;
 }
 
 message ScalarFunctionNode {
diff --git a/datafusion/proto/src/generated/pbjson.rs 
b/datafusion/proto/src/generated/pbjson.rs
index d7ad6fb03c..39a8678ef2 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -22423,6 +22423,8 @@ impl serde::Serialize for ScalarFunction {
             Self::ArraySort => "ArraySort",
             Self::ArrayDistinct => "ArrayDistinct",
             Self::ArrayResize => "ArrayResize",
+            Self::EndsWith => "EndsWith",
+            Self::InStr => "InStr",
         };
         serializer.serialize_str(variant)
     }
@@ -22565,6 +22567,8 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "ArraySort",
             "ArrayDistinct",
             "ArrayResize",
+            "EndsWith",
+            "InStr",
         ];
 
         struct GeneratedVisitor;
@@ -22736,6 +22740,8 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "ArraySort" => Ok(ScalarFunction::ArraySort),
                     "ArrayDistinct" => Ok(ScalarFunction::ArrayDistinct),
                     "ArrayResize" => Ok(ScalarFunction::ArrayResize),
+                    "EndsWith" => Ok(ScalarFunction::EndsWith),
+                    "InStr" => Ok(ScalarFunction::InStr),
                     _ => Err(serde::de::Error::unknown_variant(value, FIELDS)),
                 }
             }
diff --git a/datafusion/proto/src/generated/prost.rs 
b/datafusion/proto/src/generated/prost.rs
index d594da9087..7bf1d8ed04 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2764,6 +2764,8 @@ pub enum ScalarFunction {
     ArraySort = 128,
     ArrayDistinct = 129,
     ArrayResize = 130,
+    EndsWith = 131,
+    InStr = 132,
 }
 impl ScalarFunction {
     /// String value of the enum field names used in the ProtoBuf definition.
@@ -2903,6 +2905,8 @@ impl ScalarFunction {
             ScalarFunction::ArraySort => "ArraySort",
             ScalarFunction::ArrayDistinct => "ArrayDistinct",
             ScalarFunction::ArrayResize => "ArrayResize",
+            ScalarFunction::EndsWith => "EndsWith",
+            ScalarFunction::InStr => "InStr",
         }
     }
     /// Creates an enum from field names used in the ProtoBuf definition.
@@ -3039,6 +3043,8 @@ impl ScalarFunction {
             "ArraySort" => Some(Self::ArraySort),
             "ArrayDistinct" => Some(Self::ArrayDistinct),
             "ArrayResize" => Some(Self::ArrayResize),
+            "EndsWith" => Some(Self::EndsWith),
+            "InStr" => Some(Self::InStr),
             _ => None,
         }
     }
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs 
b/datafusion/proto/src/logical_plan/from_proto.rs
index 8db5ccdfd6..42d39b5c51 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -56,10 +56,10 @@ use datafusion_expr::{
     ascii, asin, asinh, atan, atan2, atanh, bit_length, btrim, cardinality, 
cbrt, ceil,
     character_length, chr, coalesce, concat_expr, concat_ws_expr, cos, cosh, 
cot,
     current_date, current_time, date_bin, date_part, date_trunc, decode, 
degrees, digest,
-    encode, exp,
+    encode, ends_with, exp,
     expr::{self, InList, Sort, WindowFunction},
     factorial, find_in_set, flatten, floor, from_unixtime, gcd, gen_range, 
initcap,
-    isnan, iszero, lcm, left, levenshtein, ln, log, log10, log2,
+    instr, isnan, iszero, lcm, left, levenshtein, ln, log, log10, log2,
     logical_plan::{PlanType, StringifiedPlan},
     lower, lpad, ltrim, md5, nanvl, now, nullif, octet_length, overlay, pi, 
power,
     radians, random, regexp_match, regexp_replace, repeat, replace, reverse, 
right,
@@ -529,7 +529,9 @@ impl From<&protobuf::ScalarFunction> for 
BuiltinScalarFunction {
             ScalarFunction::CharacterLength => Self::CharacterLength,
             ScalarFunction::Chr => Self::Chr,
             ScalarFunction::ConcatWithSeparator => Self::ConcatWithSeparator,
+            ScalarFunction::EndsWith => Self::EndsWith,
             ScalarFunction::InitCap => Self::InitCap,
+            ScalarFunction::InStr => Self::InStr,
             ScalarFunction::Left => Self::Left,
             ScalarFunction::Lpad => Self::Lpad,
             ScalarFunction::Random => Self::Random,
@@ -1586,6 +1588,10 @@ pub fn parse_expr(
                 }
                 ScalarFunction::Chr => Ok(chr(parse_expr(&args[0], 
registry)?)),
                 ScalarFunction::InitCap => Ok(initcap(parse_expr(&args[0], 
registry)?)),
+                ScalarFunction::InStr => Ok(instr(
+                    parse_expr(&args[0], registry)?,
+                    parse_expr(&args[1], registry)?,
+                )),
                 ScalarFunction::Gcd => Ok(gcd(
                     parse_expr(&args[0], registry)?,
                     parse_expr(&args[1], registry)?,
@@ -1665,6 +1671,10 @@ pub fn parse_expr(
                     parse_expr(&args[0], registry)?,
                     parse_expr(&args[1], registry)?,
                 )),
+                ScalarFunction::EndsWith => Ok(ends_with(
+                    parse_expr(&args[0], registry)?,
+                    parse_expr(&args[1], registry)?,
+                )),
                 ScalarFunction::Strpos => Ok(strpos(
                     parse_expr(&args[0], registry)?,
                     parse_expr(&args[1], registry)?,
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs 
b/datafusion/proto/src/logical_plan/to_proto.rs
index 7eef3da951..dbb52eced3 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1525,7 +1525,9 @@ impl TryFrom<&BuiltinScalarFunction> for 
protobuf::ScalarFunction {
             BuiltinScalarFunction::CharacterLength => Self::CharacterLength,
             BuiltinScalarFunction::Chr => Self::Chr,
             BuiltinScalarFunction::ConcatWithSeparator => 
Self::ConcatWithSeparator,
+            BuiltinScalarFunction::EndsWith => Self::EndsWith,
             BuiltinScalarFunction::InitCap => Self::InitCap,
+            BuiltinScalarFunction::InStr => Self::InStr,
             BuiltinScalarFunction::Left => Self::Left,
             BuiltinScalarFunction::Lpad => Self::Lpad,
             BuiltinScalarFunction::Random => Self::Random,
diff --git a/datafusion/sqllogictest/test_files/functions.slt 
b/datafusion/sqllogictest/test_files/functions.slt
index 7bd60a3a15..d3f81cc61e 100644
--- a/datafusion/sqllogictest/test_files/functions.slt
+++ b/datafusion/sqllogictest/test_files/functions.slt
@@ -647,6 +647,21 @@ SELECT initcap(arrow_cast('foo', 'Dictionary(Int32, 
Utf8)'))
 ----
 Foo
 
+query I
+SELECT instr('foobarbar', 'bar')
+----
+4
+
+query I
+SELECT instr('foobarbar', 'aa')
+----
+0
+
+query I
+SELECT instr('foobarbar', '')
+----
+1
+
 query T
 SELECT lower('FOObar')
 ----
@@ -727,6 +742,26 @@ SELECT split_part(arrow_cast('foo_bar', 'Dictionary(Int32, 
Utf8)'), '_', 2)
 ----
 bar
 
+query B
+SELECT starts_with('foobar', 'foo')
+----
+true
+
+query B
+SELECT starts_with('foobar', 'bar')
+----
+false
+
+query B
+SELECT ends_with('foobar', 'bar')
+----
+true
+
+query B
+SELECT ends_with('foobar', 'foo')
+----
+false
+
 query T
 SELECT trim('  foo  ')
 ----
diff --git a/docs/source/user-guide/sql/scalar_functions.md 
b/docs/source/user-guide/sql/scalar_functions.md
index 50e1cbc3d6..c3def3f89b 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -613,7 +613,9 @@ nullif(expression1, expression2)
 - [concat](#concat)
 - [concat_ws](#concat_ws)
 - [chr](#chr)
+- [ends_with](#ends_with)
 - [initcap](#initcap)
+- [instr](#instr)
 - [left](#left)
 - [length](#length)
 - [lower](#lower)
@@ -756,6 +758,20 @@ chr(expression)
 **Related functions**:
 [ascii](#ascii)
 
+### `ends_with`
+
+Tests if a string ends with a substring.
+
+```
+ends_with(str, substr)
+```
+
+#### Arguments
+
+- **str**: String expression to test.
+  Can be a constant, column, or function, and any combination of string 
operators.
+- **substr**: Substring to test for.
+
 ### `initcap`
 
 Capitalizes the first character in each word in the input string.
@@ -774,6 +790,22 @@ initcap(str)
 [lower](#lower),
 [upper](#upper)
 
+### `instr`
+
+Returns the location where substr first appeared in str (counting from 1).
+If substr does not appear in str, return 0.
+
+```
+instr(str, substr)
+```
+
+#### Arguments
+
+- **str**: String expression to operate on.
+  Can be a constant, column, or function, and any combination of string 
operators.
+- **substr**: Substring expression to search for.
+  Can be a constant, column, or function, and any combination of string 
operators.
+
 ### `left`
 
 Returns a specified number of characters from the left side of a string.

Reply via email to