This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 084fdfbeb7 feat:implement sql style 'ends_with' and 'instr' string
function (#8862)
084fdfbeb7 is described below
commit 084fdfbeb7c1174b2ccee400a80bcddd6bac22dd
Author: zy-kkk <[email protected]>
AuthorDate: Wed Jan 24 00:05:42 2024 +0800
feat:implement sql style 'ends_with' and 'instr' string function (#8862)
* feat:implement sql style 'ends_with' and 'instr' string function
* Use Arrow comparison functions for ends_with and starts_with
implementations and extend tests for instr function
---
.../core/tests/dataframe/dataframe_functions.rs | 40 ++++++
datafusion/expr/src/built_in_function.rs | 36 ++++--
datafusion/expr/src/expr_fn.rs | 4 +
datafusion/physical-expr/src/functions.rs | 143 ++++++++++++++++++++-
datafusion/physical-expr/src/string_expressions.rs | 72 +++++++++--
datafusion/proto/proto/datafusion.proto | 2 +
datafusion/proto/src/generated/pbjson.rs | 6 +
datafusion/proto/src/generated/prost.rs | 6 +
datafusion/proto/src/logical_plan/from_proto.rs | 14 +-
datafusion/proto/src/logical_plan/to_proto.rs | 2 +
datafusion/sqllogictest/test_files/functions.slt | 35 +++++
docs/source/user-guide/sql/scalar_functions.md | 32 +++++
12 files changed, 366 insertions(+), 26 deletions(-)
diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs
b/datafusion/core/tests/dataframe/dataframe_functions.rs
index fe56fc22ea..2d42034643 100644
--- a/datafusion/core/tests/dataframe/dataframe_functions.rs
+++ b/datafusion/core/tests/dataframe/dataframe_functions.rs
@@ -267,6 +267,26 @@ async fn test_fn_initcap() -> Result<()> {
Ok(())
}
+#[tokio::test]
+async fn test_fn_instr() -> Result<()> {
+ let expr = instr(col("a"), lit("b"));
+
+ let expected = [
+ "+-------------------------+",
+ "| instr(test.a,Utf8(\"b\")) |",
+ "+-------------------------+",
+ "| 2 |",
+ "| 2 |",
+ "| 0 |",
+ "| 5 |",
+ "+-------------------------+",
+ ];
+
+ assert_fn_batches!(expr, expected);
+
+ Ok(())
+}
+
#[tokio::test]
#[cfg(feature = "unicode_expressions")]
async fn test_fn_left() -> Result<()> {
@@ -634,6 +654,26 @@ async fn test_fn_starts_with() -> Result<()> {
Ok(())
}
+#[tokio::test]
+async fn test_fn_ends_with() -> Result<()> {
+ let expr = ends_with(col("a"), lit("DEF"));
+
+ let expected = [
+ "+-------------------------------+",
+ "| ends_with(test.a,Utf8(\"DEF\")) |",
+ "+-------------------------------+",
+ "| true |",
+ "| false |",
+ "| false |",
+ "| false |",
+ "+-------------------------------+",
+ ];
+
+ assert_fn_batches!(expr, expected);
+
+ Ok(())
+}
+
#[tokio::test]
#[cfg(feature = "unicode_expressions")]
async fn test_fn_strpos() -> Result<()> {
diff --git a/datafusion/expr/src/built_in_function.rs
b/datafusion/expr/src/built_in_function.rs
index 81c8f67cc6..e86d6172ce 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -221,8 +221,12 @@ pub enum BuiltinScalarFunction {
DateTrunc,
/// date_bin
DateBin,
+ /// ends_with
+ EndsWith,
/// initcap
InitCap,
+ /// InStr
+ InStr,
/// left
Left,
/// lpad
@@ -446,7 +450,9 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::DatePart => Volatility::Immutable,
BuiltinScalarFunction::DateTrunc => Volatility::Immutable,
BuiltinScalarFunction::DateBin => Volatility::Immutable,
+ BuiltinScalarFunction::EndsWith => Volatility::Immutable,
BuiltinScalarFunction::InitCap => Volatility::Immutable,
+ BuiltinScalarFunction::InStr => Volatility::Immutable,
BuiltinScalarFunction::Left => Volatility::Immutable,
BuiltinScalarFunction::Lpad => Volatility::Immutable,
BuiltinScalarFunction::Lower => Volatility::Immutable,
@@ -708,6 +714,9 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::InitCap => {
utf8_to_str_type(&input_expr_types[0], "initcap")
}
+ BuiltinScalarFunction::InStr => {
+ utf8_to_int_type(&input_expr_types[0], "instr")
+ }
BuiltinScalarFunction::Left =>
utf8_to_str_type(&input_expr_types[0], "left"),
BuiltinScalarFunction::Lower => {
utf8_to_str_type(&input_expr_types[0], "lower")
@@ -795,6 +804,7 @@ impl BuiltinScalarFunction {
true,
)))),
BuiltinScalarFunction::StartsWith => Ok(Boolean),
+ BuiltinScalarFunction::EndsWith => Ok(Boolean),
BuiltinScalarFunction::Strpos => {
utf8_to_int_type(&input_expr_types[0], "strpos")
}
@@ -1211,17 +1221,19 @@ impl BuiltinScalarFunction {
],
self.volatility(),
),
- BuiltinScalarFunction::Strpos | BuiltinScalarFunction::StartsWith
=> {
- Signature::one_of(
- vec![
- Exact(vec![Utf8, Utf8]),
- Exact(vec![Utf8, LargeUtf8]),
- Exact(vec![LargeUtf8, Utf8]),
- Exact(vec![LargeUtf8, LargeUtf8]),
- ],
- self.volatility(),
- )
- }
+
+ BuiltinScalarFunction::EndsWith
+ | BuiltinScalarFunction::InStr
+ | BuiltinScalarFunction::Strpos
+ | BuiltinScalarFunction::StartsWith => Signature::one_of(
+ vec![
+ Exact(vec![Utf8, Utf8]),
+ Exact(vec![Utf8, LargeUtf8]),
+ Exact(vec![LargeUtf8, Utf8]),
+ Exact(vec![LargeUtf8, LargeUtf8]),
+ ],
+ self.volatility(),
+ ),
BuiltinScalarFunction::Substr => Signature::one_of(
vec![
@@ -1473,7 +1485,9 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Concat => &["concat"],
BuiltinScalarFunction::ConcatWithSeparator => &["concat_ws"],
BuiltinScalarFunction::Chr => &["chr"],
+ BuiltinScalarFunction::EndsWith => &["ends_with"],
BuiltinScalarFunction::InitCap => &["initcap"],
+ BuiltinScalarFunction::InStr => &["instr"],
BuiltinScalarFunction::Left => &["left"],
BuiltinScalarFunction::Lower => &["lower"],
BuiltinScalarFunction::Lpad => &["lpad"],
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 1d45fa4fac..006b5f10f1 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -798,6 +798,7 @@ scalar_expr!(Digest, digest, input algorithm, "compute the
binary hash of `input
scalar_expr!(Encode, encode, input encoding, "encode the `input`, using the
`encoding`. encoding can be base64 or hex");
scalar_expr!(Decode, decode, input encoding, "decode the`input`, using the
`encoding`. encoding can be base64 or hex");
scalar_expr!(InitCap, initcap, string, "converts the first letter of each word
in `string` in uppercase and the remaining characters in lowercase");
+scalar_expr!(InStr, instr, string substring, "returns the position of the
first occurrence of `substring` in `string`");
scalar_expr!(Left, left, string n, "returns the first `n` characters in the
`string`");
scalar_expr!(Lower, lower, string, "convert the string to lower case");
scalar_expr!(
@@ -830,6 +831,7 @@ scalar_expr!(SHA512, sha512, string, "SHA-512 hash");
scalar_expr!(SplitPart, split_part, string delimiter index, "splits a string
based on a delimiter and picks out the desired field based on the index.");
scalar_expr!(StringToArray, string_to_array, string delimiter null_string,
"splits a `string` based on a `delimiter` and returns an array of parts. Any
parts matching the optional `null_string` will be replaced with `NULL`");
scalar_expr!(StartsWith, starts_with, string prefix, "whether the `string`
starts with the `prefix`");
+scalar_expr!(EndsWith, ends_with, string suffix, "whether the `string` ends
with the `suffix`");
scalar_expr!(Strpos, strpos, string substring, "finds the position from where
the `substring` matches the `string`");
scalar_expr!(Substr, substr, string position, "substring from the `position`
to the end");
scalar_expr!(Substr, substring, string position length, "substring from the
`position` with `length` characters");
@@ -1372,6 +1374,7 @@ mod test {
test_scalar_expr!(Gcd, gcd, arg_1, arg_2);
test_scalar_expr!(Lcm, lcm, arg_1, arg_2);
test_scalar_expr!(InitCap, initcap, string);
+ test_scalar_expr!(InStr, instr, string, substring);
test_scalar_expr!(Left, left, string, count);
test_scalar_expr!(Lower, lower, string);
test_nary_scalar_expr!(Lpad, lpad, string, count);
@@ -1410,6 +1413,7 @@ mod test {
test_scalar_expr!(SplitPart, split_part, expr, delimiter, index);
test_scalar_expr!(StringToArray, string_to_array, expr, delimiter,
null_value);
test_scalar_expr!(StartsWith, starts_with, string, characters);
+ test_scalar_expr!(EndsWith, ends_with, string, characters);
test_scalar_expr!(Strpos, strpos, string, substring);
test_scalar_expr!(Substr, substr, string, position);
test_scalar_expr!(Substr, substring, string, position, count);
diff --git a/datafusion/physical-expr/src/functions.rs
b/datafusion/physical-expr/src/functions.rs
index d1e75bfe4f..57e04c6ecf 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -557,6 +557,15 @@ pub fn create_physical_fun(
internal_err!("Unsupported data type {other:?} for function
initcap")
}
}),
+ BuiltinScalarFunction::InStr => Arc::new(|args| match
args[0].data_type() {
+ DataType::Utf8 => {
+ make_scalar_function(string_expressions::instr::<i32>)(args)
+ }
+ DataType::LargeUtf8 => {
+ make_scalar_function(string_expressions::instr::<i64>)(args)
+ }
+ other => internal_err!("Unsupported data type {other:?} for
function instr"),
+ }),
BuiltinScalarFunction::Left => Arc::new(|args| match
args[0].data_type() {
DataType::Utf8 => {
let func = invoke_if_unicode_expressions_feature_flag!(left,
i32, "left");
@@ -779,6 +788,17 @@ pub fn create_physical_fun(
internal_err!("Unsupported data type {other:?} for function
starts_with")
}
}),
+ BuiltinScalarFunction::EndsWith => Arc::new(|args| match
args[0].data_type() {
+ DataType::Utf8 => {
+
make_scalar_function(string_expressions::ends_with::<i32>)(args)
+ }
+ DataType::LargeUtf8 => {
+
make_scalar_function(string_expressions::ends_with::<i64>)(args)
+ }
+ other => {
+ internal_err!("Unsupported data type {other:?} for function
ends_with")
+ }
+ }),
BuiltinScalarFunction::Strpos => Arc::new(|args| match
args[0].data_type() {
DataType::Utf8 => {
let func = invoke_if_unicode_expressions_feature_flag!(
@@ -1001,7 +1021,7 @@ mod tests {
use arrow::{
array::{
Array, ArrayRef, BinaryArray, BooleanArray, Float32Array,
Float64Array,
- Int32Array, StringArray, UInt64Array,
+ Int32Array, Int64Array, StringArray, UInt64Array,
},
datatypes::Field,
record_batch::RecordBatch,
@@ -1393,6 +1413,95 @@ mod tests {
Utf8,
StringArray
);
+ test_function!(
+ InStr,
+ &[lit("abc"), lit("b")],
+ Ok(Some(2)),
+ i32,
+ Int32,
+ Int32Array
+ );
+ test_function!(
+ InStr,
+ &[lit("abc"), lit("c")],
+ Ok(Some(3)),
+ i32,
+ Int32,
+ Int32Array
+ );
+ test_function!(
+ InStr,
+ &[lit("abc"), lit("d")],
+ Ok(Some(0)),
+ i32,
+ Int32,
+ Int32Array
+ );
+ test_function!(
+ InStr,
+ &[lit("abc"), lit("")],
+ Ok(Some(1)),
+ i32,
+ Int32,
+ Int32Array
+ );
+ test_function!(
+ InStr,
+ &[lit("Helloworld"), lit("world")],
+ Ok(Some(6)),
+ i32,
+ Int32,
+ Int32Array
+ );
+ test_function!(
+ InStr,
+ &[lit("Helloworld"), lit(ScalarValue::Utf8(None))],
+ Ok(None),
+ i32,
+ Int32,
+ Int32Array
+ );
+ test_function!(
+ InStr,
+ &[lit(ScalarValue::Utf8(None)), lit("Hello")],
+ Ok(None),
+ i32,
+ Int32,
+ Int32Array
+ );
+ test_function!(
+ InStr,
+ &[
+ lit(ScalarValue::LargeUtf8(Some("Helloworld".to_string()))),
+ lit(ScalarValue::LargeUtf8(Some("world".to_string())))
+ ],
+ Ok(Some(6)),
+ i64,
+ Int64,
+ Int64Array
+ );
+ test_function!(
+ InStr,
+ &[
+ lit(ScalarValue::LargeUtf8(None)),
+ lit(ScalarValue::LargeUtf8(Some("world".to_string())))
+ ],
+ Ok(None),
+ i64,
+ Int64,
+ Int64Array
+ );
+ test_function!(
+ InStr,
+ &[
+ lit(ScalarValue::LargeUtf8(Some("Helloworld".to_string()))),
+ lit(ScalarValue::LargeUtf8(None))
+ ],
+ Ok(None),
+ i64,
+ Int64,
+ Int64Array
+ );
#[cfg(feature = "unicode_expressions")]
test_function!(
Left,
@@ -2511,6 +2620,38 @@ mod tests {
Boolean,
BooleanArray
);
+ test_function!(
+ EndsWith,
+ &[lit("alphabet"), lit("alph"),],
+ Ok(Some(false)),
+ bool,
+ Boolean,
+ BooleanArray
+ );
+ test_function!(
+ EndsWith,
+ &[lit("alphabet"), lit("bet"),],
+ Ok(Some(true)),
+ bool,
+ Boolean,
+ BooleanArray
+ );
+ test_function!(
+ EndsWith,
+ &[lit(ScalarValue::Utf8(None)), lit("alph"),],
+ Ok(None),
+ bool,
+ Boolean,
+ BooleanArray
+ );
+ test_function!(
+ EndsWith,
+ &[lit("alphabet"), lit(ScalarValue::Utf8(None)),],
+ Ok(None),
+ bool,
+ Boolean,
+ BooleanArray
+ );
#[cfg(feature = "unicode_expressions")]
test_function!(
Strpos,
diff --git a/datafusion/physical-expr/src/string_expressions.rs
b/datafusion/physical-expr/src/string_expressions.rs
index 7d9fecf614..d5344773cf 100644
--- a/datafusion/physical-expr/src/string_expressions.rs
+++ b/datafusion/physical-expr/src/string_expressions.rs
@@ -23,8 +23,8 @@
use arrow::{
array::{
- Array, ArrayRef, BooleanArray, GenericStringArray, Int32Array,
Int64Array,
- OffsetSizeTrait, StringArray,
+ Array, ArrayRef, GenericStringArray, Int32Array, Int64Array,
OffsetSizeTrait,
+ StringArray,
},
datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType},
};
@@ -296,6 +296,50 @@ pub fn initcap<T: OffsetSizeTrait>(args: &[ArrayRef]) ->
Result<ArrayRef> {
Ok(Arc::new(result) as ArrayRef)
}
+/// Returns the position of the first occurrence of substring in string.
+/// The position is counted from 1. If the substring is not found, returns 0.
+/// For example, instr('Helloworld', 'world') = 6.
+pub fn instr<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+ let string_array = as_generic_string_array::<T>(&args[0])?;
+ let substr_array = as_generic_string_array::<T>(&args[1])?;
+
+ match args[0].data_type() {
+ DataType::Utf8 => {
+ let result = string_array
+ .iter()
+ .zip(substr_array.iter())
+ .map(|(string, substr)| match (string, substr) {
+ (Some(string), Some(substr)) => string
+ .find(substr)
+ .map_or(Some(0), |index| Some((index + 1) as i32)),
+ _ => None,
+ })
+ .collect::<Int32Array>();
+
+ Ok(Arc::new(result) as ArrayRef)
+ }
+ DataType::LargeUtf8 => {
+ let result = string_array
+ .iter()
+ .zip(substr_array.iter())
+ .map(|(string, substr)| match (string, substr) {
+ (Some(string), Some(substr)) => string
+ .find(substr)
+ .map_or(Some(0), |index| Some((index + 1) as i64)),
+ _ => None,
+ })
+ .collect::<Int64Array>();
+
+ Ok(Arc::new(result) as ArrayRef)
+ }
+ other => {
+ internal_err!(
+ "instr was called with {other} datatype arguments. It requires
Utf8 or LargeUtf8."
+ )
+ }
+ }
+}
+
/// Converts the string to all lower case.
/// lower('TOM') = 'tom'
pub fn lower(args: &[ColumnarValue]) -> Result<ColumnarValue> {
@@ -461,17 +505,21 @@ pub fn split_part<T: OffsetSizeTrait>(args: &[ArrayRef])
-> Result<ArrayRef> {
/// Returns true if string starts with prefix.
/// starts_with('alphabet', 'alph') = 't'
pub fn starts_with<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
- let string_array = as_generic_string_array::<T>(&args[0])?;
- let prefix_array = as_generic_string_array::<T>(&args[1])?;
+ let left = as_generic_string_array::<T>(&args[0])?;
+ let right = as_generic_string_array::<T>(&args[1])?;
- let result = string_array
- .iter()
- .zip(prefix_array.iter())
- .map(|(string, prefix)| match (string, prefix) {
- (Some(string), Some(prefix)) => Some(string.starts_with(prefix)),
- _ => None,
- })
- .collect::<BooleanArray>();
+ let result = arrow::compute::kernels::comparison::starts_with(left,
right)?;
+
+ Ok(Arc::new(result) as ArrayRef)
+}
+
+/// Returns true if string ends with suffix.
+/// ends_with('alphabet', 'abet') = 't'
+pub fn ends_with<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+ let left = as_generic_string_array::<T>(&args[0])?;
+ let right = as_generic_string_array::<T>(&args[1])?;
+
+ let result = arrow::compute::kernels::comparison::ends_with(left, right)?;
Ok(Arc::new(result) as ArrayRef)
}
diff --git a/datafusion/proto/proto/datafusion.proto
b/datafusion/proto/proto/datafusion.proto
index d79879e57a..66c1271e65 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -669,6 +669,8 @@ enum ScalarFunction {
ArraySort = 128;
ArrayDistinct = 129;
ArrayResize = 130;
+ EndsWith = 131;
+ InStr = 132;
}
message ScalarFunctionNode {
diff --git a/datafusion/proto/src/generated/pbjson.rs
b/datafusion/proto/src/generated/pbjson.rs
index d7ad6fb03c..39a8678ef2 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -22423,6 +22423,8 @@ impl serde::Serialize for ScalarFunction {
Self::ArraySort => "ArraySort",
Self::ArrayDistinct => "ArrayDistinct",
Self::ArrayResize => "ArrayResize",
+ Self::EndsWith => "EndsWith",
+ Self::InStr => "InStr",
};
serializer.serialize_str(variant)
}
@@ -22565,6 +22567,8 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"ArraySort",
"ArrayDistinct",
"ArrayResize",
+ "EndsWith",
+ "InStr",
];
struct GeneratedVisitor;
@@ -22736,6 +22740,8 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"ArraySort" => Ok(ScalarFunction::ArraySort),
"ArrayDistinct" => Ok(ScalarFunction::ArrayDistinct),
"ArrayResize" => Ok(ScalarFunction::ArrayResize),
+ "EndsWith" => Ok(ScalarFunction::EndsWith),
+ "InStr" => Ok(ScalarFunction::InStr),
_ => Err(serde::de::Error::unknown_variant(value, FIELDS)),
}
}
diff --git a/datafusion/proto/src/generated/prost.rs
b/datafusion/proto/src/generated/prost.rs
index d594da9087..7bf1d8ed04 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2764,6 +2764,8 @@ pub enum ScalarFunction {
ArraySort = 128,
ArrayDistinct = 129,
ArrayResize = 130,
+ EndsWith = 131,
+ InStr = 132,
}
impl ScalarFunction {
/// String value of the enum field names used in the ProtoBuf definition.
@@ -2903,6 +2905,8 @@ impl ScalarFunction {
ScalarFunction::ArraySort => "ArraySort",
ScalarFunction::ArrayDistinct => "ArrayDistinct",
ScalarFunction::ArrayResize => "ArrayResize",
+ ScalarFunction::EndsWith => "EndsWith",
+ ScalarFunction::InStr => "InStr",
}
}
/// Creates an enum from field names used in the ProtoBuf definition.
@@ -3039,6 +3043,8 @@ impl ScalarFunction {
"ArraySort" => Some(Self::ArraySort),
"ArrayDistinct" => Some(Self::ArrayDistinct),
"ArrayResize" => Some(Self::ArrayResize),
+ "EndsWith" => Some(Self::EndsWith),
+ "InStr" => Some(Self::InStr),
_ => None,
}
}
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs
b/datafusion/proto/src/logical_plan/from_proto.rs
index 8db5ccdfd6..42d39b5c51 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -56,10 +56,10 @@ use datafusion_expr::{
ascii, asin, asinh, atan, atan2, atanh, bit_length, btrim, cardinality,
cbrt, ceil,
character_length, chr, coalesce, concat_expr, concat_ws_expr, cos, cosh,
cot,
current_date, current_time, date_bin, date_part, date_trunc, decode,
degrees, digest,
- encode, exp,
+ encode, ends_with, exp,
expr::{self, InList, Sort, WindowFunction},
factorial, find_in_set, flatten, floor, from_unixtime, gcd, gen_range,
initcap,
- isnan, iszero, lcm, left, levenshtein, ln, log, log10, log2,
+ instr, isnan, iszero, lcm, left, levenshtein, ln, log, log10, log2,
logical_plan::{PlanType, StringifiedPlan},
lower, lpad, ltrim, md5, nanvl, now, nullif, octet_length, overlay, pi,
power,
radians, random, regexp_match, regexp_replace, repeat, replace, reverse,
right,
@@ -529,7 +529,9 @@ impl From<&protobuf::ScalarFunction> for
BuiltinScalarFunction {
ScalarFunction::CharacterLength => Self::CharacterLength,
ScalarFunction::Chr => Self::Chr,
ScalarFunction::ConcatWithSeparator => Self::ConcatWithSeparator,
+ ScalarFunction::EndsWith => Self::EndsWith,
ScalarFunction::InitCap => Self::InitCap,
+ ScalarFunction::InStr => Self::InStr,
ScalarFunction::Left => Self::Left,
ScalarFunction::Lpad => Self::Lpad,
ScalarFunction::Random => Self::Random,
@@ -1586,6 +1588,10 @@ pub fn parse_expr(
}
ScalarFunction::Chr => Ok(chr(parse_expr(&args[0],
registry)?)),
ScalarFunction::InitCap => Ok(initcap(parse_expr(&args[0],
registry)?)),
+ ScalarFunction::InStr => Ok(instr(
+ parse_expr(&args[0], registry)?,
+ parse_expr(&args[1], registry)?,
+ )),
ScalarFunction::Gcd => Ok(gcd(
parse_expr(&args[0], registry)?,
parse_expr(&args[1], registry)?,
@@ -1665,6 +1671,10 @@ pub fn parse_expr(
parse_expr(&args[0], registry)?,
parse_expr(&args[1], registry)?,
)),
+ ScalarFunction::EndsWith => Ok(ends_with(
+ parse_expr(&args[0], registry)?,
+ parse_expr(&args[1], registry)?,
+ )),
ScalarFunction::Strpos => Ok(strpos(
parse_expr(&args[0], registry)?,
parse_expr(&args[1], registry)?,
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs
b/datafusion/proto/src/logical_plan/to_proto.rs
index 7eef3da951..dbb52eced3 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1525,7 +1525,9 @@ impl TryFrom<&BuiltinScalarFunction> for
protobuf::ScalarFunction {
BuiltinScalarFunction::CharacterLength => Self::CharacterLength,
BuiltinScalarFunction::Chr => Self::Chr,
BuiltinScalarFunction::ConcatWithSeparator =>
Self::ConcatWithSeparator,
+ BuiltinScalarFunction::EndsWith => Self::EndsWith,
BuiltinScalarFunction::InitCap => Self::InitCap,
+ BuiltinScalarFunction::InStr => Self::InStr,
BuiltinScalarFunction::Left => Self::Left,
BuiltinScalarFunction::Lpad => Self::Lpad,
BuiltinScalarFunction::Random => Self::Random,
diff --git a/datafusion/sqllogictest/test_files/functions.slt
b/datafusion/sqllogictest/test_files/functions.slt
index 7bd60a3a15..d3f81cc61e 100644
--- a/datafusion/sqllogictest/test_files/functions.slt
+++ b/datafusion/sqllogictest/test_files/functions.slt
@@ -647,6 +647,21 @@ SELECT initcap(arrow_cast('foo', 'Dictionary(Int32,
Utf8)'))
----
Foo
+query I
+SELECT instr('foobarbar', 'bar')
+----
+4
+
+query I
+SELECT instr('foobarbar', 'aa')
+----
+0
+
+query I
+SELECT instr('foobarbar', '')
+----
+1
+
query T
SELECT lower('FOObar')
----
@@ -727,6 +742,26 @@ SELECT split_part(arrow_cast('foo_bar', 'Dictionary(Int32,
Utf8)'), '_', 2)
----
bar
+query B
+SELECT starts_with('foobar', 'foo')
+----
+true
+
+query B
+SELECT starts_with('foobar', 'bar')
+----
+false
+
+query B
+SELECT ends_with('foobar', 'bar')
+----
+true
+
+query B
+SELECT ends_with('foobar', 'foo')
+----
+false
+
query T
SELECT trim(' foo ')
----
diff --git a/docs/source/user-guide/sql/scalar_functions.md
b/docs/source/user-guide/sql/scalar_functions.md
index 50e1cbc3d6..c3def3f89b 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -613,7 +613,9 @@ nullif(expression1, expression2)
- [concat](#concat)
- [concat_ws](#concat_ws)
- [chr](#chr)
+- [ends_with](#ends_with)
- [initcap](#initcap)
+- [instr](#instr)
- [left](#left)
- [length](#length)
- [lower](#lower)
@@ -756,6 +758,20 @@ chr(expression)
**Related functions**:
[ascii](#ascii)
+### `ends_with`
+
+Tests if a string ends with a substring.
+
+```
+ends_with(str, substr)
+```
+
+#### Arguments
+
+- **str**: String expression to test.
+ Can be a constant, column, or function, and any combination of string
operators.
+- **substr**: Substring to test for.
+
### `initcap`
Capitalizes the first character in each word in the input string.
@@ -774,6 +790,22 @@ initcap(str)
[lower](#lower),
[upper](#upper)
+### `instr`
+
+Returns the location where substr first appeared in str (counting from 1).
+If substr does not appear in str, return 0.
+
+```
+instr(str, substr)
+```
+
+#### Arguments
+
+- **str**: String expression to operate on.
+ Can be a constant, column, or function, and any combination of string
operators.
+- **substr**: Substring expression to search for.
+ Can be a constant, column, or function, and any combination of string
operators.
+
### `left`
Returns a specified number of characters from the left side of a string.