This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new e622409691 feat: issue_9285: port builtin reg function into
datafusion-function-* (1/3 regexpmatch) (#9329)
e622409691 is described below
commit e62240969135e2236d100c8c0c01546a87950a80
Author: Lordworms <[email protected]>
AuthorDate: Wed Feb 28 04:55:03 2024 -0600
feat: issue_9285: port builtin reg function into datafusion-function-* (1/3
regexpmatch) (#9329)
* feat: issue_9285: port builtin reg function into datafusion-function-*
crate (1/3: RegexpMatch part)
* fix fmt
* refact
* modify test
* fix msrv verify problem
* port test and delete useless lines
---
.../core/tests/dataframe/dataframe_functions.rs | 2 +-
datafusion/expr/src/built_in_function.rs | 22 ----
datafusion/expr/src/expr_fn.rs | 7 -
datafusion/functions/Cargo.toml | 3 +-
datafusion/functions/src/lib.rs | 7 +-
datafusion/functions/src/regex/mod.rs | 29 +++++
datafusion/functions/src/regex/regexpmatch.rs | 145 +++++++++++++++++++++
datafusion/physical-expr/src/functions.rs | 108 ---------------
datafusion/proto/proto/datafusion.proto | 2 +-
datafusion/proto/src/generated/pbjson.rs | 3 -
datafusion/proto/src/generated/prost.rs | 4 +-
datafusion/proto/src/logical_plan/from_proto.rs | 17 +--
datafusion/proto/src/logical_plan/to_proto.rs | 1 -
datafusion/sqllogictest/test_files/regexp.slt | 8 +-
14 files changed, 196 insertions(+), 162 deletions(-)
diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs
b/datafusion/core/tests/dataframe/dataframe_functions.rs
index 95c13fc17c..ff553a4888 100644
--- a/datafusion/core/tests/dataframe/dataframe_functions.rs
+++ b/datafusion/core/tests/dataframe/dataframe_functions.rs
@@ -467,7 +467,7 @@ async fn test_fn_regexp_like() -> Result<()> {
#[tokio::test]
#[cfg(feature = "unicode_expressions")]
async fn test_fn_regexp_match() -> Result<()> {
- let expr = regexp_match(vec![col("a"), lit("[a-z]")]);
+ let expr = regexp_match(col("a"), lit("[a-z]"));
let expected = [
"+------------------------------------+",
diff --git a/datafusion/expr/src/built_in_function.rs
b/datafusion/expr/src/built_in_function.rs
index e041065958..8df2f4e88d 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -233,7 +233,6 @@ pub enum BuiltinScalarFunction {
/// regexp_like
RegexpLike,
/// regexp_match
- RegexpMatch,
/// regexp_replace
RegexpReplace,
/// repeat
@@ -449,7 +448,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::OctetLength => Volatility::Immutable,
BuiltinScalarFunction::Radians => Volatility::Immutable,
BuiltinScalarFunction::RegexpLike => Volatility::Immutable,
- BuiltinScalarFunction::RegexpMatch => Volatility::Immutable,
BuiltinScalarFunction::RegexpReplace => Volatility::Immutable,
BuiltinScalarFunction::Repeat => Volatility::Immutable,
BuiltinScalarFunction::Replace => Volatility::Immutable,
@@ -814,16 +812,6 @@ impl BuiltinScalarFunction {
);
}
}),
- BuiltinScalarFunction::RegexpMatch => Ok(match
&input_expr_types[0] {
- LargeUtf8 => List(Arc::new(Field::new("item", LargeUtf8,
true))),
- Utf8 => List(Arc::new(Field::new("item", Utf8, true))),
- Null => Null,
- other => {
- return plan_err!(
- "The regexp_match function can only accept strings.
Got {other}"
- );
- }
- }),
BuiltinScalarFunction::Factorial
| BuiltinScalarFunction::Gcd
@@ -1263,15 +1251,6 @@ impl BuiltinScalarFunction {
],
self.volatility(),
),
- BuiltinScalarFunction::RegexpMatch => Signature::one_of(
- vec![
- Exact(vec![Utf8, Utf8]),
- Exact(vec![LargeUtf8, Utf8]),
- Exact(vec![Utf8, Utf8, Utf8]),
- Exact(vec![LargeUtf8, Utf8, Utf8]),
- ],
- self.volatility(),
- ),
BuiltinScalarFunction::RegexpReplace => Signature::one_of(
vec![
Exact(vec![Utf8, Utf8, Utf8]),
@@ -1514,7 +1493,6 @@ impl BuiltinScalarFunction {
// regex functions
BuiltinScalarFunction::RegexpLike => &["regexp_like"],
- BuiltinScalarFunction::RegexpMatch => &["regexp_match"],
BuiltinScalarFunction::RegexpReplace => &["regexp_replace"],
// time/date functions
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 67bf39050d..7ffd2f76e7 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -854,11 +854,6 @@ nary_scalar_expr!(
regexp_like,
"matches a regular expression against a string and returns true or false
if there was at least one match or not"
);
-nary_scalar_expr!(
- RegexpMatch,
- regexp_match,
- "matches a regular expression against a string and returns matched
substrings."
-);
nary_scalar_expr!(
RegexpReplace,
regexp_replace,
@@ -1380,8 +1375,6 @@ mod test {
test_scalar_expr!(OctetLength, octet_length, string);
test_nary_scalar_expr!(RegexpLike, regexp_like, string, pattern);
test_nary_scalar_expr!(RegexpLike, regexp_like, string, pattern,
flags);
- test_nary_scalar_expr!(RegexpMatch, regexp_match, string, pattern);
- test_nary_scalar_expr!(RegexpMatch, regexp_match, string, pattern,
flags);
test_nary_scalar_expr!(
RegexpReplace,
regexp_replace,
diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index f63f18f955..89b7de9ee1 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -32,11 +32,12 @@ rust-version = { workspace = true }
# enable core functions
core_expressions = []
# Enable encoding by default so the doctests work. In general don't
automatically enable all packages.
-default = ["core_expressions", "encoding_expressions", "math_expressions"]
+default = ["core_expressions", "encoding_expressions", "math_expressions",
"regex_expressions"]
# enable encode/decode functions
encoding_expressions = ["base64", "hex"]
# enable math functions
math_expressions = []
+regex_expressions = []
[lib]
name = "datafusion_functions"
diff --git a/datafusion/functions/src/lib.rs b/datafusion/functions/src/lib.rs
index 981174c141..d2f0270959 100644
--- a/datafusion/functions/src/lib.rs
+++ b/datafusion/functions/src/lib.rs
@@ -93,7 +93,7 @@ make_package!(
);
make_package!(math, "math_expressions", "Mathematical functions.");
-
+make_package!(regex, "regex_expressions", "Regex functions");
/// Fluent-style API for creating `Expr`s
pub mod expr_fn {
#[cfg(feature = "core_expressions")]
@@ -102,6 +102,8 @@ pub mod expr_fn {
pub use super::encoding::expr_fn::*;
#[cfg(feature = "math_expressions")]
pub use super::math::expr_fn::*;
+ #[cfg(feature = "regex_expressions")]
+ pub use super::regex::expr_fn::*;
}
/// Registers all enabled packages with a [`FunctionRegistry`]
@@ -109,7 +111,8 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) ->
Result<()> {
let mut all_functions = core::functions()
.into_iter()
.chain(encoding::functions())
- .chain(math::functions());
+ .chain(math::functions())
+ .chain(regex::functions());
all_functions.try_for_each(|udf| {
let existing_udf = registry.register_udf(udf)?;
diff --git a/datafusion/functions/src/regex/mod.rs
b/datafusion/functions/src/regex/mod.rs
new file mode 100644
index 0000000000..862e8b77a2
--- /dev/null
+++ b/datafusion/functions/src/regex/mod.rs
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! "regx" DataFusion functions
+
+mod regexpmatch;
+// create UDFs
+make_udf_function!(regexpmatch::RegexpMatchFunc, REGEXP_MATCH, regexp_match);
+
+export_functions!((
+ regexp_match,
+ input_arg1
+ input_arg2,
+ "returns a list of regular expression matches in a string. "
+));
diff --git a/datafusion/functions/src/regex/regexpmatch.rs
b/datafusion/functions/src/regex/regexpmatch.rs
new file mode 100644
index 0000000000..7ab99f96b1
--- /dev/null
+++ b/datafusion/functions/src/regex/regexpmatch.rs
@@ -0,0 +1,145 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Encoding expressions
+use arrow::array::{Array, ArrayRef, OffsetSizeTrait};
+use arrow::compute::kernels::regexp;
+use arrow::datatypes::DataType;
+use arrow::datatypes::Field;
+use datafusion_common::ScalarValue;
+use datafusion_expr::TypeSignature::*;
+use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+use datafusion_common::exec_err;
+use datafusion_common::{arrow_datafusion_err, plan_err};
+use datafusion_common::{
+ cast::as_generic_string_array, internal_err, DataFusionError, Result,
+};
+use datafusion_expr::ColumnarValue;
+use std::sync::Arc;
+
+#[derive(Debug)]
+pub(super) struct RegexpMatchFunc {
+ signature: Signature,
+}
+impl RegexpMatchFunc {
+ pub fn new() -> Self {
+ use DataType::*;
+ Self {
+ signature: Signature::one_of(
+ vec![
+ Exact(vec![Utf8, Utf8]),
+ Exact(vec![LargeUtf8, Utf8]),
+ Exact(vec![Utf8, Utf8, Utf8]),
+ Exact(vec![LargeUtf8, Utf8, Utf8]),
+ ],
+ Volatility::Immutable,
+ ),
+ }
+ }
+}
+
+impl ScalarUDFImpl for RegexpMatchFunc {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+
+ fn name(&self) -> &str {
+ "regexp_match"
+ }
+
+ fn signature(&self) -> &Signature {
+ &self.signature
+ }
+
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+ use DataType::*;
+
+ Ok(match &arg_types[0] {
+ LargeUtf8 => List(Arc::new(Field::new("item", LargeUtf8, true))),
+ Utf8 => List(Arc::new(Field::new("item", Utf8, true))),
+ Null => Null,
+ other => {
+ return plan_err!(
+ "The regexp_match function can only accept strings. Got
{other}"
+ );
+ }
+ })
+ }
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ let len = args
+ .iter()
+ .fold(Option::<usize>::None, |acc, arg| match arg {
+ ColumnarValue::Scalar(_) => acc,
+ ColumnarValue::Array(a) => Some(a.len()),
+ });
+
+ let is_scalar = len.is_none();
+ let inferred_length = len.unwrap_or(1);
+ let args = args
+ .iter()
+ .map(|arg| arg.clone().into_array(inferred_length))
+ .collect::<Result<Vec<_>>>()?;
+
+ let result = regexp_match_func(&args);
+ if is_scalar {
+ // If all inputs are scalar, keeps output as scalar
+ let result = result.and_then(|arr|
ScalarValue::try_from_array(&arr, 0));
+ result.map(ColumnarValue::Scalar)
+ } else {
+ result.map(ColumnarValue::Array)
+ }
+ }
+}
+fn regexp_match_func(args: &[ArrayRef]) -> Result<ArrayRef> {
+ match args[0].data_type() {
+ DataType::Utf8 => {
+ regexp_match::<i32>(args)
+ }
+ DataType::LargeUtf8 => {
+ regexp_match::<i64>(args)
+ }
+ other => {
+ internal_err!("Unsupported data type {other:?} for function
regexp_match")
+ }
+ }
+}
+pub fn regexp_match<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef>
{
+ match args.len() {
+ 2 => {
+ let values = as_generic_string_array::<T>(&args[0])?;
+ let regex = as_generic_string_array::<T>(&args[1])?;
+ regexp::regexp_match(values, regex, None)
+ .map_err(|e| arrow_datafusion_err!(e))
+ }
+ 3 => {
+ let values = as_generic_string_array::<T>(&args[0])?;
+ let regex = as_generic_string_array::<T>(&args[1])?;
+ let flags = as_generic_string_array::<T>(&args[2])?;
+
+ if flags.iter().any(|s| s == Some("g")) {
+ return plan_err!("regexp_match() does not support the
\"global\" option")
+ }
+
+ regexp::regexp_match(values, regex, Some(flags))
+ .map_err(|e| arrow_datafusion_err!(e))
+ }
+ other => exec_err!(
+ "regexp_match was called with {other} arguments. It requires at
least 2 and at most 3."
+ ),
+ }
+}
diff --git a/datafusion/physical-expr/src/functions.rs
b/datafusion/physical-expr/src/functions.rs
index af079dbd2d..186de0609b 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -243,7 +243,6 @@ where
.collect::<Result<Vec<_>>>()?;
let result = (inner)(&args);
-
if is_scalar {
// If all inputs are scalar, keeps output as scalar
let result = result.and_then(|arr|
ScalarValue::try_from_array(&arr, 0));
@@ -619,29 +618,6 @@ pub fn create_physical_fun(
exec_err!("Unsupported data type {other:?} for function
regexp_like")
}
}),
- BuiltinScalarFunction::RegexpMatch => {
- Arc::new(|args| match args[0].data_type() {
- DataType::Utf8 => {
- let func =
invoke_on_array_if_regex_expressions_feature_flag!(
- regexp_match,
- i32,
- "regexp_match"
- );
- make_scalar_function_inner(func)(args)
- }
- DataType::LargeUtf8 => {
- let func =
invoke_on_array_if_regex_expressions_feature_flag!(
- regexp_match,
- i64,
- "regexp_match"
- );
- make_scalar_function_inner(func)(args)
- }
- other => {
- exec_err!("Unsupported data type {other:?} for function
regexp_match")
- }
- })
- }
BuiltinScalarFunction::RegexpReplace => {
Arc::new(|args| match args[0].data_type() {
DataType::Utf8 => {
@@ -3185,90 +3161,6 @@ mod tests {
Ok(())
}
- #[test]
- #[cfg(feature = "regex_expressions")]
- fn test_regexp_match() -> Result<()> {
- use datafusion_common::cast::{as_list_array, as_string_array};
- let schema = Schema::new(vec![Field::new("a", DataType::Utf8, false)]);
- let execution_props = ExecutionProps::new();
-
- let col_value: ArrayRef = Arc::new(StringArray::from(vec!["aaa-555"]));
- let pattern = lit(r".*-(\d*)");
- let columns: Vec<ArrayRef> = vec![col_value];
- let expr = create_physical_expr_with_type_coercion(
- &BuiltinScalarFunction::RegexpMatch,
- &[col("a", &schema)?, pattern],
- &schema,
- &execution_props,
- )?;
-
- // type is correct
- assert_eq!(
- expr.data_type(&schema)?,
- DataType::List(Arc::new(Field::new("item", DataType::Utf8, true)))
- );
-
- // evaluate works
- let batch = RecordBatch::try_new(Arc::new(schema.clone()), columns)?;
- let result = expr
- .evaluate(&batch)?
- .into_array(batch.num_rows())
- .expect("Failed to convert to array");
-
- // downcast works
- let result = as_list_array(&result)?;
- let first_row = result.value(0);
- let first_row = as_string_array(&first_row)?;
-
- // value is correct
- let expected = "555".to_string();
- assert_eq!(first_row.value(0), expected);
-
- Ok(())
- }
-
- #[test]
- #[cfg(feature = "regex_expressions")]
- fn test_regexp_match_all_literals() -> Result<()> {
- use datafusion_common::cast::{as_list_array, as_string_array};
- let schema = Schema::new(vec![Field::new("a", DataType::Int32,
false)]);
- let execution_props = ExecutionProps::new();
-
- let col_value = lit("aaa-555");
- let pattern = lit(r".*-(\d*)");
- let columns: Vec<ArrayRef> = vec![Arc::new(Int32Array::from(vec![1]))];
- let expr = create_physical_expr_with_type_coercion(
- &BuiltinScalarFunction::RegexpMatch,
- &[col_value, pattern],
- &schema,
- &execution_props,
- )?;
-
- // type is correct
- assert_eq!(
- expr.data_type(&schema)?,
- DataType::List(Arc::new(Field::new("item", DataType::Utf8, true)))
- );
-
- // evaluate works
- let batch = RecordBatch::try_new(Arc::new(schema.clone()), columns)?;
- let result = expr
- .evaluate(&batch)?
- .into_array(batch.num_rows())
- .expect("Failed to convert to array");
-
- // downcast works
- let result = as_list_array(&result)?;
- let first_row = result.value(0);
- let first_row = as_string_array(&first_row)?;
-
- // value is correct
- let expected = "555".to_string();
- assert_eq!(first_row.value(0), expected);
-
- Ok(())
- }
-
// Helper function just for testing.
// Returns `expressions` coerced to types compatible with
// `signature`, if possible.
diff --git a/datafusion/proto/proto/datafusion.proto
b/datafusion/proto/proto/datafusion.proto
index 2d729ffc5b..1f659469aa 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -568,7 +568,7 @@ enum ScalarFunction {
Tan = 18;
Trunc = 19;
Array = 20;
- RegexpMatch = 21;
+ // RegexpMatch = 21;
BitLength = 22;
Btrim = 23;
CharacterLength = 24;
diff --git a/datafusion/proto/src/generated/pbjson.rs
b/datafusion/proto/src/generated/pbjson.rs
index 5f05b8546f..8959dd37cf 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -22341,7 +22341,6 @@ impl serde::Serialize for ScalarFunction {
Self::Tan => "Tan",
Self::Trunc => "Trunc",
Self::Array => "Array",
- Self::RegexpMatch => "RegexpMatch",
Self::BitLength => "BitLength",
Self::Btrim => "Btrim",
Self::CharacterLength => "CharacterLength",
@@ -22483,7 +22482,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"Tan",
"Trunc",
"Array",
- "RegexpMatch",
"BitLength",
"Btrim",
"CharacterLength",
@@ -22654,7 +22652,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"Tan" => Ok(ScalarFunction::Tan),
"Trunc" => Ok(ScalarFunction::Trunc),
"Array" => Ok(ScalarFunction::Array),
- "RegexpMatch" => Ok(ScalarFunction::RegexpMatch),
"BitLength" => Ok(ScalarFunction::BitLength),
"Btrim" => Ok(ScalarFunction::Btrim),
"CharacterLength" => Ok(ScalarFunction::CharacterLength),
diff --git a/datafusion/proto/src/generated/prost.rs
b/datafusion/proto/src/generated/prost.rs
index 252089d5c1..09152d99c1 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2656,7 +2656,7 @@ pub enum ScalarFunction {
Tan = 18,
Trunc = 19,
Array = 20,
- RegexpMatch = 21,
+ /// RegexpMatch = 21;
BitLength = 22,
Btrim = 23,
CharacterLength = 24,
@@ -2798,7 +2798,6 @@ impl ScalarFunction {
ScalarFunction::Tan => "Tan",
ScalarFunction::Trunc => "Trunc",
ScalarFunction::Array => "Array",
- ScalarFunction::RegexpMatch => "RegexpMatch",
ScalarFunction::BitLength => "BitLength",
ScalarFunction::Btrim => "Btrim",
ScalarFunction::CharacterLength => "CharacterLength",
@@ -2934,7 +2933,6 @@ impl ScalarFunction {
"Tan" => Some(Self::Tan),
"Trunc" => Some(Self::Trunc),
"Array" => Some(Self::Array),
- "RegexpMatch" => Some(Self::RegexpMatch),
"BitLength" => Some(Self::BitLength),
"Btrim" => Some(Self::Btrim),
"CharacterLength" => Some(Self::CharacterLength),
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs
b/datafusion/proto/src/logical_plan/from_proto.rs
index acfa043b88..e8059482b1 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -61,11 +61,11 @@ use datafusion_expr::{
instr, iszero, lcm, left, levenshtein, ln, log, log10, log2,
logical_plan::{PlanType, StringifiedPlan},
lower, lpad, ltrim, md5, nanvl, now, octet_length, overlay, pi, power,
radians,
- random, regexp_like, regexp_match, regexp_replace, repeat, replace,
reverse, right,
- round, rpad, rtrim, sha224, sha256, sha384, sha512, signum, sin, sinh,
split_part,
- sqrt, starts_with, string_to_array, strpos, struct_fun, substr,
substr_index,
- substring, tan, tanh, to_hex, translate, trim, trunc, upper, uuid,
AggregateFunction,
- Between, BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction, Case,
Cast, Expr,
+ random, regexp_like, regexp_replace, repeat, replace, reverse, right,
round, rpad,
+ rtrim, sha224, sha256, sha384, sha512, signum, sin, sinh, split_part, sqrt,
+ starts_with, string_to_array, strpos, struct_fun, substr, substr_index,
substring,
+ tan, tanh, to_hex, translate, trim, trunc, upper, uuid, AggregateFunction,
Between,
+ BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction, Case, Cast, Expr,
GetFieldAccess, GetIndexedField, GroupingSet,
GroupingSet::GroupingSets,
JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame,
WindowFrameBound,
@@ -535,7 +535,6 @@ impl From<&protobuf::ScalarFunction> for
BuiltinScalarFunction {
ScalarFunction::Lpad => Self::Lpad,
ScalarFunction::Random => Self::Random,
ScalarFunction::RegexpLike => Self::RegexpLike,
- ScalarFunction::RegexpMatch => Self::RegexpMatch,
ScalarFunction::RegexpReplace => Self::RegexpReplace,
ScalarFunction::Repeat => Self::Repeat,
ScalarFunction::Replace => Self::Replace,
@@ -1638,12 +1637,6 @@ pub fn parse_expr(
.map(|expr| parse_expr(expr, registry))
.collect::<Result<Vec<_>, _>>()?,
)),
- ScalarFunction::RegexpMatch => Ok(regexp_match(
- args.to_owned()
- .iter()
- .map(|expr| parse_expr(expr, registry))
- .collect::<Result<Vec<_>, _>>()?,
- )),
ScalarFunction::RegexpReplace => Ok(regexp_replace(
args.to_owned()
.iter()
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs
b/datafusion/proto/src/logical_plan/to_proto.rs
index d19830db98..6f126729cb 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1518,7 +1518,6 @@ impl TryFrom<&BuiltinScalarFunction> for
protobuf::ScalarFunction {
BuiltinScalarFunction::Random => Self::Random,
BuiltinScalarFunction::Uuid => Self::Uuid,
BuiltinScalarFunction::RegexpLike => Self::RegexpLike,
- BuiltinScalarFunction::RegexpMatch => Self::RegexpMatch,
BuiltinScalarFunction::RegexpReplace => Self::RegexpReplace,
BuiltinScalarFunction::Repeat => Self::Repeat,
BuiltinScalarFunction::Replace => Self::Replace,
diff --git a/datafusion/sqllogictest/test_files/regexp.slt
b/datafusion/sqllogictest/test_files/regexp.slt
index 1e951e2962..a80b08c41e 100644
--- a/datafusion/sqllogictest/test_files/regexp.slt
+++ b/datafusion/sqllogictest/test_files/regexp.slt
@@ -220,6 +220,12 @@ SELECT regexp_match('(?<=[A-Z]\w )Smith', 'John Smith',
'i');
----
NULL
+# ported test
+query ?
+SELECT regexp_match('aaa-555', '.*-(\d*)');
+----
+[555]
+
#
# regexp_replace tests
#
@@ -300,4 +306,4 @@ SELECT regexp_replace(arrow_cast('foobar',
'Dictionary(Int32, Utf8)'), 'bar', 'x
fooxx
statement ok
-drop table t;
\ No newline at end of file
+drop table t;