This is an automated email from the ASF dual-hosted git repository.
jonah pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 3ef90dd4ca feat: optional args for regexp_* UDFs (#10514)
3ef90dd4ca is described below
commit 3ef90dd4ca77a9f8d43911f38dcb6d97bb62176c
Author: Michael J Ward <[email protected]>
AuthorDate: Tue May 14 22:20:29 2024 -0500
feat: optional args for regexp_* UDFs (#10514)
* refactor: declare `regex::expr_fn`s explicitly instead of using the macro
This does not change any functionality but enables adding optional
arguments to each fn.
* feat: make `flag` argument optional for expr_fn::regexp_replace
This also updates the argument names to match what is used in the inner
impl.
Ref #10512
* feat: add optional `flags` argument to expr_fn::regexp_match
This also updates the argument names to match the inner impl.
Ref #10512
* feat: add optional `flags` argument to expr_fn::regexp_like
This also updates the argument names to match the inner impl.
Ref #10512
* docs: clean up doc comments for expr_fn::regexp_*
* refactor: use datafusion_expr::Expr declaration
---
.../core/tests/dataframe/dataframe_functions.rs | 51 ++++++++++++++++++++--
datafusion/functions/src/regex/mod.rs | 50 +++++++++++++++++----
2 files changed, 89 insertions(+), 12 deletions(-)
diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs
b/datafusion/core/tests/dataframe/dataframe_functions.rs
index 2ffac6a775..7d155bb16c 100644
--- a/datafusion/core/tests/dataframe/dataframe_functions.rs
+++ b/datafusion/core/tests/dataframe/dataframe_functions.rs
@@ -597,7 +597,7 @@ async fn test_fn_md5() -> Result<()> {
#[tokio::test]
#[cfg(feature = "unicode_expressions")]
async fn test_fn_regexp_like() -> Result<()> {
- let expr = regexp_like(col("a"), lit("[a-z]"));
+ let expr = regexp_like(col("a"), lit("[a-z]"), None);
let expected = [
"+-----------------------------------+",
@@ -612,13 +612,28 @@ async fn test_fn_regexp_like() -> Result<()> {
assert_fn_batches!(expr, expected);
+ let expr = regexp_like(col("a"), lit("abc"), Some(lit("i")));
+
+ let expected = [
+ "+-------------------------------------------+",
+ "| regexp_like(test.a,Utf8(\"abc\"),Utf8(\"i\")) |",
+ "+-------------------------------------------+",
+ "| true |",
+ "| true |",
+ "| false |",
+ "| true |",
+ "+-------------------------------------------+",
+ ];
+
+ assert_fn_batches!(expr, expected);
+
Ok(())
}
#[tokio::test]
#[cfg(feature = "unicode_expressions")]
async fn test_fn_regexp_match() -> Result<()> {
- let expr = regexp_match(col("a"), lit("[a-z]"));
+ let expr = regexp_match(col("a"), lit("[a-z]"), None);
let expected = [
"+------------------------------------+",
@@ -633,13 +648,28 @@ async fn test_fn_regexp_match() -> Result<()> {
assert_fn_batches!(expr, expected);
+ let expr = regexp_match(col("a"), lit("[A-Z]"), Some(lit("i")));
+
+ let expected = [
+ "+----------------------------------------------+",
+ "| regexp_match(test.a,Utf8(\"[A-Z]\"),Utf8(\"i\")) |",
+ "+----------------------------------------------+",
+ "| [a] |",
+ "| [a] |",
+ "| [C] |",
+ "| [A] |",
+ "+----------------------------------------------+",
+ ];
+
+ assert_fn_batches!(expr, expected);
+
Ok(())
}
#[tokio::test]
#[cfg(feature = "unicode_expressions")]
async fn test_fn_regexp_replace() -> Result<()> {
- let expr = regexp_replace(col("a"), lit("[a-z]"), lit("x"), lit("g"));
+ let expr = regexp_replace(col("a"), lit("[a-z]"), lit("x"),
Some(lit("g")));
let expected = [
"+----------------------------------------------------------+",
@@ -654,6 +684,21 @@ async fn test_fn_regexp_replace() -> Result<()> {
assert_fn_batches!(expr, expected);
+ let expr = regexp_replace(col("a"), lit("[a-z]"), lit("x"), None);
+
+ let expected = [
+ "+------------------------------------------------+",
+ "| regexp_replace(test.a,Utf8(\"[a-z]\"),Utf8(\"x\")) |",
+ "+------------------------------------------------+",
+ "| xbcDEF |",
+ "| xbc123 |",
+ "| CBAxef |",
+ "| 123AxcDef |",
+ "+------------------------------------------------+",
+ ];
+
+ assert_fn_batches!(expr, expected);
+
Ok(())
}
diff --git a/datafusion/functions/src/regex/mod.rs
b/datafusion/functions/src/regex/mod.rs
index 5c12d4559e..884db24d9e 100644
--- a/datafusion/functions/src/regex/mod.rs
+++ b/datafusion/functions/src/regex/mod.rs
@@ -28,12 +28,44 @@ make_udf_function!(
REGEXP_REPLACE,
regexp_replace
);
-export_functions!((
- regexp_match,
- input_arg1 input_arg2,
- "returns a list of regular expression matches in a string. "
-),(
- regexp_like,
- input_arg1 input_arg2,
- "Returns true if a has at least one match in a string,false otherwise."
-),(regexp_replace, arg1 arg2 arg3 arg4, "Replaces substrings in a string that
match"));
+
+pub mod expr_fn {
+ use datafusion_expr::Expr;
+
+ /// Returns a list of regular expression matches in a string.
+ pub fn regexp_match(values: Expr, regex: Expr, flags: Option<Expr>) ->
Expr {
+ let mut args = vec![values, regex];
+ if let Some(flags) = flags {
+ args.push(flags);
+ };
+ super::regexp_match().call(args)
+ }
+
+ /// Returns true if a has at least one match in a string, false otherwise.
+ pub fn regexp_like(values: Expr, regex: Expr, flags: Option<Expr>) -> Expr
{
+ let mut args = vec![values, regex];
+ if let Some(flags) = flags {
+ args.push(flags);
+ };
+ super::regexp_like().call(args)
+ }
+
+ /// Replaces substrings in a string that match.
+ pub fn regexp_replace(
+ string: Expr,
+ pattern: Expr,
+ replacement: Expr,
+ flags: Option<Expr>,
+ ) -> Expr {
+ let mut args = vec![string, pattern, replacement];
+ if let Some(flags) = flags {
+ args.push(flags);
+ };
+ super::regexp_replace().call(args)
+ }
+}
+
+#[doc = r" Return a list of all functions in this package"]
+pub fn functions() -> Vec<std::sync::Arc<datafusion_expr::ScalarUDF>> {
+ vec![regexp_match(), regexp_like(), regexp_replace()]
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]