This is an automated email from the ASF dual-hosted git repository.

jonah pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 3ef90dd4ca feat: optional args for regexp_* UDFs (#10514)
3ef90dd4ca is described below

commit 3ef90dd4ca77a9f8d43911f38dcb6d97bb62176c
Author: Michael J Ward <[email protected]>
AuthorDate: Tue May 14 22:20:29 2024 -0500

    feat: optional args for regexp_* UDFs (#10514)
    
    * refactor: declare `regex::expr_fn`s explicitly instead of using the macro
    
    This does not change any functionality but enables adding optional 
arguments to each fn.
    
    * feat: make `flag` argument optional for expr_fn::regexp_replace
    
    This also updates the argument names to match what is used in the inner 
impl.
    
    Ref #10512
    
    * feat: add optional `flags` argument to expr_fn::regexp_match
    
    This also updates the argument names to match the inner impl.
    
    Ref #10512
    
    * feat: add optional `flags` argument to expr_fn::regexp_like
    
    This also updates the argument names to match the inner impl.
    
    Ref #10512
    
    * docs: clean up doc comments for expr_fn::regexp_*
    
    * refactor: use datafusion_expr::Expr declaration
---
 .../core/tests/dataframe/dataframe_functions.rs    | 51 ++++++++++++++++++++--
 datafusion/functions/src/regex/mod.rs              | 50 +++++++++++++++++----
 2 files changed, 89 insertions(+), 12 deletions(-)

diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs 
b/datafusion/core/tests/dataframe/dataframe_functions.rs
index 2ffac6a775..7d155bb16c 100644
--- a/datafusion/core/tests/dataframe/dataframe_functions.rs
+++ b/datafusion/core/tests/dataframe/dataframe_functions.rs
@@ -597,7 +597,7 @@ async fn test_fn_md5() -> Result<()> {
 #[tokio::test]
 #[cfg(feature = "unicode_expressions")]
 async fn test_fn_regexp_like() -> Result<()> {
-    let expr = regexp_like(col("a"), lit("[a-z]"));
+    let expr = regexp_like(col("a"), lit("[a-z]"), None);
 
     let expected = [
         "+-----------------------------------+",
@@ -612,13 +612,28 @@ async fn test_fn_regexp_like() -> Result<()> {
 
     assert_fn_batches!(expr, expected);
 
+    let expr = regexp_like(col("a"), lit("abc"), Some(lit("i")));
+
+    let expected = [
+        "+-------------------------------------------+",
+        "| regexp_like(test.a,Utf8(\"abc\"),Utf8(\"i\")) |",
+        "+-------------------------------------------+",
+        "| true                                      |",
+        "| true                                      |",
+        "| false                                     |",
+        "| true                                      |",
+        "+-------------------------------------------+",
+    ];
+
+    assert_fn_batches!(expr, expected);
+
     Ok(())
 }
 
 #[tokio::test]
 #[cfg(feature = "unicode_expressions")]
 async fn test_fn_regexp_match() -> Result<()> {
-    let expr = regexp_match(col("a"), lit("[a-z]"));
+    let expr = regexp_match(col("a"), lit("[a-z]"), None);
 
     let expected = [
         "+------------------------------------+",
@@ -633,13 +648,28 @@ async fn test_fn_regexp_match() -> Result<()> {
 
     assert_fn_batches!(expr, expected);
 
+    let expr = regexp_match(col("a"), lit("[A-Z]"), Some(lit("i")));
+
+    let expected = [
+        "+----------------------------------------------+",
+        "| regexp_match(test.a,Utf8(\"[A-Z]\"),Utf8(\"i\")) |",
+        "+----------------------------------------------+",
+        "| [a]                                          |",
+        "| [a]                                          |",
+        "| [C]                                          |",
+        "| [A]                                          |",
+        "+----------------------------------------------+",
+    ];
+
+    assert_fn_batches!(expr, expected);
+
     Ok(())
 }
 
 #[tokio::test]
 #[cfg(feature = "unicode_expressions")]
 async fn test_fn_regexp_replace() -> Result<()> {
-    let expr = regexp_replace(col("a"), lit("[a-z]"), lit("x"), lit("g"));
+    let expr = regexp_replace(col("a"), lit("[a-z]"), lit("x"), 
Some(lit("g")));
 
     let expected = [
         "+----------------------------------------------------------+",
@@ -654,6 +684,21 @@ async fn test_fn_regexp_replace() -> Result<()> {
 
     assert_fn_batches!(expr, expected);
 
+    let expr = regexp_replace(col("a"), lit("[a-z]"), lit("x"), None);
+
+    let expected = [
+        "+------------------------------------------------+",
+        "| regexp_replace(test.a,Utf8(\"[a-z]\"),Utf8(\"x\")) |",
+        "+------------------------------------------------+",
+        "| xbcDEF                                         |",
+        "| xbc123                                         |",
+        "| CBAxef                                         |",
+        "| 123AxcDef                                      |",
+        "+------------------------------------------------+",
+    ];
+
+    assert_fn_batches!(expr, expected);
+
     Ok(())
 }
 
diff --git a/datafusion/functions/src/regex/mod.rs 
b/datafusion/functions/src/regex/mod.rs
index 5c12d4559e..884db24d9e 100644
--- a/datafusion/functions/src/regex/mod.rs
+++ b/datafusion/functions/src/regex/mod.rs
@@ -28,12 +28,44 @@ make_udf_function!(
     REGEXP_REPLACE,
     regexp_replace
 );
-export_functions!((
-    regexp_match,
-    input_arg1 input_arg2,
-    "returns a list of regular expression matches in a string. "
-),(
-    regexp_like,
-    input_arg1 input_arg2,
-    "Returns true if a has at least one match in a string,false otherwise."
-),(regexp_replace, arg1 arg2 arg3 arg4, "Replaces substrings in a string that 
match"));
+
+pub mod expr_fn {
+    use datafusion_expr::Expr;
+
+    /// Returns a list of regular expression matches in a string.
+    pub fn regexp_match(values: Expr, regex: Expr, flags: Option<Expr>) -> 
Expr {
+        let mut args = vec![values, regex];
+        if let Some(flags) = flags {
+            args.push(flags);
+        };
+        super::regexp_match().call(args)
+    }
+
+    /// Returns true if a has at least one match in a string, false otherwise.
+    pub fn regexp_like(values: Expr, regex: Expr, flags: Option<Expr>) -> Expr 
{
+        let mut args = vec![values, regex];
+        if let Some(flags) = flags {
+            args.push(flags);
+        };
+        super::regexp_like().call(args)
+    }
+
+    /// Replaces substrings in a string that match.
+    pub fn regexp_replace(
+        string: Expr,
+        pattern: Expr,
+        replacement: Expr,
+        flags: Option<Expr>,
+    ) -> Expr {
+        let mut args = vec![string, pattern, replacement];
+        if let Some(flags) = flags {
+            args.push(flags);
+        };
+        super::regexp_replace().call(args)
+    }
+}
+
+#[doc = r" Return a list of all functions in this package"]
+pub fn functions() -> Vec<std::sync::Arc<datafusion_expr::ScalarUDF>> {
+    vec![regexp_match(), regexp_like(), regexp_replace()]
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to