This is an automated email from the ASF dual-hosted git repository.

timsaucer pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 7c1c08f8 feat: expose regex_count function (#1066)
7c1c08f8 is described below

commit 7c1c08f8617ac97a2568eb0664e9d4ee30fceba9
Author: Nirnay Roy <[email protected]>
AuthorDate: Sat Mar 15 17:05:05 2025 +0530

    feat: expose regex_count function (#1066)
    
    * Added wrapper for regex_count function
    
    * fix comment
    
    ---------
    
    Co-authored-by: Nirnay Roy <[email protected]>
---
 python/datafusion/functions.py | 18 ++++++++++++++++++
 python/tests/test_functions.py |  4 ++++
 src/functions.rs               | 20 ++++++++++++++++++++
 3 files changed, 42 insertions(+)

diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py
index 0cc7434c..26bac149 100644
--- a/python/datafusion/functions.py
+++ b/python/datafusion/functions.py
@@ -217,6 +217,7 @@ __all__ = [
     "random",
     "range",
     "rank",
+    "regexp_count",
     "regexp_like",
     "regexp_match",
     "regexp_replace",
@@ -779,6 +780,23 @@ def regexp_replace(
     return Expr(f.regexp_replace(string.expr, pattern.expr, replacement.expr, 
flags))
 
 
+def regexp_count(
+    string: Expr, pattern: Expr, start: Expr, flags: Expr | None = None
+) -> Expr:
+    """Returns the number of matches in a string.
+
+    Optional start position (the first position is 1) to search for the regular
+    expression.
+    """
+    if flags is not None:
+        flags = flags.expr
+    if start is not None:
+        start = start.expr
+    else:
+        start = Expr.expr
+    return Expr(f.regexp_count(string.expr, pattern.expr, start, flags))
+
+
 def repeat(string: Expr, n: Expr) -> Expr:
     """Repeats the ``string`` to ``n`` times."""
     return Expr(f.repeat(string.expr, n.expr))
diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py
index ed88a16e..161e1e3b 100644
--- a/python/tests/test_functions.py
+++ b/python/tests/test_functions.py
@@ -740,6 +740,10 @@ def test_array_function_obj_tests(stmt, py_expr):
             f.regexp_replace(column("a"), literal("(ell|orl)"), literal("-")),
             pa.array(["H-o", "W-d", "!"]),
         ),
+        (
+            f.regexp_count(column("a"), literal("(ell|orl)"), literal(1)),
+            pa.array([1, 1, 0], type=pa.int64()),
+        ),
     ],
 )
 def test_string_functions(df, function, expected_result):
diff --git a/src/functions.rs b/src/functions.rs
index 6a8abb18..8fac239b 100644
--- a/src/functions.rs
+++ b/src/functions.rs
@@ -173,6 +173,25 @@ fn regexp_replace(
     )
     .into())
 }
+
+#[pyfunction]
+#[pyo3(signature = (string, pattern, start, flags=None))]
+/// Returns the number of matches found in the string.
+fn regexp_count(
+    string: PyExpr,
+    pattern: PyExpr,
+    start: Option<PyExpr>,
+    flags: Option<PyExpr>,
+) -> PyResult<PyExpr> {
+    Ok(functions::expr_fn::regexp_count(
+        string.expr,
+        pattern.expr,
+        start.map(|x| x.expr),
+        flags.map(|x| x.expr),
+    )
+    .into())
+}
+
 /// Creates a new Sort Expr
 #[pyfunction]
 fn order_by(expr: PyExpr, asc: bool, nulls_first: bool) -> 
PyResult<PySortExpr> {
@@ -943,6 +962,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> 
PyResult<()> {
     m.add_wrapped(wrap_pyfunction!(power))?;
     m.add_wrapped(wrap_pyfunction!(radians))?;
     m.add_wrapped(wrap_pyfunction!(random))?;
+    m.add_wrapped(wrap_pyfunction!(regexp_count))?;
     m.add_wrapped(wrap_pyfunction!(regexp_like))?;
     m.add_wrapped(wrap_pyfunction!(regexp_match))?;
     m.add_wrapped(wrap_pyfunction!(regexp_replace))?;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to