This is an automated email from the ASF dual-hosted git repository.
timsaucer pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-python.git
The following commit(s) were added to refs/heads/main by this push:
new 7c1c08f8 feat: expose regex_count function (#1066)
7c1c08f8 is described below
commit 7c1c08f8617ac97a2568eb0664e9d4ee30fceba9
Author: Nirnay Roy <[email protected]>
AuthorDate: Sat Mar 15 17:05:05 2025 +0530
feat: expose regex_count function (#1066)
* Added wrapper for regex_count function
* fix comment
---------
Co-authored-by: Nirnay Roy <[email protected]>
---
python/datafusion/functions.py | 18 ++++++++++++++++++
python/tests/test_functions.py | 4 ++++
src/functions.rs | 20 ++++++++++++++++++++
3 files changed, 42 insertions(+)
diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py
index 0cc7434c..26bac149 100644
--- a/python/datafusion/functions.py
+++ b/python/datafusion/functions.py
@@ -217,6 +217,7 @@ __all__ = [
"random",
"range",
"rank",
+ "regexp_count",
"regexp_like",
"regexp_match",
"regexp_replace",
@@ -779,6 +780,23 @@ def regexp_replace(
return Expr(f.regexp_replace(string.expr, pattern.expr, replacement.expr,
flags))
+def regexp_count(
+ string: Expr, pattern: Expr, start: Expr, flags: Expr | None = None
+) -> Expr:
+ """Returns the number of matches in a string.
+
+ Optional start position (the first position is 1) to search for the regular
+ expression.
+ """
+ if flags is not None:
+ flags = flags.expr
+ if start is not None:
+ start = start.expr
+ else:
+ start = Expr.expr
+ return Expr(f.regexp_count(string.expr, pattern.expr, start, flags))
+
+
def repeat(string: Expr, n: Expr) -> Expr:
"""Repeats the ``string`` to ``n`` times."""
return Expr(f.repeat(string.expr, n.expr))
diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py
index ed88a16e..161e1e3b 100644
--- a/python/tests/test_functions.py
+++ b/python/tests/test_functions.py
@@ -740,6 +740,10 @@ def test_array_function_obj_tests(stmt, py_expr):
f.regexp_replace(column("a"), literal("(ell|orl)"), literal("-")),
pa.array(["H-o", "W-d", "!"]),
),
+ (
+ f.regexp_count(column("a"), literal("(ell|orl)"), literal(1)),
+ pa.array([1, 1, 0], type=pa.int64()),
+ ),
],
)
def test_string_functions(df, function, expected_result):
diff --git a/src/functions.rs b/src/functions.rs
index 6a8abb18..8fac239b 100644
--- a/src/functions.rs
+++ b/src/functions.rs
@@ -173,6 +173,25 @@ fn regexp_replace(
)
.into())
}
+
+#[pyfunction]
+#[pyo3(signature = (string, pattern, start, flags=None))]
+/// Returns the number of matches found in the string.
+fn regexp_count(
+ string: PyExpr,
+ pattern: PyExpr,
+ start: Option<PyExpr>,
+ flags: Option<PyExpr>,
+) -> PyResult<PyExpr> {
+ Ok(functions::expr_fn::regexp_count(
+ string.expr,
+ pattern.expr,
+ start.map(|x| x.expr),
+ flags.map(|x| x.expr),
+ )
+ .into())
+}
+
/// Creates a new Sort Expr
#[pyfunction]
fn order_by(expr: PyExpr, asc: bool, nulls_first: bool) ->
PyResult<PySortExpr> {
@@ -943,6 +962,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) ->
PyResult<()> {
m.add_wrapped(wrap_pyfunction!(power))?;
m.add_wrapped(wrap_pyfunction!(radians))?;
m.add_wrapped(wrap_pyfunction!(random))?;
+ m.add_wrapped(wrap_pyfunction!(regexp_count))?;
m.add_wrapped(wrap_pyfunction!(regexp_like))?;
m.add_wrapped(wrap_pyfunction!(regexp_match))?;
m.add_wrapped(wrap_pyfunction!(regexp_replace))?;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]