This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch maint-6.0.x
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 03197788f8d47dcff577761a574c2ab2d48f4a24
Author: Dragos Moldovan-Grünfeld <[email protected]>
AuthorDate: Fri Oct 22 15:47:33 2021 +0100

    ARROW-13156 [R] bindings for str_count
    
    Closes #11473 from dragosmg/ARROW-13156_str_count_bindings
    
    Lead-authored-by: Dragos Moldovan-Grünfeld <[email protected]>
    Co-authored-by: Dragoș Moldovan-Grünfeld <[email protected]>
    Signed-off-by: Nic Crane <[email protected]>
---
 r/R/dplyr-functions.R                      | 13 +++++++
 r/tests/testthat/test-dplyr-funcs-string.R | 60 ++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+)

diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index dbb9d5f..717cdae 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -645,6 +645,19 @@ nse_funcs$str_ends <- function(string, pattern, negate = 
FALSE) {
   out
 }
 
+nse_funcs$str_count <- function(string, pattern) {
+  opts <- get_stringr_pattern_options(enexpr(pattern))
+  if (!is.string(pattern)) {
+    arrow_not_supported("`pattern` must be a length 1 character vector; other 
values")
+  }
+  arrow_fun <- ifelse(opts$fixed, "count_substring", "count_substring_regex")
+  Expression$create(
+    arrow_fun,
+    string,
+    options = list(pattern = opts$pattern, ignore_case = opts$ignore_case)
+  )
+}
+
 # String function helpers
 
 # format `pattern` as needed for case insensitivity and literal matching by RE2
diff --git a/r/tests/testthat/test-dplyr-funcs-string.R 
b/r/tests/testthat/test-dplyr-funcs-string.R
index dd59b5a..333735b 100644
--- a/r/tests/testthat/test-dplyr-funcs-string.R
+++ b/r/tests/testthat/test-dplyr-funcs-string.R
@@ -1336,3 +1336,63 @@ test_that("str_starts, str_ends, startsWith, endsWith", {
     df
   )
 })
+
+test_that("str_count", {
+  df <- tibble(
+    cities = c("Kolkata", "Dar es Salaam", "Tel Aviv", "San Antonio", "Cluj 
Napoca", "Bern", "Bogota"),
+    dots = c("a.", "...", ".a.a", "a..a.", "ab...", "dse....", ".f..d..")
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(a_count = str_count(cities, pattern = "a")) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(p_count = str_count(cities, pattern = "d")) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(p_count = str_count(cities,
+        pattern = regex("d", ignore_case = TRUE)
+      )) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(e_count = str_count(cities, pattern = "u")) %>%
+      collect(),
+    df
+  )
+
+  # nse_funcs$str_count() is not vectorised over pattern
+  expect_dplyr_equal(
+    input %>%
+      mutate(let_count = str_count(cities, pattern = c("a", "b", "e", "g", 
"p", "n", "s"))) %>%
+      collect(),
+    df,
+    warning = TRUE
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(dots_count = str_count(dots, ".")) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(dots_count = str_count(dots, fixed("."))) %>%
+      collect(),
+    df
+  )
+})

Reply via email to