This is an automated email from the ASF dual-hosted git repository.

thisisnic pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 180b1c2ac8 ARROW-14832: [R] Implement bindings for stringr::str_remove 
and stringr::str_remove_all (#14644)
180b1c2ac8 is described below

commit 180b1c2ac8aebe9055622e7d9b947916f4cc84ae
Author: Nic Crane <[email protected]>
AuthorDate: Thu Dec 15 14:51:29 2022 +0000

    ARROW-14832: [R] Implement bindings for stringr::str_remove and 
stringr::str_remove_all (#14644)
    
    Authored-by: Nic Crane <[email protected]>
    Signed-off-by: Nic Crane <[email protected]>
---
 r/DESCRIPTION                              |  2 +-
 r/R/dplyr-funcs-doc.R                      |  6 ++--
 r/R/dplyr-funcs-string.R                   | 16 +++++++++
 r/man/acero.Rd                             |  6 ++--
 r/tests/testthat/test-dplyr-funcs-string.R | 56 ++++++++++++++++++++++++++++++
 5 files changed, 81 insertions(+), 5 deletions(-)

diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 9bb9976355..da481c7dc5 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -41,7 +41,7 @@ Imports:
     utils,
     vctrs
 Roxygen: list(markdown = TRUE, r6 = FALSE, load = "source")
-RoxygenNote: 7.2.2
+RoxygenNote: 7.2.3
 Config/testthat/edition: 3
 Suggests:
     blob,
diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R
index b8337e3069..1798e9a96c 100644
--- a/r/R/dplyr-funcs-doc.R
+++ b/r/R/dplyr-funcs-doc.R
@@ -21,7 +21,7 @@
 #'
 #' The `arrow` package contains methods for 37 `dplyr` table functions, many of
 #' which are "verbs" that do transformations to one or more tables.
-#' The package also has mappings of 207 R functions to the corresponding
+#' The package also has mappings of 209 R functions to the corresponding
 #' functions in the Arrow compute library. These allow you to write code inside
 #' of `dplyr` methods that call R functions, including many in packages like
 #' `stringr` and `lubridate`, and they will get translated to Arrow and run
@@ -83,7 +83,7 @@
 #' Functions can be called either as `pkg::fun()` or just `fun()`, i.e. both
 #' `str_sub()` and `stringr::str_sub()` work.
 #'
-#' In addition to these functions, you can call any of Arrow's 243 compute
+#' In addition to these functions, you can call any of Arrow's 246 compute
 #' functions directly. Arrow has many functions that don't map to an existing R
 #' function. In other cases where there is an R function mapping, you can still
 #' call the Arrow function directly if you don't want the adaptations that the 
R
@@ -317,6 +317,8 @@
 #' * [`str_length()`][stringr::str_length()]
 #' * `str_like()`: not yet in a released version of `stringr`, but it is 
supported in `arrow`
 #' * [`str_pad()`][stringr::str_pad()]
+#' * [`str_remove()`][stringr::str_remove()]
+#' * [`str_remove_all()`][stringr::str_remove_all()]
 #' * [`str_replace()`][stringr::str_replace()]
 #' * [`str_replace_all()`][stringr::str_replace_all()]
 #' * [`str_split()`][stringr::str_split()]: Case-insensitive string splitting 
and splitting into 0 parts not supported
diff --git a/r/R/dplyr-funcs-string.R b/r/R/dplyr-funcs-string.R
index 7a5d747475..16ad35148e 100644
--- a/r/R/dplyr-funcs-string.R
+++ b/r/R/dplyr-funcs-string.R
@@ -360,10 +360,26 @@ register_bindings_string_regex <- function() {
     }
   }
 
+  arrow_stringr_string_remove_function <- function(max_replacements) {
+    force(max_replacements)
+    function(string, pattern) {
+      opts <- get_stringr_pattern_options(enexpr(pattern))
+      arrow_r_string_replace_function(max_replacements)(
+        pattern = opts$pattern,
+        replacement = "",
+        x = string,
+        ignore.case = opts$ignore_case,
+        fixed = opts$fixed
+      )
+    }
+  }
+
   register_binding("base::sub", arrow_r_string_replace_function(1L))
   register_binding("base::gsub", arrow_r_string_replace_function(-1L))
   register_binding("stringr::str_replace", 
arrow_stringr_string_replace_function(1L))
   register_binding("stringr::str_replace_all", 
arrow_stringr_string_replace_function(-1L))
+  register_binding("stringr::str_remove", 
arrow_stringr_string_remove_function(1L))
+  register_binding("stringr::str_remove_all", 
arrow_stringr_string_remove_function(-1L))
 
   register_binding("base::strsplit", function(x, split, fixed = FALSE, perl = 
FALSE,
                                               useBytes = FALSE) {
diff --git a/r/man/acero.Rd b/r/man/acero.Rd
index 84adf081de..9931d3df92 100644
--- a/r/man/acero.Rd
+++ b/r/man/acero.Rd
@@ -6,7 +6,7 @@
 \description{
 The \code{arrow} package contains methods for 37 \code{dplyr} table functions, 
many of
 which are "verbs" that do transformations to one or more tables.
-The package also has mappings of 207 R functions to the corresponding
+The package also has mappings of 209 R functions to the corresponding
 functions in the Arrow compute library. These allow you to write code inside
 of \code{dplyr} methods that call R functions, including many in packages like
 \code{stringr} and \code{lubridate}, and they will get translated to Arrow and 
run
@@ -68,7 +68,7 @@ can assume that the function works in Acero just as it does 
in R.
 Functions can be called either as \code{pkg::fun()} or just \code{fun()}, i.e. 
both
 \code{str_sub()} and \code{stringr::str_sub()} work.
 
-In addition to these functions, you can call any of Arrow's 243 compute
+In addition to these functions, you can call any of Arrow's 246 compute
 functions directly. Arrow has many functions that don't map to an existing R
 function. In other cases where there is an R function mapping, you can still
 call the Arrow function directly if you don't want the adaptations that the R
@@ -319,6 +319,8 @@ Pattern modifiers \code{coll()} and \code{boundary()} are 
not supported in any f
 \item \code{\link[stringr:str_length]{str_length()}}
 \item \code{str_like()}: not yet in a released version of \code{stringr}, but 
it is supported in \code{arrow}
 \item \code{\link[stringr:str_pad]{str_pad()}}
+\item \code{\link[stringr:str_remove]{str_remove()}}
+\item \code{\link[stringr:str_remove]{str_remove_all()}}
 \item \code{\link[stringr:str_replace]{str_replace()}}
 \item \code{\link[stringr:str_replace]{str_replace_all()}}
 \item \code{\link[stringr:str_split]{str_split()}}: Case-insensitive string 
splitting and splitting into 0 parts not supported
diff --git a/r/tests/testthat/test-dplyr-funcs-string.R 
b/r/tests/testthat/test-dplyr-funcs-string.R
index 346c25ec0e..2e7931a49b 100644
--- a/r/tests/testthat/test-dplyr-funcs-string.R
+++ b/r/tests/testthat/test-dplyr-funcs-string.R
@@ -1420,3 +1420,59 @@ test_that("str_trim()", {
     tbl
   )
 })
+
+test_that("str_remove and str_remove_all", {
+  df <- tibble(x = c("Foo", "bar"))
+
+  compare_dplyr_binding(
+    .input %>%
+      transmute(x = str_remove_all(x, "^F")) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      transmute(x = str_remove_all(x, regex("^F"))) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = str_remove(x, "^F[a-z]{2}")) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      transmute(x = str_remove(x, regex("^f[A-Z]{2}", ignore_case = TRUE))) %>%
+      collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(
+        x = str_remove_all(x, fixed("o")),
+        x2 = stringr::str_remove_all(x, fixed("o"))
+      ) %>%
+      collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(
+        x = str_remove(x, fixed("O")),
+        x2 = stringr::str_remove(x, fixed("O"))
+      ) %>%
+      collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(x = str_remove(x, fixed("O", ignore_case = TRUE))) %>%
+      collect(),
+    df
+  )
+})

Reply via email to