This is an automated email from the ASF dual-hosted git repository.
thisisnic pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 180b1c2ac8 ARROW-14832: [R] Implement bindings for stringr::str_remove
and stringr::str_remove_all (#14644)
180b1c2ac8 is described below
commit 180b1c2ac8aebe9055622e7d9b947916f4cc84ae
Author: Nic Crane <[email protected]>
AuthorDate: Thu Dec 15 14:51:29 2022 +0000
ARROW-14832: [R] Implement bindings for stringr::str_remove and
stringr::str_remove_all (#14644)
Authored-by: Nic Crane <[email protected]>
Signed-off-by: Nic Crane <[email protected]>
---
r/DESCRIPTION | 2 +-
r/R/dplyr-funcs-doc.R | 6 ++--
r/R/dplyr-funcs-string.R | 16 +++++++++
r/man/acero.Rd | 6 ++--
r/tests/testthat/test-dplyr-funcs-string.R | 56 ++++++++++++++++++++++++++++++
5 files changed, 81 insertions(+), 5 deletions(-)
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 9bb9976355..da481c7dc5 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -41,7 +41,7 @@ Imports:
utils,
vctrs
Roxygen: list(markdown = TRUE, r6 = FALSE, load = "source")
-RoxygenNote: 7.2.2
+RoxygenNote: 7.2.3
Config/testthat/edition: 3
Suggests:
blob,
diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R
index b8337e3069..1798e9a96c 100644
--- a/r/R/dplyr-funcs-doc.R
+++ b/r/R/dplyr-funcs-doc.R
@@ -21,7 +21,7 @@
#'
#' The `arrow` package contains methods for 37 `dplyr` table functions, many of
#' which are "verbs" that do transformations to one or more tables.
-#' The package also has mappings of 207 R functions to the corresponding
+#' The package also has mappings of 209 R functions to the corresponding
#' functions in the Arrow compute library. These allow you to write code inside
#' of `dplyr` methods that call R functions, including many in packages like
#' `stringr` and `lubridate`, and they will get translated to Arrow and run
@@ -83,7 +83,7 @@
#' Functions can be called either as `pkg::fun()` or just `fun()`, i.e. both
#' `str_sub()` and `stringr::str_sub()` work.
#'
-#' In addition to these functions, you can call any of Arrow's 243 compute
+#' In addition to these functions, you can call any of Arrow's 246 compute
#' functions directly. Arrow has many functions that don't map to an existing R
#' function. In other cases where there is an R function mapping, you can still
#' call the Arrow function directly if you don't want the adaptations that the
R
@@ -317,6 +317,8 @@
#' * [`str_length()`][stringr::str_length()]
#' * `str_like()`: not yet in a released version of `stringr`, but it is
supported in `arrow`
#' * [`str_pad()`][stringr::str_pad()]
+#' * [`str_remove()`][stringr::str_remove()]
+#' * [`str_remove_all()`][stringr::str_remove_all()]
#' * [`str_replace()`][stringr::str_replace()]
#' * [`str_replace_all()`][stringr::str_replace_all()]
#' * [`str_split()`][stringr::str_split()]: Case-insensitive string splitting
and splitting into 0 parts not supported
diff --git a/r/R/dplyr-funcs-string.R b/r/R/dplyr-funcs-string.R
index 7a5d747475..16ad35148e 100644
--- a/r/R/dplyr-funcs-string.R
+++ b/r/R/dplyr-funcs-string.R
@@ -360,10 +360,26 @@ register_bindings_string_regex <- function() {
}
}
+ arrow_stringr_string_remove_function <- function(max_replacements) {
+ force(max_replacements)
+ function(string, pattern) {
+ opts <- get_stringr_pattern_options(enexpr(pattern))
+ arrow_r_string_replace_function(max_replacements)(
+ pattern = opts$pattern,
+ replacement = "",
+ x = string,
+ ignore.case = opts$ignore_case,
+ fixed = opts$fixed
+ )
+ }
+ }
+
register_binding("base::sub", arrow_r_string_replace_function(1L))
register_binding("base::gsub", arrow_r_string_replace_function(-1L))
register_binding("stringr::str_replace",
arrow_stringr_string_replace_function(1L))
register_binding("stringr::str_replace_all",
arrow_stringr_string_replace_function(-1L))
+ register_binding("stringr::str_remove",
arrow_stringr_string_remove_function(1L))
+ register_binding("stringr::str_remove_all",
arrow_stringr_string_remove_function(-1L))
register_binding("base::strsplit", function(x, split, fixed = FALSE, perl =
FALSE,
useBytes = FALSE) {
diff --git a/r/man/acero.Rd b/r/man/acero.Rd
index 84adf081de..9931d3df92 100644
--- a/r/man/acero.Rd
+++ b/r/man/acero.Rd
@@ -6,7 +6,7 @@
\description{
The \code{arrow} package contains methods for 37 \code{dplyr} table functions,
many of
which are "verbs" that do transformations to one or more tables.
-The package also has mappings of 207 R functions to the corresponding
+The package also has mappings of 209 R functions to the corresponding
functions in the Arrow compute library. These allow you to write code inside
of \code{dplyr} methods that call R functions, including many in packages like
\code{stringr} and \code{lubridate}, and they will get translated to Arrow and
run
@@ -68,7 +68,7 @@ can assume that the function works in Acero just as it does
in R.
Functions can be called either as \code{pkg::fun()} or just \code{fun()}, i.e.
both
\code{str_sub()} and \code{stringr::str_sub()} work.
-In addition to these functions, you can call any of Arrow's 243 compute
+In addition to these functions, you can call any of Arrow's 246 compute
functions directly. Arrow has many functions that don't map to an existing R
function. In other cases where there is an R function mapping, you can still
call the Arrow function directly if you don't want the adaptations that the R
@@ -319,6 +319,8 @@ Pattern modifiers \code{coll()} and \code{boundary()} are
not supported in any f
\item \code{\link[stringr:str_length]{str_length()}}
\item \code{str_like()}: not yet in a released version of \code{stringr}, but
it is supported in \code{arrow}
\item \code{\link[stringr:str_pad]{str_pad()}}
+\item \code{\link[stringr:str_remove]{str_remove()}}
+\item \code{\link[stringr:str_remove]{str_remove_all()}}
\item \code{\link[stringr:str_replace]{str_replace()}}
\item \code{\link[stringr:str_replace]{str_replace_all()}}
\item \code{\link[stringr:str_split]{str_split()}}: Case-insensitive string
splitting and splitting into 0 parts not supported
diff --git a/r/tests/testthat/test-dplyr-funcs-string.R
b/r/tests/testthat/test-dplyr-funcs-string.R
index 346c25ec0e..2e7931a49b 100644
--- a/r/tests/testthat/test-dplyr-funcs-string.R
+++ b/r/tests/testthat/test-dplyr-funcs-string.R
@@ -1420,3 +1420,59 @@ test_that("str_trim()", {
tbl
)
})
+
+test_that("str_remove and str_remove_all", {
+ df <- tibble(x = c("Foo", "bar"))
+
+ compare_dplyr_binding(
+ .input %>%
+ transmute(x = str_remove_all(x, "^F")) %>%
+ collect(),
+ df
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ transmute(x = str_remove_all(x, regex("^F"))) %>%
+ collect(),
+ df
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ mutate(x = str_remove(x, "^F[a-z]{2}")) %>%
+ collect(),
+ df
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ transmute(x = str_remove(x, regex("^f[A-Z]{2}", ignore_case = TRUE))) %>%
+ collect(),
+ df
+ )
+ compare_dplyr_binding(
+ .input %>%
+ transmute(
+ x = str_remove_all(x, fixed("o")),
+ x2 = stringr::str_remove_all(x, fixed("o"))
+ ) %>%
+ collect(),
+ df
+ )
+ compare_dplyr_binding(
+ .input %>%
+ transmute(
+ x = str_remove(x, fixed("O")),
+ x2 = stringr::str_remove(x, fixed("O"))
+ ) %>%
+ collect(),
+ df
+ )
+ compare_dplyr_binding(
+ .input %>%
+ transmute(x = str_remove(x, fixed("O", ignore_case = TRUE))) %>%
+ collect(),
+ df
+ )
+})