This is an automated email from the ASF dual-hosted git repository.
thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 64fed4e047 GH-39191: [R] throw error when `string_replace` is passed
vector of values in `pattern` (#39219)
64fed4e047 is described below
commit 64fed4e047f6a7b6e1081921135afc86fdcef1e7
Author: Abram Fleishman <[email protected]>
AuthorDate: Tue Dec 19 01:47:47 2023 -0800
GH-39191: [R] throw error when `string_replace` is passed vector of values
in `pattern` (#39219)
### Rationale for this change
See #39191 This PR will hopefully throw an informative error message to let
the user know that while the stringr::str_replace_all function can handle a
named vector of values as the pattern argument, the arrow R package
implementation cannot.
### What changes are included in this PR?
- [ ] add tests for passing vector to the pattern argument
- [ ] add check for length > 1 to the string replace bindings
### Are these changes tested?
yes (though I need help!)
### Are there any user-facing changes?
yes. Hopefully the user will be alerted by an informative error message
that they cannot pass a vector to the pattern argument. No breaking changes are
expected.
* Closes: #39191
Authored-by: Abram B. Fleishman <[email protected]>
Signed-off-by: Nic Crane <[email protected]>
---
r/R/dplyr-funcs-string.R | 7 ++++++-
r/tests/testthat/test-dplyr-funcs-string.R | 17 +++++++++++++++++
2 files changed, 23 insertions(+), 1 deletion(-)
diff --git a/r/R/dplyr-funcs-string.R b/r/R/dplyr-funcs-string.R
index 9f3220e557..a21ce78edd 100644
--- a/r/R/dplyr-funcs-string.R
+++ b/r/R/dplyr-funcs-string.R
@@ -58,7 +58,6 @@ get_stringr_pattern_options <- function(pattern) {
}
ensure_opts <- function(opts) {
-
# default options for the simple cases
if (is.character(opts)) {
opts <- list(pattern = opts, fixed = FALSE, ignore_case = FALSE)
@@ -352,6 +351,12 @@ register_bindings_string_regex <- function() {
# Encapsulate some common logic for sub/gsub/str_replace/str_replace_all
arrow_r_string_replace_function <- function(max_replacements) {
function(pattern, replacement, x, ignore.case = FALSE, fixed = FALSE) {
+ if (length(pattern) != 1) {
+ stop("`pattern` must be a length 1 character vector")
+ }
+ if (length(replacement) != 1) {
+ stop("`replacement` must be a length 1 character vector")
+ }
Expression$create(
ifelse(fixed && !ignore.case, "replace_substring",
"replace_substring_regex"),
x,
diff --git a/r/tests/testthat/test-dplyr-funcs-string.R
b/r/tests/testthat/test-dplyr-funcs-string.R
index 411b5ae3c7..039220b88e 100644
--- a/r/tests/testthat/test-dplyr-funcs-string.R
+++ b/r/tests/testthat/test-dplyr-funcs-string.R
@@ -425,6 +425,23 @@ test_that("sub and gsub with namespacing", {
})
test_that("str_replace and str_replace_all", {
+ x <- Expression$field_ref("x")
+
+ expect_error(
+ call_binding("str_replace_all", x, c("F" = "_", "b" = "")),
+ regexp = "`pattern` must be a length 1 character vector"
+ )
+
+ expect_error(
+ call_binding("str_replace_all", x, c("F", "b"), c("_", "")),
+ regexp = "`pattern` must be a length 1 character vector"
+ )
+
+ expect_error(
+ call_binding("str_replace_all", x, c("F"), c("_", "")),
+ regexp = "`replacement` must be a length 1 character vector"
+ )
+
df <- tibble(x = c("Foo", "bar"))
compare_dplyr_binding(