This is an automated email from the ASF dual-hosted git repository.

thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 64fed4e047 GH-39191: [R] throw error when `string_replace` is passed 
vector of values in `pattern` (#39219)
64fed4e047 is described below

commit 64fed4e047f6a7b6e1081921135afc86fdcef1e7
Author: Abram Fleishman <[email protected]>
AuthorDate: Tue Dec 19 01:47:47 2023 -0800

    GH-39191: [R] throw error when `string_replace` is passed vector of values 
in `pattern` (#39219)
    
    
    
    ### Rationale for this change
    See #39191 This PR will hopefully throw an informative error message to let 
the user know that while the stringr::str_replace_all function can handle a 
named vector of values as the pattern argument, the arrow R package 
implementation cannot.
    
    ### What changes are included in this PR?
    - [ ] add tests for passing vector to the pattern argument
    - [ ] add check for length > 1 to the string replace bindings
    
    ### Are these changes tested?
    yes (though I need help!)
    
    ### Are there any user-facing changes?
     yes. Hopefully the user will be alerted by an informative error message 
that they cannot pass a vector to the pattern argument. No breaking changes are 
expected.
    
    * Closes: #39191
    
    Authored-by: Abram B. Fleishman <[email protected]>
    Signed-off-by: Nic Crane <[email protected]>
---
 r/R/dplyr-funcs-string.R                   |  7 ++++++-
 r/tests/testthat/test-dplyr-funcs-string.R | 17 +++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/r/R/dplyr-funcs-string.R b/r/R/dplyr-funcs-string.R
index 9f3220e557..a21ce78edd 100644
--- a/r/R/dplyr-funcs-string.R
+++ b/r/R/dplyr-funcs-string.R
@@ -58,7 +58,6 @@ get_stringr_pattern_options <- function(pattern) {
   }
 
   ensure_opts <- function(opts) {
-
     # default options for the simple cases
     if (is.character(opts)) {
       opts <- list(pattern = opts, fixed = FALSE, ignore_case = FALSE)
@@ -352,6 +351,12 @@ register_bindings_string_regex <- function() {
   # Encapsulate some common logic for sub/gsub/str_replace/str_replace_all
   arrow_r_string_replace_function <- function(max_replacements) {
     function(pattern, replacement, x, ignore.case = FALSE, fixed = FALSE) {
+      if (length(pattern) != 1) {
+        stop("`pattern` must be a length 1 character vector")
+      }
+      if (length(replacement) != 1) {
+        stop("`replacement` must be a length 1 character vector")
+      }
       Expression$create(
         ifelse(fixed && !ignore.case, "replace_substring", 
"replace_substring_regex"),
         x,
diff --git a/r/tests/testthat/test-dplyr-funcs-string.R 
b/r/tests/testthat/test-dplyr-funcs-string.R
index 411b5ae3c7..039220b88e 100644
--- a/r/tests/testthat/test-dplyr-funcs-string.R
+++ b/r/tests/testthat/test-dplyr-funcs-string.R
@@ -425,6 +425,23 @@ test_that("sub and gsub with namespacing", {
 })
 
 test_that("str_replace and str_replace_all", {
+  x <- Expression$field_ref("x")
+
+  expect_error(
+    call_binding("str_replace_all", x, c("F" = "_", "b" = "")),
+    regexp = "`pattern` must be a length 1 character vector"
+  )
+
+  expect_error(
+    call_binding("str_replace_all", x, c("F", "b"), c("_", "")),
+    regexp = "`pattern` must be a length 1 character vector"
+  )
+
+  expect_error(
+    call_binding("str_replace_all", x, c("F"), c("_", "")),
+    regexp = "`replacement` must be a length 1 character vector"
+  )
+
   df <- tibble(x = c("Foo", "bar"))
 
   compare_dplyr_binding(

Reply via email to