This is an automated email from the ASF dual-hosted git repository.

npr pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 81b94dcda6 GH-43440: [R] Unable to filter a factor column with %in% 
(#43446)
81b94dcda6 is described below

commit 81b94dcda6180781effc331650d9cd413a4ac115
Author: Neal Richardson <[email protected]>
AuthorDate: Sun Sep 22 11:15:40 2024 -0400

    GH-43440: [R] Unable to filter a factor column with %in% (#43446)
    
    ### Rationale for this change
    
    Fixes #43440
    
    ### What changes are included in this PR?
    
    The binding for `%in%` sends the DictionaryType's `value_type` to
    `cast_or_parse()`. It's possible that it would be better to handle this
    in `cast_or_parse()`, but it is used in lots of places and I wasn't sure
    that was correct everywhere. We could certainly find out, but that's a
    bigger testing exercise than I wanted to take on this afternoon.
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    The bug is fixed.
    * GitHub Issue: #43440
---
 r/R/dplyr-funcs-conditional.R                   | 11 +++++++++--
 r/tests/testthat/test-dplyr-funcs-conditional.R | 10 ++++++++++
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/r/R/dplyr-funcs-conditional.R b/r/R/dplyr-funcs-conditional.R
index 3ab955aa8a..52f47a128f 100644
--- a/r/R/dplyr-funcs-conditional.R
+++ b/r/R/dplyr-funcs-conditional.R
@@ -21,9 +21,16 @@ register_bindings_conditional <- function() {
     value_set <- Array$create(table)
     # If possible, `table` should be the same type as `x`
     # Try downcasting here; otherwise Acero may upcast x to table's type
+    x_type <- x$type()
+    # GH-43440: `is_in` doesn't want a DictionaryType in the value_set,
+    # so we'll cast to its value_type
+    # TODO: should this be pushed into cast_or_parse? Is this a bigger issue?
+    if (inherits(x_type, "DictionaryType")) {
+      x_type <- x_type$value_type
+    }
     try(
-      value_set <- cast_or_parse(value_set, x$type()),
-      silent = TRUE
+      value_set <- cast_or_parse(value_set, x_type),
+      silent = !getOption("arrow.debug", FALSE)
     )
 
     expr <- Expression$create("is_in", x,
diff --git a/r/tests/testthat/test-dplyr-funcs-conditional.R 
b/r/tests/testthat/test-dplyr-funcs-conditional.R
index d90dc827b4..24ddd342a8 100644
--- a/r/tests/testthat/test-dplyr-funcs-conditional.R
+++ b/r/tests/testthat/test-dplyr-funcs-conditional.R
@@ -26,6 +26,16 @@ tbl <- example_data
 tbl$verses <- verses[[1]]
 tbl$another_chr <- tail(letters, 10)
 
+test_that("%in% handles dictionary type", {
+  df <- tibble::tibble(x = factor(c("a", "b", "c")))
+  compare_dplyr_binding(
+    .input %>%
+      filter(x %in% "a") %>%
+      collect(),
+    df
+  )
+})
+
 test_that("if_else and ifelse", {
   compare_dplyr_binding(
     .input %>%

Reply via email to