thisisnic commented on code in PR #46878:
URL: https://github.com/apache/arrow/pull/46878#discussion_r2313533771
##########
r/R/dplyr-funcs-conditional.R:
##########
@@ -138,4 +139,61 @@ register_bindings_conditional <- function() {
)
)
}, notes = "`.ptype` and `.size` arguments not supported")
+
+ register_binding("data.table::fcase", function(..., default = NULL) {
+ n_inputs <- ...length()
Review Comment:
If we move `args <- list2(...)` up to here and evaluate the length of that,
it'll be more consistent with the rest of the codebase.
##########
r/tests/testthat/test-dplyr-funcs-conditional.R:
##########
@@ -318,6 +318,99 @@ test_that("case_when()", {
)
})
+test_that("fcase()", {
+ skip_if_not_installed("data.table")
+
+ # Basic usage with default
+ compare_dplyr_binding(
+ .input %>%
+ mutate(cw = data.table::fcase(int > 5, 1, default = 0)) %>%
+ collect(),
+ tbl
+ )
+
+ # With namespacing
+ compare_dplyr_binding(
+ .input %>%
+ mutate(cw = data.table::fcase(int > 5, 1L, default = 0L)) %>%
+ collect(),
+ tbl
+ )
+
+ # Multiple conditions
+ compare_dplyr_binding(
+ .input %>%
+ transmute(cw = data.table::fcase(lgl, dbl, !false, dbl + dbl2)) %>%
+ collect(),
+ tbl
+ )
+
+ # No default provided (should result in NAs)
+ compare_dplyr_binding(
+ .input %>%
+ transmute(cw = data.table::fcase(chr %in% letters[1:3], 1L) + 41L) %>%
+ collect(),
+ tbl
+ )
+
+ # Type coercion (int and dbl -> dbl)
+ expect_equal(
+ tbl %>%
+ mutate(i64 = as.integer64(1e10)) %>%
+ Table$create() %>%
+ transmute(cw = data.table::fcase(
+ is.na(fct), int,
+ is.na(chr), dbl,
+ TRUE, i64
+ )) %>%
+ collect(),
+ tbl %>%
+ transmute(
+ cw = ifelse(is.na(fct), int, ifelse(is.na(chr), dbl, 1e10))
+ )
+ )
Review Comment:
This feels a little clunky (as is the original test this is replicated from
tbh); please can you add a comment explaining what/why for this test?
##########
r/tests/testthat/test-dplyr-funcs-conditional.R:
##########
@@ -318,6 +318,99 @@ test_that("case_when()", {
)
})
+test_that("fcase()", {
+ skip_if_not_installed("data.table")
+
+ # Basic usage with default
+ compare_dplyr_binding(
+ .input %>%
+ mutate(cw = data.table::fcase(int > 5, 1, default = 0)) %>%
+ collect(),
+ tbl
+ )
+
+ # With namespacing
+ compare_dplyr_binding(
+ .input %>%
+ mutate(cw = data.table::fcase(int > 5, 1L, default = 0L)) %>%
+ collect(),
+ tbl
+ )
+
+ # Multiple conditions
+ compare_dplyr_binding(
+ .input %>%
+ transmute(cw = data.table::fcase(lgl, dbl, !false, dbl + dbl2)) %>%
+ collect(),
+ tbl
+ )
+
+ # No default provided (should result in NAs)
+ compare_dplyr_binding(
+ .input %>%
+ transmute(cw = data.table::fcase(chr %in% letters[1:3], 1L) + 41L) %>%
+ collect(),
+ tbl
+ )
+
+ # Type coercion (int and dbl -> dbl)
+ expect_equal(
+ tbl %>%
+ mutate(i64 = as.integer64(1e10)) %>%
+ Table$create() %>%
+ transmute(cw = data.table::fcase(
+ is.na(fct), int,
+ is.na(chr), dbl,
+ TRUE, i64
+ )) %>%
+ collect(),
+ tbl %>%
+ transmute(
+ cw = ifelse(is.na(fct), int, ifelse(is.na(chr), dbl, 1e10))
+ )
+ )
+
+ # Character results
+ compare_dplyr_binding(
+ .input %>%
+ transmute(cw = data.table::fcase(lgl, "abc", default = "def")) %>%
+ collect(),
+ tbl
+ )
+
+ # validation errors
+ expect_arrow_eval_error(
+ data.table::fcase(),
+ "No cases provided to fcase\\(\\)",
+ class = "validation_error"
+ )
+ expect_arrow_eval_error(
+ data.table::fcase(TRUE),
+ "fcase\\(\\) must have an even number of arguments, but 1 were provided.",
+ class = "validation_error"
+ )
+ expect_arrow_eval_error(
+ data.table::fcase(0L, FALSE, TRUE, FALSE),
+ "Element 1 of `...` in fcase\\(\\) must be a logical expression",
+ class = "validation_error"
+ )
Review Comment:
When I run this expression outside of an Arrow context (i..e just with
data.table 1.17.8), I get:
```
> data.table::fcase(0L, FALSE, TRUE, FALSE)
Error in data.table::fcase(0L, FALSE, TRUE, FALSE) :
Argument #1 must be logical but was of type integer.
```
Our general philosophy towards bindings is to match the error messages of
the original implementation, unless there's a reason not to - should we update
these here, or is there a reason to have them as different outputs?
##########
r/R/dplyr-funcs-conditional.R:
##########
@@ -138,4 +139,61 @@ register_bindings_conditional <- function() {
)
)
}, notes = "`.ptype` and `.size` arguments not supported")
+
+ register_binding("data.table::fcase", function(..., default = NULL) {
+ n_inputs <- ...length()
+
+ if (n_inputs == 0 && is.null(default)) {
+ validation_error("No cases provided to fcase()")
+ }
+
+ if (n_inputs %% 2L != 0L) {
+ validation_error(paste0("fcase() must have an even number of arguments,
but ", n_inputs, " were provided."))
+ }
+
+ mask <- caller_env()
+ # Separate conditions (query) and results (value)
+ args <- list2(...)
+ query <- args[seq(1L, n_inputs, by = 2L)]
+ value <- args[seq(2L, n_inputs, by = 2L)]
+
+
+ # Evaluate all arguments in the arrow context
+ for (i in seq_along(query)) {
+ query[[i]] <- arrow_eval(query[[i]], mask)
+ if (!call_binding("is.logical", query[[i]])) {
+ validation_error(paste0(
+ "Element ", 2L * i - 1L,
+ " of `...` in fcase() must be a logical expression"
Review Comment:
```suggestion
" of `...` in `fcase()` must be a logical expression"
```
Please can you update the other examples to use backticks around `fcase()`
too?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]