nealrichardson commented on code in PR #13786:
URL: https://github.com/apache/arrow/pull/13786#discussion_r945704963


##########
r/tests/testthat/test-dplyr-mutate.R:
##########
@@ -589,3 +588,127 @@ test_that("mutate() and transmute() with namespaced 
functions", {
     tbl
   )
 })
+
+test_that("Can use across() within mutate()", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(across(c(dbl, dbl2), round)) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(across(c(dbl, dbl2), list(exp, sqrt))) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(across(c(dbl, dbl2), list("fun1" = round, "fun2" = sqrt))) %>%
+      collect(),
+    tbl
+  )
+
+  # this is valid is neither R nor Arrow
+  expect_error(
+    expect_warning(
+      compare_dplyr_binding(
+        .input %>%
+          arrow_table() %>%
+          mutate(across(c(dbl, dbl2), list("fun1" = round(sqrt(dbl))))) %>%
+          collect(),
+        tbl,
+        warning = TRUE
+      )
+    )
+  )
+
+  # across() arguments not in default order
+  compare_dplyr_binding(
+    .input %>%
+      mutate(across(.fns = round, c(dbl, dbl2))) %>%
+      collect(),
+    tbl
+  )
+
+  # ARROW-17364: .names argument not yet supported for across()
+  expect_error(
+    tbl %>%
+      arrow_table() %>%
+      mutate(across(c(dbl, dbl2), round, .names = "{.col}.{.fn}")) %>%
+      collect(),
+    regexp = "`.names` argument to `across()` not yet supported in Arrow",
+    fixed = TRUE
+  )
+
+  # ellipses (...) are a deprecated argument
+  expect_error(
+    tbl %>%
+      arrow_table() %>%
+      mutate(across(c(dbl, dbl2), round, digits = -1)) %>%
+      collect(),
+    regexp = "`...` argument to `across()` is deprecated in dplyr and not 
supported in Arrow",
+    fixed = TRUE
+  )
+
+  # alternative ways of specifying .fns - as a list
+  compare_dplyr_binding(
+    .input %>%
+      mutate(across(1:dbl2, list(round))) %>%
+      collect(),
+    tbl
+  )
+
+  # supply .fns as a one-item vector
+  compare_dplyr_binding(
+    .input %>%
+      mutate(across(1:dbl2, c(round))) %>%
+      collect(),
+    tbl
+  )
+
+  # ARROW-17366: purrr-style lambda functions not yet supported
+  expect_error(
+    compare_dplyr_binding(
+      .input %>%
+        mutate(across(1:dbl2, ~ round(.x, digits = -1))) %>%
+        collect(),
+      tbl
+    ),
+    regexp = "purrr-style lambda functions as `.fns` argument to `across()` 
not yet supported in Arrow",
+    fixed = TRUE
+  )
+
+  # .fns = NULL, the default
+  compare_dplyr_binding(
+    .input %>%
+      mutate(across(1:dbl2, NULL)) %>%
+      collect(),
+    tbl
+  )
+
+  expect_error(
+    compare_dplyr_binding(
+      .input %>%
+        mutate(across(where(is.double))) %>%
+        collect(),
+      tbl
+    ),
+    "Unsupported selection helper"

Review Comment:
   Why is this unsupported? Do we have JIRA(s) for this? Seems like it should 
work.



##########
r/R/dplyr-mutate.R:
##########
@@ -151,3 +153,95 @@ ensure_named_exprs <- function(exprs) {
   names(exprs)[unnamed] <- map_chr(exprs[unnamed], format_expr)
   exprs
 }
+
+# Take the input quos and unfold any instances of across()
+# into individual quosures
+unfold_across <- function(.data, quos_in) {
+  quos_out <- list()
+  # Check for any expressions starting with across
+  for (quo_i in seq_along(quos_in)) {

Review Comment:
   I don't see any tests where there is more than `mutate(across(...))`, i.e. 
no other quosures in this list. We should probably test the corners of how 
across interacts with other mutate expressions.



##########
r/R/dplyr-mutate.R:
##########
@@ -24,7 +24,9 @@ mutate.arrow_dplyr_query <- function(.data,
                                      .before = NULL,
                                      .after = NULL) {
   call <- match.call()
-  exprs <- ensure_named_exprs(quos(...))
+
+  expression_list <- unfold_across(.data, quos(...))

Review Comment:
   I know you've deferred this from filter and summarize, but is that 
necessary? Is it more involved than adding in this `unfold_across` in them? 
There's no other modifications to mutate() so it seems reasonable to guess that 
it would be that straightforward in the other functions too.



##########
r/R/dplyr-mutate.R:
##########
@@ -151,3 +153,95 @@ ensure_named_exprs <- function(exprs) {
   names(exprs)[unnamed] <- map_chr(exprs[unnamed], format_expr)
   exprs
 }
+
+# Take the input quos and unfold any instances of across()
+# into individual quosures
+unfold_across <- function(.data, quos_in) {

Review Comment:
   How does this compare/contrast to how dplyr does this? 



##########
r/R/dplyr-mutate.R:
##########
@@ -151,3 +153,95 @@ ensure_named_exprs <- function(exprs) {
   names(exprs)[unnamed] <- map_chr(exprs[unnamed], format_expr)
   exprs
 }
+
+# Take the input quos and unfold any instances of across()
+# into individual quosures
+unfold_across <- function(.data, quos_in) {
+  quos_out <- list()
+  # Check for any expressions starting with across
+  for (quo_i in seq_along(quos_in)) {
+    quo_in <- quos_in[quo_i]
+    quo_expr <- quo_get_expr(quo_in[[1]])
+    quo_env <- quo_get_env(quo_in[[1]])
+
+    if (is_call(quo_expr, "across")) {
+      new_quos <- list()
+      across_call <- call_match(quo_expr, dplyr::across, defaults = TRUE)
+
+      if (!all(names(across_call[-1]) %in% c(".cols", ".fns", ".names"))) {
+        abort("`...` argument to `across()` is deprecated in dplyr and not 
supported in Arrow")
+      }
+
+      # ARROW-17364: add support for .names argument
+      if (!is.null(across_call[[".names"]])) {
+        abort("`.names` argument to `across()` not yet supported in Arrow")
+      }
+
+      # use select() to get the columns so we can take advantage of tidyselect
+      source_cols <- names(dplyr::select(.data, !!across_call[[".cols"]]))
+      funcs <- across_call[[".fns"]]
+
+      # calling across() with .fns = NULL returns all columns unchanged
+      if (is_empty(funcs)) {
+        return()
+      }
+
+      if (!is_list(funcs) && as.character(funcs)[[1]] == "~") {
+        abort(
+          paste(
+            "purrr-style lambda functions as `.fns` argument to `across()`",

Review Comment:
   Why not? We already import `purrr::as_mapper`, is this more complicated than 
`funcs <- map(funcs, as_mapper)`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to