paleolimbot commented on code in PR #14122:
URL: https://github.com/apache/arrow/pull/14122#discussion_r971098678
##########
r/tests/testthat/test-dplyr-group-by.R:
##########
@@ -166,3 +166,67 @@ test_that("group_by() with namespaced functions", {
tbl
)
})
+
+test_that("group_by() with .add", {
Review Comment:
There are a few tests in here where it's not clear how they're testing
`.add`. Maybe use an explicit `.add = FALSE` or remove `.add` from this line if
that is not what they are testing?
##########
r/R/dplyr-group-by.R:
##########
@@ -24,34 +24,25 @@ group_by.arrow_dplyr_query <- function(.data,
add = .add,
.drop =
dplyr::group_by_drop_default(.data)) {
.data <- as_adq(.data)
- new_groups <- enquos(...)
- # ... can contain expressions (i.e. can add (or rename?) columns) and so we
- # need to identify those and add them on to the query with mutate.
Specifically,
- # we want to mark as new:
- # * expressions (named or otherwise)
- # * variables that have new names
- # All others (i.e. simple references to variables) should not be (re)-added
+ expression_list <- expand_across(.data, quos(...))
+ new_groups <- ensure_named_exprs(expression_list)
- # Identify any groups with names which aren't in names of .data
- new_group_ind <- map_lgl(new_groups, ~ !(quo_name(.x) %in% names(.data)))
- # Identify any groups which don't have names
- named_group_ind <- map_lgl(names(new_groups), nzchar)
- # Retain any new groups identified above
- new_groups <- new_groups[new_group_ind | named_group_ind]
if (length(new_groups)) {
- # now either use the name that was given in ... or if that is "" then use
the expr
- names(new_groups) <- imap_chr(new_groups, ~ ifelse(.y == "", quo_name(.x),
.y))
-
# Add them to the data
.data <- dplyr::mutate(.data, !!!new_groups)
}
- if (".add" %in% names(formals(dplyr::group_by))) {
- # For compatibility with dplyr >= 1.0
- gv <- dplyr::group_by_prepare(.data, ..., .add = .add)$group_names
+
+ if (!".add" %in% names(formals(dplyr::group_by))) {
+ # For compatibility with dplyr < 1.0
+ .add <- add
+ }
Review Comment:
I know that you just copied this from before, but this probably has no
effect given `add = .add` in the formals above. Since you added a nice unit
test for `.add`, you could add one in that uses `group_by(..., add = TRUE)`
just to be sure.
##########
r/R/dplyr-group-by.R:
##########
@@ -24,34 +24,25 @@ group_by.arrow_dplyr_query <- function(.data,
add = .add,
.drop =
dplyr::group_by_drop_default(.data)) {
.data <- as_adq(.data)
- new_groups <- enquos(...)
- # ... can contain expressions (i.e. can add (or rename?) columns) and so we
- # need to identify those and add them on to the query with mutate.
Specifically,
- # we want to mark as new:
- # * expressions (named or otherwise)
- # * variables that have new names
- # All others (i.e. simple references to variables) should not be (re)-added
+ expression_list <- expand_across(.data, quos(...))
+ new_groups <- ensure_named_exprs(expression_list)
- # Identify any groups with names which aren't in names of .data
- new_group_ind <- map_lgl(new_groups, ~ !(quo_name(.x) %in% names(.data)))
- # Identify any groups which don't have names
- named_group_ind <- map_lgl(names(new_groups), nzchar)
- # Retain any new groups identified above
- new_groups <- new_groups[new_group_ind | named_group_ind]
if (length(new_groups)) {
- # now either use the name that was given in ... or if that is "" then use
the expr
- names(new_groups) <- imap_chr(new_groups, ~ ifelse(.y == "", quo_name(.x),
.y))
-
# Add them to the data
.data <- dplyr::mutate(.data, !!!new_groups)
}
- if (".add" %in% names(formals(dplyr::group_by))) {
- # For compatibility with dplyr >= 1.0
- gv <- dplyr::group_by_prepare(.data, ..., .add = .add)$group_names
+
+ if (!".add" %in% names(formals(dplyr::group_by))) {
+ # For compatibility with dplyr < 1.0
+ .add <- add
+ }
+
+ if (.add) {
+ gv <- dplyr::union(dplyr::group_vars(.data), names(new_groups))
Review Comment:
```suggestion
gv <- union(dplyr::group_vars(.data), names(new_groups))
```
##########
r/tests/testthat/test-dplyr-group-by.R:
##########
@@ -166,3 +166,67 @@ test_that("group_by() with namespaced functions", {
tbl
)
})
+
+test_that("group_by() with .add", {
+ compare_dplyr_binding(
+ .input %>%
+ group_by(dbl2) %>%
+ group_by() %>%
+ collect(),
+ tbl
+ )
+ compare_dplyr_binding(
+ .input %>%
+ group_by(dbl2) %>%
+ group_by(.add = TRUE) %>%
+ collect(),
+ tbl
+ )
+ compare_dplyr_binding(
+ .input %>%
+ group_by(dbl2) %>%
+ group_by(chr) %>%
+ collect(),
+ tbl
+ )
+ compare_dplyr_binding(
+ .input %>%
+ group_by(dbl2) %>%
+ group_by(chr, .add = TRUE) %>%
+ collect(),
+ tbl
+ )
+})
+
+test_that("Can use across() within group_by()", {
+ test_groups <- c("starting_a_fight", "consoling_a_child", "petting_a_dog")
+ compare_dplyr_binding(
+ .input %>%
+ group_by(across()) %>%
+ collect(),
+ example_with_logical_factors
Review Comment:
Using this example dataset suggests to me that the behaviour of logical
factors is important here and I don't think that is the case. Maybe just use
`tbl` like all the other tests?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]