nealrichardson commented on code in PR #13150:
URL: https://github.com/apache/arrow/pull/13150#discussion_r905133620
##########
r/R/query-engine.R:
##########
@@ -101,42 +98,9 @@ ExecPlan <- R6Class("ExecPlan",
}
if (!is.null(.data$aggregations)) {
- # Project to include just the data required for each aggregation,
- # plus group_by_vars (last)
- # TODO: validate that none of names(aggregations) are the same as
names(group_by_vars)
- # dplyr does not error on this but the result it gives isn't great
- node <- node$Project(summarize_projection(.data))
-
- if (grouped) {
- # We need to prefix all of the aggregation function names with
"hash_"
- .data$aggregations <- lapply(.data$aggregations, function(x) {
- x[["fun"]] <- paste0("hash_", x[["fun"]])
- x
- })
- }
-
- node <- node$Aggregate(
- options = map(.data$aggregations, ~ .[c("fun", "options")]),
- target_names = names(.data$aggregations),
- out_field_names = names(.data$aggregations),
- key_names = group_vars
- )
-
- if (grouped) {
- # The result will have result columns first then the grouping cols.
- # dplyr orders group cols first, so adapt the result to meet that
expectation.
- node <- node$Project(
- make_field_refs(c(group_vars, names(.data$aggregations)))
- )
- if (getOption("arrow.summarise.sort", FALSE)) {
- # Add sorting instructions for the rows too to match dplyr
- # (see below about why sorting isn't itself a Node)
- node$extras$sort <- list(
- names = group_vars,
- orders = rep(0L, length(group_vars))
- )
- }
- }
+ config_agg <- private$.set_aggregation(node, .data, grouped,
group_vars)
Review Comment:
Full revert, the helpers don't seem to be helping with the lint warning
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]