rok commented on a change in pull request #9758:
URL: https://github.com/apache/arrow/pull/9758#discussion_r615313437



##########
File path: r/R/compute.R
##########
@@ -93,30 +93,36 @@ list_compute_functions <- function(pattern = NULL, ...) {
 }
 
 #' @export
-sum.ArrowDatum <- function(..., na.rm = FALSE) scalar_aggregate("sum", ..., 
na.rm = na.rm)
+sum.ArrowDatum <- function(..., na.rm = FALSE, na.min_count = 0) {
+  scalar_aggregate("sum", ..., na.rm = na.rm, na.min_count = na.min_count)
+}
 
 #' @export
-mean.ArrowDatum <- function(..., na.rm = FALSE) scalar_aggregate("mean", ..., 
na.rm = na.rm)
+mean.ArrowDatum <- function(..., na.rm = FALSE, na.min_count = 0) {
+  scalar_aggregate("mean", ..., na.rm = na.rm, na.min_count = na.min_count)
+}
 
 #' @export
-min.ArrowDatum <- function(..., na.rm = FALSE) {
-  scalar_aggregate("min_max", ..., na.rm = na.rm)$GetFieldByName("min")
+min.ArrowDatum <- function(..., na.rm = FALSE, na.min_count = 0) {
+  scalar_aggregate("min_max", ..., na.rm = na.rm, na.min_count = 
na.min_count)$GetFieldByName("min")
 }
 
 #' @export
-max.ArrowDatum <- function(..., na.rm = FALSE) {
-  scalar_aggregate("min_max", ..., na.rm = na.rm)$GetFieldByName("max")
+max.ArrowDatum <- function(..., na.rm = FALSE, na.min_count = 0) {
+  scalar_aggregate("min_max", ..., na.rm = na.rm, na.min_count = 
na.min_count)$GetFieldByName("max")
 }
 
-scalar_aggregate <- function(FUN, ..., na.rm = FALSE) {
+scalar_aggregate <- function(FUN, ..., na.rm = FALSE, na.min_count = 0) {
   a <- collect_arrays_from_dots(list(...))
-  if (!na.rm && a$null_count > 0 && (FUN %in% c("mean", "sum"))) {
-    # Arrow sum/mean function always drops NAs so handle that here
-    # https://issues.apache.org/jira/browse/ARROW-9054
-    return(Scalar$create(NA_real_))
+  if (FUN %in% c("mean", "sum") && !na.rm) {
+    na.min_count = length(a)
   }
-
-  call_function(FUN, a, options = list(na.rm = na.rm))
+  if (FUN == "min_max" && na.rm && a$null_count == length(a)) {
+    # Arrow sum/mean returns with the output type equal to input. But here R 
expects +/-Inf for boolean
+    # and integer types which arrow will only return with float input.
+    a <- a$cast(float64())
+  }

Review comment:
       Is there some better way to do this? e.g. just return +/-Inf




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to