Dewey Dunnington created ARROW-15008:
----------------------------------------
Summary: [R] Not all group generic functions are supported for
Arrays
Key: ARROW-15008
URL: https://issues.apache.org/jira/browse/ARROW-15008
Project: Apache Arrow
Issue Type: Improvement
Components: R
Reporter: Dewey Dunnington
When trying to do some math with decimal types, I noticed that a lot of the
group generic functions are not implemented. Many users will use the dplyr
bindings (where these are accessible), but it's useful to do this on Arrays and
Scalars, too, particularly for decimal types whose math isn't accessible
anywhere else in R.
Some template code that might be helpful from carrow:
{code:R}
#' @export
Math.carrow_vctr <- function(x, ...) {
switch(
.Generic,
abs =, sign =, sqrt =,
floor =, ceiling =, trunc =,
round =, signif =,
exp =, log =, expm1 =, log1p =,
cos =, sin =, tan =,
cospi =, sinpi =, tanpi =,
acos =, asin =, atan =,
cosh =, sinh =, tanh =,
acosh =, asinh =, atanh =,
lgamma =, gamma =, digamma =, trigamma =,
cumsum =, cumprod =, cummax =, cumin = {
assert_arrow("Math group generics")
array <- as_arrow_array(x)
arrow_array <- from_carrow_array(array, arrow::Array)
getNamespace("base")[[.Generic]](arrow_array)
},
stop(sprintf("Math generic '%s' not supported for carrow_vctr()",
.Generic)) # nocov
)
}
#' @export
Ops.carrow_vctr <- function(e1, e2) {
if (missing(e2)) {
switch(
.Generic,
"!" =, "+" =, "-" = {
assert_arrow("Unary Ops group generics")
array <- as_carrow_array(e1)
arrow_array <- from_carrow_array(array, arrow::Array)
result <- getNamespace("base")[[.Generic]](arrow_array)
return(as_carrow_vctr(result))
},
# R catches these before we do with 'invalid unary operator'
stop(sprintf("Unary '%s' not supported for carrow_vctr()", .Generic)) #
nocov
)
}
switch(
.Generic,
"+" =, "-" =, "*" =, "/" =, "^" =, "%%" =, "%/%" =,
"&" =, "|" =, "!" =,
"==" =, "!=" =, "<" =, "<=" =, ">=" =, ">" = {
assert_arrow("Ops group generics")
vctr1 <- as_carrow_vctr(e1)
vctr2 <- as_carrow_vctr(e2)
array1 <- as_carrow_array(vctr1)
array2 <- as_carrow_array(vctr2)
arrow_array1 <- from_carrow_array(array1, arrow::Array)
arrow_array2 <- from_carrow_array(array2, arrow::Array)
result <- getNamespace("base")[[.Generic]](arrow_array1, arrow_array2)
as_carrow_vctr(result)
},
stop(sprintf("Ops generic '%s' not supported for carrow_vctr()", .Generic))
# nocov
)
}
#' @export
Summary.carrow_vctr <- function(x, ..., na.rm = FALSE) {
assert_arrow("Math group generics")
switch(
.Generic,
all =, any =,
sum =, prod =,
min =, max =,
range = {
# make sure dots are empty because we ignore them
stopifnot(...length() == 0L)
array <- as_carrow_array(x)
arrow_array <- from_carrow_array(array, arrow::Array)
getNamespace("base")[[.Generic]](arrow_array, na.rm = na.rm)
},
stop(sprintf("Summary generic '%s' not supported for carrow_vctr()",
.Generic)) # nocov
)
}
#' @export
Complex.carrow_vctr <- function(z) {
stop("Complex group generics are not supported for carrow_vctr", call. =
FALSE)
}
{code}
And some tests that might be useful to copy:
{code:R}
test_that("Math generics work", {
# none of these are implemented in Arrow, so none are here either
})
test_that("Ops numeric generics work", {
skip_if_not_installed("arrow")
v1 <- c(1:5, NA)
v2 <- 6:11
vctr1 <- as_carrow_vctr(v1)
vctr2 <- as_carrow_vctr(v2)
# unary expressions are broken in Arrow so these don't work
# expect_identical(
# from_carrow_array(as_carrow_array(+vctr1)),
# +v1
# )
#
# expect_identical(
# from_carrow_array(as_carrow_array(-vctr1)),
# -v1
# )
expect_identical(
from_carrow_array(as_carrow_array(vctr1 + vctr2)),
v1 + v2
)
expect_identical(
from_carrow_array(as_carrow_array(vctr1 - vctr2)),
v1 - v2
)
expect_identical(
from_carrow_array(as_carrow_array(vctr1 * vctr2)),
v1 * v2
)
expect_identical(
from_carrow_array(as_carrow_array(vctr1 / vctr2)),
v1 / v2
)
expect_identical(
from_carrow_array(as_carrow_array(vctr1 ^ vctr2)),
as.integer(v1 ^ v2)
)
expect_identical(
from_carrow_array(as_carrow_array(vctr1 %% vctr2)),
v1 %% v2
)
expect_identical(
from_carrow_array(as_carrow_array(vctr1 %/% vctr2)),
v1 %/% v2
)
expect_identical(
from_carrow_array(as_carrow_array(vctr1 + vctr2)),
v1 + v2
)
})
test_that("Ops logical generics work", {
skip_if_not_installed("arrow")
skip("until logical conversion is improved")
v1 <- c(TRUE, TRUE, FALSE, FALSE, NA, NA, NA)
v2 <- c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, NA)
vctr1 <- as_carrow_vctr(v1)
vctr2 <- as_carrow_vctr(v2)
expect_identical(
from_carrow_array(as_carrow_array(!vctr1)),
!v1
)
expect_identical(
from_carrow_array(as_carrow_array(vctr1 & vctr2)),
v1 & v2
)
expect_identical(
from_carrow_array(as_carrow_array(vctr1 & vctr2)),
v1 | v2
)
})
test_that("Ops comparison generics work", {
skip_if_not_installed("arrow")
skip("until logical conversion is improved")
v1 <- c(1, 2, 3, 4, 5, 1, NA, 3, NA, 5, NA)
v2 <- c(5, 4, 3, 2, 1, NA, 4, NA, 2, 1, NA)
vctr1 <- as_carrow_vctr(v1)
vctr2 <- as_carrow_vctr(v2)
expect_identical(
from_carrow_array(as_carrow_array(vctr1 == vctr2)),
v1 == v2
)
expect_identical(
from_carrow_array(as_carrow_array(vctr1 != vctr2)),
v1 != v2
)
expect_identical(
from_carrow_array(as_carrow_array(vctr1 < vctr2)),
v1 < v2
)
expect_identical(
from_carrow_array(as_carrow_array(vctr1 <= vctr2)),
v1 <= v2
)
expect_identical(
from_carrow_array(as_carrow_array(vctr1 >= vctr2)),
v1 >= v2
)
expect_identical(
from_carrow_array(as_carrow_array(vctr1 > vctr2)),
v1 > v2
)
})
test_that("Summary numeric generics work", {
skip_if_not_installed("arrow")
v1 <- c(1:5, NA)
vctr1 <- as_carrow_vctr(v1)
expect_identical(
from_carrow_array(as_carrow_array(sum(vctr1, na.rm = TRUE))),
as.double(sum(v1, na.rm = TRUE))
)
expect_identical(
from_carrow_array(as_carrow_array(sum(vctr1, na.rm = FALSE))),
as.double(sum(v1, na.rm = FALSE))
)
expect_identical(
from_carrow_array(as_carrow_array(min(vctr1, na.rm = TRUE))),
min(v1, na.rm = TRUE)
)
expect_identical(
from_carrow_array(as_carrow_array(min(vctr1, na.rm = FALSE))),
min(v1, na.rm = FALSE)
)
expect_identical(
from_carrow_array(as_carrow_array(max(vctr1, na.rm = TRUE))),
max(v1, na.rm = TRUE)
)
expect_identical(
from_carrow_array(as_carrow_array(max(vctr1, na.rm = FALSE))),
max(v1, na.rm = FALSE)
)
skip("not all Summary generics are implemented in Arrow")
expect_identical(
from_carrow_array(as_carrow_array(range(vctr1, na.rm = TRUE))),
range(v1, na.rm = TRUE)
)
expect_identical(
from_carrow_array(as_carrow_array(range(vctr1, na.rm = FALSE))),
range(v1, na.rm = FALSE)
)
expect_identical(
from_carrow_array(as_carrow_array(prod(vctr1, na.rm = TRUE))),
prod(v1, na.rm = TRUE)
)
expect_identical(
from_carrow_array(as_carrow_array(prod(vctr1, na.rm = FALSE))),
prod(v1, na.rm = FALSE)
)
})
test_that("Summary logical generics work", {
skip_if_not_installed("arrow")
skip("until logical conversion is fixed")
v1 <- c(FALSE, FALSE, NA)
v2 <- c(TRUE, TRUE, NA)
vctr1 <- as_carrow_vctr(v1)
vctr2 <- as_carrow_vctr(v2)
expect_identical(
from_carrow_array(as_carrow_array(any(vctr1, na.rm = TRUE))),
any(v1, na.rm = TRUE)
)
expect_identical(
from_carrow_array(as_carrow_array(any(vctr1, na.rm = FALSE))),
any(v1, na.rm = FALSE)
)
expect_identical(
from_carrow_array(as_carrow_array(any(vctr2, na.rm = TRUE))),
any(v2, na.rm = TRUE)
)
expect_identical(
from_carrow_array(as_carrow_array(any(vctr2, na.rm = FALSE))),
any(v2, na.rm = FALSE)
)
expect_identical(
from_carrow_array(as_carrow_array(all(vctr1, na.rm = TRUE))),
all(v1, na.rm = TRUE)
)
expect_identical(
from_carrow_array(as_carrow_array(all(vctr1, na.rm = FALSE))),
all(v1, na.rm = FALSE)
)
expect_identical(
from_carrow_array(as_carrow_array(all(vctr2, na.rm = TRUE))),
all(v2, na.rm = TRUE)
)
expect_identical(
from_carrow_array(as_carrow_array(all(vctr2, na.rm = FALSE))),
all(v2, na.rm = FALSE)
)
})
{code}
--
This message was sent by Atlassian Jira
(v8.20.1#820001)