Dewey Dunnington created ARROW-15008:
----------------------------------------

             Summary: [R] Not all group generic functions are supported for 
Arrays
                 Key: ARROW-15008
                 URL: https://issues.apache.org/jira/browse/ARROW-15008
             Project: Apache Arrow
          Issue Type: Improvement
          Components: R
            Reporter: Dewey Dunnington


When trying to do some math with decimal types, I noticed that a lot of the 
group generic functions are not implemented. Many users will use the dplyr 
bindings (where these are accessible), but it's useful to do this on Arrays and 
Scalars, too, particularly for decimal types whose math isn't accessible 
anywhere else in R.

Some template code that might be helpful from carrow:


{code:R}
#' @export
Math.carrow_vctr <- function(x, ...) {
  switch(
    .Generic,
    abs =, sign =, sqrt =,
    floor =, ceiling =, trunc =,
    round =, signif =,
    exp =, log =, expm1 =, log1p =,
    cos =, sin =, tan =,
    cospi =, sinpi =, tanpi =,
    acos =, asin =, atan =,
    cosh =, sinh =, tanh =,
    acosh =, asinh =, atanh =,
    lgamma =, gamma =, digamma =, trigamma =,
    cumsum =, cumprod =, cummax =, cumin = {
      assert_arrow("Math group generics")
      array <- as_arrow_array(x)
      arrow_array <- from_carrow_array(array, arrow::Array)
      getNamespace("base")[[.Generic]](arrow_array)
    },
    stop(sprintf("Math generic '%s' not supported for carrow_vctr()", 
.Generic)) # nocov
  )
}

#' @export
Ops.carrow_vctr <- function(e1, e2) {
  if (missing(e2)) {
    switch(
      .Generic,
      "!" =, "+" =, "-" = {
        assert_arrow("Unary Ops group generics")
        array <- as_carrow_array(e1)
        arrow_array <- from_carrow_array(array, arrow::Array)
        result <- getNamespace("base")[[.Generic]](arrow_array)
        return(as_carrow_vctr(result))
      },
      # R catches these before we do with 'invalid unary operator'
      stop(sprintf("Unary '%s' not supported for carrow_vctr()", .Generic)) # 
nocov
    )
  }

  switch(
    .Generic,
    "+" =, "-" =, "*" =, "/" =, "^" =, "%%" =, "%/%" =,
    "&" =, "|" =, "!" =,
    "==" =, "!=" =, "<" =, "<=" =, ">=" =, ">" = {
      assert_arrow("Ops group generics")
      vctr1 <- as_carrow_vctr(e1)
      vctr2 <- as_carrow_vctr(e2)
      array1 <- as_carrow_array(vctr1)
      array2 <- as_carrow_array(vctr2)
      arrow_array1 <- from_carrow_array(array1, arrow::Array)
      arrow_array2 <- from_carrow_array(array2, arrow::Array)

      result <- getNamespace("base")[[.Generic]](arrow_array1, arrow_array2)
      as_carrow_vctr(result)
    },
    stop(sprintf("Ops generic '%s' not supported for carrow_vctr()", .Generic)) 
# nocov
  )
}

#' @export
Summary.carrow_vctr <- function(x, ..., na.rm = FALSE) {
  assert_arrow("Math group generics")
  switch(
    .Generic,
    all =, any =,
    sum =, prod =,
    min =, max =,
    range = {
      # make sure dots are empty because we ignore them
      stopifnot(...length() == 0L)

      array <- as_carrow_array(x)
      arrow_array <- from_carrow_array(array, arrow::Array)
      getNamespace("base")[[.Generic]](arrow_array, na.rm = na.rm)
    },
    stop(sprintf("Summary generic '%s' not supported for carrow_vctr()", 
.Generic)) # nocov
  )
}

#' @export
Complex.carrow_vctr <- function(z) {
  stop("Complex group generics are not supported for carrow_vctr", call. = 
FALSE)
}
{code}

And some tests that might be useful to copy:


{code:R}

test_that("Math generics work", {
  # none of these are implemented in Arrow, so none are here either
})

test_that("Ops numeric generics work", {
  skip_if_not_installed("arrow")

  v1 <- c(1:5, NA)
  v2 <- 6:11
  vctr1 <- as_carrow_vctr(v1)
  vctr2 <- as_carrow_vctr(v2)

  # unary expressions are broken in Arrow so these don't work
  # expect_identical(
  #   from_carrow_array(as_carrow_array(+vctr1)),
  #   +v1
  # )
  #
  # expect_identical(
  #   from_carrow_array(as_carrow_array(-vctr1)),
  #   -v1
  # )

  expect_identical(
    from_carrow_array(as_carrow_array(vctr1 + vctr2)),
    v1 + v2
  )

  expect_identical(
    from_carrow_array(as_carrow_array(vctr1 - vctr2)),
    v1 - v2
  )

  expect_identical(
    from_carrow_array(as_carrow_array(vctr1 * vctr2)),
    v1 * v2
  )

  expect_identical(
    from_carrow_array(as_carrow_array(vctr1 / vctr2)),
    v1 / v2
  )

  expect_identical(
    from_carrow_array(as_carrow_array(vctr1 ^ vctr2)),
    as.integer(v1 ^ v2)
  )

  expect_identical(
    from_carrow_array(as_carrow_array(vctr1 %% vctr2)),
    v1 %% v2
  )

  expect_identical(
    from_carrow_array(as_carrow_array(vctr1 %/% vctr2)),
    v1 %/% v2
  )

  expect_identical(
    from_carrow_array(as_carrow_array(vctr1 + vctr2)),
    v1 + v2
  )
})

test_that("Ops logical generics work", {
  skip_if_not_installed("arrow")
  skip("until logical conversion is improved")

  v1 <- c(TRUE, TRUE, FALSE, FALSE, NA, NA, NA)
  v2 <- c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, NA)
  vctr1 <- as_carrow_vctr(v1)
  vctr2 <- as_carrow_vctr(v2)

  expect_identical(
    from_carrow_array(as_carrow_array(!vctr1)),
    !v1
  )

  expect_identical(
    from_carrow_array(as_carrow_array(vctr1 & vctr2)),
    v1 & v2
  )

  expect_identical(
    from_carrow_array(as_carrow_array(vctr1 & vctr2)),
    v1 | v2
  )
})

test_that("Ops comparison generics work", {
  skip_if_not_installed("arrow")
  skip("until logical conversion is improved")

  v1 <- c(1, 2, 3, 4, 5, 1, NA, 3, NA, 5, NA)
  v2 <- c(5, 4, 3, 2, 1, NA, 4, NA, 2, 1, NA)
  vctr1 <- as_carrow_vctr(v1)
  vctr2 <- as_carrow_vctr(v2)

  expect_identical(
    from_carrow_array(as_carrow_array(vctr1 == vctr2)),
    v1 == v2
  )

  expect_identical(
    from_carrow_array(as_carrow_array(vctr1 != vctr2)),
    v1 != v2
  )

  expect_identical(
    from_carrow_array(as_carrow_array(vctr1 < vctr2)),
    v1 < v2
  )

  expect_identical(
    from_carrow_array(as_carrow_array(vctr1 <= vctr2)),
    v1 <= v2
  )

  expect_identical(
    from_carrow_array(as_carrow_array(vctr1 >= vctr2)),
    v1 >= v2
  )

  expect_identical(
    from_carrow_array(as_carrow_array(vctr1 > vctr2)),
    v1 > v2
  )
})

test_that("Summary numeric generics work", {
  skip_if_not_installed("arrow")

  v1 <- c(1:5, NA)
  vctr1 <- as_carrow_vctr(v1)

  expect_identical(
    from_carrow_array(as_carrow_array(sum(vctr1, na.rm = TRUE))),
    as.double(sum(v1, na.rm = TRUE))
  )

  expect_identical(
    from_carrow_array(as_carrow_array(sum(vctr1, na.rm = FALSE))),
    as.double(sum(v1, na.rm = FALSE))
  )

  expect_identical(
    from_carrow_array(as_carrow_array(min(vctr1, na.rm = TRUE))),
    min(v1, na.rm = TRUE)
  )

  expect_identical(
    from_carrow_array(as_carrow_array(min(vctr1, na.rm = FALSE))),
    min(v1, na.rm = FALSE)
  )

  expect_identical(
    from_carrow_array(as_carrow_array(max(vctr1, na.rm = TRUE))),
    max(v1, na.rm = TRUE)
  )

  expect_identical(
    from_carrow_array(as_carrow_array(max(vctr1, na.rm = FALSE))),
    max(v1, na.rm = FALSE)
  )

  skip("not all Summary generics are implemented in Arrow")

  expect_identical(
    from_carrow_array(as_carrow_array(range(vctr1, na.rm = TRUE))),
    range(v1, na.rm = TRUE)
  )

  expect_identical(
    from_carrow_array(as_carrow_array(range(vctr1, na.rm = FALSE))),
    range(v1, na.rm = FALSE)
  )

  expect_identical(
    from_carrow_array(as_carrow_array(prod(vctr1, na.rm = TRUE))),
    prod(v1, na.rm = TRUE)
  )

  expect_identical(
    from_carrow_array(as_carrow_array(prod(vctr1, na.rm = FALSE))),
    prod(v1, na.rm = FALSE)
  )
})

test_that("Summary logical generics work", {
  skip_if_not_installed("arrow")
  skip("until logical conversion is fixed")

  v1 <- c(FALSE, FALSE, NA)
  v2 <- c(TRUE, TRUE, NA)
  vctr1 <- as_carrow_vctr(v1)
  vctr2 <- as_carrow_vctr(v2)

  expect_identical(
    from_carrow_array(as_carrow_array(any(vctr1, na.rm = TRUE))),
    any(v1, na.rm = TRUE)
  )

  expect_identical(
    from_carrow_array(as_carrow_array(any(vctr1, na.rm = FALSE))),
    any(v1, na.rm = FALSE)
  )

  expect_identical(
    from_carrow_array(as_carrow_array(any(vctr2, na.rm = TRUE))),
    any(v2, na.rm = TRUE)
  )

  expect_identical(
    from_carrow_array(as_carrow_array(any(vctr2, na.rm = FALSE))),
    any(v2, na.rm = FALSE)
  )

  expect_identical(
    from_carrow_array(as_carrow_array(all(vctr1, na.rm = TRUE))),
    all(v1, na.rm = TRUE)
  )

  expect_identical(
    from_carrow_array(as_carrow_array(all(vctr1, na.rm = FALSE))),
    all(v1, na.rm = FALSE)
  )

  expect_identical(
    from_carrow_array(as_carrow_array(all(vctr2, na.rm = TRUE))),
    all(v2, na.rm = TRUE)
  )

  expect_identical(
    from_carrow_array(as_carrow_array(all(vctr2, na.rm = FALSE))),
    all(v2, na.rm = FALSE)
  )
})
{code}




--
This message was sent by Atlassian Jira
(v8.20.1#820001)

Reply via email to