[
https://issues.apache.org/jira/browse/ARROW-14297?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Dragoș Moldovan-Grünfeld updated ARROW-14297:
---------------------------------------------
Description:
Behaviour of integer division from Arrow differs from R.
{code}
> a <- c(1:4, NA_integer_)
> a_arrow <- Array$create(a)
>
> # expect_equal(a %/% 2, Array$create(c(0L, 1L, 1L, 2L, NA_integer_)))
> expect_identical(as.vector(a_arrow %/% 2), a %/% 2)
Error: as.vector(a_arrow%/%2) (`actual`) not identical to a%/%2 (`expected`).
`actual` is an integer vector (0, 1, 1, 2, NA)
`expected` is a double vector (0, 1, 1, 2, NA)
>
> # this actually works, but on accident as in R the type of the denominator
> # is the one that determines the type of the output
> expect_identical(as.vector(a_arrow %/% 2L), a %/% 2L)
>
> expect_identical(as.vector(a_arrow %/% 0), a %/% 0)
Error: as.vector(a_arrow%/%0) (`actual`) not identical to a%/%0 (`expected`).
`actual` is an integer vector (2147483647, 2147483647, 2147483647, 2147483647,
NA)
`expected` is a double vector (Inf, Inf, Inf, Inf, NA)
> expect_identical(as.vector(a_arrow %/% 0L), a %/% 0L)
Error: as.vector(a_arrow%/%0L) (`actual`) not identical to a%/%0L (`expected`).
`actual`: 2147483647 2147483647 2147483647 2147483647 NA
`expected`: NA NA NA NA NA
>
> b <- -a
> b_arrow <- Array$create(b)
> expect_identical(as.vector(b_arrow %/% 2), b %/% 2)
Error: as.vector(b_arrow%/%2) (`actual`) not identical to b%/%2 (`expected`).
`actual` is an integer vector (0, -1, -1, -2, NA)
`expected` is a double vector (-1, -1, -2, -2, NA)
> expect_identical(as.vector(b_arrow %/% .2), b %/% .2)
Error: as.vector(b_arrow%/%0.2) (`actual`) not identical to b%/%0.2
(`expected`).
`actual` is an integer vector (-5, -10, -15, -20, NA)
`expected` is a double vector (-5, -10, -15, -20, NA)
{code}
expect_dplyr_equal() uses expect_equal() which does not enforce type
was:
Behaviour of integer division from Arrow differs from R.
{code}
``` r
library(arrow, warn.conflicts = FALSE)
library(testthat)
#>
#> Attaching package: 'testthat'
#> The following object is masked from 'package:arrow':
#>
#> matches
a <- c(1:4, NA_integer_)
a_arrow <- Array$create(a)
# expect_equal(a %/% 2, Array$create(c(0L, 1L, 1L, 2L, NA_integer_)))
expect_identical(as.vector(a_arrow %/% 2), a %/% 2)
#> Error: as.vector(a_arrow%/%2) not identical to a%/%2.
#> Objects equal but not identical
# this actually works, but on accident as in R the type of the denominator
# is the one that determines the type of the output
expect_identical(as.vector(a_arrow %/% 2L), a %/% 2L)
expect_identical(as.vector(a_arrow %/% 0), a %/% 0)
#> Error: Invalid: divide by zero
expect_identical(as.vector(a_arrow %/% 0L), a %/% 0L)
#> Error: Invalid: divide by zero
b <- -a
b_arrow <- Array$create(b)
expect_identical(as.vector(b_arrow %/% 2), b %/% 2)
#> Error: as.vector(b_arrow%/%2) not identical to b%/%2.
#> 2/5 mismatches (average diff: 1)
#> [1] 0 - -1 == 1
#> [3] -1 - -2 == 1
expect_identical(as.vector(b_arrow %/% .2), b %/% .2)
#> Error: as.vector(b_arrow%/%0.2) not identical to b%/%0.2.
#> Objects equal but not identical
{code}
expect_dplyr_equal() uses expect_equal() which does not enforce type
> [R] smooth out integer division to better match R
> -------------------------------------------------
>
> Key: ARROW-14297
> URL: https://issues.apache.org/jira/browse/ARROW-14297
> Project: Apache Arrow
> Issue Type: Improvement
> Components: R
> Reporter: Dragoș Moldovan-Grünfeld
> Priority: Major
> Fix For: 7.0.0
>
>
> Behaviour of integer division from Arrow differs from R.
> {code}
> > a <- c(1:4, NA_integer_)
> > a_arrow <- Array$create(a)
> >
> > # expect_equal(a %/% 2, Array$create(c(0L, 1L, 1L, 2L, NA_integer_)))
> > expect_identical(as.vector(a_arrow %/% 2), a %/% 2)
> Error: as.vector(a_arrow%/%2) (`actual`) not identical to a%/%2 (`expected`).
> `actual` is an integer vector (0, 1, 1, 2, NA)
> `expected` is a double vector (0, 1, 1, 2, NA)
> >
> > # this actually works, but on accident as in R the type of the denominator
> > # is the one that determines the type of the output
> > expect_identical(as.vector(a_arrow %/% 2L), a %/% 2L)
> >
> > expect_identical(as.vector(a_arrow %/% 0), a %/% 0)
> Error: as.vector(a_arrow%/%0) (`actual`) not identical to a%/%0 (`expected`).
> `actual` is an integer vector (2147483647, 2147483647, 2147483647,
> 2147483647, NA)
> `expected` is a double vector (Inf, Inf, Inf, Inf, NA)
> > expect_identical(as.vector(a_arrow %/% 0L), a %/% 0L)
> Error: as.vector(a_arrow%/%0L) (`actual`) not identical to a%/%0L
> (`expected`).
> `actual`: 2147483647 2147483647 2147483647 2147483647 NA
> `expected`: NA NA NA NA NA
> >
> > b <- -a
> > b_arrow <- Array$create(b)
> > expect_identical(as.vector(b_arrow %/% 2), b %/% 2)
> Error: as.vector(b_arrow%/%2) (`actual`) not identical to b%/%2 (`expected`).
> `actual` is an integer vector (0, -1, -1, -2, NA)
> `expected` is a double vector (-1, -1, -2, -2, NA)
> > expect_identical(as.vector(b_arrow %/% .2), b %/% .2)
> Error: as.vector(b_arrow%/%0.2) (`actual`) not identical to b%/%0.2
> (`expected`).
> `actual` is an integer vector (-5, -10, -15, -20, NA)
> `expected` is a double vector (-5, -10, -15, -20, NA)
> {code}
> expect_dplyr_equal() uses expect_equal() which does not enforce type
--
This message was sent by Atlassian Jira
(v8.3.4#803005)