[
https://issues.apache.org/jira/browse/ARROW-14297?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Dragoș Moldovan-Grünfeld updated ARROW-14297:
---------------------------------------------
Description:
Behaviour of integer division from Arrow differs from R.
{code}
``` r
library(arrow, warn.conflicts = FALSE)
library(testthat)
#>
#> Attaching package: 'testthat'
#> The following object is masked from 'package:arrow':
#>
#> matches
a <- c(1:4, NA_integer_)
a_arrow <- Array$create(a)
# expect_equal(a %/% 2, Array$create(c(0L, 1L, 1L, 2L, NA_integer_)))
expect_identical(as.vector(a_arrow %/% 2), a %/% 2)
#> Error: as.vector(a_arrow%/%2) not identical to a%/%2.
#> Objects equal but not identical
# this actually works, but on accident as in R the type of the denominator
# is the one that determines the type of the output
expect_identical(as.vector(a_arrow %/% 2L), a %/% 2L)
expect_identical(as.vector(a_arrow %/% 0), a %/% 0)
#> Error: Invalid: divide by zero
expect_identical(as.vector(a_arrow %/% 0L), a %/% 0L)
#> Error: Invalid: divide by zero
b <- -a
b_arrow <- Array$create(b)
expect_identical(as.vector(b_arrow %/% 2), b %/% 2)
#> Error: as.vector(b_arrow%/%2) not identical to b%/%2.
#> 2/5 mismatches (average diff: 1)
#> [1] 0 - -1 == 1
#> [3] -1 - -2 == 1
expect_identical(as.vector(b_arrow %/% .2), b %/% .2)
#> Error: as.vector(b_arrow%/%0.2) not identical to b%/%0.2.
#> Objects equal but not identical
{code}
expect_dplyr_equal() uses expect_equal() which does not enforce type
was:
Behaviour of integer division from Arrow differs from R.
{code}
``` r
library(arrow)
#>
#> Attaching package: 'arrow'
#> The following object is masked from 'package:utils':
#>
#> timestamp
a <- c(1:4, NA_integer_)
a_arrow <- Array$create(a)
# expect_equal(a %/% 2, Array$create(c(0L, 1L, 1L, 2L, NA_integer_)))
expect_identical(as.vector(a_arrow %/% 2), a %/% 2)
#> Error in expect_identical(as.vector(a_arrow%/%2), a%/%2): could not find
function "expect_identical"
# this actually works, but on accident as in R the type of the denominator
# is the one that determines the type of the output
expect_identical(as.vector(a_arrow %/% 2L), a %/% 2L)
#> Error in expect_identical(as.vector(a_arrow%/%2L), a%/%2L): could not find
function "expect_identical"
expect_identical(as.vector(a_arrow %/% 0), a %/% 0)
#> Error in expect_identical(as.vector(a_arrow%/%0), a%/%0): could not find
function "expect_identical"
expect_identical(as.vector(a_arrow %/% 0L), a %/% 0L)
#> Error in expect_identical(as.vector(a_arrow%/%0L), a%/%0L): could not find
function "expect_identical"
b <- -a
b_arrow <- Array$create(b)
expect_identical(as.vector(b_arrow %/% 2), b %/% 2)
#> Error in expect_identical(as.vector(b_arrow%/%2), b%/%2): could not find
function "expect_identical"
expect_identical(as.vector(b_arrow %/% .2), b %/% .2)
#> Error in expect_identical(as.vector(b_arrow%/%0.2), b%/%0.2): could not find
function "expect_identical"
```
<sup>Created on 2021-10-12 by the [reprex
package](https://reprex.tidyverse.org) (v2.0.1)</sup>
{code}
expect_dplyr_equal() uses expect_equal() which does not enforce type
> [R] smooth out integer division to better match R
> -------------------------------------------------
>
> Key: ARROW-14297
> URL: https://issues.apache.org/jira/browse/ARROW-14297
> Project: Apache Arrow
> Issue Type: Improvement
> Components: R
> Reporter: Dragoș Moldovan-Grünfeld
> Priority: Major
> Fix For: 7.0.0
>
>
> Behaviour of integer division from Arrow differs from R.
> {code}
> ``` r
> library(arrow, warn.conflicts = FALSE)
> library(testthat)
> #>
> #> Attaching package: 'testthat'
> #> The following object is masked from 'package:arrow':
> #>
> #> matches
> a <- c(1:4, NA_integer_)
> a_arrow <- Array$create(a)
> # expect_equal(a %/% 2, Array$create(c(0L, 1L, 1L, 2L, NA_integer_)))
> expect_identical(as.vector(a_arrow %/% 2), a %/% 2)
> #> Error: as.vector(a_arrow%/%2) not identical to a%/%2.
> #> Objects equal but not identical
> # this actually works, but on accident as in R the type of the denominator
> # is the one that determines the type of the output
> expect_identical(as.vector(a_arrow %/% 2L), a %/% 2L)
> expect_identical(as.vector(a_arrow %/% 0), a %/% 0)
> #> Error: Invalid: divide by zero
> expect_identical(as.vector(a_arrow %/% 0L), a %/% 0L)
> #> Error: Invalid: divide by zero
> b <- -a
> b_arrow <- Array$create(b)
> expect_identical(as.vector(b_arrow %/% 2), b %/% 2)
> #> Error: as.vector(b_arrow%/%2) not identical to b%/%2.
> #> 2/5 mismatches (average diff: 1)
> #> [1] 0 - -1 == 1
> #> [3] -1 - -2 == 1
> expect_identical(as.vector(b_arrow %/% .2), b %/% .2)
> #> Error: as.vector(b_arrow%/%0.2) not identical to b%/%0.2.
> #> Objects equal but not identical
> {code}
> expect_dplyr_equal() uses expect_equal() which does not enforce type
--
This message was sent by Atlassian Jira
(v8.3.4#803005)