paleolimbot commented on a change in pull request #12324:
URL: https://github.com/apache/arrow/pull/12324#discussion_r806789891
##########
File path: r/R/array.R
##########
@@ -216,6 +216,76 @@ Array$create <- function(x, type = NULL) {
#' @include arrowExports.R
Array$import_from_c <- ImportArray
+
+#' Concatenate zero or more Arrays
+#'
+#' @param ... zero or more [Array] objects to concatenate
+#' @param type An optional `type` describing the desired
+#' type for the final Array.
+#'
+#' @return An [Array]
+#' @export
+#'
+#' @examples
+#' concat_arrays(Array$create(1:3), Array$create(4:5))
+#'
+concat_arrays <- function(..., type = NULL) {
+ dots <- lapply(list2(...), Array$create, type = type)
+
+ if (length(dots) == 0 && is.null(type)) {
+ return(Array$create(logical(), type = null()))
+ } else if (length(dots) == 0) {
+ return(Array$create(logical(), type = null())$cast(type))
+ }
+
+ if (!is.null(type)) {
+ dots <- lapply(dots, function(array) array$cast(type))
+ }
+
+ arrow__Concatenate(dots)
+}
+
+# The c() method uses non-standard dispatch in R
+# and has some peculiarities when multiple types are passed to ....
+# However, with a method defined for all subclasses of Array, it will
+# do what a user expects most of the time.
Review comment:
Good call! I thought this would fail:
``` r
library(arrow, warn.conflicts = FALSE)
arr1 <- Array$create(1:3)
arr2 <- Array$create(4:5)
c(arr1, arr2)
#> Array
#> <int32>
#> [
#> 1,
#> 2,
#> 3,
#> 4,
#> 5
#> ]
class(arr1) <- c("SomeSubclass", class(arr1))
class(arr2) <- c("SomeOtherSubclass", class(arr2))
c(arr1, arr2)
#> Array
#> <int32>
#> [
#> 1,
#> 2,
#> 3,
#> 4,
#> 5
#> ]
```
...but it didn't, and removing all the extra `c()` methods didn't cause any
of the tests to break, which means that for all practical purposes we don't
need them. A better example of the madness I was thinking of is this:
``` r
library(arrow, warn.conflicts = FALSE)
c(Array$create(1:3), wk::xy(1:2, 1:2))
#> Error: Invalid: arrays to be concatenated must be identically typed, but
int32 and list<item: double> were encountered.
c( wk::xy(1:2, 1:2), Array$create(1:3))
#> Error: Can't combine 'wk_rcrd' objects that do not have identical classes.
```
(where if more than one `c()` method is defined you'll get different results
depending on which one is first).
The "right way" to do this is a full vctrs implementation but I don't think
it's time for that quite yet.
##########
File path: r/tests/testthat/test-Array.R
##########
@@ -989,6 +989,59 @@ test_that("auto int64 conversion to int can be disabled
(ARROW-10093)", {
})
})
+test_that("concat_arrays works", {
+ concat_empty <- concat_arrays()
+ expect_true(concat_empty$type == null())
+ expect_equal(concat_empty$length(), 0L)
+
+ concat_empty_typed <- concat_arrays(type = int64())
+ expect_true(concat_empty_typed$type == int64())
+ expect_equal(concat_empty$length(), 0L)
+
+ concat_int <- concat_arrays(Array$create(1:3), Array$create(4:5))
+ expect_true(concat_int$type == int32())
+ expect_true(all(concat_int == Array$create(1:5)))
+
+ concat_int64 <- concat_arrays(
+ Array$create(1:3),
+ Array$create(4:5, type = int64()),
+ type = int64()
+ )
+ expect_true(concat_int64$type == int64())
+ expect_true(all(concat_int == Array$create(1:5)))
+
+ expect_error(
+ concat_arrays(
+ Array$create(1:3),
+ Array$create(4:5, type = int64())
+ ),
+ "must be identically typed"
+ )
+})
+
+test_that("c() works for Array", {
Review comment:
Good call! This is another example of the madness around `c()` (which we
can do little about except circumvent it by eventually defining a vctrs
implementation).
``` r
library(arrow, warn.conflicts = FALSE)
c(Array$create(1L), 2L)
#> Array
#> <int32>
#> [
#> 1,
#> 2
#> ]
c(2L, Array$create(1L))
#> [[1]]
#> [1] 2
#>
#> [[2]]
#> Array
#> <int32>
#> [
#> 1
#> ]
```
<sup>Created on 2022-02-15 by the [reprex
package](https://reprex.tidyverse.org) (v2.0.1)</sup>
##########
File path: r/tests/testthat/test-Array.R
##########
@@ -989,6 +989,59 @@ test_that("auto int64 conversion to int can be disabled
(ARROW-10093)", {
})
})
+test_that("concat_arrays works", {
+ concat_empty <- concat_arrays()
+ expect_true(concat_empty$type == null())
+ expect_equal(concat_empty$length(), 0L)
+
+ concat_empty_typed <- concat_arrays(type = int64())
+ expect_true(concat_empty_typed$type == int64())
+ expect_equal(concat_empty$length(), 0L)
+
+ concat_int <- concat_arrays(Array$create(1:3), Array$create(4:5))
+ expect_true(concat_int$type == int32())
+ expect_true(all(concat_int == Array$create(1:5)))
+
+ concat_int64 <- concat_arrays(
+ Array$create(1:3),
+ Array$create(4:5, type = int64()),
+ type = int64()
+ )
+ expect_true(concat_int64$type == int64())
+ expect_true(all(concat_int == Array$create(1:5)))
+
+ expect_error(
+ concat_arrays(
+ Array$create(1:3),
+ Array$create(4:5, type = int64())
+ ),
+ "must be identically typed"
+ )
+})
+
+test_that("c() works for Array", {
Review comment:
...I added a test from the `concat_arrays()` end since we do have
control over that!
##########
File path: r/R/array.R
##########
@@ -216,6 +216,76 @@ Array$create <- function(x, type = NULL) {
#' @include arrowExports.R
Array$import_from_c <- ImportArray
+
+#' Concatenate zero or more Arrays
+#'
+#' @param ... zero or more [Array] objects to concatenate
+#' @param type An optional `type` describing the desired
+#' type for the final Array.
+#'
+#' @return An [Array]
+#' @export
+#'
+#' @examples
+#' concat_arrays(Array$create(1:3), Array$create(4:5))
+#'
+concat_arrays <- function(..., type = NULL) {
+ dots <- lapply(list2(...), Array$create, type = type)
+
+ if (length(dots) == 0 && is.null(type)) {
+ return(Array$create(logical(), type = null()))
+ } else if (length(dots) == 0) {
+ return(Array$create(logical(), type = null())$cast(type))
+ }
+
+ if (!is.null(type)) {
+ dots <- lapply(dots, function(array) array$cast(type))
+ }
+
+ arrow__Concatenate(dots)
+}
+
+# The c() method uses non-standard dispatch in R
+# and has some peculiarities when multiple types are passed to ....
+# However, with a method defined for all subclasses of Array, it will
+# do what a user expects most of the time.
Review comment:
As @wjones127 noted above, if we *don't* define it we get even weirder
behaviour. I think it's more intuitive to define it than omit but am happy to
leave it out too. The upside is that it's the first thing an R user might think
to do (as Will thought to do immediately based on the comment above).
##########
File path: r/tests/testthat/test-Array.R
##########
@@ -989,6 +989,65 @@ test_that("auto int64 conversion to int can be disabled
(ARROW-10093)", {
})
})
+test_that("concat_arrays works", {
+ concat_empty <- concat_arrays()
+ expect_true(concat_empty$type == null())
+ expect_equal(concat_empty$length(), 0L)
+
+ concat_empty_typed <- concat_arrays(type = int64())
+ expect_true(concat_empty_typed$type == int64())
+ expect_equal(concat_empty$length(), 0L)
+
+ concat_int <- concat_arrays(Array$create(1:3), Array$create(4:5))
+ expect_true(concat_int$type == int32())
+ expect_true(all(concat_int == Array$create(1:5)))
+
+ concat_int64 <- concat_arrays(
+ Array$create(1:3),
+ Array$create(4:5, type = int64()),
+ type = int64()
+ )
+ expect_true(concat_int64$type == int64())
+ expect_true(all(concat_int == Array$create(1:5)))
+
+ expect_error(
+ concat_arrays(
+ Array$create(1:3),
+ Array$create(4:5, type = int64())
+ ),
+ "must be identically typed"
+ )
+})
+
+test_that("concat_arrays() coerces its input to Array", {
+ concat_ints <- concat_arrays(1L, 2L)
+ expect_true(concat_ints$type == int32())
+ expect_true(all(concat_ints == Array$create(c(1L, 2L))))
Review comment:
Done!
##########
File path: r/R/array.R
##########
@@ -216,6 +216,76 @@ Array$create <- function(x, type = NULL) {
#' @include arrowExports.R
Array$import_from_c <- ImportArray
+
+#' Concatenate zero or more Arrays
+#'
+#' @param ... zero or more [Array] objects to concatenate
+#' @param type An optional `type` describing the desired
+#' type for the final Array.
+#'
+#' @return An [Array]
+#' @export
+#'
+#' @examples
+#' concat_arrays(Array$create(1:3), Array$create(4:5))
+#'
+concat_arrays <- function(..., type = NULL) {
+ dots <- lapply(list2(...), Array$create, type = type)
+
+ if (length(dots) == 0 && is.null(type)) {
+ return(Array$create(logical(), type = null()))
+ } else if (length(dots) == 0) {
+ return(Array$create(logical(), type = null())$cast(type))
+ }
+
+ if (!is.null(type)) {
+ dots <- lapply(dots, function(array) array$cast(type))
+ }
+
+ arrow__Concatenate(dots)
+}
+
+# The c() method uses non-standard dispatch in R
+# and has some peculiarities when multiple types are passed to ....
+# However, with a method defined for all subclasses of Array, it will
+# do what a user expects most of the time.
Review comment:
Done!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]