paleolimbot commented on a change in pull request #12324:
URL: https://github.com/apache/arrow/pull/12324#discussion_r806789891



##########
File path: r/R/array.R
##########
@@ -216,6 +216,76 @@ Array$create <- function(x, type = NULL) {
 #' @include arrowExports.R
 Array$import_from_c <- ImportArray
 
+
+#' Concatenate zero or more Arrays
+#'
+#' @param ... zero or more [Array] objects to concatenate
+#' @param type An optional `type` describing the desired
+#'   type for the final Array.
+#'
+#' @return An [Array]
+#' @export
+#'
+#' @examples
+#' concat_arrays(Array$create(1:3), Array$create(4:5))
+#'
+concat_arrays <- function(..., type = NULL) {
+  dots <- lapply(list2(...), Array$create, type = type)
+
+  if (length(dots) == 0 && is.null(type)) {
+    return(Array$create(logical(), type = null()))
+  } else if (length(dots) == 0) {
+    return(Array$create(logical(), type = null())$cast(type))
+  }
+
+  if (!is.null(type)) {
+    dots <- lapply(dots, function(array) array$cast(type))
+  }
+
+  arrow__Concatenate(dots)
+}
+
+# The c() method uses non-standard dispatch in R
+# and has some peculiarities when multiple types are passed to ....
+# However, with a method defined for all subclasses of Array, it will
+# do what a user expects most of the time.

Review comment:
       Good call! I thought this would fail:
   
   ``` r
   library(arrow, warn.conflicts = FALSE)
   
   arr1 <- Array$create(1:3)
   arr2 <- Array$create(4:5)
   c(arr1, arr2)
   #> Array
   #> <int32>
   #> [
   #>   1,
   #>   2,
   #>   3,
   #>   4,
   #>   5
   #> ]
   
   class(arr1) <- c("SomeSubclass", class(arr1))
   class(arr2) <- c("SomeOtherSubclass", class(arr2))
   c(arr1, arr2)
   #> Array
   #> <int32>
   #> [
   #>   1,
   #>   2,
   #>   3,
   #>   4,
   #>   5
   #> ]
   ```
   
   ...but it didn't, and removing all the extra `c()` methods didn't cause any 
of the tests to break, which means that for all practical purposes we don't 
need them. A better example of the madness I was thinking of is this:
   
   ``` r
   library(arrow, warn.conflicts = FALSE)
   c(Array$create(1:3), wk::xy(1:2, 1:2))
   #> Error: Invalid: arrays to be concatenated must be identically typed, but 
int32 and list<item: double> were encountered.
   c( wk::xy(1:2, 1:2), Array$create(1:3))
   #> Error: Can't combine 'wk_rcrd' objects that do not have identical classes.
   ```
   
   (where if more than one `c()` method is defined you'll get different results 
depending on which one is first).
   
   The "right way" to do this is a full vctrs implementation but I don't think 
it's time for that quite yet.

##########
File path: r/tests/testthat/test-Array.R
##########
@@ -989,6 +989,59 @@ test_that("auto int64 conversion to int can be disabled 
(ARROW-10093)", {
   })
 })
 
+test_that("concat_arrays works", {
+  concat_empty <- concat_arrays()
+  expect_true(concat_empty$type == null())
+  expect_equal(concat_empty$length(), 0L)
+
+  concat_empty_typed <- concat_arrays(type = int64())
+  expect_true(concat_empty_typed$type == int64())
+  expect_equal(concat_empty$length(), 0L)
+
+  concat_int <- concat_arrays(Array$create(1:3), Array$create(4:5))
+  expect_true(concat_int$type == int32())
+  expect_true(all(concat_int == Array$create(1:5)))
+
+  concat_int64 <- concat_arrays(
+    Array$create(1:3),
+    Array$create(4:5, type = int64()),
+    type = int64()
+  )
+  expect_true(concat_int64$type == int64())
+  expect_true(all(concat_int == Array$create(1:5)))
+
+  expect_error(
+    concat_arrays(
+      Array$create(1:3),
+      Array$create(4:5, type = int64())
+    ),
+    "must be identically typed"
+  )
+})
+
+test_that("c() works for Array", {

Review comment:
       Good call! This is another example of the madness around `c()` (which we 
can do little about except circumvent it by eventually defining a vctrs 
implementation).
   
   ``` r
   library(arrow, warn.conflicts = FALSE)
   c(Array$create(1L), 2L)
   #> Array
   #> <int32>
   #> [
   #>   1,
   #>   2
   #> ]
   c(2L, Array$create(1L))
   #> [[1]]
   #> [1] 2
   #> 
   #> [[2]]
   #> Array
   #> <int32>
   #> [
   #>   1
   #> ]
   ```
   
   <sup>Created on 2022-02-15 by the [reprex 
package](https://reprex.tidyverse.org) (v2.0.1)</sup>

##########
File path: r/tests/testthat/test-Array.R
##########
@@ -989,6 +989,59 @@ test_that("auto int64 conversion to int can be disabled 
(ARROW-10093)", {
   })
 })
 
+test_that("concat_arrays works", {
+  concat_empty <- concat_arrays()
+  expect_true(concat_empty$type == null())
+  expect_equal(concat_empty$length(), 0L)
+
+  concat_empty_typed <- concat_arrays(type = int64())
+  expect_true(concat_empty_typed$type == int64())
+  expect_equal(concat_empty$length(), 0L)
+
+  concat_int <- concat_arrays(Array$create(1:3), Array$create(4:5))
+  expect_true(concat_int$type == int32())
+  expect_true(all(concat_int == Array$create(1:5)))
+
+  concat_int64 <- concat_arrays(
+    Array$create(1:3),
+    Array$create(4:5, type = int64()),
+    type = int64()
+  )
+  expect_true(concat_int64$type == int64())
+  expect_true(all(concat_int == Array$create(1:5)))
+
+  expect_error(
+    concat_arrays(
+      Array$create(1:3),
+      Array$create(4:5, type = int64())
+    ),
+    "must be identically typed"
+  )
+})
+
+test_that("c() works for Array", {

Review comment:
       ...I added a test from the `concat_arrays()` end since we do have 
control over that!

##########
File path: r/R/array.R
##########
@@ -216,6 +216,76 @@ Array$create <- function(x, type = NULL) {
 #' @include arrowExports.R
 Array$import_from_c <- ImportArray
 
+
+#' Concatenate zero or more Arrays
+#'
+#' @param ... zero or more [Array] objects to concatenate
+#' @param type An optional `type` describing the desired
+#'   type for the final Array.
+#'
+#' @return An [Array]
+#' @export
+#'
+#' @examples
+#' concat_arrays(Array$create(1:3), Array$create(4:5))
+#'
+concat_arrays <- function(..., type = NULL) {
+  dots <- lapply(list2(...), Array$create, type = type)
+
+  if (length(dots) == 0 && is.null(type)) {
+    return(Array$create(logical(), type = null()))
+  } else if (length(dots) == 0) {
+    return(Array$create(logical(), type = null())$cast(type))
+  }
+
+  if (!is.null(type)) {
+    dots <- lapply(dots, function(array) array$cast(type))
+  }
+
+  arrow__Concatenate(dots)
+}
+
+# The c() method uses non-standard dispatch in R
+# and has some peculiarities when multiple types are passed to ....
+# However, with a method defined for all subclasses of Array, it will
+# do what a user expects most of the time.

Review comment:
       As @wjones127 noted above, if we *don't* define it we get even weirder 
behaviour. I think it's more intuitive to define it than omit but am happy to 
leave it out too. The upside is that it's the first thing an R user might think 
to do (as Will thought to do immediately based on the comment above).

##########
File path: r/tests/testthat/test-Array.R
##########
@@ -989,6 +989,65 @@ test_that("auto int64 conversion to int can be disabled 
(ARROW-10093)", {
   })
 })
 
+test_that("concat_arrays works", {
+  concat_empty <- concat_arrays()
+  expect_true(concat_empty$type == null())
+  expect_equal(concat_empty$length(), 0L)
+
+  concat_empty_typed <- concat_arrays(type = int64())
+  expect_true(concat_empty_typed$type == int64())
+  expect_equal(concat_empty$length(), 0L)
+
+  concat_int <- concat_arrays(Array$create(1:3), Array$create(4:5))
+  expect_true(concat_int$type == int32())
+  expect_true(all(concat_int == Array$create(1:5)))
+
+  concat_int64 <- concat_arrays(
+    Array$create(1:3),
+    Array$create(4:5, type = int64()),
+    type = int64()
+  )
+  expect_true(concat_int64$type == int64())
+  expect_true(all(concat_int == Array$create(1:5)))
+
+  expect_error(
+    concat_arrays(
+      Array$create(1:3),
+      Array$create(4:5, type = int64())
+    ),
+    "must be identically typed"
+  )
+})
+
+test_that("concat_arrays() coerces its input to Array", {
+  concat_ints <- concat_arrays(1L, 2L)
+  expect_true(concat_ints$type == int32())
+  expect_true(all(concat_ints == Array$create(c(1L, 2L))))

Review comment:
       Done!

##########
File path: r/R/array.R
##########
@@ -216,6 +216,76 @@ Array$create <- function(x, type = NULL) {
 #' @include arrowExports.R
 Array$import_from_c <- ImportArray
 
+
+#' Concatenate zero or more Arrays
+#'
+#' @param ... zero or more [Array] objects to concatenate
+#' @param type An optional `type` describing the desired
+#'   type for the final Array.
+#'
+#' @return An [Array]
+#' @export
+#'
+#' @examples
+#' concat_arrays(Array$create(1:3), Array$create(4:5))
+#'
+concat_arrays <- function(..., type = NULL) {
+  dots <- lapply(list2(...), Array$create, type = type)
+
+  if (length(dots) == 0 && is.null(type)) {
+    return(Array$create(logical(), type = null()))
+  } else if (length(dots) == 0) {
+    return(Array$create(logical(), type = null())$cast(type))
+  }
+
+  if (!is.null(type)) {
+    dots <- lapply(dots, function(array) array$cast(type))
+  }
+
+  arrow__Concatenate(dots)
+}
+
+# The c() method uses non-standard dispatch in R
+# and has some peculiarities when multiple types are passed to ....
+# However, with a method defined for all subclasses of Array, it will
+# do what a user expects most of the time.

Review comment:
       Done!




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to