paleolimbot commented on a change in pull request #12751:
URL: https://github.com/apache/arrow/pull/12751#discussion_r839098244



##########
File path: r/R/chunked-array.R
##########
@@ -148,6 +153,12 @@ ChunkedArray$create <- function(..., type = NULL) {
   ChunkedArray__from_list(list2(...), type)
 }
 
+#' @export
+c.ChunkedArray <- function(...) {
+  arrays <- list(...)
+  do.call(ChunkedArray$create, unlist(lapply(arrays, function(arr) 
arr$chunks)))

Review comment:
       - Maybe `chunked_arrays` instead of `arrays`? (Given that both appear 
here and the difference matters)
   - If you can rig it so that you do `ChunkedArrays$create(!!! 
list_of_arrays)` it might be cleaner (and might give a nicer error message when 
it fails)
   - What happens when the arrays have different types? I think I rigged 
something in `concat_arrays()` so that you can specify the final type which 
might be worth adopting here too.

##########
File path: r/R/record-batch.R
##########
@@ -189,3 +189,36 @@ record_batch <- RecordBatch$create
 
 #' @export
 names.RecordBatch <- function(x) x$names()
+
+#' @export
+rbind.RecordBatch <- function(...) {
+  stop("Use Table$create to combine record batches")
+}
+
+#' @export
+cbind.RecordBatch <- function(...) {
+  batches <- list(...)
+
+  # Assert they have the same length
+  unequal_length_idx <- which.min(lapply(batches, function(x) x$num_rows == 
batches[[1]]$num_rows))

Review comment:
       See the note below about maybe using `vctrs::vec_size_common()` here

##########
File path: r/R/array.R
##########
@@ -256,7 +255,7 @@ concat_arrays <- function(..., type = NULL) {
 #' @rdname concat_arrays
 #' @export
 c.Array <- function(...) {
-  concat_arrays(...)
+  stop("Use concat_arrays() to create a new array, ChunkedArray$create() to 
create a zero-copy concatenation")

Review comment:
       It's not perfect, but I think we're generally trying to converge around 
`abort()` for error messages.
   
   ```suggestion
     abort("Use concat_arrays() to create a new array, ChunkedArray$create() to 
create a zero-copy concatenation")
   ```

##########
File path: r/tests/testthat/test-Array.R
##########
@@ -1035,30 +1035,6 @@ test_that("concat_arrays() coerces its input to Array", {
   )
 })
 
-test_that("c() works for Array", {

Review comment:
       You should probably test the error here (`expect_snapshot(..., error = 
TRUE)` or `expect_error()`)

##########
File path: r/R/table.R
##########
@@ -149,6 +149,64 @@ Table$create <- function(..., schema = NULL) {
 #' @export
 names.Table <- function(x) x$ColumnNames()
 
+#' @export
+rbind.Table <- function(...) {
+  tables <- list(...)
+
+  # assert they have same schema
+  schema <- tables[[1]]$schema
+  unequal_schema_idx <- which.min(lapply(tables, function(x) x$schema == 
schema))
+  if (unequal_schema_idx != 1) {
+    stop(paste0(
+      sprintf("Schema at index %i does not match the first schema\n", 
unequal_schema_idx),
+      "Schema 1:\n",
+      tables[[1]]$schema$ToString(),
+      sprintf("\nSchema %i:\n", unequal_schema_idx),
+      tables[[unequal_schema_idx]]$schema$ToString()
+    ))
+  }
+
+  # create chunked array from each column
+  columns <- vector(mode = "list", length = tables[[1]]$num_columns)
+  for (i in seq_len(length(columns))) {
+    columns[[i]] <- do.call(c, lapply(tables, function(tab) tab[[i]]))
+  }
+
+  # return new table
+  args <- columns
+  names(args) <- names(schema)
+  args$schema <- schema
+  do.call(Table$create, args)
+}
+
+#' @export
+cbind.Table <- function(...) {
+  tables <- list(...)
+
+  # Assert they have the same length
+  unequal_length_idx <- which.min(lapply(tables, function(x) x$num_rows == 
tables[[1]]$num_rows))

Review comment:
       You could use:
   
   ``` r
   vctrs::vec_size_common(seq_len(5), seq_len(10))
   #> Error in `stop_vctrs()`:
   #> ! Can't recycle `..1` (size 5) to match `..2` (size 10).
   ```
   
   ...with the slight complication that you're not actually recycling Tables 
with 1 row to match the length of the data. Maybe it's worth considering that 
(it's what `dplyr::bind_cols()`, which is powered by `vctrs::vec_cbind()` does).
   
   

##########
File path: r/R/table.R
##########
@@ -149,6 +149,64 @@ Table$create <- function(..., schema = NULL) {
 #' @export
 names.Table <- function(x) x$ColumnNames()
 
+#' @export
+rbind.Table <- function(...) {
+  tables <- list(...)
+
+  # assert they have same schema
+  schema <- tables[[1]]$schema

Review comment:
       If we use `UnionDataset` + `InMemoryDataset` do we have a better chance 
of combining tables with different schemas (at least filling missing columns 
with nulls and/or reconciling schemas that are equal except for the order of 
the columns?).

##########
File path: r/tests/testthat/test-RecordBatch.R
##########
@@ -513,6 +513,26 @@ test_that("record_batch() with different length arrays", {
   expect_error(record_batch(a = 1:5, b = 1:6), msg)
 })
 
+test_that("RecordBatch supports cbind", {
+  expect_error(
+    cbind(
+      RecordBatch$create(a = 1:10, ),
+      RecordBatch$create(a = c("a", "b"))
+    ),
+    regexp = "unequal number of rows"
+  )
+
+  batches <- list(
+    RecordBatch$create(a = c(1, 2), b = c("a", "b")),
+    RecordBatch$create(a = c("d", "c")),
+    RecordBatch$create(c = c(2, 3))
+  )
+
+  expected <- RecordBatch$create(
+    do.call(cbind, lapply(batches, function(batch) as.data.frame(batch))))
+  expect_equal(do.call(cbind, batches), expected, ignore_attr = TRUE)
+})
+

Review comment:
       ...and test the error for `rbind()` too

##########
File path: r/R/record-batch.R
##########
@@ -189,3 +189,36 @@ record_batch <- RecordBatch$create
 
 #' @export
 names.RecordBatch <- function(x) x$names()
+
+#' @export
+rbind.RecordBatch <- function(...) {
+  stop("Use Table$create to combine record batches")

Review comment:
       ```suggestion
     abort("Use Table$create to combine record batches")
   ```

##########
File path: r/R/record-batch.R
##########
@@ -189,3 +189,36 @@ record_batch <- RecordBatch$create
 
 #' @export
 names.RecordBatch <- function(x) x$names()
+
+#' @export
+rbind.RecordBatch <- function(...) {
+  stop("Use Table$create to combine record batches")
+}
+
+#' @export
+cbind.RecordBatch <- function(...) {
+  batches <- list(...)
+
+  # Assert they have the same length
+  unequal_length_idx <- which.min(lapply(batches, function(x) x$num_rows == 
batches[[1]]$num_rows))
+  if (unequal_length_idx != 1) {
+    stop(

Review comment:
       ```suggestion
       abort(
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to