nealrichardson commented on code in PR #12751:
URL: https://github.com/apache/arrow/pull/12751#discussion_r852384536
##########
r/R/table.R:
##########
@@ -149,6 +149,77 @@ Table$create <- function(..., schema = NULL) {
#' @export
names.Table <- function(x) x$ColumnNames()
+#' @export
+rbind.Table <- function(...) {
+ tables <- list(...)
+
+ # assert they have same schema
+ schema <- tables[[1]]$schema
+ unequal_schema_idx <- which.min(lapply(tables, function(x) x$schema ==
schema))
+ if (unequal_schema_idx != 1) {
+ abort(c(
+ sprintf("Schema at index %i does not match the first schema",
unequal_schema_idx),
+ i = paste0("Schema 1:\n", schema$ToString()),
+ i = paste0(sprintf("Schema %d:\n", unequal_schema_idx),
+ tables[[unequal_schema_idx]]$schema$ToString())
+ ))
+ }
+
+ # create chunked array for each column
+ columns <- map(seq_len(tables[[1]]$num_columns), function(i) {
+ do.call(c, map(tables, ~ .[[i]]))
+ })
+
+ Table$create(!!!set_names(columns, names(schema)), schema = schema)
+}
+
+#' @export
+cbind.Table <- function(...) {
Review Comment:
This is basically the same as cbind.RecordBatch, right? Any way we can
consolidate those two?
##########
r/R/table.R:
##########
@@ -149,6 +149,77 @@ Table$create <- function(..., schema = NULL) {
#' @export
names.Table <- function(x) x$ColumnNames()
+#' @export
+rbind.Table <- function(...) {
+ tables <- list(...)
+
+ # assert they have same schema
+ schema <- tables[[1]]$schema
+ unequal_schema_idx <- which.min(lapply(tables, function(x) x$schema ==
schema))
+ if (unequal_schema_idx != 1) {
+ abort(c(
+ sprintf("Schema at index %i does not match the first schema",
unequal_schema_idx),
+ i = paste0("Schema 1:\n", schema$ToString()),
+ i = paste0(sprintf("Schema %d:\n", unequal_schema_idx),
+ tables[[unequal_schema_idx]]$schema$ToString())
+ ))
+ }
+
+ # create chunked array for each column
+ columns <- map(seq_len(tables[[1]]$num_columns), function(i) {
+ do.call(c, map(tables, ~ .[[i]]))
+ })
+
+ Table$create(!!!set_names(columns, names(schema)), schema = schema)
+}
+
+#' @export
+cbind.Table <- function(...) {
+ call <- sys.call()
+ inputs <- list(...)
+ num_rows <- inputs[[1]]$num_rows
+
+ # These names are only used for scalar or arrays
+ arg_names <- if (is.null(names(inputs))) character(length(inputs)) else
names(inputs)
+ arg_names <- make.names(arg_names, unique = TRUE)
+
+ tables <- map(seq_along(inputs), function(i) {
+ input <- inputs[[i]]
+ name <- arg_names[i]
+
Review Comment:
I believe you can factor out all of the length checks to a single
`cbind_check_length(num_rows, NROW(input), i, call)`
(at least except for the scalar recycling case)
##########
r/R/table.R:
##########
@@ -149,6 +149,77 @@ Table$create <- function(..., schema = NULL) {
#' @export
names.Table <- function(x) x$ColumnNames()
+#' @export
+rbind.Table <- function(...) {
+ tables <- list(...)
+
+ # assert they have same schema
+ schema <- tables[[1]]$schema
+ unequal_schema_idx <- which.min(lapply(tables, function(x) x$schema ==
schema))
+ if (unequal_schema_idx != 1) {
+ abort(c(
+ sprintf("Schema at index %i does not match the first schema",
unequal_schema_idx),
+ i = paste0("Schema 1:\n", schema$ToString()),
+ i = paste0(sprintf("Schema %d:\n", unequal_schema_idx),
+ tables[[unequal_schema_idx]]$schema$ToString())
+ ))
+ }
+
+ # create chunked array for each column
+ columns <- map(seq_len(tables[[1]]$num_columns), function(i) {
+ do.call(c, map(tables, ~ .[[i]]))
+ })
+
+ Table$create(!!!set_names(columns, names(schema)), schema = schema)
+}
+
+#' @export
+cbind.Table <- function(...) {
+ call <- sys.call()
+ inputs <- list(...)
+ num_rows <- inputs[[1]]$num_rows
+
+ # These names are only used for scalar or arrays
+ arg_names <- if (is.null(names(inputs))) character(length(inputs)) else
names(inputs)
+ arg_names <- make.names(arg_names, unique = TRUE)
+
+ tables <- map(seq_along(inputs), function(i) {
Review Comment:
Can `imap()` or `map2()` help here?
https://purrr.tidyverse.org/reference/imap.html
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]