Github user felixcheung commented on a diff in the pull request: https://github.com/apache/spark/pull/22954#discussion_r232170936 --- Diff: R/pkg/R/SQLContext.R --- @@ -147,6 +147,55 @@ getDefaultSqlSource <- function() { l[["spark.sql.sources.default"]] } +writeToTempFileInArrow <- function(rdf, numPartitions) { + # R API in Arrow is not yet released. CRAN requires to add the package in requireNamespace + # at DESCRIPTION. Later, CRAN checks if the package is available or not. Therefore, it works + # around by avoiding direct requireNamespace. + requireNamespace1 <- requireNamespace + if (requireNamespace1("arrow", quietly = TRUE)) { + record_batch <- get("record_batch", envir = asNamespace("arrow"), inherits = FALSE) + record_batch_stream_writer <- get( + "record_batch_stream_writer", envir = asNamespace("arrow"), inherits = FALSE) + file_output_stream <- get( + "file_output_stream", envir = asNamespace("arrow"), inherits = FALSE) + write_record_batch <- get( + "write_record_batch", envir = asNamespace("arrow"), inherits = FALSE) + + # Currently arrow requires withr; otherwise, write APIs don't work. + # Direct 'require' is not recommended by CRAN. Here's a workaround. + require1 <- require + if (require1("withr", quietly = TRUE)) { + numPartitions <- if (!is.null(numPartitions)) { + numToInt(numPartitions) + } else { + 1 + } + fileName <- tempfile(pattern = "spark-arrow", fileext = ".tmp") + chunk <- as.integer(ceiling(nrow(rdf) / numPartitions)) + rdf_slices <- split(rdf, rep(1:ceiling(nrow(rdf) / chunk), each = chunk)[1:nrow(rdf)]) --- End diff -- `1 : ceiling`? `1 : nrow`?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org