Repository: spark Updated Branches: refs/heads/master db5165246 -> eeb58722a
[SPARK-12198][SPARKR] SparkR support read.parquet and deprecate parquetFile SparkR support ```read.parquet``` and deprecate ```parquetFile```. This change is similar with #10145 for ```jsonFile```. Author: Yanbo Liang <yblia...@gmail.com> Closes #10191 from yanboliang/spark-12198. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/eeb58722 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/eeb58722 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/eeb58722 Branch: refs/heads/master Commit: eeb58722ad73441eeb5f35f864be3c5392cfd426 Parents: db51652 Author: Yanbo Liang <yblia...@gmail.com> Authored: Thu Dec 10 09:44:53 2015 -0800 Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu> Committed: Thu Dec 10 09:44:53 2015 -0800 ---------------------------------------------------------------------- R/pkg/NAMESPACE | 1 + R/pkg/R/SQLContext.R | 16 ++++++++++++++-- R/pkg/inst/tests/testthat/test_sparkSQL.R | 11 +++++++---- 3 files changed, 22 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/eeb58722/R/pkg/NAMESPACE ---------------------------------------------------------------------- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 565a2b1..ba64bc5 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -270,6 +270,7 @@ export("as.DataFrame", "loadDF", "parquetFile", "read.df", + "read.parquet", "sql", "table", "tableNames", http://git-wip-us.apache.org/repos/asf/spark/blob/eeb58722/R/pkg/R/SQLContext.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 85541c8..f678c70 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -256,18 +256,30 @@ jsonRDD <- function(sqlContext, rdd, schema = NULL, samplingRatio = 1.0) { } } - #' Create a DataFrame from a Parquet file. #' #' Loads a Parquet file, returning the result as a DataFrame. #' #' @param sqlContext SQLContext to use -#' @param ... Path(s) of parquet file(s) to read. +#' @param path Path of file to read. A vector of multiple paths is allowed. #' @return DataFrame +#' @rdname read.parquet +#' @name read.parquet #' @export +read.parquet <- function(sqlContext, path) { + # Allow the user to have a more flexible definiton of the text file path + paths <- as.list(suppressWarnings(normalizePath(path))) + read <- callJMethod(sqlContext, "read") + sdf <- callJMethod(read, "parquet", paths) + dataFrame(sdf) +} +#' @rdname read.parquet +#' @name parquetFile +#' @export # TODO: Implement saveasParquetFile and write examples for both parquetFile <- function(sqlContext, ...) { + .Deprecated("read.parquet") # Allow the user to have a more flexible definiton of the text file path paths <- lapply(list(...), function(x) suppressWarnings(normalizePath(x))) sdf <- callJMethod(sqlContext, "parquetFile", paths) http://git-wip-us.apache.org/repos/asf/spark/blob/eeb58722/R/pkg/inst/tests/testthat/test_sparkSQL.R ---------------------------------------------------------------------- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 39fc94a..222c04a 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -1420,22 +1420,25 @@ test_that("mutate(), transform(), rename() and names()", { detach(airquality) }) -test_that("write.df() on DataFrame and works with parquetFile", { +test_that("write.df() on DataFrame and works with read.parquet", { df <- jsonFile(sqlContext, jsonPath) write.df(df, parquetPath, "parquet", mode="overwrite") - parquetDF <- parquetFile(sqlContext, parquetPath) + parquetDF <- read.parquet(sqlContext, parquetPath) expect_is(parquetDF, "DataFrame") expect_equal(count(df), count(parquetDF)) }) -test_that("parquetFile works with multiple input paths", { +test_that("read.parquet()/parquetFile() works with multiple input paths", { df <- jsonFile(sqlContext, jsonPath) write.df(df, parquetPath, "parquet", mode="overwrite") parquetPath2 <- tempfile(pattern = "parquetPath2", fileext = ".parquet") write.df(df, parquetPath2, "parquet", mode="overwrite") - parquetDF <- parquetFile(sqlContext, parquetPath, parquetPath2) + parquetDF <- read.parquet(sqlContext, c(parquetPath, parquetPath2)) expect_is(parquetDF, "DataFrame") expect_equal(count(parquetDF), count(df) * 2) + parquetDF2 <- suppressWarnings(parquetFile(sqlContext, parquetPath, parquetPath2)) + expect_is(parquetDF2, "DataFrame") + expect_equal(count(parquetDF2), count(df) * 2) # Test if varargs works with variables saveMode <- "overwrite" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org