spark git commit: [SPARK-12198][SPARKR] SparkR support read.parquet and deprecate parquetFile

shivaram Thu, 10 Dec 2015 09:45:07 -0800

Repository: spark
Updated Branches:
  refs/heads/master db5165246 -> eeb58722a



[SPARK-12198][SPARKR] SparkR support read.parquet and deprecate parquetFile

SparkR support ```read.parquet``` and deprecate ```parquetFile```. This change 
is similar with #10145 for ```jsonFile```.

Author: Yanbo Liang <[email protected]>

Closes #10191 from yanboliang/spark-12198.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/eeb58722
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/eeb58722
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/eeb58722

Branch: refs/heads/master
Commit: eeb58722ad73441eeb5f35f864be3c5392cfd426
Parents: db51652
Author: Yanbo Liang <[email protected]>
Authored: Thu Dec 10 09:44:53 2015 -0800
Committer: Shivaram Venkataraman <[email protected]>
Committed: Thu Dec 10 09:44:53 2015 -0800

----------------------------------------------------------------------
 R/pkg/NAMESPACE                           |  1 +
 R/pkg/R/SQLContext.R                      | 16 ++++++++++++++--
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 11 +++++++----
 3 files changed, 22 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/eeb58722/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 565a2b1..ba64bc5 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -270,6 +270,7 @@ export("as.DataFrame",
        "loadDF",
        "parquetFile",
        "read.df",
+       "read.parquet",
        "sql",
        "table",
        "tableNames",

http://git-wip-us.apache.org/repos/asf/spark/blob/eeb58722/R/pkg/R/SQLContext.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 85541c8..f678c70 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -256,18 +256,30 @@ jsonRDD <- function(sqlContext, rdd, schema = NULL, 
samplingRatio = 1.0) {
   }
 }
 
-
 #' Create a DataFrame from a Parquet file.
 #'
 #' Loads a Parquet file, returning the result as a DataFrame.
 #'
 #' @param sqlContext SQLContext to use
-#' @param ... Path(s) of parquet file(s) to read.
+#' @param path Path of file to read. A vector of multiple paths is allowed.
 #' @return DataFrame
+#' @rdname read.parquet
+#' @name read.parquet
 #' @export
+read.parquet <- function(sqlContext, path) {
+  # Allow the user to have a more flexible definiton of the text file path
+  paths <- as.list(suppressWarnings(normalizePath(path)))
+  read <- callJMethod(sqlContext, "read")
+  sdf <- callJMethod(read, "parquet", paths)
+  dataFrame(sdf)
+}
 
+#' @rdname read.parquet
+#' @name parquetFile
+#' @export
 # TODO: Implement saveasParquetFile and write examples for both
 parquetFile <- function(sqlContext, ...) {
+  .Deprecated("read.parquet")
   # Allow the user to have a more flexible definiton of the text file path
   paths <- lapply(list(...), function(x) suppressWarnings(normalizePath(x)))
   sdf <- callJMethod(sqlContext, "parquetFile", paths)

http://git-wip-us.apache.org/repos/asf/spark/blob/eeb58722/R/pkg/inst/tests/testthat/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 39fc94a..222c04a 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1420,22 +1420,25 @@ test_that("mutate(), transform(), rename() and 
names()", {
   detach(airquality)
 })
 
-test_that("write.df() on DataFrame and works with parquetFile", {
+test_that("write.df() on DataFrame and works with read.parquet", {
   df <- jsonFile(sqlContext, jsonPath)
   write.df(df, parquetPath, "parquet", mode="overwrite")
-  parquetDF <- parquetFile(sqlContext, parquetPath)
+  parquetDF <- read.parquet(sqlContext, parquetPath)
   expect_is(parquetDF, "DataFrame")
   expect_equal(count(df), count(parquetDF))
 })
 
-test_that("parquetFile works with multiple input paths", {
+test_that("read.parquet()/parquetFile() works with multiple input paths", {
   df <- jsonFile(sqlContext, jsonPath)
   write.df(df, parquetPath, "parquet", mode="overwrite")
   parquetPath2 <- tempfile(pattern = "parquetPath2", fileext = ".parquet")
   write.df(df, parquetPath2, "parquet", mode="overwrite")
-  parquetDF <- parquetFile(sqlContext, parquetPath, parquetPath2)
+  parquetDF <- read.parquet(sqlContext, c(parquetPath, parquetPath2))
   expect_is(parquetDF, "DataFrame")
   expect_equal(count(parquetDF), count(df) * 2)
+  parquetDF2 <- suppressWarnings(parquetFile(sqlContext, parquetPath, 
parquetPath2))
+  expect_is(parquetDF2, "DataFrame")
+  expect_equal(count(parquetDF2), count(df) * 2)
 
   # Test if varargs works with variables
   saveMode <- "overwrite"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-12198][SPARKR] SparkR support read.parquet and deprecate parquetFile

Reply via email to