Repository: spark Updated Branches: refs/heads/master 5e3868ba1 -> 1e0aba90b
[SPARK-10888] [SPARKR] Added as.DataFrame as a synonym to createDataFrame as.DataFrame is more a R-style like signature. Also, I'd like to know if we could make the context, e.g. sqlContext global, so that we do not have to specify it as an argument, when we each time create a dataframe. Author: Narine Kokhlikyan <[email protected]> Closes #8952 from NarineK/sparkrasDataFrame. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1e0aba90 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1e0aba90 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1e0aba90 Branch: refs/heads/master Commit: 1e0aba90b9e73834af70d196f7f869b062d98d94 Parents: 5e3868b Author: Narine Kokhlikyan <[email protected]> Authored: Tue Oct 13 10:09:05 2015 -0700 Committer: Shivaram Venkataraman <[email protected]> Committed: Tue Oct 13 10:09:05 2015 -0700 ---------------------------------------------------------------------- R/pkg/NAMESPACE | 3 ++- R/pkg/R/SQLContext.R | 17 +++++++++++++---- R/pkg/inst/tests/test_sparkSQL.R | 15 +++++++++++++++ 3 files changed, 30 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/1e0aba90/R/pkg/NAMESPACE ---------------------------------------------------------------------- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 95d949e..41986a5 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -228,7 +228,8 @@ exportMethods("agg") export("sparkRSQL.init", "sparkRHive.init") -export("cacheTable", +export("as.DataFrame", + "cacheTable", "clearCache", "createDataFrame", "createExternalTable", http://git-wip-us.apache.org/repos/asf/spark/blob/1e0aba90/R/pkg/R/SQLContext.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 66c7e30..399f536 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -64,21 +64,23 @@ infer_type <- function(x) { } } -#' Create a DataFrame from an RDD +#' Create a DataFrame #' -#' Converts an RDD to a DataFrame by infer the types. +#' Converts R data.frame or list into DataFrame. #' #' @param sqlContext A SQLContext #' @param data An RDD or list or data.frame #' @param schema a list of column names or named list (StructType), optional #' @return an DataFrame +#' @rdname createDataFrame #' @export #' @examples #'\dontrun{ #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) -#' rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, b=as.character(x))) -#' df <- createDataFrame(sqlContext, rdd) +#' df1 <- as.DataFrame(sqlContext, iris) +#' df2 <- as.DataFrame(sqlContext, list(3,4,5,6)) +#' df3 <- createDataFrame(sqlContext, iris) #' } # TODO(davies): support sampling and infer type from NA @@ -151,6 +153,13 @@ createDataFrame <- function(sqlContext, data, schema = NULL, samplingRatio = 1.0 dataFrame(sdf) } +#' @rdname createDataFrame +#' @aliases createDataFrame +#' @export +as.DataFrame <- function(sqlContext, data, schema = NULL, samplingRatio = 1.0) { + createDataFrame(sqlContext, data, schema, samplingRatio) +} + # toDF # # Converts an RDD to a DataFrame by infer the types. http://git-wip-us.apache.org/repos/asf/spark/blob/1e0aba90/R/pkg/inst/tests/test_sparkSQL.R ---------------------------------------------------------------------- diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index af6efa4..b599994 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -89,17 +89,28 @@ test_that("structType and structField", { test_that("create DataFrame from RDD", { rdd <- lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) }) df <- createDataFrame(sqlContext, rdd, list("a", "b")) + dfAsDF <- as.DataFrame(sqlContext, rdd, list("a", "b")) expect_is(df, "DataFrame") + expect_is(dfAsDF, "DataFrame") expect_equal(count(df), 10) + expect_equal(count(dfAsDF), 10) expect_equal(nrow(df), 10) + expect_equal(nrow(dfAsDF), 10) expect_equal(ncol(df), 2) + expect_equal(ncol(dfAsDF), 2) expect_equal(dim(df), c(10, 2)) + expect_equal(dim(dfAsDF), c(10, 2)) expect_equal(columns(df), c("a", "b")) + expect_equal(columns(dfAsDF), c("a", "b")) expect_equal(dtypes(df), list(c("a", "int"), c("b", "string"))) + expect_equal(dtypes(dfAsDF), list(c("a", "int"), c("b", "string"))) df <- createDataFrame(sqlContext, rdd) + dfAsDF <- as.DataFrame(sqlContext, rdd) expect_is(df, "DataFrame") + expect_is(dfAsDF, "DataFrame") expect_equal(columns(df), c("_1", "_2")) + expect_equal(columns(dfAsDF), c("_1", "_2")) schema <- structType(structField(x = "a", type = "integer", nullable = TRUE), structField(x = "b", type = "string", nullable = TRUE)) @@ -130,9 +141,13 @@ test_that("create DataFrame from RDD", { schema <- structType(structField("name", "string"), structField("age", "integer"), structField("height", "float")) df2 <- createDataFrame(sqlContext, df.toRDD, schema) + df2AsDF <- as.DataFrame(sqlContext, df.toRDD, schema) expect_equal(columns(df2), c("name", "age", "height")) + expect_equal(columns(df2AsDF), c("name", "age", "height")) expect_equal(dtypes(df2), list(c("name", "string"), c("age", "int"), c("height", "float"))) + expect_equal(dtypes(df2AsDF), list(c("name", "string"), c("age", "int"), c("height", "float"))) expect_equal(collect(where(df2, df2$name == "Bob")), c("Bob", 16, 176.5)) + expect_equal(collect(where(df2AsDF, df2$name == "Bob")), c("Bob", 16, 176.5)) localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18), --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
