spark git commit: [SPARK-16096][SPARKR] add union and deprecate unionAll
Repository: spark Updated Branches: refs/heads/master 918c91954 -> dbfdae4e4 [SPARK-16096][SPARKR] add union and deprecate unionAll ## What changes were proposed in this pull request? add union and deprecate unionAll, separate roxygen2 doc for rbind (since their usage and parameter lists are quite different) `explode` is also deprecated - but seems like replacement is a combination of calls; not sure if we should deprecate it in SparkR, yet. ## How was this patch tested? unit tests, manual checks for r doc Author: Felix Cheung Closes #13805 from felixcheung/runion. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dbfdae4e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dbfdae4e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dbfdae4e Branch: refs/heads/master Commit: dbfdae4e41a900de01b48639d6554d32edbb2e0b Parents: 918c919 Author: Felix Cheung Authored: Tue Jun 21 13:36:50 2016 -0700 Committer: Shivaram Venkataraman Committed: Tue Jun 21 13:36:50 2016 -0700 -- R/pkg/NAMESPACE | 1 + R/pkg/R/DataFrame.R | 43 -- R/pkg/R/generics.R| 6 +++- R/pkg/inst/tests/testthat/test_context.R | 2 +- R/pkg/inst/tests/testthat/test_sparkSQL.R | 8 +++-- 5 files changed, 47 insertions(+), 13 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/dbfdae4e/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index ea42888..2272d8b 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -107,6 +107,7 @@ exportMethods("arrange", "summary", "take", "transform", + "union", "unionAll", "unique", "unpersist", http://git-wip-us.apache.org/repos/asf/spark/blob/dbfdae4e/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index ed0bb85..725cbf2 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2251,7 +2251,7 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) { cols } -#' rbind +#' Return a new SparkDataFrame containing the union of rows #' #' Return a new SparkDataFrame containing the union of rows in this SparkDataFrame #' and another SparkDataFrame. This is equivalent to `UNION ALL` in SQL. @@ -2261,39 +2261,64 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) { #' @param y A SparkDataFrame #' @return A SparkDataFrame containing the result of the union. #' @family SparkDataFrame functions -#' @rdname rbind -#' @name unionAll +#' @rdname union +#' @name union +#' @seealso \link{rbind} #' @export #' @examples #'\dontrun{ #' sparkR.session() #' df1 <- read.json(path) #' df2 <- read.json(path2) -#' unioned <- unionAll(df, df2) +#' unioned <- union(df, df2) +#' unions <- rbind(df, df2, df3, df4) #' } +#' @note union since 2.0.0 +setMethod("union", + signature(x = "SparkDataFrame", y = "SparkDataFrame"), + function(x, y) { +unioned <- callJMethod(x@sdf, "union", y@sdf) +dataFrame(unioned) + }) + +#' unionAll is deprecated - use union instead +#' @rdname union +#' @name unionAll +#' @export #' @note unionAll since 1.4.0 setMethod("unionAll", signature(x = "SparkDataFrame", y = "SparkDataFrame"), function(x, y) { -unioned <- callJMethod(x@sdf, "unionAll", y@sdf) -dataFrame(unioned) +.Deprecated("union") +union(x, y) }) #' Union two or more SparkDataFrames #' -#' Returns a new SparkDataFrame containing rows of all parameters. +#' Union two or more SparkDataFrames. This is equivalent to `UNION ALL` in SQL. +#' Note that this does not remove duplicate rows across the two SparkDataFrames. #' +#' @param x A SparkDataFrame +#' @param ... Additional SparkDataFrame +#' @return A SparkDataFrame containing the result of the union. +#' @family SparkDataFrame functions #' @rdname rbind #' @name rbind +#' @seealso \link{union} #' @export +#' @examples +#'\dontrun{ +#' sparkR.session() +#' unions <- rbind(df, df2, df3, df4) +#' } #' @note rbind since 1.5.0 setMethod("rbind", signature(... = "SparkDataFrame"), function(x, ..., deparse.level = 1) { if (nargs() == 3) { - unionAll(x, ...) + union(x, ...) } else { - unionAll(x, Recall(..., deparse.level = 1)) + union(x, Recall(..., deparse.level = 1)) } }) http://git-wip-us.apache.org/repos/asf/spark/blob/dbfdae4e/R/pkg/R/
spark git commit: [SPARK-16096][SPARKR] add union and deprecate unionAll
Repository: spark Updated Branches: refs/heads/branch-2.0 591bf7909 -> aeda9a153 [SPARK-16096][SPARKR] add union and deprecate unionAll ## What changes were proposed in this pull request? add union and deprecate unionAll, separate roxygen2 doc for rbind (since their usage and parameter lists are quite different) `explode` is also deprecated - but seems like replacement is a combination of calls; not sure if we should deprecate it in SparkR, yet. ## How was this patch tested? unit tests, manual checks for r doc Author: Felix Cheung Closes #13805 from felixcheung/runion. (cherry picked from commit dbfdae4e41a900de01b48639d6554d32edbb2e0b) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aeda9a15 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aeda9a15 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aeda9a15 Branch: refs/heads/branch-2.0 Commit: aeda9a153c117921e95cf204daab0df3202f1d95 Parents: 591bf79 Author: Felix Cheung Authored: Tue Jun 21 13:36:50 2016 -0700 Committer: Shivaram Venkataraman Committed: Tue Jun 21 13:36:58 2016 -0700 -- R/pkg/NAMESPACE | 1 + R/pkg/R/DataFrame.R | 43 -- R/pkg/R/generics.R| 6 +++- R/pkg/inst/tests/testthat/test_context.R | 2 +- R/pkg/inst/tests/testthat/test_sparkSQL.R | 8 +++-- 5 files changed, 47 insertions(+), 13 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/aeda9a15/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index ea42888..2272d8b 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -107,6 +107,7 @@ exportMethods("arrange", "summary", "take", "transform", + "union", "unionAll", "unique", "unpersist", http://git-wip-us.apache.org/repos/asf/spark/blob/aeda9a15/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index ed0bb85..725cbf2 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2251,7 +2251,7 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) { cols } -#' rbind +#' Return a new SparkDataFrame containing the union of rows #' #' Return a new SparkDataFrame containing the union of rows in this SparkDataFrame #' and another SparkDataFrame. This is equivalent to `UNION ALL` in SQL. @@ -2261,39 +2261,64 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) { #' @param y A SparkDataFrame #' @return A SparkDataFrame containing the result of the union. #' @family SparkDataFrame functions -#' @rdname rbind -#' @name unionAll +#' @rdname union +#' @name union +#' @seealso \link{rbind} #' @export #' @examples #'\dontrun{ #' sparkR.session() #' df1 <- read.json(path) #' df2 <- read.json(path2) -#' unioned <- unionAll(df, df2) +#' unioned <- union(df, df2) +#' unions <- rbind(df, df2, df3, df4) #' } +#' @note union since 2.0.0 +setMethod("union", + signature(x = "SparkDataFrame", y = "SparkDataFrame"), + function(x, y) { +unioned <- callJMethod(x@sdf, "union", y@sdf) +dataFrame(unioned) + }) + +#' unionAll is deprecated - use union instead +#' @rdname union +#' @name unionAll +#' @export #' @note unionAll since 1.4.0 setMethod("unionAll", signature(x = "SparkDataFrame", y = "SparkDataFrame"), function(x, y) { -unioned <- callJMethod(x@sdf, "unionAll", y@sdf) -dataFrame(unioned) +.Deprecated("union") +union(x, y) }) #' Union two or more SparkDataFrames #' -#' Returns a new SparkDataFrame containing rows of all parameters. +#' Union two or more SparkDataFrames. This is equivalent to `UNION ALL` in SQL. +#' Note that this does not remove duplicate rows across the two SparkDataFrames. #' +#' @param x A SparkDataFrame +#' @param ... Additional SparkDataFrame +#' @return A SparkDataFrame containing the result of the union. +#' @family SparkDataFrame functions #' @rdname rbind #' @name rbind +#' @seealso \link{union} #' @export +#' @examples +#'\dontrun{ +#' sparkR.session() +#' unions <- rbind(df, df2, df3, df4) +#' } #' @note rbind since 1.5.0 setMethod("rbind", signature(... = "SparkDataFrame"), function(x, ..., deparse.level = 1) { if (nargs() == 3) { - unionAll(x, ...) + union(x, ...) } else { - unionAll(x, Recall(..., deparse.level = 1)) + union(x, Recall(..., dep