spark git commit: [SPARK-16096][SPARKR] add union and deprecate unionAll

2016-06-21 Thread shivaram
Repository: spark
Updated Branches:
  refs/heads/master 918c91954 -> dbfdae4e4


[SPARK-16096][SPARKR] add union and deprecate unionAll

## What changes were proposed in this pull request?

add union and deprecate unionAll, separate roxygen2 doc for rbind (since their 
usage and parameter lists are quite different)

`explode` is also deprecated - but seems like replacement is a combination of 
calls; not sure if we should deprecate it in SparkR, yet.

## How was this patch tested?

unit tests, manual checks for r doc

Author: Felix Cheung 

Closes #13805 from felixcheung/runion.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dbfdae4e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dbfdae4e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dbfdae4e

Branch: refs/heads/master
Commit: dbfdae4e41a900de01b48639d6554d32edbb2e0b
Parents: 918c919
Author: Felix Cheung 
Authored: Tue Jun 21 13:36:50 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Tue Jun 21 13:36:50 2016 -0700

--
 R/pkg/NAMESPACE   |  1 +
 R/pkg/R/DataFrame.R   | 43 --
 R/pkg/R/generics.R|  6 +++-
 R/pkg/inst/tests/testthat/test_context.R  |  2 +-
 R/pkg/inst/tests/testthat/test_sparkSQL.R |  8 +++--
 5 files changed, 47 insertions(+), 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/dbfdae4e/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index ea42888..2272d8b 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -107,6 +107,7 @@ exportMethods("arrange",
   "summary",
   "take",
   "transform",
+  "union",
   "unionAll",
   "unique",
   "unpersist",

http://git-wip-us.apache.org/repos/asf/spark/blob/dbfdae4e/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index ed0bb85..725cbf2 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2251,7 +2251,7 @@ generateAliasesForIntersectedCols <- function (x, 
intersectedColNames, suffix) {
   cols
 }
 
-#' rbind
+#' Return a new SparkDataFrame containing the union of rows
 #'
 #' Return a new SparkDataFrame containing the union of rows in this 
SparkDataFrame
 #' and another SparkDataFrame. This is equivalent to `UNION ALL` in SQL.
@@ -2261,39 +2261,64 @@ generateAliasesForIntersectedCols <- function (x, 
intersectedColNames, suffix) {
 #' @param y A SparkDataFrame
 #' @return A SparkDataFrame containing the result of the union.
 #' @family SparkDataFrame functions
-#' @rdname rbind
-#' @name unionAll
+#' @rdname union
+#' @name union
+#' @seealso \link{rbind}
 #' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
 #' df1 <- read.json(path)
 #' df2 <- read.json(path2)
-#' unioned <- unionAll(df, df2)
+#' unioned <- union(df, df2)
+#' unions <- rbind(df, df2, df3, df4)
 #' }
+#' @note union since 2.0.0
+setMethod("union",
+  signature(x = "SparkDataFrame", y = "SparkDataFrame"),
+  function(x, y) {
+unioned <- callJMethod(x@sdf, "union", y@sdf)
+dataFrame(unioned)
+  })
+
+#' unionAll is deprecated - use union instead
+#' @rdname union
+#' @name unionAll
+#' @export
 #' @note unionAll since 1.4.0
 setMethod("unionAll",
   signature(x = "SparkDataFrame", y = "SparkDataFrame"),
   function(x, y) {
-unioned <- callJMethod(x@sdf, "unionAll", y@sdf)
-dataFrame(unioned)
+.Deprecated("union")
+union(x, y)
   })
 
 #' Union two or more SparkDataFrames
 #'
-#' Returns a new SparkDataFrame containing rows of all parameters.
+#' Union two or more SparkDataFrames. This is equivalent to `UNION ALL` in SQL.
+#' Note that this does not remove duplicate rows across the two 
SparkDataFrames.
 #'
+#' @param x A SparkDataFrame
+#' @param ... Additional SparkDataFrame
+#' @return A SparkDataFrame containing the result of the union.
+#' @family SparkDataFrame functions
 #' @rdname rbind
 #' @name rbind
+#' @seealso \link{union}
 #' @export
+#' @examples
+#'\dontrun{
+#' sparkR.session()
+#' unions <- rbind(df, df2, df3, df4)
+#' }
 #' @note rbind since 1.5.0
 setMethod("rbind",
   signature(... = "SparkDataFrame"),
   function(x, ..., deparse.level = 1) {
 if (nargs() == 3) {
-  unionAll(x, ...)
+  union(x, ...)
 } else {
-  unionAll(x, Recall(..., deparse.level = 1))
+  union(x, Recall(..., deparse.level = 1))
 }
   })
 

http://git-wip-us.apache.org/repos/asf/spark/blob/dbfdae4e/R/pkg/R/

spark git commit: [SPARK-16096][SPARKR] add union and deprecate unionAll

2016-06-21 Thread shivaram
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 591bf7909 -> aeda9a153


[SPARK-16096][SPARKR] add union and deprecate unionAll

## What changes were proposed in this pull request?

add union and deprecate unionAll, separate roxygen2 doc for rbind (since their 
usage and parameter lists are quite different)

`explode` is also deprecated - but seems like replacement is a combination of 
calls; not sure if we should deprecate it in SparkR, yet.

## How was this patch tested?

unit tests, manual checks for r doc

Author: Felix Cheung 

Closes #13805 from felixcheung/runion.

(cherry picked from commit dbfdae4e41a900de01b48639d6554d32edbb2e0b)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aeda9a15
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aeda9a15
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aeda9a15

Branch: refs/heads/branch-2.0
Commit: aeda9a153c117921e95cf204daab0df3202f1d95
Parents: 591bf79
Author: Felix Cheung 
Authored: Tue Jun 21 13:36:50 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Tue Jun 21 13:36:58 2016 -0700

--
 R/pkg/NAMESPACE   |  1 +
 R/pkg/R/DataFrame.R   | 43 --
 R/pkg/R/generics.R|  6 +++-
 R/pkg/inst/tests/testthat/test_context.R  |  2 +-
 R/pkg/inst/tests/testthat/test_sparkSQL.R |  8 +++--
 5 files changed, 47 insertions(+), 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/aeda9a15/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index ea42888..2272d8b 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -107,6 +107,7 @@ exportMethods("arrange",
   "summary",
   "take",
   "transform",
+  "union",
   "unionAll",
   "unique",
   "unpersist",

http://git-wip-us.apache.org/repos/asf/spark/blob/aeda9a15/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index ed0bb85..725cbf2 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2251,7 +2251,7 @@ generateAliasesForIntersectedCols <- function (x, 
intersectedColNames, suffix) {
   cols
 }
 
-#' rbind
+#' Return a new SparkDataFrame containing the union of rows
 #'
 #' Return a new SparkDataFrame containing the union of rows in this 
SparkDataFrame
 #' and another SparkDataFrame. This is equivalent to `UNION ALL` in SQL.
@@ -2261,39 +2261,64 @@ generateAliasesForIntersectedCols <- function (x, 
intersectedColNames, suffix) {
 #' @param y A SparkDataFrame
 #' @return A SparkDataFrame containing the result of the union.
 #' @family SparkDataFrame functions
-#' @rdname rbind
-#' @name unionAll
+#' @rdname union
+#' @name union
+#' @seealso \link{rbind}
 #' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
 #' df1 <- read.json(path)
 #' df2 <- read.json(path2)
-#' unioned <- unionAll(df, df2)
+#' unioned <- union(df, df2)
+#' unions <- rbind(df, df2, df3, df4)
 #' }
+#' @note union since 2.0.0
+setMethod("union",
+  signature(x = "SparkDataFrame", y = "SparkDataFrame"),
+  function(x, y) {
+unioned <- callJMethod(x@sdf, "union", y@sdf)
+dataFrame(unioned)
+  })
+
+#' unionAll is deprecated - use union instead
+#' @rdname union
+#' @name unionAll
+#' @export
 #' @note unionAll since 1.4.0
 setMethod("unionAll",
   signature(x = "SparkDataFrame", y = "SparkDataFrame"),
   function(x, y) {
-unioned <- callJMethod(x@sdf, "unionAll", y@sdf)
-dataFrame(unioned)
+.Deprecated("union")
+union(x, y)
   })
 
 #' Union two or more SparkDataFrames
 #'
-#' Returns a new SparkDataFrame containing rows of all parameters.
+#' Union two or more SparkDataFrames. This is equivalent to `UNION ALL` in SQL.
+#' Note that this does not remove duplicate rows across the two 
SparkDataFrames.
 #'
+#' @param x A SparkDataFrame
+#' @param ... Additional SparkDataFrame
+#' @return A SparkDataFrame containing the result of the union.
+#' @family SparkDataFrame functions
 #' @rdname rbind
 #' @name rbind
+#' @seealso \link{union}
 #' @export
+#' @examples
+#'\dontrun{
+#' sparkR.session()
+#' unions <- rbind(df, df2, df3, df4)
+#' }
 #' @note rbind since 1.5.0
 setMethod("rbind",
   signature(... = "SparkDataFrame"),
   function(x, ..., deparse.level = 1) {
 if (nargs() == 3) {
-  unionAll(x, ...)
+  union(x, ...)
 } else {
-  unionAll(x, Recall(..., deparse.level = 1))
+  union(x, Recall(..., dep