spark git commit: [SPARK-11260][SPARKR] with() function support

2015-11-05 Thread shivaram
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 3e1e03d03 -> 975ae4e6f


[SPARK-11260][SPARKR] with() function support

Author: adrian555 
Author: Adrian Zhuang 

Closes #9443 from adrian555/with.

(cherry picked from commit b9455d1f1810e1e3f472014f665ad3ad3122bcc0)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/975ae4e6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/975ae4e6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/975ae4e6

Branch: refs/heads/branch-1.6
Commit: 975ae4e6feff9d1ca8c146ea9b2d8300579c78bd
Parents: 3e1e03d
Author: adrian555 
Authored: Thu Nov 5 14:47:38 2015 -0800
Committer: Shivaram Venkataraman 
Committed: Thu Nov 5 14:47:50 2015 -0800

--
 R/pkg/NAMESPACE  |  1 +
 R/pkg/R/DataFrame.R  | 30 --
 R/pkg/R/generics.R   |  4 
 R/pkg/R/utils.R  | 13 +
 R/pkg/inst/tests/test_sparkSQL.R |  9 +
 5 files changed, 51 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/975ae4e6/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index cd9537a..56b8ed0 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -83,6 +83,7 @@ exportMethods("arrange",
   "unique",
   "unpersist",
   "where",
+  "with",
   "withColumn",
   "withColumnRenamed",
   "write.df")

http://git-wip-us.apache.org/repos/asf/spark/blob/975ae4e6/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index df5bc81..44ce941 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2126,11 +2126,29 @@ setMethod("as.data.frame",
 setMethod("attach",
   signature(what = "DataFrame"),
   function(what, pos = 2, name = deparse(substitute(what)), 
warn.conflicts = TRUE) {
-cols <- columns(what)
-stopifnot(length(cols) > 0)
-newEnv <- new.env()
-for (i in 1:length(cols)) {
-  assign(x = cols[i], value = what[, cols[i]], envir = newEnv)
-}
+newEnv <- assignNewEnv(what)
 attach(newEnv, pos = pos, name = name, warn.conflicts = 
warn.conflicts)
   })
+
+#' Evaluate a R expression in an environment constructed from a DataFrame
+#' with() allows access to columns of a DataFrame by simply referring to
+#' their name. It appends every column of a DataFrame into a new
+#' environment. Then, the given expression is evaluated in this new
+#' environment.
+#'
+#' @rdname with
+#' @title Evaluate a R expression in an environment constructed from a 
DataFrame
+#' @param data (DataFrame) DataFrame to use for constructing an environment.
+#' @param expr (expression) Expression to evaluate.
+#' @param ... arguments to be passed to future methods.
+#' @examples
+#' \dontrun{
+#' with(irisDf, nrow(Sepal_Width))
+#' }
+#' @seealso \link{attach}
+setMethod("with",
+  signature(data = "DataFrame"),
+  function(data, expr, ...) {
+newEnv <- assignNewEnv(data)
+eval(substitute(expr), envir = newEnv, enclos = newEnv)
+  })

http://git-wip-us.apache.org/repos/asf/spark/blob/975ae4e6/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 0b35340..083d37f 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1043,3 +1043,7 @@ setGeneric("as.data.frame")
 #' @rdname attach
 #' @export
 setGeneric("attach")
+
+#' @rdname with
+#' @export
+setGeneric("with")

http://git-wip-us.apache.org/repos/asf/spark/blob/975ae4e6/R/pkg/R/utils.R
--
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 0b9e295..db3b2c4 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -623,3 +623,16 @@ convertNamedListToEnv <- function(namedList) {
   }
   env
 }
+
+# Assign a new environment for attach() and with() methods
+assignNewEnv <- function(data) {
+  stopifnot(class(data) == "DataFrame")
+  cols <- columns(data)
+  stopifnot(length(cols) > 0)
+
+  env <- new.env()
+  for (i in 1:length(cols)) {
+assign(x = cols[i], value = data[, cols[i]], envir = env)
+  }
+  env
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/975ae4e6/R/pkg/inst/tests/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index b4a4d03..816315b 100644
--- a/R/pkg/inst/tests/t

spark git commit: [SPARK-11260][SPARKR] with() function support

2015-11-05 Thread shivaram
Repository: spark
Updated Branches:
  refs/heads/master 8a5314efd -> b9455d1f1


[SPARK-11260][SPARKR] with() function support

Author: adrian555 
Author: Adrian Zhuang 

Closes #9443 from adrian555/with.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b9455d1f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b9455d1f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b9455d1f

Branch: refs/heads/master
Commit: b9455d1f1810e1e3f472014f665ad3ad3122bcc0
Parents: 8a5314e
Author: adrian555 
Authored: Thu Nov 5 14:47:38 2015 -0800
Committer: Shivaram Venkataraman 
Committed: Thu Nov 5 14:47:38 2015 -0800

--
 R/pkg/NAMESPACE  |  1 +
 R/pkg/R/DataFrame.R  | 30 --
 R/pkg/R/generics.R   |  4 
 R/pkg/R/utils.R  | 13 +
 R/pkg/inst/tests/test_sparkSQL.R |  9 +
 5 files changed, 51 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b9455d1f/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index cd9537a..56b8ed0 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -83,6 +83,7 @@ exportMethods("arrange",
   "unique",
   "unpersist",
   "where",
+  "with",
   "withColumn",
   "withColumnRenamed",
   "write.df")

http://git-wip-us.apache.org/repos/asf/spark/blob/b9455d1f/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index df5bc81..44ce941 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2126,11 +2126,29 @@ setMethod("as.data.frame",
 setMethod("attach",
   signature(what = "DataFrame"),
   function(what, pos = 2, name = deparse(substitute(what)), 
warn.conflicts = TRUE) {
-cols <- columns(what)
-stopifnot(length(cols) > 0)
-newEnv <- new.env()
-for (i in 1:length(cols)) {
-  assign(x = cols[i], value = what[, cols[i]], envir = newEnv)
-}
+newEnv <- assignNewEnv(what)
 attach(newEnv, pos = pos, name = name, warn.conflicts = 
warn.conflicts)
   })
+
+#' Evaluate a R expression in an environment constructed from a DataFrame
+#' with() allows access to columns of a DataFrame by simply referring to
+#' their name. It appends every column of a DataFrame into a new
+#' environment. Then, the given expression is evaluated in this new
+#' environment.
+#'
+#' @rdname with
+#' @title Evaluate a R expression in an environment constructed from a 
DataFrame
+#' @param data (DataFrame) DataFrame to use for constructing an environment.
+#' @param expr (expression) Expression to evaluate.
+#' @param ... arguments to be passed to future methods.
+#' @examples
+#' \dontrun{
+#' with(irisDf, nrow(Sepal_Width))
+#' }
+#' @seealso \link{attach}
+setMethod("with",
+  signature(data = "DataFrame"),
+  function(data, expr, ...) {
+newEnv <- assignNewEnv(data)
+eval(substitute(expr), envir = newEnv, enclos = newEnv)
+  })

http://git-wip-us.apache.org/repos/asf/spark/blob/b9455d1f/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 0b35340..083d37f 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1043,3 +1043,7 @@ setGeneric("as.data.frame")
 #' @rdname attach
 #' @export
 setGeneric("attach")
+
+#' @rdname with
+#' @export
+setGeneric("with")

http://git-wip-us.apache.org/repos/asf/spark/blob/b9455d1f/R/pkg/R/utils.R
--
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 0b9e295..db3b2c4 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -623,3 +623,16 @@ convertNamedListToEnv <- function(namedList) {
   }
   env
 }
+
+# Assign a new environment for attach() and with() methods
+assignNewEnv <- function(data) {
+  stopifnot(class(data) == "DataFrame")
+  cols <- columns(data)
+  stopifnot(length(cols) > 0)
+
+  env <- new.env()
+  for (i in 1:length(cols)) {
+assign(x = cols[i], value = data[, cols[i]], envir = env)
+  }
+  env
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/b9455d1f/R/pkg/inst/tests/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index b4a4d03..816315b 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -1494,6 +1494,15 @@ test_that("attach() on a DataFrame", {