Repository: spark
Updated Branches:
  refs/heads/master ae2370e72 -> 26e760581


[SPARK-9871] [SPARKR] Add expression functions into SparkR which have a 
variable parameter

### Summary

- Add `lit` function
- Add `concat`, `greatest`, `least` functions

I think we need to improve `collect` function in order to implement `struct` 
function. Since `collect` doesn't work with arguments which includes a nested 
`list` variable. It seems that a list against `struct` still has `jobj` 
classes. So it would be better to solve this problem on another issue.

### JIRA
[[SPARK-9871] Add expression functions into SparkR which have a variable 
parameter - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9871)

Author: Yu ISHIKAWA <[email protected]>

Closes #8194 from yu-iskw/SPARK-9856.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/26e76058
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/26e76058
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/26e76058

Branch: refs/heads/master
Commit: 26e760581fdf7ca913da93fa80e73b7ddabcedf6
Parents: ae2370e
Author: Yu ISHIKAWA <[email protected]>
Authored: Sun Aug 16 23:33:20 2015 -0700
Committer: Shivaram Venkataraman <[email protected]>
Committed: Sun Aug 16 23:33:20 2015 -0700

----------------------------------------------------------------------
 R/pkg/NAMESPACE                  |  4 ++++
 R/pkg/R/functions.R              | 42 +++++++++++++++++++++++++++++++++++
 R/pkg/R/generics.R               | 16 +++++++++++++
 R/pkg/inst/tests/test_sparkSQL.R | 13 +++++++++++
 4 files changed, 75 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/26e76058/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index b2d92bd..fd9dfdf 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -98,6 +98,7 @@ exportMethods("abs",
               "contains",
               "cos",
               "cosh",
+              "concat",
               "countDistinct",
               "desc",
               "endsWith",
@@ -106,10 +107,13 @@ exportMethods("abs",
               "floor",
               "getField",
               "getItem",
+              "greatest",
               "hypot",
               "isNotNull",
               "isNull",
+              "lit",
               "last",
+              "least",
               "like",
               "log",
               "log10",

http://git-wip-us.apache.org/repos/asf/spark/blob/26e76058/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index a15d2d5..6eef4d6 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -67,6 +67,14 @@ createFunctions <- function() {
 
 createFunctions()
 
+#' @rdname functions
+#' @return Creates a Column class of literal value.
+setMethod("lit", signature("ANY"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "lit", 
ifelse(class(x) == "Column", x@jc, x))
+            column(jc)
+          })
+
 #' Approx Count Distinct
 #'
 #' @rdname functions
@@ -94,6 +102,40 @@ setMethod("countDistinct",
           })
 
 #' @rdname functions
+#' @return Concatenates multiple input string columns together into a single 
string column.
+setMethod("concat",
+          signature(x = "Column"),
+          function(x, ...) {
+            jcols <- lapply(list(x, ...), function(x) { x@jc })
+            jc <- callJStatic("org.apache.spark.sql.functions", "concat", 
listToSeq(jcols))
+            column(jc)
+          })
+
+#' @rdname functions
+#' @return Returns the greatest value of the list of column names, skipping 
null values.
+#'         This function takes at least 2 parameters. It will return null if 
all parameters are null.
+setMethod("greatest",
+          signature(x = "Column"),
+          function(x, ...) {
+            stopifnot(length(list(...)) > 0)
+            jcols <- lapply(list(x, ...), function(x) { x@jc })
+            jc <- callJStatic("org.apache.spark.sql.functions", "greatest", 
listToSeq(jcols))
+            column(jc)
+          })
+
+#' @rdname functions
+#' @return Returns the least value of the list of column names, skipping null 
values.
+#'         This function takes at least 2 parameters. It will return null iff 
all parameters are null.
+setMethod("least",
+          signature(x = "Column"),
+          function(x, ...) {
+            stopifnot(length(list(...)) > 0)
+            jcols <- lapply(list(x, ...), function(x) { x@jc })
+            jc <- callJStatic("org.apache.spark.sql.functions", "least", 
listToSeq(jcols))
+            column(jc)
+          })
+
+#' @rdname functions
 #' @aliases ceil
 setMethod("ceiling",
           signature(x = "Column"),

http://git-wip-us.apache.org/repos/asf/spark/blob/26e76058/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index f11e7fc..5c1cc98 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -684,6 +684,10 @@ setGeneric("ceil", function(x) { standardGeneric("ceil") })
 
 #' @rdname functions
 #' @export
+setGeneric("concat", function(x, ...) { standardGeneric("concat") })
+
+#' @rdname functions
+#' @export
 setGeneric("crc32", function(x) { standardGeneric("crc32") })
 
 #' @rdname functions
@@ -704,6 +708,10 @@ setGeneric("explode", function(x) { 
standardGeneric("explode") })
 
 #' @rdname functions
 #' @export
+setGeneric("greatest", function(x, ...) { standardGeneric("greatest") })
+
+#' @rdname functions
+#' @export
 setGeneric("hex", function(x) { standardGeneric("hex") })
 
 #' @rdname functions
@@ -724,10 +732,18 @@ setGeneric("last_day", function(x) { 
standardGeneric("last_day") })
 
 #' @rdname functions
 #' @export
+setGeneric("least", function(x, ...) { standardGeneric("least") })
+
+#' @rdname functions
+#' @export
 setGeneric("levenshtein", function(y, x) { standardGeneric("levenshtein") })
 
 #' @rdname functions
 #' @export
+setGeneric("lit", function(x) { standardGeneric("lit") })
+
+#' @rdname functions
+#' @export
 setGeneric("lower", function(x) { standardGeneric("lower") })
 
 #' @rdname functions

http://git-wip-us.apache.org/repos/asf/spark/blob/26e76058/R/pkg/inst/tests/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index c77f633..83caba8 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -580,6 +580,11 @@ test_that("select with column", {
   df2 <- select(df, df$age)
   expect_equal(columns(df2), c("age"))
   expect_equal(count(df2), 3)
+
+  df3 <- select(df, lit("x"))
+  expect_equal(columns(df3), c("x"))
+  expect_equal(count(df3), 3)
+  expect_equal(collect(select(df3, "x"))[[1, 1]], "x")
 })
 
 test_that("selectExpr() on a DataFrame", {
@@ -712,6 +717,14 @@ test_that("string operators", {
   expect_equal(count(where(df, startsWith(df$name, "A"))), 1)
   expect_equal(first(select(df, substr(df$name, 1, 2)))[[1]], "Mi")
   expect_equal(collect(select(df, cast(df$age, "string")))[[2, 1]], "30")
+  expect_equal(collect(select(df, concat(df$name, lit(":"), df$age)))[[2, 1]], 
"Andy:30")
+})
+
+test_that("greatest() and least() on a DataFrame", {
+  l <- list(list(a = 1, b = 2), list(a = 3, b = 4))
+  df <- createDataFrame(sqlContext, l)
+  expect_equal(collect(select(df, greatest(df$a, df$b)))[, 1], c(2, 4))
+  expect_equal(collect(select(df, least(df$a, df$b)))[, 1], c(1, 3))
 })
 
 test_that("group by", {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to