spark git commit: [SPARK-24186][R][SQL] change reverse and concat to collection functions in R

gurwls223 Sun, 13 May 2018 18:49:29 -0700

Repository: spark
Updated Branches:
  refs/heads/master 2fa33649d -> 3f0e801c1



[SPARK-24186][R][SQL] change reverse and concat to collection functions in R

## What changes were proposed in this pull request?

reverse and concat are already in functions.R as column string functions. Since 
now these two functions are categorized as collection functions in scala and 
python, we will do the same in R.

## How was this patch tested?

Add test in test_sparkSQL.R

Author: Huaxin Gao <[email protected]>

Closes #21307 from huaxingao/spark_24186.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3f0e801c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3f0e801c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3f0e801c

Branch: refs/heads/master
Commit: 3f0e801c11e600ed28491924e550d3ba93f19c19
Parents: 2fa3364
Author: Huaxin Gao <[email protected]>
Authored: Mon May 14 09:48:54 2018 +0800
Committer: hyukjinkwon <[email protected]>
Committed: Mon May 14 09:48:54 2018 +0800

----------------------------------------------------------------------
 R/pkg/R/functions.R                   | 35 +++++++++++++++---------------
 R/pkg/R/generics.R                    |  4 ++--
 R/pkg/tests/fulltests/test_sparkSQL.R | 17 +++++++++++++--
 3 files changed, 35 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/3f0e801c/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 77d70cb..fcb3521 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -208,7 +208,7 @@ NULL
 #' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1)))
 #' head(select(tmp, array_max(tmp$v1), array_min(tmp$v1)))
 #' head(select(tmp, array_position(tmp$v1, 21), array_sort(tmp$v1)))
-#' head(select(tmp, flatten(tmp$v1)))
+#' head(select(tmp, flatten(tmp$v1), reverse(tmp$v1)))
 #' tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
 #' head(tmp2)
 #' head(select(tmp, posexplode(tmp$v1)))
@@ -218,7 +218,10 @@ NULL
 #' tmp3 <- mutate(df, v3 = create_map(df$model, df$cyl))
 #' head(select(tmp3, map_keys(tmp3$v3)))
 #' head(select(tmp3, map_values(tmp3$v3)))
-#' head(select(tmp3, element_at(tmp3$v3, "Valiant")))}
+#' head(select(tmp3, element_at(tmp3$v3, "Valiant")))
+#' tmp4 <- mutate(df, v4 = create_array(df$mpg, df$cyl), v5 = 
create_array(df$hp))
+#' head(select(tmp4, concat(tmp4$v4, tmp4$v5)))
+#' head(select(tmp, concat(df$mpg, df$cyl, df$hp)))}
 NULL
 
 #' Window functions for Column operations
@@ -1260,9 +1263,9 @@ setMethod("quarter",
           })
 
 #' @details
-#' \code{reverse}: Reverses the string column and returns it as a new string 
column.
+#' \code{reverse}: Returns a reversed string or an array with reverse order of 
elements.
 #'
-#' @rdname column_string_functions
+#' @rdname column_collection_functions
 #' @aliases reverse reverse,Column-method
 #' @note reverse since 1.5.0
 setMethod("reverse",
@@ -2055,20 +2058,10 @@ setMethod("countDistinct",
 
 #' @details
 #' \code{concat}: Concatenates multiple input columns together into a single 
column.
-#' If all inputs are binary, concat returns an output as binary. Otherwise, it 
returns as string.
+#' The function works with strings, binary and compatible array columns.
 #'
-#' @rdname column_string_functions
+#' @rdname column_collection_functions
 #' @aliases concat concat,Column-method
-#' @examples
-#'
-#' \dontrun{
-#' # concatenate strings
-#' tmp <- mutate(df, s1 = concat(df$Class, df$Sex),
-#'                   s2 = concat(df$Class, df$Sex, df$Age),
-#'                   s3 = concat(df$Class, df$Sex, df$Age, df$Class),
-#'                   s4 = concat_ws("_", df$Class, df$Sex),
-#'                   s5 = concat_ws("+", df$Class, df$Sex, df$Age, 
df$Survived))
-#' head(tmp)}
 #' @note concat since 1.5.0
 setMethod("concat",
           signature(x = "Column"),
@@ -2409,6 +2402,13 @@ setMethod("shiftRightUnsigned", signature(y = "Column", 
x = "numeric"),
 #' @param sep separator to use.
 #' @rdname column_string_functions
 #' @aliases concat_ws concat_ws,character,Column-method
+#' @examples
+#'
+#' \dontrun{
+#' # concatenate strings
+#' tmp <- mutate(df, s1 = concat_ws("_", df$Class, df$Sex),
+#'                   s2 = concat_ws("+", df$Class, df$Sex, df$Age, 
df$Survived))
+#' head(tmp)}
 #' @note concat_ws since 1.5.0
 setMethod("concat_ws", signature(sep = "character", x = "Column"),
           function(sep, x, ...) {
@@ -3063,7 +3063,8 @@ setMethod("array_sort",
           })
 
 #' @details
-#' \code{flatten}: Transforms an array of arrays into a single array.
+#' \code{flatten}: Creates a single array from an array of arrays.
+#' If a structure of nested arrays is deeper than two levels, only one level 
of nesting is removed.
 #'
 #' @rdname column_collection_functions
 #' @aliases flatten flatten,Column-method

http://git-wip-us.apache.org/repos/asf/spark/blob/3f0e801c/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index fbc4113..61da30b 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -817,7 +817,7 @@ setGeneric("collect_set", function(x) { 
standardGeneric("collect_set") })
 #' @rdname column
 setGeneric("column", function(x) { standardGeneric("column") })
 
-#' @rdname column_string_functions
+#' @rdname column_collection_functions
 #' @name NULL
 setGeneric("concat", function(x, ...) { standardGeneric("concat") })
 
@@ -1134,7 +1134,7 @@ setGeneric("regexp_replace",
 #' @name NULL
 setGeneric("repeat_string", function(x, n) { standardGeneric("repeat_string") 
})
 
-#' @rdname column_string_functions
+#' @rdname column_collection_functions
 #' @name NULL
 setGeneric("reverse", function(x) { standardGeneric("reverse") })
 

http://git-wip-us.apache.org/repos/asf/spark/blob/3f0e801c/R/pkg/tests/fulltests/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R 
b/R/pkg/tests/fulltests/test_sparkSQL.R
index 2a550b9..13b55ac 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1479,7 +1479,7 @@ test_that("column functions", {
   df5 <- createDataFrame(list(list(a = "010101")))
   expect_equal(collect(select(df5, conv(df5$a, 2, 16)))[1, 1], "15")
 
-  # Test array_contains(), array_max(), array_min(), array_position() and 
element_at()
+  # Test array_contains(), array_max(), array_min(), array_position(), 
element_at() and reverse()
   df <- createDataFrame(list(list(list(1L, 2L, 3L)), list(list(6L, 5L, 4L))))
   result <- collect(select(df, array_contains(df[[1]], 1L)))[[1]]
   expect_equal(result, c(TRUE, FALSE))
@@ -1496,6 +1496,13 @@ test_that("column functions", {
   result <- collect(select(df, element_at(df[[1]], 1L)))[[1]]
   expect_equal(result, c(1, 6))
 
+  result <- collect(select(df, reverse(df[[1]])))[[1]]
+  expect_equal(result, list(list(3L, 2L, 1L), list(4L, 5L, 6L)))
+
+  df2 <- createDataFrame(list(list("abc")))
+  result <- collect(select(df2, reverse(df2[[1]])))[[1]]
+  expect_equal(result, "cba")
+
   # Test array_sort() and sort_array()
   df <- createDataFrame(list(list(list(2L, 1L, 3L, NA)), list(list(NA, 6L, 5L, 
NA, 4L))))
 
@@ -1512,7 +1519,13 @@ test_that("column functions", {
   result <- collect(select(df, slice(df[[1]], 2L, 2L)))[[1]]
   expect_equal(result, list(list(2L, 3L), list(5L)))
 
-  # Test flattern
+  # Test concat()
+  df <- createDataFrame(list(list(list(1L, 2L, 3L), list(4L, 5L, 6L)),
+                        list(list(7L, 8L, 9L), list(10L, 11L, 12L))))
+  result <- collect(select(df, concat(df[[1]], df[[2]])))[[1]]
+  expect_equal(result, list(list(1L, 2L, 3L, 4L, 5L, 6L), list(7L, 8L, 9L, 
10L, 11L, 12L)))
+
+  # Test flatten()
   df <- createDataFrame(list(list(list(list(1L, 2L), list(3L, 4L))),
                         list(list(list(5L, 6L), list(7L, 8L)))))
   result <- collect(select(df, flatten(df[[1]])))[[1]]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-24186][R][SQL] change reverse and concat to collection functions in R

Reply via email to