Repository: spark Updated Branches: refs/heads/master e3dabdf6e -> 5902125ac
[SPARK-24198][SPARKR][SQL] Adding slice function to SparkR ## What changes were proposed in this pull request? The PR adds the `slice` function to SparkR. The function returns a subset of consecutive elements from the given array. ``` > df <- createDataFrame(cbind(model = rownames(mtcars), mtcars)) > tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp)) > head(select(tmp, slice(tmp$v1, 2L, 2L))) ``` ``` slice(v1, 2, 2) 1 6, 110 2 6, 110 3 4, 93 4 6, 110 5 8, 175 6 6, 105 ``` ## How was this patch tested? A test added into R/pkg/tests/fulltests/test_sparkSQL.R Author: Marek Novotny <[email protected]> Closes #21298 from mn-mikke/SPARK-24198. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5902125a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5902125a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5902125a Branch: refs/heads/master Commit: 5902125ac7ad25a0cb7aa3d98825c8290ee33c12 Parents: e3dabdf Author: Marek Novotny <[email protected]> Authored: Sat May 12 19:21:42 2018 +0800 Committer: hyukjinkwon <[email protected]> Committed: Sat May 12 19:21:42 2018 +0800 ---------------------------------------------------------------------- R/pkg/NAMESPACE | 1 + R/pkg/R/functions.R | 17 +++++++++++++++++ R/pkg/R/generics.R | 4 ++++ R/pkg/tests/fulltests/test_sparkSQL.R | 5 +++++ 4 files changed, 27 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/5902125a/R/pkg/NAMESPACE ---------------------------------------------------------------------- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 5f82096..c575fe2 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -352,6 +352,7 @@ exportMethods("%<=>%", "sinh", "size", "skewness", + "slice", "sort_array", "soundex", "spark_partition_id", http://git-wip-us.apache.org/repos/asf/spark/blob/5902125a/R/pkg/R/functions.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 4964594..77d70cb 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -212,6 +212,7 @@ NULL #' tmp2 <- mutate(tmp, v2 = explode(tmp$v1)) #' head(tmp2) #' head(select(tmp, posexplode(tmp$v1))) +#' head(select(tmp, slice(tmp$v1, 2L, 2L))) #' head(select(tmp, sort_array(tmp$v1))) #' head(select(tmp, sort_array(tmp$v1, asc = FALSE))) #' tmp3 <- mutate(df, v3 = create_map(df$model, df$cyl)) @@ -3143,6 +3144,22 @@ setMethod("size", }) #' @details +#' \code{slice}: Returns an array containing all the elements in x from the index start +#' (or starting from the end if start is negative) with the specified length. +#' +#' @rdname column_collection_functions +#' @param start an index indicating the first element occuring in the result. +#' @param length a number of consecutive elements choosen to the result. +#' @aliases slice slice,Column-method +#' @note slice since 2.4.0 +setMethod("slice", + signature(x = "Column"), + function(x, start, length) { + jc <- callJStatic("org.apache.spark.sql.functions", "slice", x@jc, start, length) + column(jc) + }) + +#' @details #' \code{sort_array}: Sorts the input array in ascending or descending order according to #' the natural ordering of the array elements. NA elements will be placed at the beginning of #' the returned array in ascending order or at the end of the returned array in descending order. http://git-wip-us.apache.org/repos/asf/spark/blob/5902125a/R/pkg/R/generics.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 5faa51e..fbc4113 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -1196,6 +1196,10 @@ setGeneric("skewness", function(x) { standardGeneric("skewness") }) #' @rdname column_collection_functions #' @name NULL +setGeneric("slice", function(x, start, length) { standardGeneric("slice") }) + +#' @rdname column_collection_functions +#' @name NULL setGeneric("sort_array", function(x, asc = TRUE) { standardGeneric("sort_array") }) #' @rdname column_string_functions http://git-wip-us.apache.org/repos/asf/spark/blob/5902125a/R/pkg/tests/fulltests/test_sparkSQL.R ---------------------------------------------------------------------- diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index b8bfded..2a550b9 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -1507,6 +1507,11 @@ test_that("column functions", { result <- collect(select(df, sort_array(df[[1]])))[[1]] expect_equal(result, list(list(NA, 1L, 2L, 3L), list(NA, NA, 4L, 5L, 6L))) + # Test slice() + df <- createDataFrame(list(list(list(1L, 2L, 3L)), list(list(4L, 5L)))) + result <- collect(select(df, slice(df[[1]], 2L, 2L)))[[1]] + expect_equal(result, list(list(2L, 3L), list(5L))) + # Test flattern df <- createDataFrame(list(list(list(list(1L, 2L), list(3L, 4L))), list(list(list(5L, 6L), list(7L, 8L))))) --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
