Repository: spark Updated Branches: refs/heads/master afc364146 -> 66a7d6b30
[SPARK-22920][SPARKR] sql functions for current_date, current_timestamp, rtrim/ltrim/trim with trimString ## What changes were proposed in this pull request? Add sql functions ## How was this patch tested? manual, unit tests Author: Felix Cheung <[email protected]> Closes #20105 from felixcheung/rsqlfuncs. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/66a7d6b3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/66a7d6b3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/66a7d6b3 Branch: refs/heads/master Commit: 66a7d6b30fe5581d09ef660abe2a9c8c334d29f2 Parents: afc3641 Author: Felix Cheung <[email protected]> Authored: Fri Dec 29 10:51:43 2017 -0800 Committer: Felix Cheung <[email protected]> Committed: Fri Dec 29 10:51:43 2017 -0800 ---------------------------------------------------------------------- R/pkg/DESCRIPTION | 1 + R/pkg/NAMESPACE | 2 + R/pkg/R/functions.R | 105 ++++++++++++++++++++++++----- R/pkg/R/generics.R | 17 ++++- R/pkg/tests/fulltests/test_sparkSQL.R | 4 +- 5 files changed, 106 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/66a7d6b3/R/pkg/DESCRIPTION ---------------------------------------------------------------------- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index d1c846c..6d46c31 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -59,3 +59,4 @@ Collate: 'window.R' RoxygenNote: 5.0.1 VignetteBuilder: knitr +NeedsCompilation: no http://git-wip-us.apache.org/repos/asf/spark/blob/66a7d6b3/R/pkg/NAMESPACE ---------------------------------------------------------------------- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index ce3eec0..3219c6f 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -228,6 +228,8 @@ exportMethods("%<=>%", "crc32", "create_array", "create_map", + "current_date", + "current_timestamp", "hash", "cume_dist", "date_add", http://git-wip-us.apache.org/repos/asf/spark/blob/66a7d6b3/R/pkg/R/functions.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 3a96f94..fff230d 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -39,7 +39,8 @@ NULL #' Date time functions defined for \code{Column}. #' #' @param x Column to compute on. In \code{window}, it must be a time Column of -#' \code{TimestampType}. +#' \code{TimestampType}. This is not used with \code{current_date} and +#' \code{current_timestamp} #' @param format The format for the given dates or timestamps in Column \code{x}. See the #' format used in the following methods: #' \itemize{ @@ -1109,10 +1110,11 @@ setMethod("lower", }) #' @details -#' \code{ltrim}: Trims the spaces from left end for the specified string value. +#' \code{ltrim}: Trims the spaces from left end for the specified string value. Optionally a +#' \code{trimString} can be specified. #' #' @rdname column_string_functions -#' @aliases ltrim ltrim,Column-method +#' @aliases ltrim ltrim,Column,missing-method #' @export #' @examples #' @@ -1128,12 +1130,24 @@ setMethod("lower", #' head(tmp)} #' @note ltrim since 1.5.0 setMethod("ltrim", - signature(x = "Column"), - function(x) { + signature(x = "Column", trimString = "missing"), + function(x, trimString) { jc <- callJStatic("org.apache.spark.sql.functions", "ltrim", x@jc) column(jc) }) +#' @param trimString a character string to trim with +#' @rdname column_string_functions +#' @aliases ltrim,Column,character-method +#' @export +#' @note ltrim(Column, character) since 2.3.0 +setMethod("ltrim", + signature(x = "Column", trimString = "character"), + function(x, trimString) { + jc <- callJStatic("org.apache.spark.sql.functions", "ltrim", x@jc, trimString) + column(jc) + }) + #' @details #' \code{max}: Returns the maximum value of the expression in a group. #' @@ -1348,19 +1362,31 @@ setMethod("bround", }) #' @details -#' \code{rtrim}: Trims the spaces from right end for the specified string value. +#' \code{rtrim}: Trims the spaces from right end for the specified string value. Optionally a +#' \code{trimString} can be specified. #' #' @rdname column_string_functions -#' @aliases rtrim rtrim,Column-method +#' @aliases rtrim rtrim,Column,missing-method #' @export #' @note rtrim since 1.5.0 setMethod("rtrim", - signature(x = "Column"), - function(x) { + signature(x = "Column", trimString = "missing"), + function(x, trimString) { jc <- callJStatic("org.apache.spark.sql.functions", "rtrim", x@jc) column(jc) }) +#' @rdname column_string_functions +#' @aliases rtrim,Column,character-method +#' @export +#' @note rtrim(Column, character) since 2.3.0 +setMethod("rtrim", + signature(x = "Column", trimString = "character"), + function(x, trimString) { + jc <- callJStatic("org.apache.spark.sql.functions", "rtrim", x@jc, trimString) + column(jc) + }) + #' @details #' \code{sd}: Alias for \code{stddev_samp}. #' @@ -1789,19 +1815,31 @@ setMethod("to_timestamp", }) #' @details -#' \code{trim}: Trims the spaces from both ends for the specified string column. +#' \code{trim}: Trims the spaces from both ends for the specified string column. Optionally a +#' \code{trimString} can be specified. #' #' @rdname column_string_functions -#' @aliases trim trim,Column-method +#' @aliases trim trim,Column,missing-method #' @export #' @note trim since 1.5.0 setMethod("trim", - signature(x = "Column"), - function(x) { + signature(x = "Column", trimString = "missing"), + function(x, trimString) { jc <- callJStatic("org.apache.spark.sql.functions", "trim", x@jc) column(jc) }) +#' @rdname column_string_functions +#' @aliases trim,Column,character-method +#' @export +#' @note trim(Column, character) since 2.3.0 +setMethod("trim", + signature(x = "Column", trimString = "character"), + function(x, trimString) { + jc <- callJStatic("org.apache.spark.sql.functions", "trim", x@jc, trimString) + column(jc) + }) + #' @details #' \code{unbase64}: Decodes a BASE64 encoded string column and returns it as a binary column. #' This is the reverse of base64. @@ -2777,11 +2815,11 @@ setMethod("rpad", signature(x = "Column", len = "numeric", pad = "character"), }) #' @details -#' \code{substring_index}: Returns the substring from string str before count occurrences of -#' the delimiter delim. If count is positive, everything the left of the final delimiter -#' (counting from left) is returned. If count is negative, every to the right of the final -#' delimiter (counting from the right) is returned. substring_index performs a case-sensitive -#' match when searching for delim. +#' \code{substring_index}: Returns the substring from string (\code{x}) before \code{count} +#' occurrences of the delimiter (\code{delim}). If \code{count} is positive, everything the left of +#' the final delimiter (counting from left) is returned. If \code{count} is negative, every to the +#' right of the final delimiter (counting from the right) is returned. \code{substring_index} +#' performs a case-sensitive match when searching for the delimiter. #' #' @param delim a delimiter string. #' @param count number of occurrences of \code{delim} before the substring is returned. @@ -3504,3 +3542,34 @@ setMethod("date_trunc", jc <- callJStatic("org.apache.spark.sql.functions", "date_trunc", format, x@jc) column(jc) }) + +#' @details +#' \code{current_date}: Returns the current date as a date column. +#' +#' @rdname column_datetime_functions +#' @aliases current_date current_date,missing-method +#' @export +#' @examples +#' \dontrun{ +#' head(select(df, current_date(), current_timestamp()))} +#' @note current_date since 2.3.0 +setMethod("current_date", + signature("missing"), + function() { + jc <- callJStatic("org.apache.spark.sql.functions", "current_date") + column(jc) + }) + +#' @details +#' \code{current_timestamp}: Returns the current timestamp as a timestamp column. +#' +#' @rdname column_datetime_functions +#' @aliases current_timestamp current_timestamp,missing-method +#' @export +#' @note current_timestamp since 2.3.0 +setMethod("current_timestamp", + signature("missing"), + function() { + jc <- callJStatic("org.apache.spark.sql.functions", "current_timestamp") + column(jc) + }) http://git-wip-us.apache.org/repos/asf/spark/blob/66a7d6b3/R/pkg/R/generics.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index d5d0bc9..5369c32 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -1027,6 +1027,17 @@ setGeneric("hash", function(x, ...) { standardGeneric("hash") }) #' @name NULL setGeneric("cume_dist", function(x = "missing") { standardGeneric("cume_dist") }) +#' @rdname column_datetime_functions +#' @export +#' @name NULL +setGeneric("current_date", function(x = "missing") { standardGeneric("current_date") }) + +#' @rdname column_datetime_functions +#' @export +#' @name NULL +setGeneric("current_timestamp", function(x = "missing") { standardGeneric("current_timestamp") }) + + #' @rdname column_datetime_diff_functions #' @export #' @name NULL @@ -1230,7 +1241,7 @@ setGeneric("lpad", function(x, len, pad) { standardGeneric("lpad") }) #' @rdname column_string_functions #' @export #' @name NULL -setGeneric("ltrim", function(x) { standardGeneric("ltrim") }) +setGeneric("ltrim", function(x, trimString) { standardGeneric("ltrim") }) #' @rdname column_collection_functions #' @export @@ -1380,7 +1391,7 @@ setGeneric("rpad", function(x, len, pad) { standardGeneric("rpad") }) #' @rdname column_string_functions #' @export #' @name NULL -setGeneric("rtrim", function(x) { standardGeneric("rtrim") }) +setGeneric("rtrim", function(x, trimString) { standardGeneric("rtrim") }) #' @rdname column_aggregate_functions #' @export @@ -1520,7 +1531,7 @@ setGeneric("translate", function(x, matchingString, replaceString) { standardGen #' @rdname column_string_functions #' @export #' @name NULL -setGeneric("trim", function(x) { standardGeneric("trim") }) +setGeneric("trim", function(x, trimString) { standardGeneric("trim") }) #' @rdname column_string_functions #' @export http://git-wip-us.apache.org/repos/asf/spark/blob/66a7d6b3/R/pkg/tests/fulltests/test_sparkSQL.R ---------------------------------------------------------------------- diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 650e7c0..1b7d53f 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -1427,7 +1427,7 @@ test_that("column functions", { c9 <- signum(c) + sin(c) + sinh(c) + size(c) + stddev(c) + soundex(c) + sqrt(c) + sum(c) c10 <- sumDistinct(c) + tan(c) + tanh(c) + toDegrees(c) + toRadians(c) c11 <- to_date(c) + trim(c) + unbase64(c) + unhex(c) + upper(c) - c12 <- variance(c) + c12 <- variance(c) + ltrim(c, "a") + rtrim(c, "b") + trim(c, "c") c13 <- lead("col", 1) + lead(c, 1) + lag("col", 1) + lag(c, 1) c14 <- cume_dist() + ntile(1) + corr(c, c1) c15 <- dense_rank() + percent_rank() + rank() + row_number() @@ -1441,7 +1441,7 @@ test_that("column functions", { c23 <- trunc(c, "year") + trunc(c, "yyyy") + trunc(c, "yy") + trunc(c, "month") + trunc(c, "mon") + trunc(c, "mm") c24 <- date_trunc("hour", c) + date_trunc("minute", c) + date_trunc("week", c) + - date_trunc("quarter", c) + date_trunc("quarter", c) + current_date() + current_timestamp() # Test if base::is.nan() is exposed expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE)) --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
