trim with trimString

felixcheung Fri, 29 Dec 2017 10:52:18 -0800

Repository: spark
Updated Branches:
  refs/heads/master afc364146 -> 66a7d6b30



[SPARK-22920][SPARKR] sql functions for current_date, current_timestamp, 
rtrim/ltrim/trim with trimString

## What changes were proposed in this pull request?

Add sql functions

## How was this patch tested?

manual, unit tests

Author: Felix Cheung <[email protected]>

Closes #20105 from felixcheung/rsqlfuncs.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/66a7d6b3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/66a7d6b3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/66a7d6b3

Branch: refs/heads/master
Commit: 66a7d6b30fe5581d09ef660abe2a9c8c334d29f2
Parents: afc3641
Author: Felix Cheung <[email protected]>
Authored: Fri Dec 29 10:51:43 2017 -0800
Committer: Felix Cheung <[email protected]>
Committed: Fri Dec 29 10:51:43 2017 -0800

----------------------------------------------------------------------
 R/pkg/DESCRIPTION                     |   1 +
 R/pkg/NAMESPACE                       |   2 +
 R/pkg/R/functions.R                   | 105 ++++++++++++++++++++++++-----
 R/pkg/R/generics.R                    |  17 ++++-
 R/pkg/tests/fulltests/test_sparkSQL.R |   4 +-
 5 files changed, 106 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/66a7d6b3/R/pkg/DESCRIPTION
----------------------------------------------------------------------
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index d1c846c..6d46c31 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -59,3 +59,4 @@ Collate:
     'window.R'
 RoxygenNote: 5.0.1
 VignetteBuilder: knitr
+NeedsCompilation: no

http://git-wip-us.apache.org/repos/asf/spark/blob/66a7d6b3/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index ce3eec0..3219c6f 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -228,6 +228,8 @@ exportMethods("%<=>%",
               "crc32",
               "create_array",
               "create_map",
+              "current_date",
+              "current_timestamp",
               "hash",
               "cume_dist",
               "date_add",

http://git-wip-us.apache.org/repos/asf/spark/blob/66a7d6b3/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 3a96f94..fff230d 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -39,7 +39,8 @@ NULL
 #' Date time functions defined for \code{Column}.
 #'
 #' @param x Column to compute on. In \code{window}, it must be a time Column of
-#'          \code{TimestampType}.
+#'          \code{TimestampType}. This is not used with \code{current_date} and
+#'          \code{current_timestamp}
 #' @param format The format for the given dates or timestamps in Column 
\code{x}. See the
 #'               format used in the following methods:
 #'               \itemize{
@@ -1109,10 +1110,11 @@ setMethod("lower",
           })
 
 #' @details
-#' \code{ltrim}: Trims the spaces from left end for the specified string value.
+#' \code{ltrim}: Trims the spaces from left end for the specified string 
value. Optionally a
+#' \code{trimString} can be specified.
 #'
 #' @rdname column_string_functions
-#' @aliases ltrim ltrim,Column-method
+#' @aliases ltrim ltrim,Column,missing-method
 #' @export
 #' @examples
 #'
@@ -1128,12 +1130,24 @@ setMethod("lower",
 #' head(tmp)}
 #' @note ltrim since 1.5.0
 setMethod("ltrim",
-          signature(x = "Column"),
-          function(x) {
+          signature(x = "Column", trimString = "missing"),
+          function(x, trimString) {
             jc <- callJStatic("org.apache.spark.sql.functions", "ltrim", x@jc)
             column(jc)
           })
 
+#' @param trimString a character string to trim with
+#' @rdname column_string_functions
+#' @aliases ltrim,Column,character-method
+#' @export
+#' @note ltrim(Column, character) since 2.3.0
+setMethod("ltrim",
+          signature(x = "Column", trimString = "character"),
+          function(x, trimString) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "ltrim", x@jc, 
trimString)
+            column(jc)
+          })
+
 #' @details
 #' \code{max}: Returns the maximum value of the expression in a group.
 #'
@@ -1348,19 +1362,31 @@ setMethod("bround",
           })
 
 #' @details
-#' \code{rtrim}: Trims the spaces from right end for the specified string 
value.
+#' \code{rtrim}: Trims the spaces from right end for the specified string 
value. Optionally a
+#' \code{trimString} can be specified.
 #'
 #' @rdname column_string_functions
-#' @aliases rtrim rtrim,Column-method
+#' @aliases rtrim rtrim,Column,missing-method
 #' @export
 #' @note rtrim since 1.5.0
 setMethod("rtrim",
-          signature(x = "Column"),
-          function(x) {
+          signature(x = "Column", trimString = "missing"),
+          function(x, trimString) {
             jc <- callJStatic("org.apache.spark.sql.functions", "rtrim", x@jc)
             column(jc)
           })
 
+#' @rdname column_string_functions
+#' @aliases rtrim,Column,character-method
+#' @export
+#' @note rtrim(Column, character) since 2.3.0
+setMethod("rtrim",
+          signature(x = "Column", trimString = "character"),
+          function(x, trimString) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "rtrim", x@jc, 
trimString)
+            column(jc)
+          })
+
 #' @details
 #' \code{sd}: Alias for \code{stddev_samp}.
 #'
@@ -1789,19 +1815,31 @@ setMethod("to_timestamp",
           })
 
 #' @details
-#' \code{trim}: Trims the spaces from both ends for the specified string 
column.
+#' \code{trim}: Trims the spaces from both ends for the specified string 
column. Optionally a
+#' \code{trimString} can be specified.
 #'
 #' @rdname column_string_functions
-#' @aliases trim trim,Column-method
+#' @aliases trim trim,Column,missing-method
 #' @export
 #' @note trim since 1.5.0
 setMethod("trim",
-          signature(x = "Column"),
-          function(x) {
+          signature(x = "Column", trimString = "missing"),
+          function(x, trimString) {
             jc <- callJStatic("org.apache.spark.sql.functions", "trim", x@jc)
             column(jc)
           })
 
+#' @rdname column_string_functions
+#' @aliases trim,Column,character-method
+#' @export
+#' @note trim(Column, character) since 2.3.0
+setMethod("trim",
+          signature(x = "Column", trimString = "character"),
+          function(x, trimString) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "trim", x@jc, 
trimString)
+            column(jc)
+          })
+
 #' @details
 #' \code{unbase64}: Decodes a BASE64 encoded string column and returns it as a 
binary column.
 #' This is the reverse of base64.
@@ -2777,11 +2815,11 @@ setMethod("rpad", signature(x = "Column", len = 
"numeric", pad = "character"),
           })
 
 #' @details
-#' \code{substring_index}: Returns the substring from string str before count 
occurrences of
-#' the delimiter delim. If count is positive, everything the left of the final 
delimiter
-#' (counting from left) is returned. If count is negative, every to the right 
of the final
-#' delimiter (counting from the right) is returned. substring_index performs a 
case-sensitive
-#' match when searching for delim.
+#' \code{substring_index}: Returns the substring from string (\code{x}) before 
\code{count}
+#' occurrences of the delimiter (\code{delim}). If \code{count} is positive, 
everything the left of
+#' the final delimiter (counting from left) is returned. If \code{count} is 
negative, every to the
+#' right of the final delimiter (counting from the right) is returned. 
\code{substring_index}
+#' performs a case-sensitive match when searching for the delimiter.
 #'
 #' @param delim a delimiter string.
 #' @param count number of occurrences of \code{delim} before the substring is 
returned.
@@ -3504,3 +3542,34 @@ setMethod("date_trunc",
             jc <- callJStatic("org.apache.spark.sql.functions", "date_trunc", 
format, x@jc)
             column(jc)
           })
+
+#' @details
+#' \code{current_date}: Returns the current date as a date column.
+#'
+#' @rdname column_datetime_functions
+#' @aliases current_date current_date,missing-method
+#' @export
+#' @examples
+#' \dontrun{
+#' head(select(df, current_date(), current_timestamp()))}
+#' @note current_date since 2.3.0
+setMethod("current_date",
+          signature("missing"),
+          function() {
+            jc <- callJStatic("org.apache.spark.sql.functions", "current_date")
+            column(jc)
+          })
+
+#' @details
+#' \code{current_timestamp}: Returns the current timestamp as a timestamp 
column.
+#'
+#' @rdname column_datetime_functions
+#' @aliases current_timestamp current_timestamp,missing-method
+#' @export
+#' @note current_timestamp since 2.3.0
+setMethod("current_timestamp",
+          signature("missing"),
+          function() {
+            jc <- callJStatic("org.apache.spark.sql.functions", 
"current_timestamp")
+            column(jc)
+          })

http://git-wip-us.apache.org/repos/asf/spark/blob/66a7d6b3/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index d5d0bc9..5369c32 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1027,6 +1027,17 @@ setGeneric("hash", function(x, ...) { 
standardGeneric("hash") })
 #' @name NULL
 setGeneric("cume_dist", function(x = "missing") { standardGeneric("cume_dist") 
})
 
+#' @rdname column_datetime_functions
+#' @export
+#' @name NULL
+setGeneric("current_date", function(x = "missing") { 
standardGeneric("current_date") })
+
+#' @rdname column_datetime_functions
+#' @export
+#' @name NULL
+setGeneric("current_timestamp", function(x = "missing") { 
standardGeneric("current_timestamp") })
+
+
 #' @rdname column_datetime_diff_functions
 #' @export
 #' @name NULL
@@ -1230,7 +1241,7 @@ setGeneric("lpad", function(x, len, pad) { 
standardGeneric("lpad") })
 #' @rdname column_string_functions
 #' @export
 #' @name NULL
-setGeneric("ltrim", function(x) { standardGeneric("ltrim") })
+setGeneric("ltrim", function(x, trimString) { standardGeneric("ltrim") })
 
 #' @rdname column_collection_functions
 #' @export
@@ -1380,7 +1391,7 @@ setGeneric("rpad", function(x, len, pad) { 
standardGeneric("rpad") })
 #' @rdname column_string_functions
 #' @export
 #' @name NULL
-setGeneric("rtrim", function(x) { standardGeneric("rtrim") })
+setGeneric("rtrim", function(x, trimString) { standardGeneric("rtrim") })
 
 #' @rdname column_aggregate_functions
 #' @export
@@ -1520,7 +1531,7 @@ setGeneric("translate", function(x, matchingString, 
replaceString) { standardGen
 #' @rdname column_string_functions
 #' @export
 #' @name NULL
-setGeneric("trim", function(x) { standardGeneric("trim") })
+setGeneric("trim", function(x, trimString) { standardGeneric("trim") })
 
 #' @rdname column_string_functions
 #' @export

http://git-wip-us.apache.org/repos/asf/spark/blob/66a7d6b3/R/pkg/tests/fulltests/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R 
b/R/pkg/tests/fulltests/test_sparkSQL.R
index 650e7c0..1b7d53f 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1427,7 +1427,7 @@ test_that("column functions", {
   c9 <- signum(c) + sin(c) + sinh(c) + size(c) + stddev(c) + soundex(c) + 
sqrt(c) + sum(c)
   c10 <- sumDistinct(c) + tan(c) + tanh(c) + toDegrees(c) + toRadians(c)
   c11 <- to_date(c) + trim(c) + unbase64(c) + unhex(c) + upper(c)
-  c12 <- variance(c)
+  c12 <- variance(c) + ltrim(c, "a") + rtrim(c, "b") + trim(c, "c")
   c13 <- lead("col", 1) + lead(c, 1) + lag("col", 1) + lag(c, 1)
   c14 <- cume_dist() + ntile(1) + corr(c, c1)
   c15 <- dense_rank() + percent_rank() + rank() + row_number()
@@ -1441,7 +1441,7 @@ test_that("column functions", {
   c23 <- trunc(c, "year") + trunc(c, "yyyy") + trunc(c, "yy") +
     trunc(c, "month") + trunc(c, "mon") + trunc(c, "mm")
   c24 <- date_trunc("hour", c) + date_trunc("minute", c) + date_trunc("week", 
c) +
-    date_trunc("quarter", c)
+    date_trunc("quarter", c) + current_date() + current_timestamp()
 
   # Test if base::is.nan() is exposed
   expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE))


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-22920][SPARKR] sql functions for current_date, current_timestamp, rtrim/ltrim/trim with trimString

Reply via email to