Repository: spark
Updated Branches:
  refs/heads/master f6084a88f -> aeb45df66


[SPARK-22844][R] Adds date_trunc in R API

## What changes were proposed in this pull request?

This PR adds `date_trunc` in R API as below:

```r
> df <- createDataFrame(list(list(a = as.POSIXlt("2012-12-13 12:34:00"))))
> head(select(df, date_trunc("hour", df$a)))
  date_trunc(hour, a)
1 2012-12-13 12:00:00
```

## How was this patch tested?

Unit tests added in `R/pkg/tests/fulltests/test_sparkSQL.R`.

Author: hyukjinkwon <[email protected]>

Closes #20031 from HyukjinKwon/r-datetrunc.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aeb45df6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aeb45df6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aeb45df6

Branch: refs/heads/master
Commit: aeb45df668a97a2d48cfd4079ed62601390979ba
Parents: f6084a8
Author: hyukjinkwon <[email protected]>
Authored: Sun Dec 24 01:18:11 2017 +0900
Committer: hyukjinkwon <[email protected]>
Committed: Sun Dec 24 01:18:11 2017 +0900

----------------------------------------------------------------------
 R/pkg/NAMESPACE                       |  1 +
 R/pkg/R/functions.R                   | 34 ++++++++++++++++++++++++++----
 R/pkg/R/generics.R                    |  5 +++++
 R/pkg/tests/fulltests/test_sparkSQL.R |  3 +++
 4 files changed, 39 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/aeb45df6/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 57838f5..dce64e1 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -230,6 +230,7 @@ exportMethods("%<=>%",
               "date_add",
               "date_format",
               "date_sub",
+              "date_trunc",
               "datediff",
               "dayofmonth",
               "dayofweek",

http://git-wip-us.apache.org/repos/asf/spark/blob/aeb45df6/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 237ef06..3a96f94 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -40,10 +40,17 @@ NULL
 #'
 #' @param x Column to compute on. In \code{window}, it must be a time Column of
 #'          \code{TimestampType}.
-#' @param format For \code{to_date} and \code{to_timestamp}, it is the string 
to use to parse
-#'               Column \code{x} to DateType or TimestampType. For 
\code{trunc}, it is the string
-#'               to use to specify the truncation method. For example, "year", 
"yyyy", "yy" for
-#'               truncate by year, or "month", "mon", "mm" for truncate by 
month.
+#' @param format The format for the given dates or timestamps in Column 
\code{x}. See the
+#'               format used in the following methods:
+#'               \itemize{
+#'               \item \code{to_date} and \code{to_timestamp}: it is the 
string to use to parse
+#'                    Column \code{x} to DateType or TimestampType.
+#'               \item \code{trunc}: it is the string to use to specify the 
truncation method.
+#'                    For example, "year", "yyyy", "yy" for truncate by year, 
or "month", "mon",
+#'                    "mm" for truncate by month.
+#'               \item \code{date_trunc}: it is similar with \code{trunc}'s 
but additionally
+#'                    supports "day", "dd", "second", "minute", "hour", "week" 
and "quarter".
+#'               }
 #' @param ... additional argument(s).
 #' @name column_datetime_functions
 #' @rdname column_datetime_functions
@@ -3478,3 +3485,22 @@ setMethod("trunc",
                               x@jc, as.character(format))
             column(jc)
           })
+
+#' @details
+#' \code{date_trunc}: Returns timestamp truncated to the unit specified by the 
format.
+#'
+#' @rdname column_datetime_functions
+#' @aliases date_trunc date_trunc,character,Column-method
+#' @export
+#' @examples
+#'
+#' \dontrun{
+#' head(select(df, df$time, date_trunc("hour", df$time), date_trunc("minute", 
df$time),
+#'             date_trunc("week", df$time), date_trunc("quarter", df$time)))}
+#' @note date_trunc since 2.3.0
+setMethod("date_trunc",
+          signature(format = "character", x = "Column"),
+          function(format, x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "date_trunc", 
format, x@jc)
+            column(jc)
+          })

http://git-wip-us.apache.org/repos/asf/spark/blob/aeb45df6/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 8fcf269..5ddaa66 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1046,6 +1046,11 @@ setGeneric("date_sub", function(y, x) { 
standardGeneric("date_sub") })
 #' @rdname column_datetime_functions
 #' @export
 #' @name NULL
+setGeneric("date_trunc", function(format, x) { standardGeneric("date_trunc") })
+
+#' @rdname column_datetime_functions
+#' @export
+#' @name NULL
 setGeneric("dayofmonth", function(x) { standardGeneric("dayofmonth") })
 
 #' @rdname column_datetime_functions

http://git-wip-us.apache.org/repos/asf/spark/blob/aeb45df6/R/pkg/tests/fulltests/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R 
b/R/pkg/tests/fulltests/test_sparkSQL.R
index d87f5d2..6cc0188 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1418,6 +1418,8 @@ test_that("column functions", {
   c22 <- not(c)
   c23 <- trunc(c, "year") + trunc(c, "yyyy") + trunc(c, "yy") +
     trunc(c, "month") + trunc(c, "mon") + trunc(c, "mm")
+  c24 <- date_trunc("hour", c) + date_trunc("minute", c) + date_trunc("week", 
c) +
+    date_trunc("quarter", c)
 
   # Test if base::is.nan() is exposed
   expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE))
@@ -1729,6 +1731,7 @@ test_that("date functions on a DataFrame", {
   expect_gt(collect(select(df2, unix_timestamp()))[1, 1], 0)
   expect_gt(collect(select(df2, unix_timestamp(df2$b)))[1, 1], 0)
   expect_gt(collect(select(df2, unix_timestamp(lit("2015-01-01"), 
"yyyy-MM-dd")))[1, 1], 0)
+  expect_equal(collect(select(df2, month(date_trunc("yyyy", df2$b))))[, 1], 
c(1, 1))
 
   l3 <- list(list(a = 1000), list(a = -1000))
   df3 <- createDataFrame(l3)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to