Repository: spark
Updated Branches:
refs/heads/master f6084a88f -> aeb45df66
[SPARK-22844][R] Adds date_trunc in R API
## What changes were proposed in this pull request?
This PR adds `date_trunc` in R API as below:
```r
> df <- createDataFrame(list(list(a = as.POSIXlt("2012-12-13 12:34:00"))))
> head(select(df, date_trunc("hour", df$a)))
date_trunc(hour, a)
1 2012-12-13 12:00:00
```
## How was this patch tested?
Unit tests added in `R/pkg/tests/fulltests/test_sparkSQL.R`.
Author: hyukjinkwon <[email protected]>
Closes #20031 from HyukjinKwon/r-datetrunc.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aeb45df6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aeb45df6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aeb45df6
Branch: refs/heads/master
Commit: aeb45df668a97a2d48cfd4079ed62601390979ba
Parents: f6084a8
Author: hyukjinkwon <[email protected]>
Authored: Sun Dec 24 01:18:11 2017 +0900
Committer: hyukjinkwon <[email protected]>
Committed: Sun Dec 24 01:18:11 2017 +0900
----------------------------------------------------------------------
R/pkg/NAMESPACE | 1 +
R/pkg/R/functions.R | 34 ++++++++++++++++++++++++++----
R/pkg/R/generics.R | 5 +++++
R/pkg/tests/fulltests/test_sparkSQL.R | 3 +++
4 files changed, 39 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/aeb45df6/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 57838f5..dce64e1 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -230,6 +230,7 @@ exportMethods("%<=>%",
"date_add",
"date_format",
"date_sub",
+ "date_trunc",
"datediff",
"dayofmonth",
"dayofweek",
http://git-wip-us.apache.org/repos/asf/spark/blob/aeb45df6/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 237ef06..3a96f94 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -40,10 +40,17 @@ NULL
#'
#' @param x Column to compute on. In \code{window}, it must be a time Column of
#' \code{TimestampType}.
-#' @param format For \code{to_date} and \code{to_timestamp}, it is the string
to use to parse
-#' Column \code{x} to DateType or TimestampType. For
\code{trunc}, it is the string
-#' to use to specify the truncation method. For example, "year",
"yyyy", "yy" for
-#' truncate by year, or "month", "mon", "mm" for truncate by
month.
+#' @param format The format for the given dates or timestamps in Column
\code{x}. See the
+#' format used in the following methods:
+#' \itemize{
+#' \item \code{to_date} and \code{to_timestamp}: it is the
string to use to parse
+#' Column \code{x} to DateType or TimestampType.
+#' \item \code{trunc}: it is the string to use to specify the
truncation method.
+#' For example, "year", "yyyy", "yy" for truncate by year,
or "month", "mon",
+#' "mm" for truncate by month.
+#' \item \code{date_trunc}: it is similar with \code{trunc}'s
but additionally
+#' supports "day", "dd", "second", "minute", "hour", "week"
and "quarter".
+#' }
#' @param ... additional argument(s).
#' @name column_datetime_functions
#' @rdname column_datetime_functions
@@ -3478,3 +3485,22 @@ setMethod("trunc",
x@jc, as.character(format))
column(jc)
})
+
+#' @details
+#' \code{date_trunc}: Returns timestamp truncated to the unit specified by the
format.
+#'
+#' @rdname column_datetime_functions
+#' @aliases date_trunc date_trunc,character,Column-method
+#' @export
+#' @examples
+#'
+#' \dontrun{
+#' head(select(df, df$time, date_trunc("hour", df$time), date_trunc("minute",
df$time),
+#' date_trunc("week", df$time), date_trunc("quarter", df$time)))}
+#' @note date_trunc since 2.3.0
+setMethod("date_trunc",
+ signature(format = "character", x = "Column"),
+ function(format, x) {
+ jc <- callJStatic("org.apache.spark.sql.functions", "date_trunc",
format, x@jc)
+ column(jc)
+ })
http://git-wip-us.apache.org/repos/asf/spark/blob/aeb45df6/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 8fcf269..5ddaa66 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1046,6 +1046,11 @@ setGeneric("date_sub", function(y, x) {
standardGeneric("date_sub") })
#' @rdname column_datetime_functions
#' @export
#' @name NULL
+setGeneric("date_trunc", function(format, x) { standardGeneric("date_trunc") })
+
+#' @rdname column_datetime_functions
+#' @export
+#' @name NULL
setGeneric("dayofmonth", function(x) { standardGeneric("dayofmonth") })
#' @rdname column_datetime_functions
http://git-wip-us.apache.org/repos/asf/spark/blob/aeb45df6/R/pkg/tests/fulltests/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R
b/R/pkg/tests/fulltests/test_sparkSQL.R
index d87f5d2..6cc0188 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1418,6 +1418,8 @@ test_that("column functions", {
c22 <- not(c)
c23 <- trunc(c, "year") + trunc(c, "yyyy") + trunc(c, "yy") +
trunc(c, "month") + trunc(c, "mon") + trunc(c, "mm")
+ c24 <- date_trunc("hour", c) + date_trunc("minute", c) + date_trunc("week",
c) +
+ date_trunc("quarter", c)
# Test if base::is.nan() is exposed
expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE))
@@ -1729,6 +1731,7 @@ test_that("date functions on a DataFrame", {
expect_gt(collect(select(df2, unix_timestamp()))[1, 1], 0)
expect_gt(collect(select(df2, unix_timestamp(df2$b)))[1, 1], 0)
expect_gt(collect(select(df2, unix_timestamp(lit("2015-01-01"),
"yyyy-MM-dd")))[1, 1], 0)
+ expect_equal(collect(select(df2, month(date_trunc("yyyy", df2$b))))[, 1],
c(1, 1))
l3 <- list(list(a = 1000), list(a = -1000))
df3 <- createDataFrame(l3)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]