This is an automated email from the ASF dual-hosted git repository.
thisisnic pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 90aac16761 ARROW-16394: [R] Implement lubridate's parsers with year,
month and date components
90aac16761 is described below
commit 90aac16761b7dbf5fe931bc8837cad5116939270
Author: Dragoș Moldovan-Grünfeld <[email protected]>
AuthorDate: Thu May 12 15:41:40 2022 +0100
ARROW-16394: [R] Implement lubridate's parsers with year, month and date
components
This PR adds bindings for lubridate's parsers with **y**ear, **m**onth, and
**d**ay components, allowing the following to work correctly:
``` r
library(dplyr, warn.conflicts = FALSE)
library(arrow, warn.conflicts = FALSE)
library(lubridate, warn.conflicts = FALSE)
test_df <- tibble::tibble(
ymd_string = c("2022-05-11", "2022/05/12", "22.05-13")
)
test_df %>%
mutate(ymd_date = ymd(ymd_string))
#> # A tibble: 3 × 2
#> ymd_string ymd_date
#> <chr> <date>
#> 1 2022-05-11 2022-05-11
#> 2 2022/05/12 2022-05-12
#> 3 22.05-13 2022-05-13
test_df %>%
arrow_table() %>%
mutate(ymd_date = ymd(ymd_string)) %>%
collect()
#> # A tibble: 3 × 2
#> ymd_string ymd_date
#> <chr> <date>
#> 1 2022-05-11 2022-05-11
#> 2 2022/05/12 2022-05-12
#> 3 22.05-13 2022-05-13
```
<sup>Created on 2022-05-11 by the [reprex
package](https://reprex.tidyverse.org) (v2.0.1)</sup>
Closes #13118 from dragosmg/ymd_parsers
Authored-by: Dragoș Moldovan-Grünfeld <[email protected]>
Signed-off-by: Nic Crane <[email protected]>
---
r/R/dplyr-funcs-datetime.R | 30 ++++++++++++++++-
r/tests/testthat/test-dplyr-funcs-datetime.R | 50 ++++++++++++++++++++++++++--
2 files changed, 76 insertions(+), 4 deletions(-)
diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R
index 5e78b94fe5..e0c65d64cc 100644
--- a/r/R/dplyr-funcs-datetime.R
+++ b/r/R/dplyr-funcs-datetime.R
@@ -516,6 +516,34 @@ register_bindings_datetime_parsers <- function() {
coalesce_output <- build_expr("coalesce", args = parse_attempt_expressions)
- build_expr("assume_timezone", coalesce_output, options = list(timezone =
tz))
+ # we need this binding to be able to handle a NULL `tz`, which will then be
+ # used by bindings such as `ymd` to return, based on whether tz is NULL or
+ # not, a date or timestamp
+ if (!is.null(tz)) {
+ build_expr("assume_timezone", coalesce_output, options = list(timezone =
tz))
+ } else {
+ coalesce_output
+ }
+
})
+
+ ymd_parser_vec <- c("ymd", "ydm", "mdy", "myd", "dmy", "dym")
+
+ ymd_parser_map_factory <- function(order) {
+ force(order)
+ function(x, tz = NULL) {
+ parse_x <- call_binding("parse_date_time", x, order, tz)
+ if (is.null(tz)) {
+ # we cast so we can mimic the behaviour of the `tz` argument in
lubridate
+ # "If NULL (default), a Date object is returned. Otherwise a POSIXct
with
+ # time zone attribute set to tz."
+ parse_x <- parse_x$cast(date32())
+ }
+ parse_x
+ }
+ }
+
+ for (ymd_order in ymd_parser_vec) {
+ register_binding(ymd_order, ymd_parser_map_factory(ymd_order))
+ }
}
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R
b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 859c20ce02..42448e8243 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -1254,7 +1254,7 @@ test_that("`decimal_date()` and `date_decimal()`", {
mutate(
decimal_date_from_POSIXct = decimal_date(b),
decimal_date_from_r_POSIXct_obj = decimal_date(as.POSIXct("2022-03-25
15:37:01")),
- decimal_date_from_r_date_obj = decimal_date(ymd("2022-03-25")),
+ decimal_date_from_r_date_obj = decimal_date(as.Date("2022-03-25")),
decimal_date_from_date = decimal_date(c),
date_from_decimal = date_decimal(a),
date_from_decimal_r_obj = date_decimal(2022.178)
@@ -1640,7 +1640,8 @@ test_that("`as_datetime()`", {
})
test_that("parse_date_time() works with year, month, and date components", {
- # string processing requires RE2 library (not available on Windows with R
3.6)
+ # these functions' internals use some string processing which requires the
+ # RE2 library (not available on Windows with R 3.6)
skip_if_not_available("re2")
compare_dplyr_binding(
.input %>%
@@ -1700,7 +1701,8 @@ test_that("parse_date_time() works with year, month, and
date components", {
})
test_that("parse_date_time() works with a mix of formats and orders", {
- # string processing requires RE2 library (not available on Windows with R
3.6)
+ # these functions' internals use some string processing which requires the
+ # RE2 library (not available on Windows with R 3.6)
skip_if_not_available("re2")
test_df <- tibble(
string_combi = c("2021-09-1", "2/09//2021", "09.3.2021")
@@ -1732,3 +1734,45 @@ test_that("parse_date_time() doesn't work with hour,
minutes, and second compone
'"ymd_HMS" `orders` not supported in Arrow'
)
})
+
+test_that("year, month, day date/time parsers work", {
+ test_df <- tibble::tibble(
+ ymd_string = c("2022-05-11", "2022/05/12", "22.05-13"),
+ ydm_string = c("2022-11-05", "2022/12/05", "22.13-05"),
+ mdy_string = c("05-11-2022", "05/12/2022", "05.13-22"),
+ myd_string = c("05-2022-11", "05/2022/12", "05.22-14"),
+ dmy_string = c("11-05-2022", "12/05/2022", "13.05-22"),
+ dym_string = c("11-2022-05", "12/2022/05", "13.22-05")
+ )
+
+ # these functions' internals use some string processing which requires the
+ # RE2 library (not available on Windows with R 3.6)
+ skip_if_not_available("re2")
+ compare_dplyr_binding(
+ .input %>%
+ mutate(
+ ymd_date = ymd(ymd_string),
+ ydm_date = ydm(ydm_string),
+ mdy_date = mdy(mdy_string),
+ myd_date = myd(myd_string),
+ dmy_date = dmy(dmy_string),
+ dym_date = dym(dym_string)
+ ) %>%
+ collect(),
+ test_df
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ mutate(
+ ymd_date = ymd(ymd_string, tz = "Pacific/Marquesas"),
+ ydm_date = ydm(ydm_string, tz = "Pacific/Marquesas"),
+ mdy_date = mdy(mdy_string, tz = "Pacific/Marquesas"),
+ myd_date = myd(myd_string, tz = "Pacific/Marquesas"),
+ dmy_date = dmy(dmy_string, tz = "Pacific/Marquesas"),
+ dym_date = dym(dym_string, tz = "Pacific/Marquesas")
+ ) %>%
+ collect(),
+ test_df
+ )
+})