This is an automated email from the ASF dual-hosted git repository.

thisisnic pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 90aac16761 ARROW-16394: [R] Implement lubridate's parsers with year, 
month and date components
90aac16761 is described below

commit 90aac16761b7dbf5fe931bc8837cad5116939270
Author: Dragoș Moldovan-Grünfeld <[email protected]>
AuthorDate: Thu May 12 15:41:40 2022 +0100

    ARROW-16394: [R] Implement lubridate's parsers with year, month and date 
components
    
    This PR adds bindings for lubridate's parsers with **y**ear, **m**onth, and 
**d**ay components, allowing the following to work correctly:
    ``` r
    library(dplyr, warn.conflicts = FALSE)
    library(arrow, warn.conflicts = FALSE)
    library(lubridate, warn.conflicts = FALSE)
    
    test_df <- tibble::tibble(
      ymd_string = c("2022-05-11", "2022/05/12", "22.05-13")
    )
    
    test_df %>%
      mutate(ymd_date = ymd(ymd_string))
    #> # A tibble: 3 × 2
    #>   ymd_string ymd_date
    #>   <chr>      <date>
    #> 1 2022-05-11 2022-05-11
    #> 2 2022/05/12 2022-05-12
    #> 3 22.05-13   2022-05-13
    
    test_df %>%
      arrow_table() %>%
      mutate(ymd_date = ymd(ymd_string)) %>%
      collect()
    #> # A tibble: 3 × 2
    #>   ymd_string ymd_date
    #>   <chr>      <date>
    #> 1 2022-05-11 2022-05-11
    #> 2 2022/05/12 2022-05-12
    #> 3 22.05-13   2022-05-13
    ```
    
    <sup>Created on 2022-05-11 by the [reprex 
package](https://reprex.tidyverse.org) (v2.0.1)</sup>
    
    Closes #13118 from dragosmg/ymd_parsers
    
    Authored-by: Dragoș Moldovan-Grünfeld <[email protected]>
    Signed-off-by: Nic Crane <[email protected]>
---
 r/R/dplyr-funcs-datetime.R                   | 30 ++++++++++++++++-
 r/tests/testthat/test-dplyr-funcs-datetime.R | 50 ++++++++++++++++++++++++++--
 2 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R
index 5e78b94fe5..e0c65d64cc 100644
--- a/r/R/dplyr-funcs-datetime.R
+++ b/r/R/dplyr-funcs-datetime.R
@@ -516,6 +516,34 @@ register_bindings_datetime_parsers <- function() {
 
     coalesce_output <- build_expr("coalesce", args = parse_attempt_expressions)
 
-    build_expr("assume_timezone", coalesce_output, options = list(timezone = 
tz))
+    # we need this binding to be able to handle a NULL `tz`, which will then be
+    # used by bindings such as `ymd` to return, based on whether tz is NULL or
+    # not, a date or timestamp
+    if (!is.null(tz)) {
+      build_expr("assume_timezone", coalesce_output, options = list(timezone = 
tz))
+    } else {
+      coalesce_output
+    }
+
   })
+
+  ymd_parser_vec <- c("ymd", "ydm", "mdy", "myd", "dmy", "dym")
+
+  ymd_parser_map_factory <- function(order) {
+    force(order)
+    function(x, tz = NULL) {
+      parse_x <- call_binding("parse_date_time", x, order, tz)
+      if (is.null(tz)) {
+        # we cast so we can mimic the behaviour of the `tz` argument in 
lubridate
+        # "If NULL (default), a Date object is returned. Otherwise a POSIXct 
with
+        # time zone attribute set to tz."
+        parse_x <- parse_x$cast(date32())
+      }
+      parse_x
+    }
+  }
+
+  for (ymd_order in ymd_parser_vec) {
+    register_binding(ymd_order, ymd_parser_map_factory(ymd_order))
+  }
 }
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R 
b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 859c20ce02..42448e8243 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -1254,7 +1254,7 @@ test_that("`decimal_date()` and `date_decimal()`", {
       mutate(
         decimal_date_from_POSIXct = decimal_date(b),
         decimal_date_from_r_POSIXct_obj = decimal_date(as.POSIXct("2022-03-25 
15:37:01")),
-        decimal_date_from_r_date_obj = decimal_date(ymd("2022-03-25")),
+        decimal_date_from_r_date_obj = decimal_date(as.Date("2022-03-25")),
         decimal_date_from_date = decimal_date(c),
         date_from_decimal = date_decimal(a),
         date_from_decimal_r_obj = date_decimal(2022.178)
@@ -1640,7 +1640,8 @@ test_that("`as_datetime()`", {
 })
 
 test_that("parse_date_time() works with year, month, and date components", {
-  # string processing requires RE2 library (not available on Windows with R 
3.6)
+  # these functions' internals use some string processing which requires the
+  # RE2 library (not available on Windows with R 3.6)
   skip_if_not_available("re2")
   compare_dplyr_binding(
     .input %>%
@@ -1700,7 +1701,8 @@ test_that("parse_date_time() works with year, month, and 
date components", {
 })
 
 test_that("parse_date_time() works with a mix of formats and orders", {
-  # string processing requires RE2 library (not available on Windows with R 
3.6)
+  # these functions' internals use some string processing which requires the
+  # RE2 library (not available on Windows with R 3.6)
   skip_if_not_available("re2")
   test_df <- tibble(
     string_combi = c("2021-09-1", "2/09//2021", "09.3.2021")
@@ -1732,3 +1734,45 @@ test_that("parse_date_time() doesn't work with hour, 
minutes, and second compone
     '"ymd_HMS" `orders` not supported in Arrow'
   )
 })
+
+test_that("year, month, day date/time parsers work", {
+  test_df <- tibble::tibble(
+    ymd_string = c("2022-05-11", "2022/05/12", "22.05-13"),
+    ydm_string = c("2022-11-05", "2022/12/05", "22.13-05"),
+    mdy_string = c("05-11-2022", "05/12/2022", "05.13-22"),
+    myd_string = c("05-2022-11", "05/2022/12", "05.22-14"),
+    dmy_string = c("11-05-2022", "12/05/2022", "13.05-22"),
+    dym_string = c("11-2022-05", "12/2022/05", "13.22-05")
+  )
+
+  # these functions' internals use some string processing which requires the
+  # RE2 library (not available on Windows with R 3.6)
+  skip_if_not_available("re2")
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        ymd_date = ymd(ymd_string),
+        ydm_date = ydm(ydm_string),
+        mdy_date = mdy(mdy_string),
+        myd_date = myd(myd_string),
+        dmy_date = dmy(dmy_string),
+        dym_date = dym(dym_string)
+      ) %>%
+      collect(),
+    test_df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        ymd_date = ymd(ymd_string, tz = "Pacific/Marquesas"),
+        ydm_date = ydm(ydm_string, tz = "Pacific/Marquesas"),
+        mdy_date = mdy(mdy_string, tz = "Pacific/Marquesas"),
+        myd_date = myd(myd_string, tz = "Pacific/Marquesas"),
+        dmy_date = dmy(dmy_string, tz = "Pacific/Marquesas"),
+        dym_date = dym(dym_string, tz = "Pacific/Marquesas")
+      ) %>%
+      collect(),
+    test_df
+  )
+})

Reply via email to