jonkeane commented on a change in pull request #12433:
URL: https://github.com/apache/arrow/pull/12433#discussion_r817205283



##########
File path: r/R/dplyr-funcs-type.R
##########
@@ -76,6 +76,60 @@ register_bindings_type_cast <- function() {
   register_binding("as.numeric", function(x) {
     Expression$create("cast", x, options = cast_options(to_type = float64()))
   })
+  register_binding("as.Date", function(x,
+                                       format = NULL,
+                                       tryFormats = "%Y-%m-%d",
+                                       origin = "1970-01-01",
+                                       tz = "UTC") {
+
+    if (call_binding("is.Date", x)) {
+      # base::as.Date() first converts to the desired timezone and then 
extracts
+      # the date, which is why we need to go through timestamp() first
+      return(x)
+
+    # cast from POSIXct
+    } else if (call_binding("is.POSIXct", x)) {
+      if (tz == "UTC") {
+        x <- build_expr("cast", x, options = cast_options(to_type = 
timestamp(timezone = tz)))
+      } else {
+        abort("`as.Date()` with a timezone different to 'UTC' is not supported 
in Arrow")
+      }

Review comment:
       We chatted about this a bit earlier, have you tried to get rid of this 
bit here? I suspect that you're actually close but this should totally be 
possible. 
   
   You might need to have `tz` here be hardcoded as `UTC`, eg:
   
   ```
   x <- build_expr("cast", x, options = cast_options(to_type = 
timestamp(timezone ="UTC")))
   ```
   
   But I _think_ that should do it and just work.
   
   Here's a test I did with arrow from the master branch:
   ```
   > df <- data.frame(time = as.POSIXct("2020-01-01 22:00:00", tz = 
"America/Chicago"))
   > df %>% arrow_table() %>% mutate(strftime = strftime(time, "%Y-%m-%d"), 
as_date = cast(time, date32()), as_date_utc = cast(cast(time, 
timestamp(timezone = "UTC")), date32())) %>% collect()
                    time   strftime    as_date as_date_utc
   1 2020-01-01 22:00:00 2020-01-01 2020-01-01  2020-01-02
   ```

##########
File path: r/R/dplyr-funcs-type.R
##########
@@ -76,6 +76,60 @@ register_bindings_type_cast <- function() {
   register_binding("as.numeric", function(x) {
     Expression$create("cast", x, options = cast_options(to_type = float64()))
   })
+  register_binding("as.Date", function(x,
+                                       format = NULL,
+                                       tryFormats = "%Y-%m-%d",
+                                       origin = "1970-01-01",
+                                       tz = "UTC") {
+
+    if (call_binding("is.Date", x)) {
+      # base::as.Date() first converts to the desired timezone and then 
extracts
+      # the date, which is why we need to go through timestamp() first
+      return(x)
+
+    # cast from POSIXct
+    } else if (call_binding("is.POSIXct", x)) {
+      if (tz == "UTC") {
+        x <- build_expr("cast", x, options = cast_options(to_type = 
timestamp(timezone = tz)))
+      } else {
+        abort("`as.Date()` with a timezone different to 'UTC' is not supported 
in Arrow")
+      }
+
+    # cast from character
+    } else if (call_binding("is.character", x)) {
+      # this could be improved with tryFormats once strptime returns NA and we
+      # can use coalesce - https://issues.apache.org/jira/browse/ARROW-15659
+      # TODO revisit once https://issues.apache.org/jira/browse/ARROW-15659 is 
done
+      if (is.null(format)) {
+        if (length(tryFormats) == 1) {
+          format <- tryFormats[1]
+        } else {
+          abort("`as.Date()` with multiple `tryFormats` is not supported in 
Arrow yet")
+        }
+      }
+      x <- build_expr("strptime", x, options = list(format = format, unit = 
0L))

Review comment:
       What you have is a bit more exhaustive (and the hard coded `[[1]]` in 
the call isn't typical, but this should have the same behavior and saves us one 
level of if/else nesting (which is not usually a big deal, but in this function 
we've got a bunch already!).
   
   Alternatively, you could make the default `format` _be_ `tryFormats[[1]]` in 
the signature. That would accomplish the same thing and you could have just the 
`if() { abort() }` and then `build_expr(... format = format)`
   
   ```suggestion
         if (is.null(format) && length(tryFormats) == 1) {
           abort("`as.Date()` with multiple `tryFormats` is not supported in 
Arrow yet")
         }
         
         format <- format %||% tryFormats[[1]]
         x <- build_expr("strptime", x, options = list(format = format, unit = 
0L))
   ```

##########
File path: r/R/dplyr-funcs-type.R
##########
@@ -76,6 +76,60 @@ register_bindings_type_cast <- function() {
   register_binding("as.numeric", function(x) {
     Expression$create("cast", x, options = cast_options(to_type = float64()))
   })
+  register_binding("as.Date", function(x,
+                                       format = NULL,
+                                       tryFormats = "%Y-%m-%d",
+                                       origin = "1970-01-01",
+                                       tz = "UTC") {
+
+    if (call_binding("is.Date", x)) {
+      # base::as.Date() first converts to the desired timezone and then 
extracts
+      # the date, which is why we need to go through timestamp() first
+      return(x)
+
+    # cast from POSIXct
+    } else if (call_binding("is.POSIXct", x)) {
+      if (tz == "UTC") {
+        x <- build_expr("cast", x, options = cast_options(to_type = 
timestamp(timezone = tz)))
+      } else {
+        abort("`as.Date()` with a timezone different to 'UTC' is not supported 
in Arrow")
+      }
+
+    # cast from character
+    } else if (call_binding("is.character", x)) {
+      # this could be improved with tryFormats once strptime returns NA and we
+      # can use coalesce - https://issues.apache.org/jira/browse/ARROW-15659
+      # TODO revisit once https://issues.apache.org/jira/browse/ARROW-15659 is 
done
+      if (is.null(format)) {
+        if (length(tryFormats) == 1) {
+          format <- tryFormats[1]
+        } else {
+          abort("`as.Date()` with multiple `tryFormats` is not supported in 
Arrow yet")
+        }
+      }
+      x <- build_expr("strptime", x, options = list(format = format, unit = 
0L))
+
+    # cast from numeric
+    } else if (call_binding("is.numeric", x)) {
+      # the origin argument will be better supported once we implement temporal
+      # arithmetic (https://issues.apache.org/jira/browse/ARROW-14947)
+      # TODO revisit once the above has been sorted
+      if (!call_binding("is.integer", x)) {
+        # Arrow does not support direct casting from double to date so we have
+        # to convert to integers first - casting to int32() would error so we
+        # need to use `floor` before casting. `floor` is also a bit safer than
+        # int32() with `safe = FALSE` since it doesn't switch to `ceiling` for
+        # negative numbers
+        # TODO revisit if arrow decides to support double -> date casting
+        x <- call_binding("floor", x)
+        x <- build_expr("cast", x, options = cast_options(to_type = int32()))
+      }
+      if (origin != "1970-01-01") {
+        abort("`as.Date()` with an `origin` different than '1970-01-01' is not 
supported in Arrow")
+      }
+    }

Review comment:
       Minor, but could we move this to the top of the function? There's not 
much computation going on above so I'm not super worried about the time it 
takes to get to the error, but it's helpful to have argument validation at the 
top of a function

##########
File path: r/tests/testthat/test-dplyr-funcs-datetime.R
##########
@@ -801,3 +801,99 @@ test_that("dst extracts daylight savings time correctly", {
     test_df
   )
 })
+
+test_that("date works in arrow", {
+  # https://issues.apache.org/jira/browse/ARROW-13168
+  skip_on_os("windows")
+  # this date is specific since lubridate::date() is different from 
base::as.Date()
+  # since as.Date returns the UTC date and date() doesn't
+  test_df <- tibble(
+    posixct_date = as.POSIXct(c("2012-03-26 23:12:13", NA), tz = 
"America/New_York"),
+    integer_var = c(32L, NA))
+
+  r_date_object <- lubridate::ymd_hms("2012-03-26 23:12:13")
+
+  # we can't (for now) use namespacing, so we need to make sure 
lubridate::date()
+  # and not base::date() is being used. This is due to the way testthat runs 
and
+  # normal use of arrow would not have to do this explicitly.
+  # TODO remove once https://issues.apache.org/jira/browse/ARROW-14575 is done
+  date <- lubridate::date
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(a_date = date(posixct_date)) %>%
+      collect(),
+    test_df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(a_date_base = as.Date(posixct_date)) %>%
+      collect(),
+    test_df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(date_from_r_object = date(r_date_object)) %>%
+      collect(),
+    test_df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(as_date_from_r_object = as.Date(r_date_object)) %>%
+      collect(),
+    test_df
+  )
+
+  # date from integer supported in arrow (similar to base::as.Date()), but in
+  # Arrow it assumes a fixed origin "1970-01-01". However this is not supported
+  # by lubridate. lubridate::date(integer_var) errors without an `origin`
+  expect_equal(
+    test_df %>%
+      arrow_table() %>%
+      select(integer_var) %>%
+      mutate(date_int = date(integer_var)) %>%
+      collect(),
+    tibble(integer_var = c(32L, NA),
+           date_int = as.Date(c("1970-02-02", NA)))
+  )
+})
+
+test_that("date() errors with unsupported inputs", {
+  test_df <- tibble::tibble(
+    posixct_var = as.POSIXct("2022-02-25 00:00:01", tz = "Europe/London"),
+    date_var = as.Date("2022-02-25"),
+    character_ymd_var = "2022-02-25 00:00:01",
+    character_ydm_var = "2022/25/02 00:00:01",
+    integer_var = 32L,
+    double_var = 34.56,
+    boolean_var = TRUE
+  )
+
+  expect_error(
+    test_df %>%
+      arrow_table() %>%
+      mutate(date_char = date(character_ymd_var)) %>%
+      collect(),
+    regexp = "Unsupported cast from string to date32 using function 
cast_date32"
+  )
+
+  expect_error(
+    test_df %>%
+      arrow_table() %>%
+      mutate(date_bool = date(boolean_var)) %>%
+      collect(),
+    regexp = "Unsupported cast from bool to date32 using function cast_date32"
+  )
+
+  expect_error(
+    test_df %>%
+      arrow_table() %>%
+      mutate(date_double = date(double_var)) %>%
+      collect(),
+    regexp = "Unsupported cast from double to date32 using function 
cast_date32"
+  )

Review comment:
       Should we remove columns from this data.frame that we're not using in 
the tests?
   
   Also is it possible to use R-scalar values in the pipelines themselves:
   
   ```
     expect_error(
         test_df %>%
           arrow_table() %>%
           mutate(date_char = date("2022-02-25 00:00:01")) %>%
           collect(),
         regexp = "Unsupported cast from string to date32 using function 
cast_date32"
       )
   ```
   
   That would be a bit cleaner + easier to parse IMO

##########
File path: r/tests/testthat/test-dplyr-funcs-datetime.R
##########
@@ -801,3 +801,99 @@ test_that("dst extracts daylight savings time correctly", {
     test_df
   )
 })
+
+test_that("date works in arrow", {
+  # https://issues.apache.org/jira/browse/ARROW-13168
+  skip_on_os("windows")
+  # this date is specific since lubridate::date() is different from 
base::as.Date()
+  # since as.Date returns the UTC date and date() doesn't
+  test_df <- tibble(
+    posixct_date = as.POSIXct(c("2012-03-26 23:12:13", NA), tz = 
"America/New_York"),
+    integer_var = c(32L, NA))
+
+  r_date_object <- lubridate::ymd_hms("2012-03-26 23:12:13")
+
+  # we can't (for now) use namespacing, so we need to make sure 
lubridate::date()
+  # and not base::date() is being used. This is due to the way testthat runs 
and
+  # normal use of arrow would not have to do this explicitly.
+  # TODO remove once https://issues.apache.org/jira/browse/ARROW-14575 is done
+  date <- lubridate::date
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(a_date = date(posixct_date)) %>%
+      collect(),
+    test_df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(a_date_base = as.Date(posixct_date)) %>%
+      collect(),
+    test_df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(date_from_r_object = date(r_date_object)) %>%
+      collect(),
+    test_df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(as_date_from_r_object = as.Date(r_date_object)) %>%
+      collect(),
+    test_df
+  )
+
+  # date from integer supported in arrow (similar to base::as.Date()), but in
+  # Arrow it assumes a fixed origin "1970-01-01". However this is not supported
+  # by lubridate. lubridate::date(integer_var) errors without an `origin`
+  expect_equal(
+    test_df %>%
+      arrow_table() %>%
+      select(integer_var) %>%
+      mutate(date_int = date(integer_var)) %>%
+      collect(),
+    tibble(integer_var = c(32L, NA),
+           date_int = as.Date(c("1970-02-02", NA)))
+  )
+})
+
+test_that("date() errors with unsupported inputs", {
+  test_df <- tibble::tibble(
+    posixct_var = as.POSIXct("2022-02-25 00:00:01", tz = "Europe/London"),
+    date_var = as.Date("2022-02-25"),
+    character_ymd_var = "2022-02-25 00:00:01",
+    character_ydm_var = "2022/25/02 00:00:01",
+    integer_var = 32L,
+    double_var = 34.56,
+    boolean_var = TRUE
+  )
+
+  expect_error(
+    test_df %>%
+      arrow_table() %>%
+      mutate(date_char = date(character_ymd_var)) %>%
+      collect(),
+    regexp = "Unsupported cast from string to date32 using function 
cast_date32"
+  )
+
+  expect_error(
+    test_df %>%
+      arrow_table() %>%
+      mutate(date_bool = date(boolean_var)) %>%
+      collect(),
+    regexp = "Unsupported cast from bool to date32 using function cast_date32"
+  )
+
+  expect_error(
+    test_df %>%
+      arrow_table() %>%
+      mutate(date_double = date(double_var)) %>%
+      collect(),
+    regexp = "Unsupported cast from double to date32 using function 
cast_date32"
+  )
+

Review comment:
       ```suggestion
   ```

##########
File path: r/tests/testthat/test-dplyr-funcs-type.R
##########
@@ -768,3 +769,138 @@ test_that("nested structs can be created from scalars and 
existing data frames",
     tibble(a = 1:2)
   )
 })
+
+test_that("as.Date() converts successfully from date, timestamp, integer, char 
and double", {
+  test_df <- tibble::tibble(
+    posixct_var = as.POSIXct("2022-02-25 00:00:01", tz = "Europe/London"),
+    date_var = as.Date("2022-02-25"),
+    character_ymd_var = "2022-02-25 00:00:01",
+    character_ydm_var = "2022/25/02 00:00:01",
+    integer_var = 32L,
+    double_var = 34.56
+  )
+
+  # casting from POSIXct treated separately so we can skip on Windows
+  # TODO move the test for casting from POSIXct below once ARROW-13168 is done
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        date_dv = as.Date(date_var),
+        date_char_ymd = as.Date(character_ymd_var, format = "%Y-%m-%d 
%H:%M:%S"),
+        date_char_ydm = as.Date(character_ydm_var, format = "%Y/%d/%m 
%H:%M:%S"),
+        date_int = as.Date(integer_var, origin = "1970-01-01")
+      ) %>%
+      collect(),
+    test_df
+  )
+
+  # the way we go about it is a bit different, but the result is the same =>
+  # testing without compare_dplyr_binding()
+  expect_equal(
+    test_df %>%
+      arrow_table() %>%
+      mutate(date_double = as.Date(double_var)) %>%
+      collect(),
+    test_df %>%
+      arrow_table() %>%
+      mutate(date_double = as.Date(double_var, origin = "1970-01-01")) %>%
+      collect()
+  )

Review comment:
       And same for the test below too

##########
File path: r/tests/testthat/test-dplyr-funcs-type.R
##########
@@ -768,3 +769,138 @@ test_that("nested structs can be created from scalars and 
existing data frames",
     tibble(a = 1:2)
   )
 })
+
+test_that("as.Date() converts successfully from date, timestamp, integer, char 
and double", {
+  test_df <- tibble::tibble(
+    posixct_var = as.POSIXct("2022-02-25 00:00:01", tz = "Europe/London"),
+    date_var = as.Date("2022-02-25"),
+    character_ymd_var = "2022-02-25 00:00:01",
+    character_ydm_var = "2022/25/02 00:00:01",
+    integer_var = 32L,
+    double_var = 34.56
+  )
+
+  # casting from POSIXct treated separately so we can skip on Windows
+  # TODO move the test for casting from POSIXct below once ARROW-13168 is done
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        date_dv = as.Date(date_var),
+        date_char_ymd = as.Date(character_ymd_var, format = "%Y-%m-%d 
%H:%M:%S"),
+        date_char_ydm = as.Date(character_ydm_var, format = "%Y/%d/%m 
%H:%M:%S"),
+        date_int = as.Date(integer_var, origin = "1970-01-01")
+      ) %>%
+      collect(),
+    test_df
+  )
+
+  # the way we go about it is a bit different, but the result is the same =>
+  # testing without compare_dplyr_binding()
+  expect_equal(
+    test_df %>%
+      arrow_table() %>%
+      mutate(date_double = as.Date(double_var)) %>%
+      collect(),
+    test_df %>%
+      arrow_table() %>%
+      mutate(date_double = as.Date(double_var, origin = "1970-01-01")) %>%
+      collect()
+  )
+
+  expect_equal(
+    test_df %>%
+      record_batch() %>%
+      mutate(date_double = as.Date(double_var)) %>%
+      collect(),
+    test_df %>%
+      arrow_table() %>%
+      mutate(date_double = as.Date(double_var, origin = "1970-01-01")) %>%
+      collect()
+  )
+
+  # actual and expected differ due to doubles are accounted for (floored in
+  # arrow and rounded to the next decimal in R)
+  expect_error(
+    compare_dplyr_binding(
+      .input %>%
+        mutate(date_double = as.Date(double_var, origin = "1970-01-01")) %>%
+        collect(),
+      test_df
+    )
+  )

Review comment:
       Is this what we want? When I was reading the `floor` stuff in the code I 
actually assumed that we were doing that because that's what R did. But seeing 
this test now, it sounds like we probably ought to `round` instead of `floor`, 
yeah? 
   
   Or is there some other reason why `floor` is what we want to do in R?

##########
File path: r/tests/testthat/test-dplyr-funcs-type.R
##########
@@ -768,3 +769,138 @@ test_that("nested structs can be created from scalars and 
existing data frames",
     tibble(a = 1:2)
   )
 })
+
+test_that("as.Date() converts successfully from date, timestamp, integer, char 
and double", {
+  test_df <- tibble::tibble(
+    posixct_var = as.POSIXct("2022-02-25 00:00:01", tz = "Europe/London"),
+    date_var = as.Date("2022-02-25"),
+    character_ymd_var = "2022-02-25 00:00:01",
+    character_ydm_var = "2022/25/02 00:00:01",
+    integer_var = 32L,
+    double_var = 34.56
+  )
+
+  # casting from POSIXct treated separately so we can skip on Windows
+  # TODO move the test for casting from POSIXct below once ARROW-13168 is done
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        date_dv = as.Date(date_var),
+        date_char_ymd = as.Date(character_ymd_var, format = "%Y-%m-%d 
%H:%M:%S"),
+        date_char_ydm = as.Date(character_ydm_var, format = "%Y/%d/%m 
%H:%M:%S"),
+        date_int = as.Date(integer_var, origin = "1970-01-01")
+      ) %>%
+      collect(),
+    test_df
+  )
+
+  # the way we go about it is a bit different, but the result is the same =>
+  # testing without compare_dplyr_binding()
+  expect_equal(
+    test_df %>%
+      arrow_table() %>%
+      mutate(date_double = as.Date(double_var)) %>%
+      collect(),
+    test_df %>%
+      arrow_table() %>%
+      mutate(date_double = as.Date(double_var, origin = "1970-01-01")) %>%
+      collect()
+  )

Review comment:
       Could you tell me more about this test and what it's testing? What I'm 
seeing is basically testing that the default is `origin = "1970-01-01"`, but 
maybe I'm missing something?

##########
File path: r/R/dplyr-funcs-type.R
##########
@@ -76,6 +76,60 @@ register_bindings_type_cast <- function() {
   register_binding("as.numeric", function(x) {
     Expression$create("cast", x, options = cast_options(to_type = float64()))
   })
+  register_binding("as.Date", function(x,
+                                       format = NULL,
+                                       tryFormats = "%Y-%m-%d",
+                                       origin = "1970-01-01",
+                                       tz = "UTC") {
+
+    if (call_binding("is.Date", x)) {
+      # base::as.Date() first converts to the desired timezone and then 
extracts
+      # the date, which is why we need to go through timestamp() first
+      return(x)
+
+    # cast from POSIXct
+    } else if (call_binding("is.POSIXct", x)) {
+      if (tz == "UTC") {
+        x <- build_expr("cast", x, options = cast_options(to_type = 
timestamp(timezone = tz)))
+      } else {
+        abort("`as.Date()` with a timezone different to 'UTC' is not supported 
in Arrow")
+      }
+
+    # cast from character
+    } else if (call_binding("is.character", x)) {
+      # this could be improved with tryFormats once strptime returns NA and we
+      # can use coalesce - https://issues.apache.org/jira/browse/ARROW-15659
+      # TODO revisit once https://issues.apache.org/jira/browse/ARROW-15659 is 
done
+      if (is.null(format)) {
+        if (length(tryFormats) == 1) {
+          format <- tryFormats[1]
+        } else {
+          abort("`as.Date()` with multiple `tryFormats` is not supported in 
Arrow yet")
+        }
+      }
+      x <- build_expr("strptime", x, options = list(format = format, unit = 
0L))
+
+    # cast from numeric
+    } else if (call_binding("is.numeric", x)) {
+      # the origin argument will be better supported once we implement temporal
+      # arithmetic (https://issues.apache.org/jira/browse/ARROW-14947)
+      # TODO revisit once the above has been sorted
+      if (!call_binding("is.integer", x)) {

Review comment:
       Could we collapse these into this?
   
   ```
   } else if (call_binding("is.numeric", x) & !call_binding("is.integer", x)) {
   ```
   
   I know eventually we might need to split them out, but again this will give 
us one less indentation level

##########
File path: r/tests/testthat/test-dplyr-funcs-type.R
##########
@@ -768,3 +769,138 @@ test_that("nested structs can be created from scalars and 
existing data frames",
     tibble(a = 1:2)
   )
 })
+
+test_that("as.Date() converts successfully from date, timestamp, integer, char 
and double", {
+  test_df <- tibble::tibble(
+    posixct_var = as.POSIXct("2022-02-25 00:00:01", tz = "Europe/London"),
+    date_var = as.Date("2022-02-25"),
+    character_ymd_var = "2022-02-25 00:00:01",
+    character_ydm_var = "2022/25/02 00:00:01",
+    integer_var = 32L,
+    double_var = 34.56
+  )
+
+  # casting from POSIXct treated separately so we can skip on Windows
+  # TODO move the test for casting from POSIXct below once ARROW-13168 is done
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        date_dv = as.Date(date_var),
+        date_char_ymd = as.Date(character_ymd_var, format = "%Y-%m-%d 
%H:%M:%S"),
+        date_char_ydm = as.Date(character_ydm_var, format = "%Y/%d/%m 
%H:%M:%S"),
+        date_int = as.Date(integer_var, origin = "1970-01-01")
+      ) %>%
+      collect(),
+    test_df
+  )
+
+  # the way we go about it is a bit different, but the result is the same =>
+  # testing without compare_dplyr_binding()
+  expect_equal(
+    test_df %>%
+      arrow_table() %>%
+      mutate(date_double = as.Date(double_var)) %>%
+      collect(),
+    test_df %>%
+      arrow_table() %>%
+      mutate(date_double = as.Date(double_var, origin = "1970-01-01")) %>%
+      collect()
+  )
+
+  expect_equal(
+    test_df %>%
+      record_batch() %>%
+      mutate(date_double = as.Date(double_var)) %>%
+      collect(),
+    test_df %>%
+      arrow_table() %>%
+      mutate(date_double = as.Date(double_var, origin = "1970-01-01")) %>%
+      collect()
+  )
+
+  # actual and expected differ due to doubles are accounted for (floored in
+  # arrow and rounded to the next decimal in R)
+  expect_error(
+    compare_dplyr_binding(
+      .input %>%
+        mutate(date_double = as.Date(double_var, origin = "1970-01-01")) %>%
+        collect(),
+      test_df
+    )
+  )
+
+  expect_equal(
+    test_df %>%
+      arrow_table() %>%
+      mutate(date_double = as.Date(double_var, origin = "1970-01-01")) %>%
+      collect(),
+    test_df %>%
+      mutate(date_double = as.Date(double_var, origin = "1970-01-01")),
+    # the absolute value for date_double is different due to arrow casting from
+    # integer and r from double => testing with a tolerance of 0.6
+    # `actual$date_double`: 34.0
+    # `expected$date_double`: 34.6
+    tolerance = 0.6
+  )
+
+  expect_equal(
+    test_df %>%
+      record_batch() %>%
+      mutate(date_double = as.Date(double_var, origin = "1970-01-01")) %>%
+      collect(),
+    test_df %>%
+      mutate(date_double = as.Date(double_var, origin = "1970-01-01")),
+    # the absolute value for date_double is different due to arrow casting from
+    # integer and r from double => testing with a tolerance of 0.6
+    # `actual$date_double`: 34.0
+    # `expected$date_double`: 34.6
+    tolerance = 0.6
+  )
+
+  # currently we do not support an origin different to "1970-01-01"
+  expect_warning(
+    test_df %>%
+      arrow_table() %>%
+      mutate(date_int = as.Date(integer_var, origin = "1970-01-03")) %>%
+      collect(),
+    regexp = "`as.Date()` with an `origin` different than '1970-01-01' is not 
supported in Arrow",
+    fixed = TRUE
+

Review comment:
       ```suggestion
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to