dragosmg commented on a change in pull request #12433: URL: https://github.com/apache/arrow/pull/12433#discussion_r815947864
########## File path: r/R/dplyr-funcs-type.R ########## @@ -76,6 +76,53 @@ register_bindings_type_cast <- function() { register_binding("as.numeric", function(x) { Expression$create("cast", x, options = cast_options(to_type = float64())) }) + register_binding("as.Date", function(x, + format = NULL, + origin = "1970-01-01", + tz = "UTC") { + # base::as.Date() first converts to UTC and then extracts the date, which is + # why we need to go through timestamp() first - see unit tests for the real + # life impact of the difference between lubridate::date() and base::as.Date() + # browser() + if (call_binding("is.Date", x)) { + # arrow_date <- build_expr("cast", x, options = cast_options(to_type = date32())) + return(x) + } else if (call_binding("is.POSIXct", x)) { + if (tz == "UTC") { + arrow_timestamp <- build_expr("cast", x, options = cast_options(to_type = timestamp(timezone = tz))) + return(build_expr("cast", arrow_timestamp, options = cast_options(to_type = date32()))) + } else { + abort("`as.Date()` with a timezone different to 'UTC' is not supported in Arrow") + } + } else if (call_binding("is.character", x)) { + # this could be improved with tryFormats once strptime returns NA and we + # can use coalesce - https://issues.apache.org/jira/browse/ARROW-15659 + # TODO revisit once https://issues.apache.org/jira/browse/ARROW-15659 is done + if (!is.null(format)) { + arrow_timestamp <- call_binding("strptime", x, format, unit = "s") + return(build_expr("cast", arrow_timestamp, options = cast_options(to_type = date32()))) + } else { + abort("`as.Date()` without `format` is not supported in Arrow") + } + + } else if (call_binding("is.numeric", x)) { + # the origin argument will be better supported once we implement temporal + # arithmetic (https://issues.apache.org/jira/browse/ARROW-14947) + # TODO revisit once the above has been sorted + if (!call_binding("is.integer", x)) { + # Arrow does not support direct casting from double to date so we have + # to convert to integers first - casting to int32() would error so we + # need to use round before casting + x <- call_binding("floor", x) + x <- build_expr("cast", x, options = (cast_options(to_type = int32()))) + } + if (origin == "1970-01-01") { + return(build_expr("cast", x, options = cast_options(to_type = date32()))) + } else { + abort("`as.Date()` with an `origin` different than '1970-01-01' is not supported in Arrow") Review comment: [ARROW-15799](https://issues.apache.org/jira/browse/ARROW-15799) to revisit `as.Date()` and update the handling of the `origin` argument. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org