jonkeane commented on a change in pull request #12506:
URL: https://github.com/apache/arrow/pull/12506#discussion_r828002690
##########
File path: r/R/dplyr-funcs-datetime.R
##########
@@ -189,6 +189,65 @@ register_bindings_datetime <- function() {
})
}
+register_bindings_duration <- function() {
+ register_binding("difftime", function(time1,
+ time2,
+ tz,
+ units = c("auto", "secs", "mins",
+ "hours", "days", "weeks")) {
+ units <- match.arg(units)
+ if (units != "secs") {
+ abort("`difftime()` with units other than seconds not supported in
Arrow")
+ }
Review comment:
We should probably make "secs" be the default for this argument. I know
it's "auto" for base R, but we don't (yet? may never?) have the facility to map
from auto to the duration that R would give + we don't want the default arg to
always cause a failure.
It might be nice to also mention that the argument name is `"secs"` in the
message — I always end up needing to go look up which abbreviation it is.
##########
File path: r/R/dplyr-funcs-datetime.R
##########
@@ -189,6 +189,65 @@ register_bindings_datetime <- function() {
})
}
+register_bindings_duration <- function() {
+ register_binding("difftime", function(time1,
+ time2,
+ tz,
+ units = c("auto", "secs", "mins",
+ "hours", "days", "weeks")) {
+ units <- match.arg(units)
+ if (units != "secs") {
+ abort("`difftime()` with units other than seconds not supported in
Arrow")
+ }
+
+ if (!missing(tz)) {
+ warn("`tz` is an optional argument to `difftime()` in R and will not be
used in Arrow")
+ }
+
+ time1 <- build_expr("cast", time1, options = cast_options(to_type =
timestamp()))
+ time2 <- build_expr("cast", time2, options = cast_options(to_type =
timestamp()))
Review comment:
I'm not sure if this is really necessary, since casting to the same type
should be a no-op, but you might want to confirm that it is and if it does add
overhead wrap these in `if (!is.timestamp(time1)) { cast }`
##########
File path: r/R/dplyr-funcs-datetime.R
##########
@@ -189,6 +189,65 @@ register_bindings_datetime <- function() {
})
}
+register_bindings_duration <- function() {
+ register_binding("difftime", function(time1,
+ time2,
+ tz,
+ units = c("auto", "secs", "mins",
+ "hours", "days", "weeks")) {
+ units <- match.arg(units)
+ if (units != "secs") {
+ abort("`difftime()` with units other than seconds not supported in
Arrow")
+ }
+
+ if (!missing(tz)) {
+ warn("`tz` is an optional argument to `difftime()` in R and will not be
used in Arrow")
+ }
+
+ time1 <- build_expr("cast", time1, options = cast_options(to_type =
timestamp()))
+ time2 <- build_expr("cast", time2, options = cast_options(to_type =
timestamp()))
+
+ build_expr("cast", time1 - time2, options = cast_options(to_type =
duration("s")))
+ })
+
+ register_binding("as.difftime", function(x,
+ format = "%X",
+ units = "auto") {
+ # windows doesn't seem to like "%X"
+ if (format == "%X" & tolower(Sys.info()[["sysname"]]) == "windows") {
+ format <- "%H:%M:%S"
+ }
+
+ if (units != "secs") {
+ abort("`as.difftime()` with units other than seconds not supported in
Arrow")
+ }
Review comment:
Same thing here, having the default always error is not great, let's
make the default be `"secs"`
##########
File path: r/R/dplyr-funcs-datetime.R
##########
@@ -189,6 +189,65 @@ register_bindings_datetime <- function() {
})
}
+register_bindings_duration <- function() {
+ register_binding("difftime", function(time1,
+ time2,
+ tz,
+ units = c("auto", "secs", "mins",
+ "hours", "days", "weeks")) {
+ units <- match.arg(units)
+ if (units != "secs") {
+ abort("`difftime()` with units other than seconds not supported in
Arrow")
+ }
+
+ if (!missing(tz)) {
+ warn("`tz` is an optional argument to `difftime()` in R and will not be
used in Arrow")
+ }
+
+ time1 <- build_expr("cast", time1, options = cast_options(to_type =
timestamp()))
+ time2 <- build_expr("cast", time2, options = cast_options(to_type =
timestamp()))
+
+ build_expr("cast", time1 - time2, options = cast_options(to_type =
duration("s")))
+ })
+
+ register_binding("as.difftime", function(x,
+ format = "%X",
+ units = "auto") {
+ # windows doesn't seem to like "%X"
+ if (format == "%X" & tolower(Sys.info()[["sysname"]]) == "windows") {
+ format <- "%H:%M:%S"
+ }
+
+ if (units != "secs") {
+ abort("`as.difftime()` with units other than seconds not supported in
Arrow")
+ }
+
+ if (call_binding("is.character", x)) {
+ x <- build_expr("strptime", x, options = list(format = format, unit =
0L))
+ y <- build_expr("strptime", "0:0:0", options = list(format = "%H:%M:%S",
unit = 0L))
+ diff_x_y <- call_binding("difftime", x, y, units = "secs")
+ return(diff_x_y)
+ }
Review comment:
Have you looked into doing strptime on `x` and then converting to
`time32()`? I think that + casting to duration will do what you want it to,
without needing to construct a separate `y` here.
##########
File path: r/tests/testthat/test-dplyr-funcs-datetime.R
##########
@@ -974,3 +974,181 @@ test_that("date() errors with unsupported inputs", {
regexp = "Unsupported cast from double to date32 using function
cast_date32"
)
})
+
+test_that("difftime works correctly", {
Review comment:
```suggestion
test_that("difftime()", {
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]