jonkeane commented on a change in pull request #12506:
URL: https://github.com/apache/arrow/pull/12506#discussion_r828002690



##########
File path: r/R/dplyr-funcs-datetime.R
##########
@@ -189,6 +189,65 @@ register_bindings_datetime <- function() {
   })
 }
 
+register_bindings_duration <- function() {
+  register_binding("difftime", function(time1,
+                                        time2,
+                                        tz,
+                                        units = c("auto", "secs", "mins",
+                                                  "hours", "days", "weeks")) {
+    units <- match.arg(units)
+    if (units != "secs") {
+      abort("`difftime()` with units other than seconds not supported in 
Arrow")
+    }

Review comment:
       We should probably make "secs" be the default for this argument. I know 
it's "auto" for base R, but we don't (yet? may never?) have the facility to map 
from auto to the duration that R would give + we don't want the default arg to 
always cause a failure.
   
   It might be nice to also mention that the argument name is `"secs"` in the 
message — I always end up needing to go look up which abbreviation it is.

##########
File path: r/R/dplyr-funcs-datetime.R
##########
@@ -189,6 +189,65 @@ register_bindings_datetime <- function() {
   })
 }
 
+register_bindings_duration <- function() {
+  register_binding("difftime", function(time1,
+                                        time2,
+                                        tz,
+                                        units = c("auto", "secs", "mins",
+                                                  "hours", "days", "weeks")) {
+    units <- match.arg(units)
+    if (units != "secs") {
+      abort("`difftime()` with units other than seconds not supported in 
Arrow")
+    }
+
+    if (!missing(tz)) {
+      warn("`tz` is an optional argument to `difftime()` in R and will not be 
used in Arrow")
+    }
+
+    time1 <- build_expr("cast", time1, options = cast_options(to_type = 
timestamp()))
+    time2 <- build_expr("cast", time2, options = cast_options(to_type = 
timestamp()))

Review comment:
       I'm not sure if this is really necessary, since casting to the same type 
should be a no-op, but you might want to confirm that it is and if it does add 
overhead wrap these in `if (!is.timestamp(time1)) { cast }`

##########
File path: r/R/dplyr-funcs-datetime.R
##########
@@ -189,6 +189,65 @@ register_bindings_datetime <- function() {
   })
 }
 
+register_bindings_duration <- function() {
+  register_binding("difftime", function(time1,
+                                        time2,
+                                        tz,
+                                        units = c("auto", "secs", "mins",
+                                                  "hours", "days", "weeks")) {
+    units <- match.arg(units)
+    if (units != "secs") {
+      abort("`difftime()` with units other than seconds not supported in 
Arrow")
+    }
+
+    if (!missing(tz)) {
+      warn("`tz` is an optional argument to `difftime()` in R and will not be 
used in Arrow")
+    }
+
+    time1 <- build_expr("cast", time1, options = cast_options(to_type = 
timestamp()))
+    time2 <- build_expr("cast", time2, options = cast_options(to_type = 
timestamp()))
+
+    build_expr("cast", time1 - time2, options = cast_options(to_type = 
duration("s")))
+  })
+
+  register_binding("as.difftime", function(x,
+                                           format = "%X",
+                                           units = "auto") {
+    # windows doesn't seem to like "%X"
+    if (format == "%X" & tolower(Sys.info()[["sysname"]]) == "windows") {
+      format <- "%H:%M:%S"
+    }
+
+    if (units != "secs") {
+      abort("`as.difftime()` with units other than seconds not supported in 
Arrow")
+    }

Review comment:
       Same thing here, having the default always error is not great, let's 
make the default be `"secs"`

##########
File path: r/R/dplyr-funcs-datetime.R
##########
@@ -189,6 +189,65 @@ register_bindings_datetime <- function() {
   })
 }
 
+register_bindings_duration <- function() {
+  register_binding("difftime", function(time1,
+                                        time2,
+                                        tz,
+                                        units = c("auto", "secs", "mins",
+                                                  "hours", "days", "weeks")) {
+    units <- match.arg(units)
+    if (units != "secs") {
+      abort("`difftime()` with units other than seconds not supported in 
Arrow")
+    }
+
+    if (!missing(tz)) {
+      warn("`tz` is an optional argument to `difftime()` in R and will not be 
used in Arrow")
+    }
+
+    time1 <- build_expr("cast", time1, options = cast_options(to_type = 
timestamp()))
+    time2 <- build_expr("cast", time2, options = cast_options(to_type = 
timestamp()))
+
+    build_expr("cast", time1 - time2, options = cast_options(to_type = 
duration("s")))
+  })
+
+  register_binding("as.difftime", function(x,
+                                           format = "%X",
+                                           units = "auto") {
+    # windows doesn't seem to like "%X"
+    if (format == "%X" & tolower(Sys.info()[["sysname"]]) == "windows") {
+      format <- "%H:%M:%S"
+    }
+
+    if (units != "secs") {
+      abort("`as.difftime()` with units other than seconds not supported in 
Arrow")
+    }
+
+    if (call_binding("is.character", x)) {
+      x <- build_expr("strptime", x, options = list(format = format, unit = 
0L))
+      y <- build_expr("strptime", "0:0:0", options = list(format = "%H:%M:%S", 
unit = 0L))
+      diff_x_y <- call_binding("difftime", x, y, units = "secs")
+      return(diff_x_y)
+    }

Review comment:
       Have you looked into doing strptime on `x` and then converting to 
`time32()`? I think that + casting to duration will do what you want it to, 
without needing to construct a separate `y` here.

##########
File path: r/tests/testthat/test-dplyr-funcs-datetime.R
##########
@@ -974,3 +974,181 @@ test_that("date() errors with unsupported inputs", {
     regexp = "Unsupported cast from double to date32 using function 
cast_date32"
   )
 })
+
+test_that("difftime works correctly", {

Review comment:
       ```suggestion
   test_that("difftime()", {
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to