paleolimbot commented on a change in pull request #12506:
URL: https://github.com/apache/arrow/pull/12506#discussion_r823262420
##########
File path: r/R/dplyr-funcs-type.R
##########
@@ -120,6 +120,42 @@ register_bindings_type_cast <- function() {
}
build_expr("cast", x, options = cast_options(to_type = date32()))
})
+ register_binding("as.difftime", function(x,
+ format = "%X",
+ units = "auto",
+ tz = "UTC") {
+ # windows doesn't seem to like "%X"
+ if (format == "%X" & tolower(Sys.info()[["sysname"]]) == "windows") {
+ format <- "%H:%M:%S"
+ }
+
+ if (units != "secs") {
+ abort("`as.difftime()` with units other than seconds not supported in
Arrow")
+ }
+
+ if (call_binding("is.character", x)) {
+ x <- build_expr("strptime", x, options = list(format = format, tz = tz,
unit = 0L))
+ y <- build_expr("strptime", "0:0:0", options = list(format = "%H:%M:%S",
tz = tz, unit = 0L))
+ diff_x_y <- call_binding("difftime", x, y, units = "secs", tz = tz)
+ return(diff_x_y)
+ }
+
+ # numeric -> duration not supported in Arrow yet so we use time23() as
Review comment:
```suggestion
# numeric -> duration not supported in Arrow yet so we use time32() as
```
##########
File path: r/tests/testthat/test-dplyr-funcs-datetime.R
##########
@@ -904,3 +904,82 @@ test_that("date() errors with unsupported inputs", {
regexp = "Unsupported cast from double to date32 using function
cast_date32"
)
})
+test_that("difftime works correctly", {
+ test_df <- tibble(
+ time1 = as.POSIXct(
+ c("2021-02-20", "2021-07-31", "2021-10-30", "2021-01-31")
+ ),
+ time2 = as.POSIXct(
+ c("2021-02-20 00:02:01", "2021-07-31 00:03:54", "2021-10-30 00:05:45",
"2021-01-31 00:07:36")
+ ),
+ secs = c(121L, 234L, 345L, 456L)
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ mutate(
+ secs2 = difftime(time1, time2, units = "secs")
+ ) %>%
+ collect(),
+ test_df,
+ ignore_attr = TRUE
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ mutate(
+ secs2 = difftime(as.POSIXct("2022-03-07"), time1, units = "secs")
+ ) %>%
+ collect(),
+ test_df,
+ ignore_attr = TRUE
+ )
+
+ # units other than "secs" not supported in arrow
+ compare_dplyr_binding(
+ .input %>%
+ mutate(
+ mins = difftime(time1, time2, units = "mins"),
+ hours = difftime(time1, time2, units = "hours"),
+ days = difftime(time1, time2, units = "days"),
+ weeks = difftime(time1, time2, units = "weeks")) %>%
Review comment:
```suggestion
weeks = difftime(time1, time2, units = "weeks")
) %>%
```
##########
File path: r/R/dplyr-funcs-type.R
##########
@@ -120,6 +120,42 @@ register_bindings_type_cast <- function() {
}
build_expr("cast", x, options = cast_options(to_type = date32()))
})
+ register_binding("as.difftime", function(x,
+ format = "%X",
+ units = "auto",
+ tz = "UTC") {
+ # windows doesn't seem to like "%X"
+ if (format == "%X" & tolower(Sys.info()[["sysname"]]) == "windows") {
+ format <- "%H:%M:%S"
+ }
+
+ if (units != "secs") {
+ abort("`as.difftime()` with units other than seconds not supported in
Arrow")
+ }
+
+ if (call_binding("is.character", x)) {
+ x <- build_expr("strptime", x, options = list(format = format, tz = tz,
unit = 0L))
+ y <- build_expr("strptime", "0:0:0", options = list(format = "%H:%M:%S",
tz = tz, unit = 0L))
+ diff_x_y <- call_binding("difftime", x, y, units = "secs", tz = tz)
+ return(diff_x_y)
+ }
+
+ # numeric -> duration not supported in Arrow yet so we use time23() as
+ # intermediate step
+ # TODO revisit once https://issues.apache.org/jira/browse/ARROW-15862 done
+ if (call_binding("is.numeric", x)) {
Review comment:
You might be able to reduce the complexity below the threshold by doing:
```r
if (call_binding("is.integer")) { ... }
if (call_binding("is.double")) { stop("not supported") }
```
(rather than nesting them)
##########
File path: r/tests/testthat/test-dplyr-funcs-datetime.R
##########
@@ -904,3 +904,82 @@ test_that("date() errors with unsupported inputs", {
regexp = "Unsupported cast from double to date32 using function
cast_date32"
)
})
+test_that("difftime works correctly", {
+ test_df <- tibble(
+ time1 = as.POSIXct(
+ c("2021-02-20", "2021-07-31", "2021-10-30", "2021-01-31")
+ ),
+ time2 = as.POSIXct(
+ c("2021-02-20 00:02:01", "2021-07-31 00:03:54", "2021-10-30 00:05:45",
"2021-01-31 00:07:36")
+ ),
+ secs = c(121L, 234L, 345L, 456L)
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ mutate(
+ secs2 = difftime(time1, time2, units = "secs")
+ ) %>%
+ collect(),
+ test_df,
+ ignore_attr = TRUE
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ mutate(
+ secs2 = difftime(as.POSIXct("2022-03-07"), time1, units = "secs")
+ ) %>%
+ collect(),
+ test_df,
+ ignore_attr = TRUE
+ )
+
+ # units other than "secs" not supported in arrow
+ compare_dplyr_binding(
+ .input %>%
+ mutate(
+ mins = difftime(time1, time2, units = "mins"),
+ hours = difftime(time1, time2, units = "hours"),
+ days = difftime(time1, time2, units = "days"),
+ weeks = difftime(time1, time2, units = "weeks")) %>%
+ collect(),
+ test_df,
+ warning = TRUE,
+ ignore_attr = TRUE
+ )
+
+ skip_on_os("windows")
+ test_df_with_tz <- tibble(
+ time1 = as.POSIXct(
+ c("2021-02-20", "2021-07-31", "2021-10-30", "2021-01-31"),
+ tz = "Europe/London"
+ ),
+ time2 = as.POSIXct(
+ c("2021-02-20 00:02:01", "2021-07-31 00:03:54", "2021-10-30 00:05:45",
"2021-01-31 00:07:36"),
+ tz = "America/Chicago"
+ ),
+ secs = c(121L, 234L, 345L, 456L)
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ mutate(secs2 = difftime(time2, time1, units = "secs", tz =
"Pacific/Marquesas")) %>%
+ collect(),
+ test_df_with_tz
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ mutate(
+ secs2 =
+ difftime(
Review comment:
```suggestion
secs2 = difftime(
```
##########
File path: r/tests/testthat/test-dplyr-funcs-datetime.R
##########
@@ -904,3 +904,82 @@ test_that("date() errors with unsupported inputs", {
regexp = "Unsupported cast from double to date32 using function
cast_date32"
)
})
+test_that("difftime works correctly", {
+ test_df <- tibble(
+ time1 = as.POSIXct(
+ c("2021-02-20", "2021-07-31", "2021-10-30", "2021-01-31")
+ ),
+ time2 = as.POSIXct(
+ c("2021-02-20 00:02:01", "2021-07-31 00:03:54", "2021-10-30 00:05:45",
"2021-01-31 00:07:36")
+ ),
+ secs = c(121L, 234L, 345L, 456L)
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ mutate(
+ secs2 = difftime(time1, time2, units = "secs")
+ ) %>%
+ collect(),
+ test_df,
+ ignore_attr = TRUE
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ mutate(
+ secs2 = difftime(as.POSIXct("2022-03-07"), time1, units = "secs")
+ ) %>%
+ collect(),
+ test_df,
+ ignore_attr = TRUE
+ )
+
+ # units other than "secs" not supported in arrow
+ compare_dplyr_binding(
+ .input %>%
+ mutate(
+ mins = difftime(time1, time2, units = "mins"),
+ hours = difftime(time1, time2, units = "hours"),
+ days = difftime(time1, time2, units = "days"),
+ weeks = difftime(time1, time2, units = "weeks")) %>%
+ collect(),
+ test_df,
+ warning = TRUE,
+ ignore_attr = TRUE
+ )
+
+ skip_on_os("windows")
+ test_df_with_tz <- tibble(
+ time1 = as.POSIXct(
+ c("2021-02-20", "2021-07-31", "2021-10-30", "2021-01-31"),
+ tz = "Europe/London"
+ ),
+ time2 = as.POSIXct(
+ c("2021-02-20 00:02:01", "2021-07-31 00:03:54", "2021-10-30 00:05:45",
"2021-01-31 00:07:36"),
+ tz = "America/Chicago"
+ ),
+ secs = c(121L, 234L, 345L, 456L)
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ mutate(secs2 = difftime(time2, time1, units = "secs", tz =
"Pacific/Marquesas")) %>%
+ collect(),
Review comment:
```suggestion
.input %>%
mutate(secs2 = difftime(time2, time1, units = "secs", tz =
"Pacific/Marquesas")) %>%
collect(),
```
##########
File path: r/R/dplyr-funcs-type.R
##########
@@ -120,6 +120,42 @@ register_bindings_type_cast <- function() {
}
build_expr("cast", x, options = cast_options(to_type = date32()))
})
+ register_binding("as.difftime", function(x,
+ format = "%X",
+ units = "auto",
+ tz = "UTC") {
+ # windows doesn't seem to like "%X"
+ if (format == "%X" & tolower(Sys.info()[["sysname"]]) == "windows") {
+ format <- "%H:%M:%S"
+ }
+
+ if (units != "secs") {
+ abort("`as.difftime()` with units other than seconds not supported in
Arrow")
+ }
+
+ if (call_binding("is.character", x)) {
+ x <- build_expr("strptime", x, options = list(format = format, tz = tz,
unit = 0L))
+ y <- build_expr("strptime", "0:0:0", options = list(format = "%H:%M:%S",
tz = tz, unit = 0L))
+ diff_x_y <- call_binding("difftime", x, y, units = "secs", tz = tz)
+ return(diff_x_y)
+ }
+
+ # numeric -> duration not supported in Arrow yet so we use time23() as
+ # intermediate step
+ # TODO revisit once https://issues.apache.org/jira/browse/ARROW-15862 done
+ if (call_binding("is.numeric", x)) {
+ if (call_binding("is.integer", x)) {
+ x <- build_expr("cast", x, options = cast_options(to_type =
time32(unit = "s")))
+ y <- build_expr("cast", 0L, options = cast_options(to_type =
time32(unit = "s")))
+ diff_x_y <- call_binding("difftime", x, y, units = "secs", tz = tz)
+ return(diff_x_y)
+ } else {
+ abort("`as.difftime()` with double/float inputs not supported in Arrow ")
Review comment:
```suggestion
abort("`as.difftime()` with double/float inputs not supported in
Arrow ")
```
##########
File path: r/R/dplyr-funcs-type.R
##########
@@ -120,6 +120,42 @@ register_bindings_type_cast <- function() {
}
build_expr("cast", x, options = cast_options(to_type = date32()))
})
+ register_binding("as.difftime", function(x,
Review comment:
My instinct would have been to look for this in dplyr-funcs-datetime.R
(but also fine here if there are other functions like it that I'm not
remembering!)
##########
File path: r/tests/testthat/test-dplyr-funcs-type.R
##########
@@ -874,3 +874,54 @@ test_that("as.Date() converts successfully from date,
timestamp, integer, char a
test_df
)
})
+
+test_that("as.difftime() works properly", {
+ test_df <- tibble(
+ hms_string = c("0:7:45", "12:34:56"),
+ hm_string = c("7:45", "12:34"),
+ int = c(30L, 75L),
+ dbl = c(31, 76)
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ mutate(hms_difftime = as.difftime(hms_string, units = "secs")) %>%
+ collect(),
Review comment:
```suggestion
mutate(hms_difftime = as.difftime(hms_string, units = "secs")) %>%
collect(),
```
##########
File path: r/tests/testthat/test-dplyr-funcs-datetime.R
##########
@@ -904,3 +904,82 @@ test_that("date() errors with unsupported inputs", {
regexp = "Unsupported cast from double to date32 using function
cast_date32"
)
})
+test_that("difftime works correctly", {
+ test_df <- tibble(
+ time1 = as.POSIXct(
+ c("2021-02-20", "2021-07-31", "2021-10-30", "2021-01-31")
+ ),
+ time2 = as.POSIXct(
+ c("2021-02-20 00:02:01", "2021-07-31 00:03:54", "2021-10-30 00:05:45",
"2021-01-31 00:07:36")
+ ),
+ secs = c(121L, 234L, 345L, 456L)
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ mutate(
+ secs2 = difftime(time1, time2, units = "secs")
+ ) %>%
+ collect(),
+ test_df,
+ ignore_attr = TRUE
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ mutate(
+ secs2 = difftime(as.POSIXct("2022-03-07"), time1, units = "secs")
+ ) %>%
+ collect(),
+ test_df,
+ ignore_attr = TRUE
+ )
+
+ # units other than "secs" not supported in arrow
+ compare_dplyr_binding(
+ .input %>%
+ mutate(
+ mins = difftime(time1, time2, units = "mins"),
Review comment:
If it's important to test all of these cases (is it?), you should
probably have each in their own `compare_dplyr_binding()` block (my reading of
this is that any one of those cases could *not* throw a warning and the test
would still pass).
##########
File path: r/tests/testthat/test-dplyr-funcs-datetime.R
##########
@@ -904,3 +904,82 @@ test_that("date() errors with unsupported inputs", {
regexp = "Unsupported cast from double to date32 using function
cast_date32"
)
})
+test_that("difftime works correctly", {
+ test_df <- tibble(
+ time1 = as.POSIXct(
+ c("2021-02-20", "2021-07-31", "2021-10-30", "2021-01-31")
+ ),
+ time2 = as.POSIXct(
+ c("2021-02-20 00:02:01", "2021-07-31 00:03:54", "2021-10-30 00:05:45",
"2021-01-31 00:07:36")
+ ),
+ secs = c(121L, 234L, 345L, 456L)
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ mutate(
+ secs2 = difftime(time1, time2, units = "secs")
+ ) %>%
+ collect(),
+ test_df,
+ ignore_attr = TRUE
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ mutate(
+ secs2 = difftime(as.POSIXct("2022-03-07"), time1, units = "secs")
+ ) %>%
+ collect(),
+ test_df,
+ ignore_attr = TRUE
+ )
+
+ # units other than "secs" not supported in arrow
+ compare_dplyr_binding(
+ .input %>%
+ mutate(
+ mins = difftime(time1, time2, units = "mins"),
+ hours = difftime(time1, time2, units = "hours"),
+ days = difftime(time1, time2, units = "days"),
+ weeks = difftime(time1, time2, units = "weeks")) %>%
+ collect(),
+ test_df,
+ warning = TRUE,
+ ignore_attr = TRUE
+ )
+
+ skip_on_os("windows")
+ test_df_with_tz <- tibble(
+ time1 = as.POSIXct(
+ c("2021-02-20", "2021-07-31", "2021-10-30", "2021-01-31"),
+ tz = "Europe/London"
+ ),
+ time2 = as.POSIXct(
+ c("2021-02-20 00:02:01", "2021-07-31 00:03:54", "2021-10-30 00:05:45",
"2021-01-31 00:07:36"),
+ tz = "America/Chicago"
+ ),
+ secs = c(121L, 234L, 345L, 456L)
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ mutate(secs2 = difftime(time2, time1, units = "secs", tz =
"Pacific/Marquesas")) %>%
+ collect(),
+ test_df_with_tz
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ mutate(
+ secs2 =
+ difftime(
+ as.POSIXct("2022-03-07", tz = "Europe/Bucharest"),
+ time1,
+ units = "secs",
+ tz = "Pacific/Marquesas")
Review comment:
```suggestion
tz = "Pacific/Marquesas"
)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]