This is an automated email from the ASF dual-hosted git repository.
thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 2c66d62885 GH-45643: [R] Implement hms functions to create and
manipulate time of day variables (#46206)
2c66d62885 is described below
commit 2c66d6288543b28985c20bb17776d51ce843a670
Author: Nic Crane <[email protected]>
AuthorDate: Sat May 10 11:26:02 2025 -0400
GH-45643: [R] Implement hms functions to create and manipulate time of day
variables (#46206)
### Rationale for this change
Add support for hms functions for creating time objects
### What changes are included in this PR?
Implementing `hms::hms()` and `hms::as_hms()`
### Are these changes tested?
Yes
### Are there any user-facing changes?
Yes
* GitHub Issue: #45643
Authored-by: Nic Crane <[email protected]>
Signed-off-by: Nic Crane <[email protected]>
---
r/R/dplyr-funcs-datetime.R | 57 +++++++++++++++++++++++
r/R/dplyr-funcs-doc.R | 5 +++
r/man/acero.Rd | 7 +++
r/tests/testthat/test-dplyr-funcs-datetime.R | 67 ++++++++++++++++++++++++++++
4 files changed, 136 insertions(+)
diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R
index 47c8193688..b8bc2b6063 100644
--- a/r/R/dplyr-funcs-datetime.R
+++ b/r/R/dplyr-funcs-datetime.R
@@ -27,6 +27,7 @@ register_bindings_datetime <- function() {
register_bindings_duration_helpers()
register_bindings_datetime_parsers()
register_bindings_datetime_rounding()
+ register_bindings_hms()
}
register_bindings_datetime_utility <- function() {
@@ -826,3 +827,59 @@ register_bindings_datetime_rounding <- function() {
}
)
}
+
+register_bindings_hms <- function() {
+ numeric_to_time32 <- function(x) {
+ # The only numeric which can be cast to time32 is int32 so double cast to
make sure
+ cast(cast(x, int32()), time32(unit = "s"))
+ }
+
+ datetime_to_time32 <- function(datetime) {
+ hour <- call_binding("hour", datetime)
+ min <- call_binding("minute", datetime)
+ sec <- call_binding("second", datetime)
+
+ return(call_binding("hms::hms", seconds = sec, minutes = min, hours =
hour))
+ }
+
+ register_binding(
+ "hms::hms",
+ function(seconds = 0, minutes = 0, hours = 0, days = 0) {
+ if (!call_binding("is.numeric", seconds) || !call_binding("is.numeric",
minutes) ||
+ !call_binding("is.numeric", hours) || !call_binding("is.numeric",
days)) {
+ abort("All arguments must be numeric or NA_real_")
+ }
+
+ total_secs <- seconds +
+ Expression$create("multiply_checked", minutes, 60) +
+ Expression$create("multiply_checked", hours, 3600) +
+ Expression$create("multiply_checked", days, 86400)
+
+ return(numeric_to_time32(total_secs))
+ }
+ )
+
+ register_binding(
+ "hms::as_hms",
+ function(x = numeric()) {
+ if (call_binding("is.POSIXct", x)) {
+ return(datetime_to_time32(x))
+ }
+
+ if (call_binding("is.numeric", x)) {
+ return(numeric_to_time32(x))
+ }
+
+ if (call_binding("is.character", x)) {
+ dash <- call_binding("gsub", ":", "-", x)
+ as_date_time_string <- call_binding("str_c", "1970-01-01", dash, sep =
"-")
+ as_date_time <- Expression$create(
+ "strptime",
+ as_date_time_string,
+ options = list(format = "%Y-%m-%d-%H-%M-%S", unit = 0L)
+ )
+ return(datetime_to_time32(as_date_time))
+ }
+ }
+ )
+}
diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R
index 470c89ecc3..64e4aab0f2 100644
--- a/r/R/dplyr-funcs-doc.R
+++ b/r/R/dplyr-funcs-doc.R
@@ -297,6 +297,11 @@
#' * [`ymd_hms()`][lubridate::ymd_hms()]: `locale` argument not supported
#' * [`yq()`][lubridate::yq()]: `locale` argument not supported
#'
+#' ## hms
+#'
+#' * [`hms()`][hms::hms()]: subsecond times not supported
+#' * [`hms()`][hms::as_hms()]: subsecond times not supported
+#'
#' ## methods
#'
#' * [`is()`][methods::is()]
diff --git a/r/man/acero.Rd b/r/man/acero.Rd
index aceb533a15..345bb099d5 100644
--- a/r/man/acero.Rd
+++ b/r/man/acero.Rd
@@ -285,6 +285,13 @@ On Linux and OS X additionally a, A, b, B, Om, p, r are
available.
}
}
+\subsection{hms}{
+\itemize{
+\item \code{\link[hms:hms]{hms()}}: subsecond times not supported
+\item \code{\link[hms:hms]{hms()}}: subsecond times not supported
+}
+}
+
\subsection{methods}{
\itemize{
\item \code{\link[methods:is]{is()}}
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R
b/r/tests/testthat/test-dplyr-funcs-datetime.R
index d613a9cc5c..a71ae72164 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -20,6 +20,7 @@ skip_on_r_older_than("3.5")
library(lubridate, warn.conflicts = FALSE)
library(dplyr, warn.conflicts = FALSE)
+library(hms)
skip_if_not_available("acero")
# Skip these tests on CRAN due to build times > 10 mins
@@ -3726,3 +3727,69 @@ test_that("with_tz() and force_tz() can add timezone to
timestamp without timezo
)
)
})
+
+test_that("hms::hms", {
+ test_df <- tibble::tibble(
+ s = c(1, 2, 0, NA),
+ m = c(3, 4, 0, NA),
+ h = c(5, 6, 0, NA),
+ d = c(7, 8, 0, NA)
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ mutate(
+ time = hms::hms(s),
+ time2 = hms::hms(s, m),
+ time3 = hms::hms(s, m, h),
+ time4 = hms::hms(s, m, h, d),
+ time5 = hms::hms(days = d)
+ ) %>%
+ collect(),
+ test_df
+ )
+
+ expect_error(
+ call_binding("hms::hms", "nonsense"),
+ regexp = "All arguments must be numeric or NA"
+ )
+
+ # Works for NA_real_
+ expect_silent(
+ call_binding("hms::hms", seconds = NA_real_)
+ )
+
+ # raw NA is logical so we error here
+ expect_error(
+ call_binding("hms::hms", seconds = NA),
+ regexp = "All arguments must be numeric or NA_real_"
+ )
+
+})
+
+test_that("hms::as_hms", {
+ test_df <- tibble(
+ hms_string = c("0:7:45", "12:34:56"),
+ int = c(30L, 75L),
+ integerish_dbl = c(31, 76),
+ dbl = c(31.2, 76.4),
+ datetime = as.POSIXct(c(1645243500, 1745243500), tz = "UTC")
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ mutate(
+ x = hms::as_hms(hms_string),
+ x2 = hms::as_hms(int),
+ x3 = hms::as_hms(integerish_dbl),
+ x4 = hms::as_hms(datetime)
+ ) %>%
+ collect(),
+ test_df
+ )
+
+ expect_error(
+ arrow_table(test_df) %>% mutate(y = hms::as_hms(dbl)) %>% collect(),
+ "was truncated converting to int32"
+ )
+})