This is an automated email from the ASF dual-hosted git repository.
rok pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 0330353a93 ARROW-14819: [R] Binding for lubridate::qday (#13440)
0330353a93 is described below
commit 0330353a93eff799616bf66e0e994236393458f7
Author: Rok Mihevc <[email protected]>
AuthorDate: Wed Jul 20 09:54:09 2022 +0200
ARROW-14819: [R] Binding for lubridate::qday (#13440)
This adds lubridate-like `qday` function. Counts number of days elapsed
since beginning of the quarter.
Lead-authored-by: Rok <[email protected]>
Co-authored-by: Rok Mihevc <[email protected]>
Signed-off-by: Rok <[email protected]>
---
r/NEWS.md | 1 +
r/R/dplyr-funcs-datetime.R | 8 ++++++
r/src/compute.cpp | 29 ++++++++++++++++++++
r/tests/testthat/test-dplyr-funcs-datetime.R | 40 ++++++++++++++++++++++++++++
4 files changed, 78 insertions(+)
diff --git a/r/NEWS.md b/r/NEWS.md
index 59245b971d..560e484c33 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -31,6 +31,7 @@
Instead of these, use the `read_ipc_file()` and `write_ipc_file()` for IPC
files, or,
`read_ipc_stream()` and `write_ipc_stream()` for IPC streams.
* `write_parquet()` now defaults to writing Parquet format version 2.4 (was
1.0). Previously deprecated arguments `properties` and `arrow_properties` have
been removed; if you need to deal with these lower-level properties objects
directly, use `ParquetFileWriter`, which `write_parquet()` wraps.
+* added `lubridate::qday()` (day of quarter)
# arrow 8.0.0
diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R
index 7d11cdc113..1db6c647d5 100644
--- a/r/R/dplyr-funcs-datetime.R
+++ b/r/R/dplyr-funcs-datetime.R
@@ -209,6 +209,14 @@ register_bindings_datetime_components <- function() {
build_expr("month", x)
})
+ register_binding("lubridate::qday", function(x) {
+ # We calculate day of quarter by flooring timestamp to beginning of
quarter and
+ # calculating days between beginning of quarter and timestamp/date in
question.
+ # Since we use one one-based numbering we add one.
+ floored_x <- build_expr("floor_temporal", x, options = list(unit = 9L))
+ build_expr("days_between", floored_x, x) + Expression$scalar(1L)
+ })
+
register_binding("lubridate::am", function(x) {
hour <- Expression$create("hour", x)
hour < 12
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index 0db558972e..885af3f7ab 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -519,6 +519,35 @@ std::shared_ptr<arrow::compute::FunctionOptions>
make_compute_options(
return out;
}
+ if (func_name == "round_temporal" || func_name == "floor_temporal" ||
+ func_name == "ceil_temporal") {
+ using Options = arrow::compute::RoundTemporalOptions;
+
+ int64_t multiple = 1;
+ enum arrow::compute::CalendarUnit unit = arrow::compute::CalendarUnit::DAY;
+ bool week_starts_monday = true;
+ bool ceil_is_strictly_greater = true;
+ bool calendar_based_origin = true;
+
+ if (!Rf_isNull(options["multiple"])) {
+ multiple = cpp11::as_cpp<int64_t>(options["multiple"]);
+ }
+ if (!Rf_isNull(options["unit"])) {
+ unit = cpp11::as_cpp<enum arrow::compute::CalendarUnit>(options["unit"]);
+ }
+ if (!Rf_isNull(options["week_starts_monday"])) {
+ week_starts_monday = cpp11::as_cpp<bool>(options["week_starts_monday"]);
+ }
+ if (!Rf_isNull(options["ceil_is_strictly_greater"])) {
+ ceil_is_strictly_greater =
cpp11::as_cpp<bool>(options["ceil_is_strictly_greater"]);
+ }
+ if (!Rf_isNull(options["calendar_based_origin"])) {
+ calendar_based_origin =
cpp11::as_cpp<bool>(options["calendar_based_origin"]);
+ }
+ return std::make_shared<Options>(multiple, unit, week_starts_monday,
+ ceil_is_strictly_greater,
calendar_based_origin);
+ }
+
if (func_name == "round_to_multiple") {
using Options = arrow::compute::RoundToMultipleOptions;
auto out = std::make_shared<Options>(Options::Defaults());
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R
b/r/tests/testthat/test-dplyr-funcs-datetime.R
index f054373640..6caf061fc8 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -574,6 +574,26 @@ test_that("extract yday from timestamp", {
)
})
+test_that("extract qday from timestamp", {
+ test_df <- tibble::tibble(
+ time = as.POSIXct(seq(as.Date("1999-12-31", tz = "UTC"),
as.Date("2001-01-01", tz = "UTC"), by = "day"))
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ transmute(x = qday(time)) %>%
+ collect(),
+ test_df
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ transmute(x = qday(as.POSIXct("2022-06-29 12:35"))) %>%
+ collect(),
+ test_df
+ )
+})
+
test_that("extract hour from timestamp", {
compare_dplyr_binding(
.input %>%
@@ -790,6 +810,26 @@ test_that("extract yday from date", {
)
})
+test_that("extract qday from date", {
+ test_df <- tibble::tibble(
+ date = seq(as.Date("1999-12-31"), as.Date("2001-01-01"), by = "day")
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ mutate(x = qday(date)) %>%
+ collect(),
+ test_df
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ mutate(y = qday(as.Date("2022-06-29"))) %>%
+ collect(),
+ test_df
+ )
+})
+
test_that("leap_year mirror lubridate", {
compare_dplyr_binding(
.input %>%