dragosmg commented on code in PR #13196:
URL: https://github.com/apache/arrow/pull/13196#discussion_r905009285
##########
r/R/dplyr-datetime-helpers.R:
##########
@@ -200,20 +236,193 @@ build_formats <- function(orders) {
unique(formats)
}
+#' Build formats from a single order
+#'
+#' @param order a single string date-time format, such as `"ymd"` or
`"ymd_hms"`
+#'
+#' @return a vector of all possible formats derived from the input
+#' order
+#'
+#' @noRd
build_format_from_order <- function(order) {
- year_chars <- c("%y", "%Y")
- month_chars <- c("%m", "%B", "%b")
- day_chars <- "%d"
-
- outcome <- switch(
- order,
- "ymd" = expand.grid(year_chars, month_chars, day_chars),
- "ydm" = expand.grid(year_chars, day_chars, month_chars),
- "mdy" = expand.grid(month_chars, day_chars, year_chars),
- "myd" = expand.grid(month_chars, year_chars, day_chars),
- "dmy" = expand.grid(day_chars, month_chars, year_chars),
- "dym" = expand.grid(day_chars, year_chars, month_chars)
+ char_list <- list(
+ "y" = c("%y", "%Y"),
+ "m" = c("%m", "%B", "%b"),
+ "d" = "%d",
+ "H" = "%H",
+ "M" = "%M",
+ "S" = "%S",
+ "I" = "%I"
+ )
+
+ split_order <- strsplit(order, split = "")[[1]]
+
+ outcome <- expand.grid(char_list[split_order])
+ # we combine formats with and without the "-" separator, we will later
+ # coalesce through all of them (benchmarking indicated this is a more
+ # computationally efficient approach rather than figuring out if a string has
+ # separators or not and applying only )
+ # during parsing if the string to be parsed does not contain a separator
+ formats_with_sep <- do.call(paste, c(outcome, sep = "-"))
+ formats_without_sep <- do.call(paste, c(outcome, sep = ""))
+ c(formats_with_sep, formats_without_sep)
+}
+
+#' Process data in preparation for parsing
+#'
+#' `process_data_for_parsing()` takes a data column and a vector of `orders`
and
+#' prepares several versions of the input data:
+#' * `processed_x` is a version of `x` where all separators were replaced
with
+#' `"-"` and multiple separators were collapsed into a single one. This
element
+#' is only set to an empty list when the `orders` argument indicate we're only
+#' interested in parsing the augmented version of `x`.
+#' * each of the other 3 elements augment `x` in some way
+#' * `augmented_x_ym` - builds the `ym` and `my` formats by adding `"01"`
+#' (to indicate the first day of the month)
+#' * `augmented_x_yq` - transforms the `yq` format to `ymd`, by deriving the
+#' first month of the quarter and adding `"01"` to indicate the first day
+#' * `augmented_x_qy` - transforms the `qy` format to `ymd` in a similar
+#' manner to `"yq"`
+#'
+#' @param x an Expression corresponding to a character or numeric vector of
+#' dates to be parsed.
+#' @param orders a character vector of date-time formats.
+#'
+#' @return a list made up of 4 lists, each a different version of x:
+#' * `processed_x`
+#' * `augmented_x_ym`
+#' * `augmented_x_yq`
+#' * `augmented_x_qy`
+#' @noRd
+process_data_for_parsing <- function(x,
+ orders) {
Review Comment:
Done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]