dragosmg commented on code in PR #13196:
URL: https://github.com/apache/arrow/pull/13196#discussion_r880569374


##########
r/R/dplyr-datetime-helpers.R:
##########
@@ -201,19 +218,100 @@ build_formats <- function(orders) {
 }
 
 build_format_from_order <- function(order) {
-  year_chars <- c("%y", "%Y")
-  month_chars <- c("%m", "%B", "%b")
-  day_chars <- "%d"
-
-  outcome <- switch(
-    order,
-    "ymd" = expand.grid(year_chars, month_chars, day_chars),
-    "ydm" = expand.grid(year_chars, day_chars, month_chars),
-    "mdy" = expand.grid(month_chars, day_chars, year_chars),
-    "myd" = expand.grid(month_chars, year_chars, day_chars),
-    "dmy" = expand.grid(day_chars, month_chars, year_chars),
-    "dym" = expand.grid(day_chars, year_chars, month_chars)
+  char_list <- list(
+    "y" = c("%y", "%Y"),
+    "m" = c("%m", "%B", "%b"),
+    "d" = "%d",
+    "H" = "%H",
+    "M" = "%M",
+    "S" = "%S",
+    "I" = "%I"
+  )
+
+  split_order <- strsplit(order, split = "")[[1]]
+
+  outcome <- expand.grid(char_list[split_order])
+  formats_with_sep <- do.call(paste, c(outcome, sep = "-"))
+  formats_without_sep <- do.call(paste, c(outcome, sep = ""))
+  c(formats_with_sep, formats_without_sep)
+}
+
+process_data_for_parsing <- function(x,
+                                     orders) {
+
+  processed_x <- x$cast(string())
+
+  # make all separators (non-letters and non-numbers) into "-"
+  processed_x <- call_binding("gsub", "[^A-Za-z0-9]", "-", processed_x)
+  # collapse multiple separators into a single one
+  processed_x <- call_binding("gsub", "-{2,}", "-", processed_x)
+
+  # we need to transform `x` when orders are `ym`, `my`, and `yq`
+  # for `ym` and `my` orders we add a day ("01")
+  # TODO revisit after https://issues.apache.org/jira/browse/ARROW-16627
+  augmented_x_ym <- NULL
+  if (any(orders %in% c("ym", "my"))) {
+    # add day as "-01" if there is a "-" separator and as "01" if not
+    augmented_x_ym <- call_binding(
+      "if_else",
+      call_binding("grepl", "-", processed_x),
+      call_binding("paste0", processed_x, "-01"),
+      call_binding("paste0", processed_x, "01")
+    )
+  }
+
+  # for `yq` we need to transform the quarter into the start month (lubridate
+  # behaviour) and then add 01 to parse to the first day of the quarter
+  augmented_x_yq <- NULL
+  if (any(orders == "yq")) {
+    # extract everything that comes after the `-` separator, i.e. the quarter
+    # (e.g. 4 from 2022-4)
+    quarter_x <- call_binding("gsub", "^.*?-", "", processed_x)
+    # we should probably error if quarter is not in 1:4
+    # extract everything that comes before the `-`, i.e. the year (e.g. 2002
+    # in 2002-4)
+    year_x <- call_binding("gsub", "-.*$", "", processed_x)
+    quarter_x <- quarter_x$cast(int32())
+    month_x <- (quarter_x - 1) * 3 + 1
+    augmented_x_yq <- call_binding("paste0", year_x, "-", month_x, "-01")
+  }
+
+  list(
+    "augmented_x_ym" = augmented_x_ym,
+    "augmented_x_yq" = augmented_x_yq,
+    "processed_x" = processed_x
+  )
+}
+
+attempt_parsing <- function(x,
+                            orders) {
+  # translate orders into possible formats
+  formats <- build_formats(orders)
+
+  processed_data <- process_data_for_parsing(x, orders)
+
+  parse_attempt_exprs_list <- map(processed_data, build_strptime_exprs, 
formats)
+
+  # if all orders are in c("ym", "my", "yq") only attempt to parse the 
augmented_x
+  if (all(orders %in% c("ym", "my", "yq"))) {
+    parse_attempt_exprs_list$processed_x <- list()
+  }
+
+  purrr::flatten(parse_attempt_exprs_list)

Review Comment:
   It's a list of lists of expressions (one list of `Expressions` for variant 
of `x`). We need to remove one level in order for the resulting `list` to play 
nicely with `build_expr("coalesce", args = list)`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to