thisisnic commented on code in PR #12154:
URL: https://github.com/apache/arrow/pull/12154#discussion_r913830229


##########
r/R/util.R:
##########
@@ -215,3 +215,138 @@ handle_csv_read_error <- function(e, schema, call) {
 is_compressed <- function(compression) {
   !identical(compression, "uncompressed")
 }
+
+parse_period_unit <- function(x) {
+
+  # the regexp matches against fractional units, but per lubridate
+  # supports integer multiples of a known unit only
+  match_info <- regexpr(
+    pattern = " *(?<multiple>[0-9.,]+)? *(?<unit>[^ \t\n]+)",
+    text = x[[1]],
+    perl = TRUE
+  )
+
+  capture_start <- attr(match_info, "capture.start")
+  capture_length <- attr(match_info, "capture.length")
+  capture_end <- capture_start + capture_length - 1L
+
+  str_unit <- substr(x, capture_start[[2]], capture_end[[2]])
+  str_multiple <- substr(x, capture_start[[1]], capture_end[[1]])
+
+  known_units <- c("nanosecond", "microsecond", "millisecond", "second",
+                   "minute", "hour", "day", "week", "month", "quarter", "year")
+
+  # match the period unit
+  str_unit_start <- substr(str_unit, 1, 3)
+  unit <- as.integer(pmatch(str_unit_start, known_units)) - 1L
+
+  if (any(is.na(unit))) {
+    abort(sprintf("Unknown unit '%s'", str_unit))
+  }
+
+  # empty string in multiple interpreted as 1
+  if (capture_length[[1]] == 0) {
+    multiple <- 1L
+
+  } else {
+
+    # special cases: interpret fractions of 1 second as integer
+    # multiples of nanoseconds, microseconds, or milliseconds
+    # to mirror lubridate syntax
+    multiple <- as.numeric(str_multiple)
+
+    if (unit == 3L && multiple < 10^-6) {
+      unit <- 0L
+      multiple <- 10^9 * multiple
+    }
+    if (unit == 3L && multiple < 10^-3) {
+      unit <- 1L
+      multiple <- 10^6 * multiple
+    }
+    if (unit == 3L && multiple < 1) {
+      unit <- 2L
+      multiple <- 10^3 * multiple
+    }
+
+    multiple <- as.integer(multiple)
+  }
+
+
+  # more special cases: lubridate imposes sensible maximum
+  # values on the number of seconds, minutes and hours
+  if (unit == 3L && multiple > 60) {
+    abort("Rounding with second > 60 is not supported")
+  }
+  if (unit == 4L && multiple > 60) {
+    abort("Rounding with minute > 60 is not supported")
+  }
+  if (unit == 5L && multiple > 24) {
+    abort("Rounding with hour > 24 is not supported")
+  }
+
+  return(list(unit = unit, multiple = multiple))
+}
+
+
+# handles round/ceil/floor when unit is week and week_start is
+# a non-standard value (not Monday or Sunday)
+shift_temporal_to_week <- function(fn, x, week_start, options) {

Review Comment:
   Please can you add in a brief usage example or comment here just to make it 
more skimmable?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to