dragosmg commented on code in PR #12589:
URL: https://github.com/apache/arrow/pull/12589#discussion_r861841576
##########
r/R/dplyr-funcs-datetime.R:
##########
@@ -357,6 +357,62 @@ register_bindings_duration <- function() {
delta <- delta$cast(int64())
start + delta$cast(duration("s"))
})
+ register_binding("fast_strptime", function(x,
+ format,
+ tz = "UTC",
+ lt = TRUE,
+ cutoff_2000 = 68L,
+ unit = "s") {
+ # TODO support multiple formats once
+ # https://issues.apache.org/jira/browse/ARROW-15665 is done
+ if (length(format) > 1) {
+ arrow_not_supported("multiple values for `format`")
+ }
+
+ if (!missing(tz)) {
+ arrow_not_supported("Time zone argument")
+ }
+ # `lt` controls the output `lt = TRUE` returns a POSIXlt (which doesn't
play
+ # well with mutate, for example)
+ if (lt) {
+ arrow_not_supported("`lt = TRUE` argument")
+ }
+
+ if (cutoff_2000 != 68L) {
+ arrow_not_supported("`cutoff_2000` != 68L argument")
+ }
+
+ unit <- make_valid_time_unit(unit, c(valid_time64_units,
valid_time32_units))
+
+ build_expr("strptime", x, options = list(format = format, unit = unit))
+ })
+ register_binding("parse_date_time", function(x,
+ orders,
+ tz = "UTC") {
+
+ # make all separators (non-letters and non-numbers) into "-"
+ x <- call_binding("gsub", "[^A-Za-z0-9]", "-", x)
+ # collapse multiple separators into a single one
+ x <- call_binding("gsub", "-{2,}", "-", x)
+
+ # TODO figure out how to parse strings that have no separators)
+ # we could insert separators at the "likely" positions, but it might be
+ # tricky given the possible combinations between dmy formats + locale
+
+ # each order is translated into 6 possible formats
+ formats <- build_formats(orders)
+ coalesce_output <- build_expr(
+ "coalesce",
+ build_expr("strptime", x, options = list(format = formats[1], unit = 0L,
error_is_null = TRUE)),
+ build_expr("strptime", x, options = list(format = formats[2], unit = 0L,
error_is_null = TRUE)),
+ build_expr("strptime", x, options = list(format = formats[3], unit = 0L,
error_is_null = TRUE)),
+ build_expr("strptime", x, options = list(format = formats[4], unit = 0L,
error_is_null = TRUE)),
+ build_expr("strptime", x, options = list(format = formats[5], unit = 0L,
error_is_null = TRUE)),
+ build_expr("strptime", x, options = list(format = formats[6], unit = 0L,
error_is_null = TRUE))
+ )
+
+ build_expr("assume_timezone", coalesce_output, options = list(timezone =
tz))
Review Comment:
Sure. Good call. Done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]