ianmcook commented on a change in pull request #10190:
URL: https://github.com/apache/arrow/pull/10190#discussion_r628341781
##########
File path: r/R/dplyr.R
##########
@@ -539,6 +541,44 @@ arrow_stringr_string_replace_function <- function(FUN,
max_replacements) {
}
}
+arrow_r_string_split_function <- function(FUN, reverse = FALSE, max_splits =
-1) {
+ function(x, split, fixed = FALSE, perl = FALSE, useBytes = FALSE) {
+
+ assert_that(is.string(split))
+
+ # if !fixed but no regex metachars in split pattern, allow to proceed as
split isn't regex
+ if (!fixed && contains_regex(split)) {
+ stop("Regular expression matching not supported in strsplit for Arrow",
call. = FALSE)
+ }
+ if (fixed && perl) {
+ warning("Argument 'perl = TRUE' will be ignored", call. = FALSE)
+ }
+ FUN("split_pattern", x, options = list(pattern = split, reverse = reverse,
max_splits = max_splits))
+ }
+}
+
+arrow_stringr_string_split_function <- function(FUN, reverse = FALSE) {
+ function(string, pattern, n = Inf, simplify = FALSE) {
+ opts <- get_stringr_pattern_options(enexpr(pattern))
+ if (!opts$fixed && contains_regex(opts$pattern)) {
+ stop("Regular expression matching not supported in str_split() for
Arrow", call. = FALSE)
+ }
+ if (opts$ignore_case) {
+ stop("Case-insensitive string splitting not supported in Arrow", call. =
FALSE)
+ }
+ if (n == 0) {
+ stop("Splitting strings into zero parts not supported in Arrow" , call.
= FALSE)
+ }
+ if (identical(n, Inf)) {
+ n <- 0L
+ }
+ if (simplify) {
+ warning("Argument 'simplify = TRUE' will be ignored", call. = FALSE)
+ }
+ FUN("split_pattern", string, options = list(pattern = opts$pattern,
reverse = reverse, max_splits = n - 1L))
Review comment:
I'm quite sure.
I'll open a new PR and add a couple R tests to exercise this.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]