ianmcook commented on a change in pull request #10190:
URL: https://github.com/apache/arrow/pull/10190#discussion_r628601274



##########
File path: r/tests/testthat/test-dplyr-string-functions.R
##########
@@ -239,7 +254,172 @@ test_that("str_replace and str_replace_all", {
 
 })
 
-test_that("backreferences in pattern", {
+test_that("strsplit and str_split", {
+
+  df <- tibble(x = c("Foo and bar", "baz and qux and quux"))
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = strsplit(x, "and")) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = strsplit(x, "and.*", fixed = TRUE)) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_split(x, "and")) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_split(x, "and", n = 2)) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_split(x, fixed("and"), n = 2)) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_split(x, regex("and"), n = 2)) %>%
+      collect(),
+    df
+  )
+
+})
+
+test_that("arrow_*_split_whitespace functions", {
+
+  # use only ASCII whitespace characters
+  df_ascii <- tibble(x = c("Foo\nand bar", "baz\tand qux and quux"))
+
+  # use only non-ASCII whitespace characters
+  df_utf8 <- tibble(x = c("Foo\u00A0and\u2000bar", 
"baz\u2006and\u1680qux\u3000and\u2008quux"))
+
+  df_split <- tibble(x = list(c("Foo", "and", "bar"), c("baz", "and", "qux", 
"and", "quux")))
+
+  expect_equivalent(
+    df_ascii %>%
+      Table$create() %>%
+      mutate(x = arrow_ascii_split_whitespace(x)) %>%

Review comment:
       Added in https://github.com/apache/arrow/pull/10271




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to