edponce commented on a change in pull request #11232:
URL: https://github.com/apache/arrow/pull/11232#discussion_r718772363



##########
File path: r/tests/testthat/test-dplyr-string-functions.R
##########
@@ -471,6 +471,46 @@ test_that("strsplit and str_split", {
   )
 })
 
+test_that("str_to_lower, str_to_upper, and str_to_title", {
+  df <- tibble(x = c("Foo", " B\na R", "ⱭɽⱤoW", "ıI"))

Review comment:
       Unicode is tested in C++, so I will remove unicode characters.

##########
File path: r/R/dplyr-functions.R
##########
@@ -330,6 +330,35 @@ arrow_string_join_function <- function(null_handling, 
null_replacement = NULL) {
   }
 }
 
+# Currently, Arrow does not supports a locale option for string case conversion
+# functions, contrast to stringr's API, so the 'locale' argument is only valid
+# for stringr's default value ("en"). The following are string functions that
+# take a 'locale' option as its second argument:
+#   str_to_lower
+#   str_to_upper
+#   str_to_title
+#
+# Arrow locale will be supported with ARROW-14126
+.arrow_string_function_with_locale_arg <- function(func, string, locale) {
+  if (!identical(locale, "en")) {
+    stop("Providing a value for 'locale' other than the default ('en') is not 
supported by Arrow. ",
+    "To change locale, use 'Sys.setlocale()'", call. = FALSE)
+  }
+  Expression$create(func, string)
+}
+
+nse_funcs$str_to_lower <- function(string, locale = "en") {
+  .arrow_string_function_with_locale_arg("utf8_lower", string, locale)
+}
+
+nse_funcs$str_to_upper <- function(string, locale = "en") {
+  .arrow_string_function_with_locale_arg("utf8_upper", string, locale)
+}
+
+nse_funcs$str_to_title <- function(string, locale = "en") {
+  .arrow_string_function_with_locale_arg("utf8_title", string, locale)
+}

Review comment:
       I agree this is a better approach.

##########
File path: r/R/dplyr-functions.R
##########
@@ -330,6 +330,35 @@ arrow_string_join_function <- function(null_handling, 
null_replacement = NULL) {
   }
 }
 
+# Currently, Arrow does not supports a locale option for string case conversion
+# functions, contrast to stringr's API, so the 'locale' argument is only valid
+# for stringr's default value ("en"). The following are string functions that
+# take a 'locale' option as its second argument:
+#   str_to_lower
+#   str_to_upper
+#   str_to_title
+#
+# Arrow locale will be supported with ARROW-14126
+.arrow_string_function_with_locale_arg <- function(func, string, locale) {
+  if (!identical(locale, "en")) {
+    stop("Providing a value for 'locale' other than the default ('en') is not 
supported by Arrow. ",
+    "To change locale, use 'Sys.setlocale()'", call. = FALSE)
+  }
+  Expression$create(func, string)
+}
+
+nse_funcs$str_to_lower <- function(string, locale = "en") {
+  .arrow_string_function_with_locale_arg("utf8_lower", string, locale)
+}
+
+nse_funcs$str_to_upper <- function(string, locale = "en") {
+  .arrow_string_function_with_locale_arg("utf8_upper", string, locale)
+}
+
+nse_funcs$str_to_title <- function(string, locale = "en") {
+  .arrow_string_function_with_locale_arg("utf8_title", string, locale)
+}

Review comment:
       Should `stop_if_locale_provided` be a "hidden" function (ie, name begins 
with a dot)?

##########
File path: r/R/dplyr-functions.R
##########
@@ -330,6 +330,35 @@ arrow_string_join_function <- function(null_handling, 
null_replacement = NULL) {
   }
 }
 
+# Currently, Arrow does not supports a locale option for string case conversion
+# functions, contrast to stringr's API, so the 'locale' argument is only valid
+# for stringr's default value ("en"). The following are string functions that
+# take a 'locale' option as its second argument:
+#   str_to_lower
+#   str_to_upper
+#   str_to_title
+#
+# Arrow locale will be supported with ARROW-14126
+.arrow_string_function_with_locale_arg <- function(func, string, locale) {
+  if (!identical(locale, "en")) {
+    stop("Providing a value for 'locale' other than the default ('en') is not 
supported by Arrow. ",
+    "To change locale, use 'Sys.setlocale()'", call. = FALSE)
+  }
+  Expression$create(func, string)
+}
+
+nse_funcs$str_to_lower <- function(string, locale = "en") {
+  .arrow_string_function_with_locale_arg("utf8_lower", string, locale)
+}
+
+nse_funcs$str_to_upper <- function(string, locale = "en") {
+  .arrow_string_function_with_locale_arg("utf8_upper", string, locale)
+}
+
+nse_funcs$str_to_title <- function(string, locale = "en") {
+  .arrow_string_function_with_locale_arg("utf8_title", string, locale)
+}

Review comment:
       Ok, thanks for the guidance.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to