This is an automated email from the ASF dual-hosted git repository.
jonkeane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 435d8bae14 GH-48664: [R] Implement support for keepNA = FALSE in
base::nchar() (#48665)
435d8bae14 is described below
commit 435d8bae14f02ded1a1219b4ecbaa585105fbbc8
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Mon Jan 19 00:10:58 2026 +0900
GH-48664: [R] Implement support for keepNA = FALSE in base::nchar() (#48665)
### Rationale for this change
Should address TODO added in commit 81e1fbc1de (ARROW-17665)
https://github.com/apache/arrow/blob/744f0ec2cf9f8716fcea408d67ede9c14a7e6954/r/R/dplyr-funcs-string.R#L488
Implemented the argument intended to work.
### What changes are included in this PR?
This PR Implement support for `keepNA = FALSE` in `base::nchar()`
### Are these changes tested?
Unittest was added.
### Are there any user-facing changes?
Yes. Now `nchar(..., keepNA = FALSE)` should work.
* GitHub Issue: #48664
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Jonathan Keane <[email protected]>
---
r/NEWS.md | 4 ++++
r/R/dplyr-funcs-string.R | 17 ++++++++++++-----
r/tests/testthat/test-dplyr-funcs-string.R | 10 ++++++++++
3 files changed, 26 insertions(+), 5 deletions(-)
diff --git a/r/NEWS.md b/r/NEWS.md
index e9f7a591ce..9b3cef7e2d 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -26,6 +26,10 @@
- Ensure `MAKEFLAGS` being passed during compilation (#48341)
- Update bundled RE2 to enable Alpine Linux builds (#48011)
+## New features
+
+- `nchar()` supports the `keepNA = FALSE` argument to replace `NA` values with
`2` (@HyukjinKwon, #48665).
+
# arrow 22.0.0
## New features
diff --git a/r/R/dplyr-funcs-string.R b/r/R/dplyr-funcs-string.R
index 36b88fc2f1..158bae2db8 100644
--- a/r/R/dplyr-funcs-string.R
+++ b/r/R/dplyr-funcs-string.R
@@ -481,18 +481,25 @@ register_bindings_string_other <- function() {
if (allowNA) {
arrow_not_supported("allowNA = TRUE")
}
- if (is.na(keepNA)) {
+ keepNA_is_na <- is.na(keepNA)
+ if (keepNA_is_na) {
keepNA <- !identical(type, "width")
}
- if (!keepNA) {
- # TODO: I think there is a fill_null kernel we could use, set null to 2
+ if (keepNA && !keepNA_is_na) {
arrow_not_supported("keepNA = TRUE")
}
if (identical(type, "bytes")) {
- Expression$create("binary_length", x)
+ result <- Expression$create("binary_length", x)
} else {
- Expression$create("utf8_length", x)
+ result <- Expression$create("utf8_length", x)
}
+
+ if (!keepNA) {
+ # When keepNA = FALSE, NA values should return 2 (length of "NA" as
string)
+ result <- Expression$create("coalesce", result, Expression$scalar(2L))
+ }
+
+ result
},
notes = "`allowNA = TRUE` and `keepNA = TRUE` not supported"
)
diff --git a/r/tests/testthat/test-dplyr-funcs-string.R
b/r/tests/testthat/test-dplyr-funcs-string.R
index 26b091b9e0..58da3ea233 100644
--- a/r/tests/testthat/test-dplyr-funcs-string.R
+++ b/r/tests/testthat/test-dplyr-funcs-string.R
@@ -1442,6 +1442,16 @@ test_that("nchar with namespacing", {
)
})
+test_that("nchar with keepNA = FALSE", {
+ df <- tibble(x = c("foo", NA_character_, "bar"))
+ compare_dplyr_binding(
+ .input |>
+ mutate(n = nchar(x, keepNA = FALSE)) |>
+ collect(),
+ df
+ )
+})
+
test_that("str_trim()", {
compare_dplyr_binding(
.input |>