This is an automated email from the ASF dual-hosted git repository.

jonkeane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 435d8bae14 GH-48664: [R] Implement support for keepNA = FALSE in 
base::nchar() (#48665)
435d8bae14 is described below

commit 435d8bae14f02ded1a1219b4ecbaa585105fbbc8
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Mon Jan 19 00:10:58 2026 +0900

    GH-48664: [R] Implement support for keepNA = FALSE in base::nchar() (#48665)
    
    ### Rationale for this change
    
    Should address TODO added in commit 81e1fbc1de (ARROW-17665)
    
    
https://github.com/apache/arrow/blob/744f0ec2cf9f8716fcea408d67ede9c14a7e6954/r/R/dplyr-funcs-string.R#L488
    
    Implemented the argument intended to work.
    
    ### What changes are included in this PR?
    
    This PR Implement support for `keepNA = FALSE` in `base::nchar()`
    
    ### Are these changes tested?
    
    Unittest was added.
    
    ### Are there any user-facing changes?
    
    Yes. Now `nchar(..., keepNA = FALSE)` should work.
    * GitHub Issue: #48664
    
    Authored-by: Hyukjin Kwon <[email protected]>
    Signed-off-by: Jonathan Keane <[email protected]>
---
 r/NEWS.md                                  |  4 ++++
 r/R/dplyr-funcs-string.R                   | 17 ++++++++++++-----
 r/tests/testthat/test-dplyr-funcs-string.R | 10 ++++++++++
 3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/r/NEWS.md b/r/NEWS.md
index e9f7a591ce..9b3cef7e2d 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -26,6 +26,10 @@
 - Ensure `MAKEFLAGS` being passed during compilation (#48341)
 - Update bundled RE2 to enable Alpine Linux builds (#48011)
 
+## New features
+
+- `nchar()` supports the `keepNA = FALSE` argument to replace `NA` values with 
`2` (@HyukjinKwon, #48665).
+
 # arrow 22.0.0
 
 ## New features
diff --git a/r/R/dplyr-funcs-string.R b/r/R/dplyr-funcs-string.R
index 36b88fc2f1..158bae2db8 100644
--- a/r/R/dplyr-funcs-string.R
+++ b/r/R/dplyr-funcs-string.R
@@ -481,18 +481,25 @@ register_bindings_string_other <- function() {
       if (allowNA) {
         arrow_not_supported("allowNA = TRUE")
       }
-      if (is.na(keepNA)) {
+      keepNA_is_na <- is.na(keepNA)
+      if (keepNA_is_na) {
         keepNA <- !identical(type, "width")
       }
-      if (!keepNA) {
-        # TODO: I think there is a fill_null kernel we could use, set null to 2
+      if (keepNA && !keepNA_is_na) {
         arrow_not_supported("keepNA = TRUE")
       }
       if (identical(type, "bytes")) {
-        Expression$create("binary_length", x)
+        result <- Expression$create("binary_length", x)
       } else {
-        Expression$create("utf8_length", x)
+        result <- Expression$create("utf8_length", x)
       }
+
+      if (!keepNA) {
+        # When keepNA = FALSE, NA values should return 2 (length of "NA" as 
string)
+        result <- Expression$create("coalesce", result, Expression$scalar(2L))
+      }
+
+      result
     },
     notes = "`allowNA = TRUE` and `keepNA = TRUE` not supported"
   )
diff --git a/r/tests/testthat/test-dplyr-funcs-string.R 
b/r/tests/testthat/test-dplyr-funcs-string.R
index 26b091b9e0..58da3ea233 100644
--- a/r/tests/testthat/test-dplyr-funcs-string.R
+++ b/r/tests/testthat/test-dplyr-funcs-string.R
@@ -1442,6 +1442,16 @@ test_that("nchar with namespacing", {
   )
 })
 
+test_that("nchar with keepNA = FALSE", {
+  df <- tibble(x = c("foo", NA_character_, "bar"))
+  compare_dplyr_binding(
+    .input |>
+      mutate(n = nchar(x, keepNA = FALSE)) |>
+      collect(),
+    df
+  )
+})
+
 test_that("str_trim()", {
   compare_dplyr_binding(
     .input |>

Reply via email to