ianmcook commented on a change in pull request #10547:
URL: https://github.com/apache/arrow/pull/10547#discussion_r654432688



##########
File path: r/R/dplyr-functions.R
##########
@@ -215,6 +215,52 @@ nse_funcs$nchar <- function(x, type = "chars", allowNA = 
FALSE, keepNA = NA) {
   }
 }
 
+nse_funcs$paste <- function(..., sep = " ", collapse = NULL, recycle0 = FALSE) 
{
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep)
+}
+
+nse_funcs$paste0 <- function(..., collapse = NULL, recycle0 = FALSE) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "")
+}
+
+nse_funcs$str_c <- function(..., sep = "", collapse = NULL) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep)
+}
+
+arrow_string_join_function <- function(null_handling, null_replacement = NULL) 
{
+  # the binary_join_element_wise kernel takes the separator as the last 
argument
+  function(...) {
+    dots <- list(...) # sep is the last value in dots
+    for (i in seq_along(dots)) {
+      # handle scalar literal args, and cast all args to string for
+      # consistency with base::paste(), base::paste0(), and stringr::str_c()
+      if (!inherits(dots[[i]], "Expression")) {
+        assert_that(
+          length(dots[[i]]) == 1,
+          msg = "Literal vectors of length != 1 not supported in string 
concatenation"
+        )
+        # handle scalar literal NA consistent with the binary_join_element_wise
+        # kernel's handling of nulls in the data
+        if (null_handling == NullHandlingBehavior$REPLACE && is.na(dots[[i]])) 
{
+          dots[[i]] <- null_replacement
+        }

Review comment:
       This is necessary (for reasons I don't fully understand) to enable use 
of `NA` literal scalars in the `paste()`, `paste0()`, and `str_c()` 
expressions. See the discussion about this at 
https://github.com/apache/arrow/pull/10520#issuecomment-862962267

##########
File path: r/R/dplyr-functions.R
##########
@@ -215,6 +215,52 @@ nse_funcs$nchar <- function(x, type = "chars", allowNA = 
FALSE, keepNA = NA) {
   }
 }
 
+nse_funcs$paste <- function(..., sep = " ", collapse = NULL, recycle0 = FALSE) 
{
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep)
+}
+
+nse_funcs$paste0 <- function(..., collapse = NULL, recycle0 = FALSE) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "")
+}
+
+nse_funcs$str_c <- function(..., sep = "", collapse = NULL) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep)
+}
+
+arrow_string_join_function <- function(null_handling, null_replacement = NULL) 
{
+  # the binary_join_element_wise kernel takes the separator as the last 
argument
+  function(...) {
+    dots <- list(...) # sep is the last value in dots
+    for (i in seq_along(dots)) {
+      # handle scalar literal args, and cast all args to string for
+      # consistency with base::paste(), base::paste0(), and stringr::str_c()
+      if (!inherits(dots[[i]], "Expression")) {
+        assert_that(
+          length(dots[[i]]) == 1,
+          msg = "Literal vectors of length != 1 not supported in string 
concatenation"
+        )
+        # handle scalar literal NA consistent with the binary_join_element_wise
+        # kernel's handling of nulls in the data
+        if (null_handling == NullHandlingBehavior$REPLACE && is.na(dots[[i]])) 
{
+          dots[[i]] <- null_replacement
+        }
+        dots[[i]] <- Expression$scalar(as.character(dots[[i]]))
+      } else {
+        dots[[i]] <- nse_funcs$as.character(dots[[i]])
+      }
+    }
+    args <- c(function_name = "binary_join_element_wise", dots)
+    args$options <- list(
+      null_handling = null_handling,
+      null_replacement = null_replacement
+    )
+    do.call(Expression$create, args)

Review comment:
       @nealrichardson do you think this should call `compute___expr__call` 
directly to avoid the need for the `do.call` and so forth? (That's the arrow 
internal function that `Expression$create` calls.)

##########
File path: r/R/dplyr-functions.R
##########
@@ -215,6 +215,52 @@ nse_funcs$nchar <- function(x, type = "chars", allowNA = 
FALSE, keepNA = NA) {
   }
 }
 
+nse_funcs$paste <- function(..., sep = " ", collapse = NULL, recycle0 = FALSE) 
{
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep)
+}
+
+nse_funcs$paste0 <- function(..., collapse = NULL, recycle0 = FALSE) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "")
+}
+
+nse_funcs$str_c <- function(..., sep = "", collapse = NULL) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep)
+}
+
+arrow_string_join_function <- function(null_handling, null_replacement = NULL) 
{
+  # the binary_join_element_wise kernel takes the separator as the last 
argument
+  function(...) {
+    dots <- list(...) # sep is the last value in dots
+    for (i in seq_along(dots)) {
+      # handle scalar literal args, and cast all args to string for
+      # consistency with base::paste(), base::paste0(), and stringr::str_c()
+      if (!inherits(dots[[i]], "Expression")) {
+        assert_that(
+          length(dots[[i]]) == 1,
+          msg = "Literal vectors of length != 1 not supported in string 
concatenation"
+        )
+        # handle scalar literal NA consistent with the binary_join_element_wise
+        # kernel's handling of nulls in the data
+        if (null_handling == NullHandlingBehavior$REPLACE && is.na(dots[[i]])) 
{
+          dots[[i]] <- null_replacement
+        }
+        dots[[i]] <- Expression$scalar(as.character(dots[[i]]))
+      } else {
+        dots[[i]] <- nse_funcs$as.character(dots[[i]])
+      }
+    }
+    args <- c(function_name = "binary_join_element_wise", dots)
+    args$options <- list(
+      null_handling = null_handling,
+      null_replacement = null_replacement
+    )
+    do.call(Expression$create, args)

Review comment:
       If yes, I will include a comment in the code to explain.

##########
File path: r/R/dplyr-functions.R
##########
@@ -215,6 +215,52 @@ nse_funcs$nchar <- function(x, type = "chars", allowNA = 
FALSE, keepNA = NA) {
   }
 }
 
+nse_funcs$paste <- function(..., sep = " ", collapse = NULL, recycle0 = FALSE) 
{
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep)
+}
+
+nse_funcs$paste0 <- function(..., collapse = NULL, recycle0 = FALSE) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "")
+}
+
+nse_funcs$str_c <- function(..., sep = "", collapse = NULL) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep)
+}
+
+arrow_string_join_function <- function(null_handling, null_replacement = NULL) 
{
+  # the binary_join_element_wise kernel takes the separator as the last 
argument
+  function(...) {
+    dots <- list(...) # sep is the last value in dots
+    for (i in seq_along(dots)) {
+      # handle scalar literal args, and cast all args to string for
+      # consistency with base::paste(), base::paste0(), and stringr::str_c()
+      if (!inherits(dots[[i]], "Expression")) {
+        assert_that(
+          length(dots[[i]]) == 1,
+          msg = "Literal vectors of length != 1 not supported in string 
concatenation"
+        )
+        # handle scalar literal NA consistent with the binary_join_element_wise
+        # kernel's handling of nulls in the data
+        if (null_handling == NullHandlingBehavior$REPLACE && is.na(dots[[i]])) 
{
+          dots[[i]] <- null_replacement
+        }
+        dots[[i]] <- Expression$scalar(as.character(dots[[i]]))
+      } else {
+        dots[[i]] <- nse_funcs$as.character(dots[[i]])
+      }
+    }
+    args <- c(function_name = "binary_join_element_wise", dots)
+    args$options <- list(
+      null_handling = null_handling,
+      null_replacement = null_replacement
+    )
+    do.call(Expression$create, args)

Review comment:
       Oh, disregard this, I see now that I can pass `args` instead of `...` to 
`Expression$create`. 

##########
File path: r/R/dplyr-functions.R
##########
@@ -215,6 +215,52 @@ nse_funcs$nchar <- function(x, type = "chars", allowNA = 
FALSE, keepNA = NA) {
   }
 }
 
+nse_funcs$paste <- function(..., sep = " ", collapse = NULL, recycle0 = FALSE) 
{
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep)
+}
+
+nse_funcs$paste0 <- function(..., collapse = NULL, recycle0 = FALSE) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "")
+}
+
+nse_funcs$str_c <- function(..., sep = "", collapse = NULL) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep)
+}
+
+arrow_string_join_function <- function(null_handling, null_replacement = NULL) 
{
+  # the binary_join_element_wise kernel takes the separator as the last 
argument
+  function(...) {
+    dots <- list(...) # sep is the last value in dots
+    for (i in seq_along(dots)) {
+      # handle scalar literal args, and cast all args to string for
+      # consistency with base::paste(), base::paste0(), and stringr::str_c()
+      if (!inherits(dots[[i]], "Expression")) {
+        assert_that(
+          length(dots[[i]]) == 1,
+          msg = "Literal vectors of length != 1 not supported in string 
concatenation"
+        )
+        # handle scalar literal NA consistent with the binary_join_element_wise
+        # kernel's handling of nulls in the data
+        if (null_handling == NullHandlingBehavior$REPLACE && is.na(dots[[i]])) 
{
+          dots[[i]] <- null_replacement
+        }

Review comment:
       This is necessary (for reasons I don't fully understand) to enable use 
of `NA` literal scalars in the `paste()`, `paste0()`, and `str_c()` 
expressions. See the discussion about this at 
https://github.com/apache/arrow/pull/10520#issuecomment-862962267

##########
File path: r/R/dplyr-functions.R
##########
@@ -215,6 +215,54 @@ nse_funcs$nchar <- function(x, type = "chars", allowNA = 
FALSE, keepNA = NA) {
   }
 }
 
+nse_funcs$paste <- function(..., sep = " ", collapse = NULL, recycle0 = FALSE) 
{
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep)
+}
+
+nse_funcs$paste0 <- function(..., collapse = NULL, recycle0 = FALSE) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "")
+}
+
+nse_funcs$str_c <- function(..., sep = "", collapse = NULL) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep)
+}
+
+arrow_string_join_function <- function(null_handling, null_replacement = NULL) 
{
+  # the `binary_join_element_wise` Arrow C++ compute kernel takes the separator
+  # as the last argument, so pass `sep` as the last dots arg to this function
+  function(...) {
+    args <- lapply(list(...), function(arg) {
+      # handle scalar literal args, and cast all args to string for
+      # consistency with base::paste(), base::paste0(), and stringr::str_c()
+      if (!inherits(arg, "Expression")) {
+        assert_that(
+          length(arg) == 1,
+          msg = "Literal vectors of length != 1 not supported in string 
concatenation"
+        )
+        # handle scalar literal NA consistent with the binary_join_element_wise
+        # kernel's handling of nulls in the data
+        if (null_handling == NullHandlingBehavior$REPLACE && is.na(arg)) {
+          arg <- null_replacement
+        }

Review comment:
       This is necessary (for reasons I don't fully understand) to enable use 
of `NA` literal scalars in the `paste()`, `paste0()`, and `str_c()` 
expressions. See the discussion about this at 
https://github.com/apache/arrow/pull/10520#issuecomment-862962267

##########
File path: r/R/dplyr-functions.R
##########
@@ -215,6 +215,54 @@ nse_funcs$nchar <- function(x, type = "chars", allowNA = 
FALSE, keepNA = NA) {
   }
 }
 
+nse_funcs$paste <- function(..., sep = " ", collapse = NULL, recycle0 = FALSE) 
{
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep)
+}
+
+nse_funcs$paste0 <- function(..., collapse = NULL, recycle0 = FALSE) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "")
+}
+
+nse_funcs$str_c <- function(..., sep = "", collapse = NULL) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep)
+}
+
+arrow_string_join_function <- function(null_handling, null_replacement = NULL) 
{
+  # the `binary_join_element_wise` Arrow C++ compute kernel takes the separator
+  # as the last argument, so pass `sep` as the last dots arg to this function
+  function(...) {
+    args <- lapply(list(...), function(arg) {
+      # handle scalar literal args, and cast all args to string for
+      # consistency with base::paste(), base::paste0(), and stringr::str_c()
+      if (!inherits(arg, "Expression")) {
+        assert_that(
+          length(arg) == 1,
+          msg = "Literal vectors of length != 1 not supported in string 
concatenation"
+        )
+        # handle scalar literal NA consistent with the binary_join_element_wise
+        # kernel's handling of nulls in the data
+        if (null_handling == NullHandlingBehavior$REPLACE && is.na(arg)) {
+          arg <- null_replacement
+        }
+        Expression$scalar(as.character(arg))
+      } else {
+        nse_funcs$as.character(arg)

Review comment:
       It might be better to check first whether `arg` is a string and only 
cast to string if it's not, but the type checking functions do not always work 
here for the reason described in ARROW-13118, and I can confirm from running 
some tests that `as.character()` incurs virtually zero compute cost when called 
on a string array.

##########
File path: r/R/dplyr-functions.R
##########
@@ -215,6 +215,54 @@ nse_funcs$nchar <- function(x, type = "chars", allowNA = 
FALSE, keepNA = NA) {
   }
 }
 
+nse_funcs$paste <- function(..., sep = " ", collapse = NULL, recycle0 = FALSE) 
{
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep)
+}
+
+nse_funcs$paste0 <- function(..., collapse = NULL, recycle0 = FALSE) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "")
+}
+
+nse_funcs$str_c <- function(..., sep = "", collapse = NULL) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep)
+}
+
+arrow_string_join_function <- function(null_handling, null_replacement = NULL) 
{
+  # the `binary_join_element_wise` Arrow C++ compute kernel takes the separator
+  # as the last argument, so pass `sep` as the last dots arg to this function
+  function(...) {
+    args <- lapply(list(...), function(arg) {
+      # handle scalar literal args, and cast all args to string for
+      # consistency with base::paste(), base::paste0(), and stringr::str_c()
+      if (!inherits(arg, "Expression")) {
+        assert_that(
+          length(arg) == 1,
+          msg = "Literal vectors of length != 1 not supported in string 
concatenation"
+        )
+        # handle scalar literal NA consistent with the binary_join_element_wise
+        # kernel's handling of nulls in the data
+        if (null_handling == NullHandlingBehavior$REPLACE && is.na(arg)) {
+          arg <- null_replacement
+        }
+        Expression$scalar(as.character(arg))
+      } else {
+        nse_funcs$as.character(arg)

Review comment:
       It might be better to check first whether `arg` is a string and only 
cast to string if it's not, but the type checking functions do not always work 
here for the reason described in ARROW-13118, and I can confirm from running 
some tests that `nse_funcs$as.character()` incurs virtually zero compute cost 
when called on a string array.

##########
File path: r/R/dplyr-functions.R
##########
@@ -215,6 +215,54 @@ nse_funcs$nchar <- function(x, type = "chars", allowNA = 
FALSE, keepNA = NA) {
   }
 }
 
+nse_funcs$paste <- function(..., sep = " ", collapse = NULL, recycle0 = FALSE) 
{
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep)
+}
+
+nse_funcs$paste0 <- function(..., collapse = NULL, recycle0 = FALSE) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "")
+}
+
+nse_funcs$str_c <- function(..., sep = "", collapse = NULL) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep)
+}
+
+arrow_string_join_function <- function(null_handling, null_replacement = NULL) 
{
+  # the `binary_join_element_wise` Arrow C++ compute kernel takes the separator
+  # as the last argument, so pass `sep` as the last dots arg to this function
+  function(...) {
+    args <- lapply(list(...), function(arg) {
+      # handle scalar literal args, and cast all args to string for
+      # consistency with base::paste(), base::paste0(), and stringr::str_c()
+      if (!inherits(arg, "Expression")) {
+        assert_that(
+          length(arg) == 1,
+          msg = "Literal vectors of length != 1 not supported in string 
concatenation"
+        )
+        # handle scalar literal NA consistent with the binary_join_element_wise
+        # kernel's handling of nulls in the data
+        if (null_handling == NullHandlingBehavior$REPLACE && is.na(arg)) {
+          arg <- null_replacement
+        }
+        Expression$scalar(as.character(arg))
+      } else {
+        nse_funcs$as.character(arg)

Review comment:
       It might be better to check first whether `arg` is a string and only 
cast to string if it's not, but the type checking functions do not always work 
here for the reason described in ARROW-13118, and I can confirm from running 
some tests that `nse_funcs$as.character()` incurs virtually zero compute cost 
when called on a Datum with string type.

##########
File path: r/R/dplyr-functions.R
##########
@@ -215,6 +215,54 @@ nse_funcs$nchar <- function(x, type = "chars", allowNA = 
FALSE, keepNA = NA) {
   }
 }
 
+nse_funcs$paste <- function(..., sep = " ", collapse = NULL, recycle0 = FALSE) 
{
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep)
+}
+
+nse_funcs$paste0 <- function(..., collapse = NULL, recycle0 = FALSE) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "")
+}
+
+nse_funcs$str_c <- function(..., sep = "", collapse = NULL) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep)
+}
+
+arrow_string_join_function <- function(null_handling, null_replacement = NULL) 
{
+  # the `binary_join_element_wise` Arrow C++ compute kernel takes the separator
+  # as the last argument, so pass `sep` as the last dots arg to this function
+  function(...) {
+    args <- lapply(list(...), function(arg) {
+      # handle scalar literal args, and cast all args to string for
+      # consistency with base::paste(), base::paste0(), and stringr::str_c()
+      if (!inherits(arg, "Expression")) {
+        assert_that(
+          length(arg) == 1,
+          msg = "Literal vectors of length != 1 not supported in string 
concatenation"
+        )
+        # handle scalar literal NA consistent with the binary_join_element_wise
+        # kernel's handling of nulls in the data
+        if (null_handling == NullHandlingBehavior$REPLACE && is.na(arg)) {
+          arg <- null_replacement
+        }

Review comment:
       Will do, thanks David!

##########
File path: r/R/dplyr-functions.R
##########
@@ -215,6 +215,54 @@ nse_funcs$nchar <- function(x, type = "chars", allowNA = 
FALSE, keepNA = NA) {
   }
 }
 
+nse_funcs$paste <- function(..., sep = " ", collapse = NULL, recycle0 = FALSE) 
{
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep)
+}
+
+nse_funcs$paste0 <- function(..., collapse = NULL, recycle0 = FALSE) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "")
+}
+
+nse_funcs$str_c <- function(..., sep = "", collapse = NULL) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep)
+}
+
+arrow_string_join_function <- function(null_handling, null_replacement = NULL) 
{
+  # the `binary_join_element_wise` Arrow C++ compute kernel takes the separator
+  # as the last argument, so pass `sep` as the last dots arg to this function
+  function(...) {
+    args <- lapply(list(...), function(arg) {
+      # handle scalar literal args, and cast all args to string for
+      # consistency with base::paste(), base::paste0(), and stringr::str_c()
+      if (!inherits(arg, "Expression")) {
+        assert_that(
+          length(arg) == 1,
+          msg = "Literal vectors of length != 1 not supported in string 
concatenation"
+        )
+        # handle scalar literal NA consistent with the binary_join_element_wise
+        # kernel's handling of nulls in the data
+        if (null_handling == NullHandlingBehavior$REPLACE && is.na(arg)) {
+          arg <- null_replacement
+        }

Review comment:
       @lidavidm I applied the patch in 9dda54a but unfortunately the problem 
persists

##########
File path: r/R/dplyr-functions.R
##########
@@ -215,6 +215,54 @@ nse_funcs$nchar <- function(x, type = "chars", allowNA = 
FALSE, keepNA = NA) {
   }
 }
 
+nse_funcs$paste <- function(..., sep = " ", collapse = NULL, recycle0 = FALSE) 
{
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep)
+}
+
+nse_funcs$paste0 <- function(..., collapse = NULL, recycle0 = FALSE) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "")
+}
+
+nse_funcs$str_c <- function(..., sep = "", collapse = NULL) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep)
+}
+
+arrow_string_join_function <- function(null_handling, null_replacement = NULL) 
{
+  # the `binary_join_element_wise` Arrow C++ compute kernel takes the separator
+  # as the last argument, so pass `sep` as the last dots arg to this function
+  function(...) {
+    args <- lapply(list(...), function(arg) {
+      # handle scalar literal args, and cast all args to string for
+      # consistency with base::paste(), base::paste0(), and stringr::str_c()
+      if (!inherits(arg, "Expression")) {
+        assert_that(
+          length(arg) == 1,
+          msg = "Literal vectors of length != 1 not supported in string 
concatenation"
+        )
+        # handle scalar literal NA consistent with the binary_join_element_wise
+        # kernel's handling of nulls in the data
+        if (null_handling == NullHandlingBehavior$REPLACE && is.na(arg)) {
+          arg <- null_replacement
+        }

Review comment:
       Aha, sorry, yes—I didn't look closely enough at the error messages. It's 
now only the case where the _separator_ is a literal `NA` that's causing the 
function to emit `NA`, and that is the documented behavior:
   > Null separators emit  null.
   So I think we're all good now. Thanks!

##########
File path: r/R/dplyr-functions.R
##########
@@ -215,6 +215,54 @@ nse_funcs$nchar <- function(x, type = "chars", allowNA = 
FALSE, keepNA = NA) {
   }
 }
 
+nse_funcs$paste <- function(..., sep = " ", collapse = NULL, recycle0 = FALSE) 
{
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep)
+}
+
+nse_funcs$paste0 <- function(..., collapse = NULL, recycle0 = FALSE) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "")
+}
+
+nse_funcs$str_c <- function(..., sep = "", collapse = NULL) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep)
+}
+
+arrow_string_join_function <- function(null_handling, null_replacement = NULL) 
{
+  # the `binary_join_element_wise` Arrow C++ compute kernel takes the separator
+  # as the last argument, so pass `sep` as the last dots arg to this function
+  function(...) {
+    args <- lapply(list(...), function(arg) {
+      # handle scalar literal args, and cast all args to string for
+      # consistency with base::paste(), base::paste0(), and stringr::str_c()
+      if (!inherits(arg, "Expression")) {
+        assert_that(
+          length(arg) == 1,
+          msg = "Literal vectors of length != 1 not supported in string 
concatenation"
+        )
+        # handle scalar literal NA consistent with the binary_join_element_wise
+        # kernel's handling of nulls in the data
+        if (null_handling == NullHandlingBehavior$REPLACE && is.na(arg)) {
+          arg <- null_replacement
+        }

Review comment:
       Aha, sorry, yes—I didn't look closely enough at the error messages. It's 
now only the case where the _separator_ is a literal `NA` that's causing the 
function to emit `NA`, and that is the documented behavior:
   > Null separators emit  null.
   
   So I think we're all good now. Thanks!

##########
File path: cpp/src/arrow/compute/kernels/scalar_string.cc
##########
@@ -3587,10 +3587,14 @@ void AddBinaryJoin(FunctionRegistry* registry) {
         "binary_join_element_wise", Arity::VarArgs(/*min_args=*/1),
         &binary_join_element_wise_doc, &kDefaultJoinOptions);
     for (const auto& ty : BaseBinaryTypes()) {
-      DCHECK_OK(
-          func->AddKernel({InputType(ty)}, ty,
+      ScalarKernel kernel{KernelSignature::Make({InputType(ty)}, ty, 
/*is_varargs=*/true),
                           
GenerateTypeAgnosticVarBinaryBase<BinaryJoinElementWise>(ty),
-                          BinaryJoinElementWiseState::Init));
+                          BinaryJoinElementWiseState::Init};
+      // This is redundant but expression simplification uses this to 
potentially replace
+      // calls with null

Review comment:
       Done

##########
File path: r/R/dplyr-functions.R
##########
@@ -215,6 +215,54 @@ nse_funcs$nchar <- function(x, type = "chars", allowNA = 
FALSE, keepNA = NA) {
   }
 }
 
+nse_funcs$paste <- function(..., sep = " ", collapse = NULL, recycle0 = FALSE) 
{
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep)
+}
+
+nse_funcs$paste0 <- function(..., collapse = NULL, recycle0 = FALSE) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "")
+}
+
+nse_funcs$str_c <- function(..., sep = "", collapse = NULL) {
+  assert_that(is.null(collapse))
+  arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep)
+}
+
+arrow_string_join_function <- function(null_handling, null_replacement = NULL) 
{
+  # the `binary_join_element_wise` Arrow C++ compute kernel takes the separator
+  # as the last argument, so pass `sep` as the last dots arg to this function
+  function(...) {
+    args <- lapply(list(...), function(arg) {
+      # handle scalar literal args, and cast all args to string for
+      # consistency with base::paste(), base::paste0(), and stringr::str_c()
+      if (!inherits(arg, "Expression")) {
+        assert_that(
+          length(arg) == 1,
+          msg = "Literal vectors of length != 1 not supported in string 
concatenation"
+        )
+        # handle scalar literal NA consistent with the binary_join_element_wise
+        # kernel's handling of nulls in the data
+        if (null_handling == NullHandlingBehavior$REPLACE && is.na(arg)) {
+          arg <- null_replacement
+        }

Review comment:
       Comment fixed in 1f24ffd




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to