This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push: new b557147 [SPARK-26199][SPARK-31517][R] Fix strategy for handling ... names in mutate b557147 is described below commit b5571476a5171495770a556b0b1da044263d0ebe Author: Michael Chirico <michael.chir...@grabtaxi.com> AuthorDate: Thu Dec 17 17:20:45 2020 +0900 [SPARK-26199][SPARK-31517][R] Fix strategy for handling ... names in mutate ### What changes were proposed in this pull request? Change the strategy for how the varargs are handled in the default `mutate` method ### Why are the changes needed? Bugfix -- `deparse` + `sapply` not working as intended due to `width.cutoff` ### Does this PR introduce any user-facing change? Yes, bugfix. Shouldn't change any working code. ### How was this patch tested? None! yet. Closes #28386 from MichaelChirico/r-mutate-deparse. Lead-authored-by: Michael Chirico <michael.chir...@grabtaxi.com> Co-authored-by: Michael Chirico <michaelchiri...@gmail.com> Signed-off-by: HyukjinKwon <gurwls...@apache.org> --- R/pkg/R/DataFrame.R | 18 ++++++++++-------- R/pkg/tests/fulltests/test_sparkSQL.R | 15 +++++++++++++++ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 31a651e..8ca338f 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2277,16 +2277,17 @@ setMethod("mutate", # For named arguments, use the names for arguments as the column names # For unnamed arguments, use the argument symbols as the column names - args <- sapply(substitute(list(...))[-1], deparse) ns <- names(cols) - if (!is.null(ns)) { - lapply(seq_along(args), function(i) { - if (ns[[i]] != "") { - args[[i]] <<- ns[[i]] - } + if (is.null(ns)) ns <- rep("", length(cols)) + named_idx <- nzchar(ns) + if (!all(named_idx)) { + # SPARK-31517: deparse uses width.cutoff on wide input and the + # output is length>1, so need to collapse it to scalar + colsub <- substitute(list(...))[-1L] + ns[!named_idx] <- sapply(which(!named_idx), function(ii) { + paste(gsub("^\\s*|\\s*$", "", deparse(colsub[[ii]])), collapse = " ") }) } - ns <- args # The last column of the same name in the specific columns takes effect deDupCols <- list() @@ -3444,7 +3445,8 @@ setMethod("as.data.frame", #' @note attach since 1.6.0 setMethod("attach", signature(what = "SparkDataFrame"), - function(what, pos = 2L, name = deparse(substitute(what), backtick = FALSE), + function(what, pos = 2L, + name = paste(deparse(substitute(what), backtick = FALSE), collapse = " "), warn.conflicts = TRUE) { args <- as.list(environment()) # capture all parameters - this must be the first line newEnv <- assignNewEnv(args$what) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index c623f53..ebf08b9 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -2884,6 +2884,15 @@ test_that("mutate(), transform(), rename() and names()", { expect_equal(nrow(result), 153) expect_equal(ncol(result), 2) detach(airquality) + + # ensure long inferred names are handled without error (SPARK-26199) + # test implicitly assumes eval(formals(deparse)$width.cutoff) = 60 + # (which has always been true as of 2020-11-15) + newDF <- mutate( + df, + df$age + 12345678901234567890 + 12345678901234567890 + 12345678901234 + ) + expect_match(tail(columns(newDF), 1L), "234567890", fixed = TRUE) }) test_that("read/write ORC files", { @@ -3273,6 +3282,12 @@ test_that("attach() on a DataFrame", { stat3 <- summary(df[, "age", drop = F]) expect_equal(collect(stat3)[8, "age"], "30") expect_error(age) + + # attach method uses deparse(); ensure no errors from a very long input + abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnop <- df # nolint + attach(abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnop) + expect_true(any(grepl("abcdefghijklmnopqrstuvwxyz", search()))) + detach("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnop") }) test_that("with() on a DataFrame", { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org