This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push: new 82e461a [SPARK-35381][R] Fix lambda variable name issues in nested higher order functions at R APIs 82e461a is described below commit 82e461ab6152870ba5bae2ca64c4af29dcb86db3 Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Wed May 12 16:52:39 2021 +0900 [SPARK-35381][R] Fix lambda variable name issues in nested higher order functions at R APIs This PR fixes the same issue as https://github.com/apache/spark/pull/32424 ```r df <- sql("SELECT array(1, 2, 3) as numbers, array('a', 'b', 'c') as letters") collect(select( df, array_transform("numbers", function(number) { array_transform("letters", function(latter) { struct(alias(number, "n"), alias(latter, "l")) }) }) )) ``` **Before:** ``` ... a, a, b, b, c, c, a, a, b, b, c, c, a, a, b, b, c, c ``` **After:** ``` ... 1, a, 1, b, 1, c, 2, a, 2, b, 2, c, 3, a, 3, b, 3, c ``` To produce the correct results. Yes, it fixes the results to be correct as mentioned above. Manually tested as above, and unit test was added. Closes #32517 from HyukjinKwon/SPARK-35381. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> (cherry picked from commit ecb48ccb7db11f15b9420aaee57594dc4f9d448f) Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- R/pkg/R/functions.R | 7 ++++++- R/pkg/tests/fulltests/test_sparkSQL.R | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 43b25a1..28e4ef8 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -3578,7 +3578,12 @@ unresolved_named_lambda_var <- function(...) { "org.apache.spark.sql.Column", newJObject( "org.apache.spark.sql.catalyst.expressions.UnresolvedNamedLambdaVariable", - list(...) + lapply(list(...), function(x) { + handledCallJStatic( + "org.apache.spark.sql.catalyst.expressions.UnresolvedNamedLambdaVariable", + "freshVarName", + x) + }) ) ) column(jc) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index ebf08b9..2326897 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -2153,6 +2153,20 @@ test_that("higher order functions", { expect_error(array_transform("xs", function(...) 42)) }) +test_that("SPARK-34794: lambda vars must be resolved properly in nested higher order functions", { + df <- sql("SELECT array(1, 2, 3) as numbers, array('a', 'b', 'c') as letters") + ret <- first(select( + df, + array_transform("numbers", function(number) { + array_transform("letters", function(latter) { + struct(alias(number, "n"), alias(latter, "l")) + }) + }) + )) + + expect_equal(1, ret[[1]][[1]][[1]][[1]]$n) +}) + test_that("group by, agg functions", { df <- read.json(jsonPath) df1 <- agg(df, name = "max", age = "sum") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org