This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-2.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push: new f4c8c48 [SPARK-30998][SQL][2.4] ClassCastException when a generator having nested inner generators f4c8c48 is described below commit f4c8c4892197b8c5425a8013a09e9b379444e6fc Author: Takeshi Yamamuro <yamam...@apache.org> AuthorDate: Tue Mar 3 23:47:40 2020 +0900 [SPARK-30998][SQL][2.4] ClassCastException when a generator having nested inner generators ### What changes were proposed in this pull request? A query below failed in branch-2.4; ``` scala> sql("select array(array(1, 2), array(3)) ar").select(explode(explode($"ar"))).show() 20/03/01 13:51:56 ERROR Executor: Exception in task 0.0 in stage 0.0 (TID 0)/ 1] java.lang.ClassCastException: scala.collection.mutable.ArrayOps$ofRef cannot be cast to org.apache.spark.sql.catalyst.util.ArrayData at org.apache.spark.sql.catalyst.expressions.ExplodeBase.eval(generators.scala:313) at org.apache.spark.sql.execution.GenerateExec.$anonfun$doExecute$8(GenerateExec.scala:108) at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490) at scala.collection.Iterator$ConcatIterator.hasNext(Iterator.scala:222) at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) ... ``` This pr modified the `hasNestedGenerator` code in `ExtractGenerator` for correctly catching nested inner generators. This backport PR comes from https://github.com/apache/spark/pull/27750# ### Why are the changes needed? A bug fix. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? Added tests. Closes #27769 from maropu/SPARK-20998-BRANCH-2.4. Authored-by: Takeshi Yamamuro <yamam...@apache.org> Signed-off-by: Takeshi Yamamuro <yamam...@apache.org> --- .../apache/spark/sql/catalyst/analysis/Analyzer.scala | 16 +++++++++++++--- .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 19 +++++++++++++++++++ .../org/apache/spark/sql/GeneratorFunctionSuite.scala | 8 ++++++++ 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 0fedf7f..61f77be 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1681,10 +1681,20 @@ class Analyzer( } private def hasNestedGenerator(expr: NamedExpression): Boolean = { + def hasInnerGenerator(g: Generator): Boolean = g match { + // Since `GeneratorOuter` is just a wrapper of generators, we skip it here + case go: GeneratorOuter => + hasInnerGenerator(go.child) + case _ => + g.children.exists { _.find { + case _: Generator => true + case _ => false + }.isDefined } + } CleanupAliases.trimNonTopLevelAliases(expr) match { - case UnresolvedAlias(_: Generator, _) => false - case Alias(_: Generator, _) => false - case MultiAlias(_: Generator, _) => false + case UnresolvedAlias(g: Generator, _) => hasInnerGenerator(g) + case Alias(g: Generator, _) => hasInnerGenerator(g) + case MultiAlias(g: Generator, _) => hasInnerGenerator(g) case other => hasGenerator(other) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index 45319aa..337902f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -395,6 +395,25 @@ class AnalysisErrorSuite extends AnalysisTest { ) errorTest( + "SPARK-30998: unsupported nested inner generators", + { + val nestedListRelation = LocalRelation( + AttributeReference("nestedList", ArrayType(ArrayType(IntegerType)))()) + nestedListRelation.select(Explode(Explode($"nestedList"))) + }, + "Generators are not supported when it's nested in expressions, but got: " + + "explode(explode(nestedList))" :: Nil + ) + + errorTest( + "SPARK-30998: unsupported nested inner generators for aggregates", + testRelation.select(Explode(Explode( + CreateArray(CreateArray(min($"a") :: max($"a") :: Nil) :: Nil)))), + "Generators are not supported when it's nested in expressions, but got: " + + "explode(explode(array(array(min(a), max(a)))))" :: Nil + ) + + errorTest( "generator appears in operator which is not Project", listRelation.sortBy(Explode('list).asc), "Generators are not supported outside the SELECT clause, but got: Sort" :: Nil diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala index 8280a3c..df66b49 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala @@ -308,6 +308,14 @@ class GeneratorFunctionSuite extends QueryTest with SharedSQLContext { sql("select * from values 1, 2 lateral view outer empty_gen() a as b"), Row(1, null) :: Row(2, null) :: Nil) } + + test("SPARK-30998: Unsupported nested inner generators") { + val errMsg = intercept[AnalysisException] { + sql("SELECT array(array(1, 2), array(3)) v").select(explode(explode($"v"))).collect + }.getMessage + assert(errMsg.contains("Generators are not supported when it's nested in expressions, " + + "but got: explode(explode(v))")) + } } case class EmptyGenerator() extends Generator { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org