This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 3e884ec [SPARK-30811][SQL] CTE should not cause stack overflow when it refers to non-existent table with same name 3e884ec is described below commit 3e884ec40a7b6e4f184d7c507c29e9f8eae0a721 Author: herman <her...@databricks.com> AuthorDate: Wed Feb 19 10:17:46 2020 -0800 [SPARK-30811][SQL] CTE should not cause stack overflow when it refers to non-existent table with same name ### Why are the changes needed? This ports the tests introduced in https://github.com/apache/spark/commit/7285eea6839d40cbac15101c633a9a572eb3b603 to master to avoid future regressions. ### Background A query with Common Table Expressions can cause a stack overflow when it contains a CTE that refers a non-existing table with the same name. The name of the table need to have a database qualifier. This is caused by a couple of things: - CTESubstitution runs analysis on the CTE, but this does not throw an exception because the table has a database qualifier. The reason is that we don't fail is because we re-attempt to resolve the relation in a later rule; - CTESubstitution replace logic does not check if the table it is replacing has a database, it shouldn't replace the relation if it does. So now we will happily replace nonexist.t with t; Note that this not an issue for master or the spark-3.0 branch. ### Does this PR introduce any user-facing change? No ### How was this patch tested? Added regression test to `AnalysisErrorSuite` and `DataFrameSuite`. Closes #27635 from hvanhovell/SPARK-30811-master. Authored-by: herman <her...@databricks.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> (cherry picked from commit c92d437c4639c3e5ae3bfc38bc70e7c38fe4cfad) Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .../spark/sql/catalyst/analysis/AnalysisErrorSuite.scala | 12 ++++++++++++ .../src/test/scala/org/apache/spark/sql/DataFrameSuite.scala | 8 ++++++++ 2 files changed, 20 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index 5cc0453..8f62b0b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.analysis import org.scalatest.Assertions._ import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ @@ -652,4 +653,15 @@ class AnalysisErrorSuite extends AnalysisTest { assertAnalysisError(plan, "Aggregate/Window/Generate expressions are not valid in where clause of the query" :: Nil) } + + test("SPARK-30811: CTE should not cause stack overflow when " + + "it refers to non-existent table with same name") { + val plan = With( + UnresolvedRelation(TableIdentifier("t")), + Seq("t" -> SubqueryAlias("t", + Project( + Alias(Literal(1), "x")() :: Nil, + UnresolvedRelation(TableIdentifier("t", Option("nonexist"))))))) + assertAnalysisError(plan, "Table or view not found:" :: Nil) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 694e576..42a9073 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -2298,6 +2298,14 @@ class DataFrameSuite extends QueryTest fail("emptyDataFrame should be foldable") } } + + test("SPARK-30811: CTE should not cause stack overflow when " + + "it refers to non-existent table with same name") { + val e = intercept[AnalysisException] { + sql("WITH t AS (SELECT 1 FROM nonexist.t) SELECT * FROM t") + } + assert(e.getMessage.contains("Table or view not found:")) + } } case class GroupByKey(a: Int, b: Int) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org