This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 3e884ec  [SPARK-30811][SQL] CTE should not cause stack overflow when 
it refers to non-existent table with same name
3e884ec is described below

commit 3e884ec40a7b6e4f184d7c507c29e9f8eae0a721
Author: herman <her...@databricks.com>
AuthorDate: Wed Feb 19 10:17:46 2020 -0800

    [SPARK-30811][SQL] CTE should not cause stack overflow when it refers to 
non-existent table with same name
    
    ### Why are the changes needed?
    This ports the tests introduced in 
https://github.com/apache/spark/commit/7285eea6839d40cbac15101c633a9a572eb3b603 
to master to avoid future regressions.
    
    ### Background
    A query with Common Table Expressions can cause a stack overflow when it 
contains a CTE that refers a non-existing table with the same name. The name of 
the table need to have a database qualifier. This is caused by a couple of 
things:
    
    - CTESubstitution runs analysis on the CTE, but this does not throw an 
exception because the table has a database qualifier. The reason is that we 
don't fail is because we re-attempt to resolve the relation in a later rule;
    - CTESubstitution replace logic does not check if the table it is replacing 
has a database, it shouldn't replace the relation if it does. So now we will 
happily replace nonexist.t with t;
    
    Note that this not an issue for master or the spark-3.0 branch.
    
    ### Does this PR introduce any user-facing change?
    No
    
    ### How was this patch tested?
    Added regression test to `AnalysisErrorSuite` and `DataFrameSuite`.
    
    Closes #27635 from hvanhovell/SPARK-30811-master.
    
    Authored-by: herman <her...@databricks.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
    (cherry picked from commit c92d437c4639c3e5ae3bfc38bc70e7c38fe4cfad)
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../spark/sql/catalyst/analysis/AnalysisErrorSuite.scala     | 12 ++++++++++++
 .../src/test/scala/org/apache/spark/sql/DataFrameSuite.scala |  8 ++++++++
 2 files changed, 20 insertions(+)

diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 5cc0453..8f62b0b 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.analysis
 import org.scalatest.Assertions._
 
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
@@ -652,4 +653,15 @@ class AnalysisErrorSuite extends AnalysisTest {
     assertAnalysisError(plan,
       "Aggregate/Window/Generate expressions are not valid in where clause of 
the query" :: Nil)
   }
+
+  test("SPARK-30811: CTE should not cause stack overflow when " +
+    "it refers to non-existent table with same name") {
+    val plan = With(
+      UnresolvedRelation(TableIdentifier("t")),
+      Seq("t" -> SubqueryAlias("t",
+        Project(
+          Alias(Literal(1), "x")() :: Nil,
+          UnresolvedRelation(TableIdentifier("t", Option("nonexist")))))))
+    assertAnalysisError(plan, "Table or view not found:" :: Nil)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 694e576..42a9073 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -2298,6 +2298,14 @@ class DataFrameSuite extends QueryTest
         fail("emptyDataFrame should be foldable")
     }
   }
+
+  test("SPARK-30811: CTE should not cause stack overflow when " +
+    "it refers to non-existent table with same name") {
+    val e = intercept[AnalysisException] {
+      sql("WITH t AS (SELECT 1 FROM nonexist.t) SELECT * FROM t")
+    }
+    assert(e.getMessage.contains("Table or view not found:"))
+  }
 }
 
 case class GroupByKey(a: Int, b: Int)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to