This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new bea9f09 [SPARK-34504][SQL] Avoid unnecessary resolving of SQL temp
views for DDL commands
bea9f09 is described below
commit bea9f0912dcf4e55137038c7833f438977afd491
Author: Wenchen Fan <[email protected]>
AuthorDate: Wed Mar 17 11:16:51 2021 +0800
[SPARK-34504][SQL] Avoid unnecessary resolving of SQL temp views for DDL
commands
For DDL commands like DROP VIEW, they don't really need to resolve the view
(parse and analyze the view SQL text), they just need to get the view metadata.
This PR fixes the rule `ResolveTempViews` to only resolve the temp view for
`UnresolvedRelation`. This also fixes a bug for DROP VIEW, as previously it
tried to resolve the view and failed to drop invalid views.
bug fix
no
new test
Closes #31853 from cloud-fan/view-resolve.
Authored-by: Wenchen Fan <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
(cherry picked from commit af553735b10812d303de411034540e3d90199a26)
Signed-off-by: Wenchen Fan <[email protected]>
---
.../spark/sql/catalyst/analysis/Analyzer.scala | 33 +++++++++++-----------
.../spark/sql/execution/SQLViewTestSuite.scala | 21 ++++++++++++++
2 files changed, 37 insertions(+), 17 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 8644f8c..f98f33b 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -872,16 +872,16 @@ class Analyzer(override val catalogManager:
CatalogManager)
object ResolveTempViews extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
case u @ UnresolvedRelation(ident, _, isStreaming) =>
- lookupTempView(ident, isStreaming, performCheck = true).getOrElse(u)
+ lookupAndResolveTempView(ident, isStreaming).getOrElse(u)
case i @ InsertIntoStatement(UnresolvedRelation(ident, _, false), _, _,
_, _, _) =>
- lookupTempView(ident, performCheck = true)
+ lookupAndResolveTempView(ident)
.map(view => i.copy(table = view))
.getOrElse(i)
// TODO (SPARK-27484): handle streaming write commands when we have them.
case write: V2WriteCommand =>
write.table match {
case UnresolvedRelation(ident, _, false) =>
- lookupTempView(ident, performCheck =
true).map(EliminateSubqueryAliases(_)).map {
+
lookupAndResolveTempView(ident).map(EliminateSubqueryAliases(_)).map {
case r: DataSourceV2Relation => write.withNewTable(r)
case _ => throw new AnalysisException("Cannot write into temp
view " +
s"${ident.quoted} as it's not a data source v2 relation.")
@@ -905,10 +905,9 @@ class Analyzer(override val catalogManager: CatalogManager)
.getOrElse(u)
}
- def lookupTempView(
+ private def lookupTempView(
identifier: Seq[String],
- isStreaming: Boolean = false,
- performCheck: Boolean = false): Option[LogicalPlan] = {
+ isStreaming: Boolean = false): Option[LogicalPlan] = {
// Permanent View can't refer to temp views, no need to lookup at all.
if (isResolvingView && !referredTempViewNames.contains(identifier))
return None
@@ -922,7 +921,13 @@ class Analyzer(override val catalogManager: CatalogManager)
throw new AnalysisException(s"${identifier.quoted} is not a temp view
of streaming " +
s"logical plan, please use batch API such as `DataFrameReader.table`
to read it.")
}
- tmpView.map(ResolveRelations.resolveViews(_, performCheck))
+ tmpView
+ }
+
+ private def lookupAndResolveTempView(
+ identifier: Seq[String],
+ isStreaming: Boolean = false): Option[LogicalPlan] = {
+ lookupTempView(identifier,
isStreaming).map(ResolveRelations.resolveViews)
}
}
@@ -1076,7 +1081,7 @@ class Analyzer(override val catalogManager:
CatalogManager)
// look at `AnalysisContext.catalogAndNamespace` when resolving relations
with single-part name.
// If `AnalysisContext.catalogAndNamespace` is non-empty, analyzer will
expand single-part names
// with it, instead of current catalog and namespace.
- def resolveViews(plan: LogicalPlan, performCheck: Boolean = false):
LogicalPlan = plan match {
+ def resolveViews(plan: LogicalPlan): LogicalPlan = plan match {
// The view's child should be a logical plan parsed from the
`desc.viewText`, the variable
// `viewText` should be defined, or else we throw an error on the
generation of the View
// operator.
@@ -1097,16 +1102,10 @@ class Analyzer(override val catalogManager:
CatalogManager)
}
// Fail the analysis eagerly because outside AnalysisContext, the
unresolved operators
// inside a view maybe resolved incorrectly.
- // But for commands like `DropViewCommand`, resolving view is
unnecessary even though
- // there is unresolved node. So use the `performCheck` flag to skip
the analysis check
- // for these commands.
- // TODO(SPARK-34504): avoid unnecessary view resolving and remove the
`performCheck` flag
- if (performCheck) {
- checkAnalysis(newChild)
- }
+ checkAnalysis(newChild)
view.copy(child = newChild)
case p @ SubqueryAlias(_, view: View) =>
- p.copy(child = resolveViews(view, performCheck))
+ p.copy(child = resolveViews(view))
case _ => plan
}
@@ -1144,7 +1143,7 @@ class Analyzer(override val catalogManager:
CatalogManager)
case u: UnresolvedRelation =>
lookupRelation(u.multipartIdentifier, u.options, u.isStreaming)
- .map(resolveViews(_, performCheck = true)).getOrElse(u)
+ .map(resolveViews).getOrElse(u)
case u @ UnresolvedTable(identifier, cmd) =>
lookupTableOrView(identifier).map {
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
index 88218b1..52aa6d82 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
@@ -293,6 +293,27 @@ abstract class SQLViewTestSuite extends QueryTest with
SQLTestUtils {
}
}
}
+
+ test("SPARK-34504: drop an invalid view") {
+ // TODO: fix dropping non-existing global temp views.
+ assume(viewTypeString != "GLOBAL TEMPORARY VIEW")
+ withTable("t") {
+ sql("CREATE TABLE t(s STRUCT<i: INT, j: INT>) USING json")
+ val viewName = createView("v", "SELECT s.i FROM t")
+ withView(viewName) {
+ assert(spark.table(viewName).collect().isEmpty)
+
+ // re-create the table without nested field `i` which is referred by
the view.
+ sql("DROP TABLE t")
+ sql("CREATE TABLE t(s STRUCT<j: INT>) USING json")
+ val e = intercept[AnalysisException](spark.table(viewName))
+ assert(e.message.contains("No such struct field i in j"))
+
+ // drop invalid view should be fine
+ sql(s"DROP VIEW $viewName")
+ }
+ }
+ }
}
class LocalTempViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]