imback82 commented on a change in pull request #31368:
URL: https://github.com/apache/spark/pull/31368#discussion_r566323136
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
##########
@@ -450,45 +450,40 @@ case class InsertIntoDir(
*
* @param desc A view description(CatalogTable) that provides necessary
information to resolve the
* view.
- * @param output The output of a view operator, this is generated during
planning the view, so that
* we are able to decouple the output from the underlying
structure.
* @param child The logical plan of a view operator, it should be a logical
plan parsed from the
* `CatalogTable.viewText`, should throw an error if the
`viewText` is not defined.
*/
case class View(
desc: CatalogTable,
isTempView: Boolean,
- output: Seq[Attribute],
- child: LogicalPlan) extends LogicalPlan with MultiInstanceRelation {
-
- override def producedAttributes: AttributeSet = outputSet
-
- override lazy val resolved: Boolean = child.resolved
-
- override def children: Seq[LogicalPlan] = child :: Nil
+ child: LogicalPlan) extends UnaryNode {
- override def newInstance(): LogicalPlan = copy(output =
output.map(_.newInstance()))
+ override def output: Seq[Attribute] = child.output
override def simpleString(maxFields: Int): String = {
s"View (${desc.identifier}, ${output.mkString("[", ",", "]")})"
}
- override def doCanonicalize(): LogicalPlan = {
- def sameOutput(
- outerProject: Seq[NamedExpression], innerProject: Seq[NamedExpression]):
Boolean = {
- outerProject.length == innerProject.length &&
- outerProject.zip(innerProject).forall {
- case(outer, inner) => outer.name == inner.name && outer.dataType ==
inner.dataType
- }
- }
+ override def doCanonicalize(): LogicalPlan = child match {
+ case p: Project if p.resolved && canRemoveProject(p) =>
p.child.canonicalized
+ case _ => child.canonicalized
+ }
- val eliminated = EliminateView(this) match {
- case Project(viewProjectList, child @ Project(queryProjectList, _))
- if sameOutput(viewProjectList, queryProjectList) =>
- child
- case other => other
+ // When resolving a SQL view, we use an extra Project to add cast and alias
to make sure the view
+ // output schema doesn't change even if the table referenced by the view is
changed after view
+ // creation. We should remove this extra Project during canonicalize if it
does nothing.
+ // See more details in `SessionCatalog.fromCatalogTable`.
+ private def canRemoveProject(p: Project): Boolean = {
+ p.output.length == p.child.output.length &&
p.projectList.zipWithIndex.forall {
Review comment:
nit: you can do `p.projectList.zip(p.child.output).forall` instead so
that you don't need to reference the output by index?
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
##########
@@ -230,7 +230,7 @@ object LogicalPlanIntegrity {
// NOTE: we still need to filter resolved expressions here because the
output of
// some resolved logical plans can have unresolved references,
// e.g., outer references in `ExistenceJoin`.
- p.output.filter(_.resolved).map { a => (a.exprId, a.dataType) }
+ p.output.filter(_.resolved).map { a => (a.exprId, a.dataType.asNullable)
}
Review comment:
should we add a test for the query that failed with the previous logic?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]