ljfgem commented on code in PR #35636:
URL: https://github.com/apache/spark/pull/35636#discussion_r953092252


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala:
##########
@@ -447,6 +454,74 @@ class Analyzer(override val catalogManager: CatalogManager)
     }
   }
 
+  /**
+   * Substitute persisted views in parsed plans with parsed view sql text.
+   */
+  case class ViewSubstitution(sqlParser: ParserInterface) extends 
Rule[LogicalPlan] {
+
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
+      case u @ UnresolvedRelation(nameParts, _, _) if 
v1SessionCatalog.isTempView(nameParts) =>
+        u
+      case u @ UnresolvedRelation(
+          parts @ NonSessionCatalogAndIdentifier(catalog, ident), _, _) if 
!isSQLOnFile(parts) =>
+        CatalogV2Util.loadView(catalog, ident)
+            .map(createViewRelation(parts.quoted, _))
+            .getOrElse(u)
+    }
+
+    private def isSQLOnFile(parts: Seq[String]): Boolean = parts match {
+      case Seq(_, path) if path.contains("/") => true
+      case _ => false
+    }
+
+    private def createViewRelation(name: String, view: V2View): LogicalPlan = {
+      if (!catalogManager.isCatalogRegistered(view.currentCatalog)) {
+        throw new AnalysisException(
+          s"Invalid current catalog '${view.currentCatalog}' in view '$name'")
+      }
+
+      val child = parseViewText(name, view.sql)
+      val desc = V2ViewDescription(name, view)
+      val qualifiedChild = desc.viewCatalogAndNamespace match {
+        case Seq() =>
+          // Views from Spark 2.2 or prior do not store catalog or namespace,
+          // however its sql text should already be fully qualified.
+          child
+        case catalogAndNamespace =>
+          // Substitute CTEs within the view before qualifying table 
identifiers
+          qualifyTableIdentifiers(CTESubstitution.apply(child), 
catalogAndNamespace)
+      }
+
+      // The relation is a view, so we wrap the relation by:
+      // 1. Add a [[View]] operator over the relation to keep track of the 
view desc;
+      // 2. Wrap the logical plan in a [[SubqueryAlias]] which tracks the name 
of the view.
+      SubqueryAlias(name, View(desc, false, qualifiedChild))

Review Comment:
   Hi @jzhuge, I found that with the original Hive session catalog, the 
resolved plan of the view `select * from default.t`  is like:
   ```
   'SubqueryAlias spark_catalog.default.test_view
   +- View (`default`.`test_view`, ['intCol,'structCol,'boolCol])
      +- 'Project [upcast(getviewcolumnbynameandordinal(`default`.`test_view`, 
intCol, 0, 1), IntegerType) AS intCol#6, 
upcast(getviewcolumnbynameandordinal(`default`.`test_view`, structCol, 0, 1), 
StructField(doubleCol,DoubleType,true), StructField(stringCol,StringType,true)) 
AS structCol#7, upcast(getviewcolumnbynameandordinal(`default`.`test_view`, 
boolCol, 0, 1), BooleanType) AS boolCol#8]
         +- 'Project [*]
            +- 'UnresolvedRelation [default, t], [], false
   ```
   Looks like the project node:
   ```
      +- 'Project [upcast(getviewcolumnbynameandordinal(`default`.`test_view`, 
intCol, 0, 1), IntegerType) AS intCol#6, 
upcast(getviewcolumnbynameandordinal(`default`.`test_view`, structCol, 0, 1), 
StructField(doubleCol,DoubleType,true), StructField(stringCol,StringType,true)) 
AS structCol#7, upcast(getviewcolumnbynameandordinal(`default`.`test_view`, 
boolCol, 0, 1), BooleanType) AS boolCol#8]
   ```
   is added based on the schema stored in the view's desc (`CatalogTable`). So 
that the schema info (casing and nullability) is preserved.
   
   But in the current implementation, there is not such a project node based on 
the view's schema, do you think we need to add the similar project node if the 
view's schema is provided?
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to