imback82 commented on a change in pull request #31652:
URL: https://github.com/apache/spark/pull/31652#discussion_r591735546
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
##########
@@ -596,19 +546,102 @@ object ViewHelper {
(collectTempViews(child), collectTempFunctions(child))
}
+ /**
+ * Returns a [[TemporaryViewRelation]] that contains information about a
temporary view
+ * to create, given an analyzed plan of the view. If a temp view is to be
replaced and it is
+ * cached, it will be uncached before being replaced.
+ *
+ * @param name the name of the temporary view to create/replace.
+ * @param session the spark session.
+ * @param replace if true and the existing view is cached, it will be
uncached.
+ * @param getRawTempView the function that returns an optional raw plan of
the local or
+ * global temporary view.
+ * @param originalText the original SQL text of this view, can be None if
this view is created via
+ * Dataset API or
spark.sql.legacy.storeAnalyzedPlanForView is set to true.
+ * @param userSpecifiedColumns the output column names and optional comments
specified by users,
+ * can be Nil if not specified.
+ * @param analyzedPlan the logical plan that represents the view; this is
used to generate the
+ * logical plan for temporary view and the view schema.
+ */
+ def createTemporaryViewRelation(
+ name: TableIdentifier,
+ session: SparkSession,
+ replace: Boolean,
+ getRawTempView: String => Option[LogicalPlan],
+ originalText: Option[String],
+ userSpecifiedColumns: Seq[(String, Option[String])],
+ analyzedPlan: LogicalPlan): TemporaryViewRelation = {
+ val aliasedPlan = aliasPlan(session, analyzedPlan, userSpecifiedColumns)
+ val uncache = getRawTempView(name.table).map { r =>
+ needsToUncache(r, aliasedPlan)
+ }.getOrElse(false)
+ if (replace && uncache) {
+ logInfo(s"Try to uncache ${name.quotedString} before replacing.")
+ checkCyclicViewReference(analyzedPlan, Seq(name), name)
+ CommandUtils.uncacheTableOrView(session, name.quotedString)
+ }
+ if (!conf.storeAnalyzedPlanForView && originalText.nonEmpty) {
+ TemporaryViewRelation(
+ prepareTemporaryView(
+ name,
+ session,
+ analyzedPlan,
+ aliasedPlan.schema,
+ originalText.get))
+ } else {
+ TemporaryViewRelation(
+ prepareTemporaryViewStoringAnalyzedPlan(name, aliasedPlan),
+ Some(aliasedPlan))
+ }
+ }
+
+ /**
+ * If `userSpecifiedColumns` is defined, alias the analyzed plan to the user
specified columns,
+ * else return the analyzed plan directly.
+ */
+ def aliasPlan(
+ session: SparkSession,
+ analyzedPlan: LogicalPlan,
+ userSpecifiedColumns: Seq[(String, Option[String])]): LogicalPlan = {
+ if (userSpecifiedColumns.isEmpty) {
+ analyzedPlan
+ } else {
+ val projectList = analyzedPlan.output.zip(userSpecifiedColumns).map {
+ case (attr, (colName, None)) => Alias(attr, colName)()
+ case (attr, (colName, Some(colComment))) =>
+ val meta = new MetadataBuilder().putString("comment",
colComment).build()
+ Alias(attr, colName)(explicitMetadata = Some(meta))
+ }
+ session.sessionState.executePlan(Project(projectList,
analyzedPlan)).analyzed
+ }
+ }
+
+ /**
+ * Checks if need to uncache the temp view being replaced.
+ */
+ private def needsToUncache(
+ rawTempView: LogicalPlan,
+ aliasedPlan: LogicalPlan): Boolean = rawTempView match {
+ // If TemporaryViewRelation doesn't store the analyzed view, always
uncache.
+ case TemporaryViewRelation(_, None) => true
+ // Do not need to uncache if the to-be-replaced temp view plan and the new
plan are the
+ // same-result plans.
+ case TemporaryViewRelation(_, Some(p)) => !p.sameResult(aliasedPlan)
+ case p => !p.sameResult(aliasedPlan)
Review comment:
We need to first update `CREATE TEMP VIEW USING`:
https://github.com/apache/spark/blob/3a299aa6480ac22501512cd0310d31a441d7dfdc/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala#L93-L100
, which I will get to right after this PR.
Once the PR is done, we can update `createTempView` to take in
`TemporaryViewRelation`
(https://github.com/apache/spark/pull/31273/files#r580757641), then I can
safely remove this line.
Let me actually create a JIRA to capture these as subtasks.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]