Github user gatorsmile commented on a diff in the pull request:

    https://github.com/apache/spark/pull/12633#discussion_r60839961
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala ---
    @@ -85,68 +88,74 @@ case class CreateViewCommand(
         } else {
           // Create the view if it doesn't exist.
           sessionState.catalog.createTable(
    -        prepareTable(sqlContext, analzyedPlan), ignoreIfExists = false)
    +        prepareTable(sqlContext, analyzedPlan), ignoreIfExists = false)
         }
     
         Seq.empty[Row]
       }
     
    -  private def prepareTable(sqlContext: SQLContext, analzyedPlan: 
LogicalPlan): CatalogTable = {
    -    val expandedText = if (sqlContext.conf.canonicalView) {
    -      try rebuildViewQueryString(sqlContext, analzyedPlan) catch {
    -        case NonFatal(e) => wrapViewTextWithSelect(analzyedPlan)
    +  /**
    +   * Returns a [[CatalogTable]] that can be used to save in the catalog. 
This comment canonicalize
    +   * SQL based on the analyzed plan, and also creates the proper schema 
for the view.
    +   */
    +  private def prepareTable(sqlContext: SQLContext, analyzedPlan: 
LogicalPlan): CatalogTable = {
    +    val viewSQL: String =
    +      if (sqlContext.conf.canonicalView) {
    +        val logicalPlan =
    +          if (tableDesc.schema.isEmpty) {
    +            analyzedPlan
    +          } else {
    +            val projectList = 
analyzedPlan.output.zip(tableDesc.schema).map {
    +              case (attr, col) => Alias(attr, col.name)()
    +            }
    +            sqlContext.executePlan(Project(projectList, 
analyzedPlan)).analyzed
    +          }
    +        new SQLBuilder(logicalPlan).toSQL
    +      } else {
    +        // When user specified column names for view, we should create a 
project to do the renaming.
    +        // When no column name specified, we still need to create a 
project to declare the columns
    +        // we need, to make us more robust to top level `*`s.
    +        val viewOutput = {
    +          val columnNames = analyzedPlan.output.map(f => quote(f.name))
    +          if (tableDesc.schema.isEmpty) {
    +            columnNames.mkString(", ")
    +          } else {
    +            columnNames.zip(tableDesc.schema.map(f => quote(f.name))).map {
    +              case (name, alias) => s"$name AS $alias"
    +            }.mkString(", ")
    +          }
    +        }
    +
    +        val viewText = tableDesc.viewText.get
    +        val viewName = quote(tableDesc.identifier.table)
    +        s"SELECT $viewOutput FROM ($viewText) $viewName"
           }
    -    } else {
    -      wrapViewTextWithSelect(analzyedPlan)
    +
    +    // Validate the view SQL - make sure we can parse it and analyze it.
    +    // If we cannot analyze the generated query, there is probably a bug 
in SQL generation.
    +    try {
    +      sqlContext.sql(viewSQL).queryExecution.assertAnalyzed()
    +    } catch {
    +      case NonFatal(e) =>
    +        throw new RuntimeException(
    +          "Failed to analyze the canonicalized SQL. It is possible there 
is a bug in Spark.", e)
         }
     
    -    val viewSchema = {
    +    val viewSchema: Seq[CatalogColumn] = {
           if (tableDesc.schema.isEmpty) {
    -        analzyedPlan.output.map { a =>
    +        analyzedPlan.output.map { a =>
               CatalogColumn(a.name, a.dataType.simpleString)
             }
           } else {
    -        analzyedPlan.output.zip(tableDesc.schema).map { case (a, col) =>
    +        analyzedPlan.output.zip(tableDesc.schema).map { case (a, col) =>
               CatalogColumn(col.name, a.dataType.simpleString, nullable = 
true, col.comment)
             }
           }
         }
     
    -    tableDesc.copy(schema = viewSchema, viewText = Some(expandedText))
    -  }
    -
    -  private def wrapViewTextWithSelect(analzyedPlan: LogicalPlan): String = {
    -    // When user specified column names for view, we should create a 
project to do the renaming.
    -    // When no column name specified, we still need to create a project to 
declare the columns
    -    // we need, to make us more robust to top level `*`s.
    -    val viewOutput = {
    -      val columnNames = analzyedPlan.output.map(f => quote(f.name))
    -      if (tableDesc.schema.isEmpty) {
    -        columnNames.mkString(", ")
    -      } else {
    -        columnNames.zip(tableDesc.schema.map(f => quote(f.name))).map {
    -          case (name, alias) => s"$name AS $alias"
    -        }.mkString(", ")
    -      }
    -    }
    -
    -    val viewText = tableDesc.viewText.get
    -    val viewName = quote(tableDesc.identifier.table)
    -    s"SELECT $viewOutput FROM ($viewText) $viewName"
    -  }
    -
    -  private def rebuildViewQueryString(sqlContext: SQLContext, analzyedPlan: 
LogicalPlan): String = {
    -    val logicalPlan = if (tableDesc.schema.isEmpty) {
    -      analzyedPlan
    -    } else {
    -      val projectList = analzyedPlan.output.zip(tableDesc.schema).map {
    -        case (attr, col) => Alias(attr, col.name)()
    -      }
    -      sqlContext.executePlan(Project(projectList, analzyedPlan)).analyzed
    -    }
    -    new SQLBuilder(logicalPlan).toSQL
    +    tableDesc.copy(schema = viewSchema, viewText = Some(viewSQL))
    --- End diff --
    
    Everything looks great to me. I only have a comment that is not directly 
related to this PR.
    
    It looks a little bit strange to generate a partial `CatalogTable` for a 
`view` in 
[`createView`](https://github.com/apache/spark/blob/c06110187b3e41405fc13aba367abdd4183ed9a6/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala#L1164).
 When we describe this parameter `tableDesc` in `CreateViewCommand`, we said it 
is `the catalog table`. Actually, this is not right. Then, here, we generate 
the final `CatalogTable` by updating two of the values `schema` and `viewText` 
with the correct values. 
    
    Anyway, this is not a big deal at all. : )


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to