[GitHub] spark pull request #22514: [SPARK-25271][SQL] Hive ctas commands should use ...

cloud-fan Thu, 29 Nov 2018 21:55:47 -0800

Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22514#discussion_r237753433
  
    --- Diff: 
sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
 ---
    @@ -95,9 +77,98 @@ case class CreateHiveTableAsSelectCommand(
         Seq.empty[Row]
       }
     
    +  def getDataWritingCommand(
    +    catalog: SessionCatalog,
    +    tableDesc: CatalogTable,
    +    tableExists: Boolean): DataWritingCommand
    +
       override def argString: String = {
         s"[Database:${tableDesc.database}, " +
         s"TableName: ${tableDesc.identifier.table}, " +
         s"InsertIntoHiveTable]"
       }
     }
    +
    +/**
    + * Create table and insert the query result into it.
    + *
    + * @param tableDesc the Table Describe, which may contain serde, storage 
handler etc.
    + * @param query the query whose result will be insert into the new relation
    + * @param mode SaveMode
    + */
    +case class CreateHiveTableAsSelectCommand(
    +    tableDesc: CatalogTable,
    +    query: LogicalPlan,
    +    outputColumnNames: Seq[String],
    +    mode: SaveMode)
    +  extends CreateHiveTableAsSelectBase {
    +
    +  override def getDataWritingCommand(
    +      catalog: SessionCatalog,
    +      tableDesc: CatalogTable,
    +      tableExists: Boolean): DataWritingCommand = {
    +    if (tableExists) {
    +      InsertIntoHiveTable(
    +        tableDesc,
    +        Map.empty,
    +        query,
    +        overwrite = false,
    +        ifPartitionNotExists = false,
    +        outputColumnNames = outputColumnNames)
    +    } else {
    +      // For CTAS, there is no static partition values to insert.
    +      val partition = tableDesc.partitionColumnNames.map(_ -> None).toMap
    +      InsertIntoHiveTable(
    +        tableDesc,
    +        partition,
    +        query,
    +        overwrite = true,
    +        ifPartitionNotExists = false,
    +        outputColumnNames = outputColumnNames)
    +    }
    +  }
    +}
    +
    +/**
    + * Create table and insert the query result into it. This creates Hive 
table but inserts
    + * the query result into it by using data source.
    + *
    + * @param tableDesc the Table Describe, which may contain serde, storage 
handler etc.
    + * @param query the query whose result will be insert into the new relation
    + * @param mode SaveMode
    + */
    +case class CreateHiveTableAsSelectWithDataSourceCommand(
    --- End diff --
    
    `OptimizedCreateHiveTableAsSelectCommand`?



---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #22514: [SPARK-25271][SQL] Hive ctas commands should use ...

Reply via email to