mccheah commented on a change in pull request #24570: [SPARK-24923][SQL]
Implement v2 CreateTableAsSelect
URL: https://github.com/apache/spark/pull/24570#discussion_r283592816
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
##########
@@ -47,6 +51,60 @@ case class WriteToDataSourceV2(batchWrite: BatchWrite,
query: LogicalPlan)
override def output: Seq[Attribute] = Nil
}
+/**
+ * Physical plan node for v2 create table as select.
+ *
+ * A new table will be created using the schema of the query, and rows from
the query are appended.
+ * If either table creation or the append fails, the table will be deleted.
This implementation does
+ * not provide an atomic CTAS.
+ */
+case class CreateTableAsSelectExec(
+ catalog: TableCatalog,
+ ident: Identifier,
+ partitioning: Seq[Transform],
+ query: SparkPlan,
+ properties: Map[String, String],
+ writeOptions: CaseInsensitiveStringMap,
+ ifNotExists: Boolean) extends V2TableWriteExec {
+
+ import org.apache.spark.sql.catalog.v2.CatalogV2Implicits.IdentifierHelper
+
+ override protected def doExecute(): RDD[InternalRow] = {
+ if (catalog.tableExists(ident)) {
+ if (ifNotExists) {
+ return sparkContext.parallelize(Seq.empty, 1)
+ }
+
+ throw new TableAlreadyExistsException(ident)
+ }
+
+ Utils.tryWithSafeFinallyAndFailureCallbacks({
+ catalog.createTable(ident, query.schema, partitioning.toArray,
properties.asJava) match {
+ case table: SupportsWrite =>
+ val builder = table.newWriteBuilder(writeOptions)
+ .withInputDataSchema(query.schema)
+ .withQueryId(UUID.randomUUID().toString)
+ val batchWrite = builder match {
+ case supportsSaveMode: SupportsSaveMode =>
+ supportsSaveMode.mode(SaveMode.Append).buildForBatch()
Review comment:
I'm not sure why we have to specifically tell the writer to use append mode.
Can you elaborate? I think I'm missing something. It would be simpler to remove
this branch entirely if possible.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]