pengzhiwei2018 commented on a change in pull request #2645:
URL: https://github.com/apache/hudi/pull/2645#discussion_r607441239
##########
File path:
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableCommand.scala
##########
@@ -56,13 +68,46 @@ case class CreateHoodieTableCommand(table: CatalogTable,
ignoreIfExists: Boolean
return Seq.empty[Row]
// scalastyle:on
} else {
- throw new IllegalArgumentException(s"Table
${table.identifier.unquotedString} already exists.")
+ throw new IllegalArgumentException(s"Table $tableName already exists.")
}
}
- // Add the meta fields to the schema,
- val newSchema = addMetaFields(table.schema)
+
var path = getTableLocation(table, sparkSession)
.getOrElse(s"Missing path for table ${table.identifier}")
+ val conf = sparkSession.sessionState.newHadoopConf()
+ val isTableExists = tableExists(path, conf)
+ // Get the schema & table options
+ val (newSchema, tableOptions) = if (table.tableType ==
CatalogTableType.EXTERNAL &&
+ isTableExists) {
+ // If this is an external table & the table has already exists in the
location,
+ // load the schema from the table meta.
+ assert(table.schema.isEmpty,
+ s"Should not specified table schema for an exists hoodie external " +
+ s"table: ${table.identifier.unquotedString}")
+ // Get Schema from the external table
+ val metaClient = HoodieTableMetaClient.builder()
+ .setBasePath(path)
+ .setConf(conf)
+ .build()
+ val schemaResolver = new TableSchemaResolver(metaClient)
+ val avroSchema = schemaResolver.getTableAvroSchema(true)
+ val tableSchema =
SchemaConverters.toSqlType(avroSchema).dataType.asInstanceOf[StructType]
+ // Get options from the external table
+ val options = HoodieOptionConfig.mappingTableConfigToSqlOption(
+ metaClient.getTableConfig.getProps.asScala.toMap)
+ (tableSchema, options)
+ } else {
Review comment:
Yes, we should support both Managed table & UnManaged table for hudi
just like other spark datasource table. I think this complies with the SQL
specification.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]