This is an automated email from the ASF dual-hosted git repository.

danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new c51a8cfd5b2 [HUDI-6073] Table create schema should not include 
metadata fields (#8450)
c51a8cfd5b2 is described below

commit c51a8cfd5b2dad181039d2a1d1af59ba4f0e737a
Author: Danny Chan <[email protected]>
AuthorDate: Fri Apr 14 17:11:28 2023 +0800

    [HUDI-6073] Table create schema should not include metadata fields (#8450)
---
 .../apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala   | 9 +++++++--
 .../test/scala/org/apache/spark/sql/hudi/TestCreateTable.scala   | 8 ++++++--
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git 
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala
 
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala
index 09eb9f2944f..eebe3db0be2 100644
--- 
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala
+++ 
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.catalog
 
 import org.apache.hudi.DataSourceWriteOptions.OPERATION
 import org.apache.hudi.HoodieWriterUtils._
+import org.apache.hudi.avro.HoodieAvroUtils
 import org.apache.hudi.common.config.DFSPropertiesConfiguration
 import org.apache.hudi.common.model.HoodieTableType
 import org.apache.hudi.common.table.HoodieTableConfig.URL_ENCODE_PARTITIONING
@@ -171,6 +172,10 @@ class HoodieCatalogTable(val spark: SparkSession, var 
table: CatalogTable) exten
   def initHoodieTable(): Unit = {
     logInfo(s"Init hoodie.properties for ${table.identifier.unquotedString}")
     val (finalSchema, tableConfigs) = parseSchemaAndConfigs()
+    // The TableSchemaResolver#getTableAvroSchemaInternal has a premise that
+    // the table create schema does not include the metadata fields.
+    // When flag includeMetadataFields is false, no metadata fields should be 
included in the resolved schema.
+    val dataSchema = removeMetaFields(finalSchema)
 
     table = table.copy(schema = finalSchema)
 
@@ -187,11 +192,11 @@ class HoodieCatalogTable(val spark: SparkSession, var 
table: CatalogTable) exten
       HoodieTableMetaClient.withPropertyBuilder()
         .fromProperties(properties)
         .setDatabaseName(catalogDatabaseName)
-        
.setTableCreateSchema(SchemaConverters.toAvroType(finalSchema).toString())
+        
.setTableCreateSchema(SchemaConverters.toAvroType(dataSchema).toString())
         .initTable(hadoopConf, tableLocation)
     } else {
       val (recordName, namespace) = 
AvroConversionUtils.getAvroRecordNameAndNamespace(table.identifier.table)
-      val schema = SchemaConverters.toAvroType(finalSchema, false, recordName, 
namespace)
+      val schema = SchemaConverters.toAvroType(dataSchema, nullable = false, 
recordName, namespace)
       val partitionColumns = if (table.partitionColumnNames.isEmpty) {
         null
       } else {
diff --git 
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCreateTable.scala
 
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCreateTable.scala
index e975ca8a4b7..4ffcfa062e8 100644
--- 
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCreateTable.scala
+++ 
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCreateTable.scala
@@ -30,10 +30,9 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType
 import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.getLastCommitMetadata
 import org.apache.spark.sql.types._
-import org.junit.jupiter.api.Assertions.assertFalse
+import org.junit.jupiter.api.Assertions.{assertFalse, assertTrue}
 
 import scala.collection.JavaConverters._
-import scala.collection.Seq
 
 class TestCreateTable extends HoodieSparkSqlTestBase {
 
@@ -86,6 +85,11 @@ class TestCreateTable extends HoodieSparkSqlTestBase {
     assertResult(tableName)(tableConfig.getTableName)
     assertFalse(tableConfig.contains(OPERATION.key()))
 
+    val schemaOpt = tableConfig.getTableCreateSchema
+    assertTrue(schemaOpt.isPresent, "Table create schema should be persisted")
+    assertFalse(schemaOpt.get().getFields.asScala.exists(f => 
HoodieRecord.HOODIE_META_COLUMNS.contains(f.name())),
+      "Table create schema should not include metadata fields")
+
     spark.sql("use default")
   }
 

Reply via email to