This is an automated email from the ASF dual-hosted git repository.

yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 2823d78213c [HUDI-7314] Hudi Create table support index type check 
(#10536)
2823d78213c is described below

commit 2823d78213c479fdc54d014c38ea3f0e2dcb14ca
Author: xuzifu666 <[email protected]>
AuthorDate: Sat Jan 20 07:33:35 2024 +0800

    [HUDI-7314] Hudi Create table support index type check (#10536)
    
    Co-authored-by: xuyu <[email protected]>
    Co-authored-by: Y Ethan Guo <[email protected]>
---
 .../apache/spark/sql/hudi/HoodieOptionConfig.scala |  7 +++++
 .../apache/spark/sql/hudi/TestInsertTable.scala    | 32 ++++++++++++++++++++++
 2 files changed, 39 insertions(+)

diff --git 
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala
 
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala
index f98608176be..cc2d8903a16 100644
--- 
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala
+++ 
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala
@@ -22,6 +22,7 @@ import 
org.apache.hudi.avro.HoodieAvroUtils.getRootLevelFieldName
 import org.apache.hudi.common.model.{HoodieRecordMerger, HoodieTableType}
 import org.apache.hudi.common.table.HoodieTableConfig
 import org.apache.hudi.common.util.ValidationUtils
+import org.apache.hudi.config.HoodieIndexConfig
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.types.StructType
 
@@ -232,6 +233,12 @@ object HoodieOptionConfig {
       tableType.get.equalsIgnoreCase(SQL_VALUE_TABLE_TYPE_COW) ||
       tableType.get.equalsIgnoreCase(SQL_VALUE_TABLE_TYPE_MOR),
       s"'type' must be '$SQL_VALUE_TABLE_TYPE_COW' or 
'$SQL_VALUE_TABLE_TYPE_MOR'")
+
+    // validate table index type
+    val indexType = sqlOptions.get(HoodieIndexConfig.INDEX_TYPE.key())
+    if (!indexType.isEmpty) {
+      HoodieIndexConfig.INDEX_TYPE.checkValues(indexType.get)
+    }
   }
 
   def buildConf[T](): HoodieSQLOptionBuilder[T] = {
diff --git 
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
 
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
index 05a04daf417..eb6e20ee931 100644
--- 
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
+++ 
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
@@ -2173,6 +2173,38 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
     })
   }
 
+  test("Test inaccurate index type") {
+    withRecordType()(withTempDir { tmp =>
+      val targetTable = generateTableName
+
+      assertThrows[IllegalArgumentException] {
+        try {
+          spark.sql(
+            s"""
+               |create table ${targetTable} (
+               |  `id` string,
+               |  `name` string,
+               |  `dt` bigint,
+               |  `day` STRING,
+               |  `hour` INT
+               |) using hudi
+               |OPTIONS ('hoodie.datasource.write.hive_style_partitioning' 
'false', 'hoodie.datasource.meta.sync.enable' 'false', 
'hoodie.datasource.hive_sync.enable' 'false')
+               |tblproperties (
+               |  'primaryKey' = 'id',
+               |  'type' = 'mor',
+               |  'preCombineField'='dt',
+               |  'hoodie.index.type' = 'BUCKET_aa',
+               |  'hoodie.bucket.index.hash.field' = 'id',
+               |  'hoodie.bucket.index.num.buckets'=512
+               | )
+               |partitioned by (`day`,`hour`)
+               |location '${tmp.getCanonicalPath}'
+               |""".stripMargin)
+        }
+      }
+    })
+  }
+
   test("Test vectorized read nested columns for 
LegacyHoodieParquetFileFormat") {
     withSQLConf(
       "hoodie.datasource.read.use.new.parquet.file.format" -> "false",

Reply via email to