This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 2823d78213c [HUDI-7314] Hudi Create table support index type check
(#10536)
2823d78213c is described below
commit 2823d78213c479fdc54d014c38ea3f0e2dcb14ca
Author: xuzifu666 <[email protected]>
AuthorDate: Sat Jan 20 07:33:35 2024 +0800
[HUDI-7314] Hudi Create table support index type check (#10536)
Co-authored-by: xuyu <[email protected]>
Co-authored-by: Y Ethan Guo <[email protected]>
---
.../apache/spark/sql/hudi/HoodieOptionConfig.scala | 7 +++++
.../apache/spark/sql/hudi/TestInsertTable.scala | 32 ++++++++++++++++++++++
2 files changed, 39 insertions(+)
diff --git
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala
index f98608176be..cc2d8903a16 100644
---
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala
+++
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala
@@ -22,6 +22,7 @@ import
org.apache.hudi.avro.HoodieAvroUtils.getRootLevelFieldName
import org.apache.hudi.common.model.{HoodieRecordMerger, HoodieTableType}
import org.apache.hudi.common.table.HoodieTableConfig
import org.apache.hudi.common.util.ValidationUtils
+import org.apache.hudi.config.HoodieIndexConfig
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.types.StructType
@@ -232,6 +233,12 @@ object HoodieOptionConfig {
tableType.get.equalsIgnoreCase(SQL_VALUE_TABLE_TYPE_COW) ||
tableType.get.equalsIgnoreCase(SQL_VALUE_TABLE_TYPE_MOR),
s"'type' must be '$SQL_VALUE_TABLE_TYPE_COW' or
'$SQL_VALUE_TABLE_TYPE_MOR'")
+
+ // validate table index type
+ val indexType = sqlOptions.get(HoodieIndexConfig.INDEX_TYPE.key())
+ if (!indexType.isEmpty) {
+ HoodieIndexConfig.INDEX_TYPE.checkValues(indexType.get)
+ }
}
def buildConf[T](): HoodieSQLOptionBuilder[T] = {
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
index 05a04daf417..eb6e20ee931 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
+++
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
@@ -2173,6 +2173,38 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
})
}
+ test("Test inaccurate index type") {
+ withRecordType()(withTempDir { tmp =>
+ val targetTable = generateTableName
+
+ assertThrows[IllegalArgumentException] {
+ try {
+ spark.sql(
+ s"""
+ |create table ${targetTable} (
+ | `id` string,
+ | `name` string,
+ | `dt` bigint,
+ | `day` STRING,
+ | `hour` INT
+ |) using hudi
+ |OPTIONS ('hoodie.datasource.write.hive_style_partitioning'
'false', 'hoodie.datasource.meta.sync.enable' 'false',
'hoodie.datasource.hive_sync.enable' 'false')
+ |tblproperties (
+ | 'primaryKey' = 'id',
+ | 'type' = 'mor',
+ | 'preCombineField'='dt',
+ | 'hoodie.index.type' = 'BUCKET_aa',
+ | 'hoodie.bucket.index.hash.field' = 'id',
+ | 'hoodie.bucket.index.num.buckets'=512
+ | )
+ |partitioned by (`day`,`hour`)
+ |location '${tmp.getCanonicalPath}'
+ |""".stripMargin)
+ }
+ }
+ })
+ }
+
test("Test vectorized read nested columns for
LegacyHoodieParquetFileFormat") {
withSQLConf(
"hoodie.datasource.read.use.new.parquet.file.format" -> "false",