yihua commented on code in PR #11816:
URL: https://github.com/apache/hudi/pull/11816#discussion_r1751101788


##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java:
##########
@@ -66,29 +66,25 @@ public class KeyGenUtils {
   public static KeyGeneratorType inferKeyGeneratorType(
       Option<String> recordsKeyFields, String partitionFields) {
     boolean autoGenerateRecordKeys = !recordsKeyFields.isPresent();
-    if (autoGenerateRecordKeys) {
-      return inferKeyGeneratorTypeForAutoKeyGen(partitionFields);
-    } else {
-      if (!StringUtils.isNullOrEmpty(partitionFields)) {
-        int numPartFields = partitionFields.split(",").length;
-        int numRecordKeyFields = recordsKeyFields.get().split(",").length;
-        if (numPartFields == 1 && numRecordKeyFields == 1) {
-          return KeyGeneratorType.SIMPLE;
-        }
-        return KeyGeneratorType.COMPLEX;
-      }
-      return KeyGeneratorType.NON_PARTITION;
+    KeyGeneratorType partitionKeyGeneratorType = 
inferKeyGeneratorTypeFromPartitionFields(partitionFields);
+    if (autoGenerateRecordKeys || partitionKeyGeneratorType != 
KeyGeneratorType.SIMPLE) {
+      return partitionKeyGeneratorType;
     }
+    int numRecordKeyFields = recordsKeyFields.get().split(",").length;
+    return numRecordKeyFields == 1 ? KeyGeneratorType.SIMPLE : 
KeyGeneratorType.COMPLEX;
   }
 
   // When auto record key gen is enabled, our inference will be based on 
partition path only.
-  private static KeyGeneratorType inferKeyGeneratorTypeForAutoKeyGen(String 
partitionFields) {
+  static KeyGeneratorType inferKeyGeneratorTypeFromPartitionFields(String 
partitionFields) {
     if (!StringUtils.isNullOrEmpty(partitionFields)) {
-      int numPartFields = partitionFields.split(",").length;
-      if (numPartFields == 1) {
+      String[] partitonFields = partitionFields.split(",");
+      if 
(partitonFields[0].contains(BaseKeyGenerator.CUSTOM_KEY_GENERATOR_SPLIT_REGEX)) 
{
+        return KeyGeneratorType.CUSTOM;

Review Comment:
   What about other engines?  Do they have similar infer logic that needs to be 
fixed?



##########
hudi-client/hudi-client-common/src/test/java/org/apache/hudi/keygen/TestKeyGenUtils.java:
##########
@@ -45,12 +46,68 @@ public void testInferKeyGeneratorType() {
     assertEquals(
         KeyGeneratorType.COMPLEX,
         KeyGenUtils.inferKeyGeneratorType(Option.of("col1,col2"), 
"partition1,partition2"));
+
+    assertEquals(
+        KeyGeneratorType.CUSTOM,
+        KeyGenUtils.inferKeyGeneratorType(Option.of("col1"), 
"partition1:simple,partition2:timestamp"));
+    assertEquals(
+        KeyGeneratorType.CUSTOM,
+        KeyGenUtils.inferKeyGeneratorType(Option.of("col1,col2"), 
"partition1:simple"));
+    assertEquals(
+        KeyGeneratorType.CUSTOM,
+        KeyGenUtils.inferKeyGeneratorType(Option.of("col1,col2"), 
"partition1:simple,partition2:timestamp"));
+
     assertEquals(
         KeyGeneratorType.NON_PARTITION,
         KeyGenUtils.inferKeyGeneratorType(Option.of("col1,col2"), ""));
     assertEquals(
         KeyGeneratorType.NON_PARTITION,
         KeyGenUtils.inferKeyGeneratorType(Option.of("col1,col2"), null));
+
+    // Test key generator type with auto generation of record keys
+    assertEquals(
+        KeyGeneratorType.SIMPLE,
+        KeyGenUtils.inferKeyGeneratorType(Option.empty(), "partition1"));
+    assertEquals(
+        KeyGeneratorType.COMPLEX,
+        KeyGenUtils.inferKeyGeneratorType(Option.empty(), 
"partition1,partition2"));
+    assertEquals(
+        KeyGeneratorType.CUSTOM,
+        KeyGenUtils.inferKeyGeneratorType(Option.empty(), 
"partition1:simple"));
+    assertEquals(
+        KeyGeneratorType.CUSTOM,
+        KeyGenUtils.inferKeyGeneratorType(Option.empty(), 
"partition1:simple,partition2:timestamp"));
+    assertEquals(
+        KeyGeneratorType.NON_PARTITION,
+        KeyGenUtils.inferKeyGeneratorType(Option.empty(), ""));
+    assertEquals(
+        KeyGeneratorType.NON_PARTITION,
+        KeyGenUtils.inferKeyGeneratorType(Option.empty(), null));
+  }
+
+  @Test
+  public void testInferKeyGeneratorTypeFromPartitionFields() {
+    assertEquals(
+        KeyGeneratorType.SIMPLE,
+        KeyGenUtils.inferKeyGeneratorTypeFromPartitionFields("partition1"));
+    assertEquals(
+        KeyGeneratorType.COMPLEX,
+        
KeyGenUtils.inferKeyGeneratorTypeFromPartitionFields("partition1,partition1"));

Review Comment:
   ```suggestion
           
KeyGenUtils.inferKeyGeneratorTypeFromPartitionFields("partition1,partition2"));
   ```



##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala:
##########
@@ -313,7 +315,9 @@ class HoodieCatalogTable(val spark: SparkSession, var 
table: CatalogTable) exten
           
KeyGeneratorType.valueOf(originTableConfig(HoodieTableConfig.KEY_GENERATOR_TYPE.key)).getClassName)
     } else {
       val primaryKeys = 
table.properties.getOrElse(SQL_KEY_TABLE_PRIMARY_KEY.sqlKeyName, 
table.storage.properties.get(SQL_KEY_TABLE_PRIMARY_KEY.sqlKeyName)).toString
-      val partitions = table.partitionColumnNames.mkString(",")
+      val partitionFieldsOpt = 
originTableConfig.get(HoodieTableConfig.PARTITION_FIELDS.key)
+        
.orElse(sqlOptions.get(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key()))

Review Comment:
   Avoid using `.key` and use `ConfigUtils` instead so alternative keys are 
considered.



##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkSqlWithCustomKeyGenerator.scala:
##########
@@ -442,13 +442,92 @@ class TestSparkSqlWithCustomKeyGenerator extends 
HoodieSparkSqlTestBase {
     }
   }
 
+  test("Test create table with custom key generator") {
+    withTempDir { tmp => {
+      val tableName = generateTableName
+      val tablePath = tmp.getCanonicalPath + "/" + tableName
+      val writePartitionFields = "ts:timestamp"
+      val dateFormat = "yyyy/MM/dd"
+      val tsGenFunc = (ts: Integer) => TS_FORMATTER_FUNC_WITH_FORMAT.apply(ts, 
dateFormat)
+      val customPartitionFunc = (ts: Integer, _: String) => "ts=" + 
tsGenFunc.apply(ts)
+
+      spark.sql(
+        s"""
+           |create table ${tableName} (
+           |  `id` INT,
+           |  `name` STRING,
+           |  `price` DECIMAL(5, 1),
+           |  `ts` INT,
+           |  `segment` STRING
+           |) using hudi
+           |tblproperties (
+           |  'primaryKey' = 'id,name',
+           |  'type' = 'mor',
+           |  'preCombineField'='name',
+           |  'hoodie.datasource.write.keygenerator.class' = 
'$CUSTOM_KEY_GEN_CLASS_NAME',
+           |  'hoodie.datasource.write.partitionpath.field' = 
'$writePartitionFields',

Review Comment:
   Can we write a test that do not use 
`hoodie.datasource.write.partitionpath.field` in `tblproperties`, by creating a 
table using Spark datasource with custom keygen and timestamp-based partition 
field, and using Spark SQL without passing in any write configs to make sure 
the partition path value can be correctly generated?



##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java:
##########
@@ -66,29 +66,25 @@ public class KeyGenUtils {
   public static KeyGeneratorType inferKeyGeneratorType(
       Option<String> recordsKeyFields, String partitionFields) {
     boolean autoGenerateRecordKeys = !recordsKeyFields.isPresent();
-    if (autoGenerateRecordKeys) {
-      return inferKeyGeneratorTypeForAutoKeyGen(partitionFields);
-    } else {
-      if (!StringUtils.isNullOrEmpty(partitionFields)) {
-        int numPartFields = partitionFields.split(",").length;
-        int numRecordKeyFields = recordsKeyFields.get().split(",").length;
-        if (numPartFields == 1 && numRecordKeyFields == 1) {
-          return KeyGeneratorType.SIMPLE;
-        }
-        return KeyGeneratorType.COMPLEX;
-      }
-      return KeyGeneratorType.NON_PARTITION;
+    KeyGeneratorType partitionKeyGeneratorType = 
inferKeyGeneratorTypeFromPartitionFields(partitionFields);
+    if (autoGenerateRecordKeys || partitionKeyGeneratorType != 
KeyGeneratorType.SIMPLE) {

Review Comment:
   Should `KeyGeneratorType.COMPLEX` be included too based on the logic below.



##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala:
##########
@@ -212,7 +211,9 @@ class HoodieCatalogTable(val spark: SparkSession, var 
table: CatalogTable) exten
     } else {
       val (recordName, namespace) = 
AvroConversionUtils.getAvroRecordNameAndNamespace(table.identifier.table)
       val schema = SchemaConverters.toAvroType(dataSchema, nullable = false, 
recordName, namespace)
-      val partitionColumns = if (table.partitionColumnNames.isEmpty) {
+      val partitionColumns = if 
(tableConfigs.contains(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key())) {
+        tableConfigs(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key())
+      } else if (table.partitionColumnNames.isEmpty) {

Review Comment:
   Same here on avoiding `.key` calls



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to