nsivabalan commented on code in PR #13656: URL: https://github.com/apache/hudi/pull/13656#discussion_r2248022283
########## hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/UserProvidedKeyGenerator.java: ########## @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.keygen; + +import org.apache.hudi.common.config.TypedProperties; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.keygen.constant.KeyGeneratorOptions; + +import org.apache.avro.generic.GenericRecord; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.catalyst.InternalRow; +import org.apache.spark.sql.types.StructType; +import org.apache.spark.unsafe.types.UTF8String; + +import java.util.Collections; + +import static org.apache.hudi.common.util.ValidationUtils.checkArgument; + +/** + * This class is used for test purpose, and should never be used + * in other ways. + */ +public class UserProvidedKeyGenerator extends BuiltinKeyGenerator { Review Comment: oh man, I thought this is source code. after reading the java docs, realized. can you name it differently. `UserProvidedKeyGeneratorTest` or `MockedUserProvidedKeyGenerator` ########## hudi-common/src/main/java/org/apache/hudi/keygen/constant/KeyGeneratorType.java: ########## @@ -97,9 +99,12 @@ public enum KeyGeneratorType { SPARK_SQL_MERGE_INTO("org.apache.spark.sql.hudi.command.MergeIntoKeyGenerator"), @EnumFieldDescription("A test KeyGenerator for deltastreamer tests.") - STREAMER_TEST("org.apache.hudi.utilities.deltastreamer.TestHoodieDeltaStreamer$TestGenerator"); + STREAMER_TEST("org.apache.hudi.utilities.deltastreamer.TestHoodieDeltaStreamer$TestGenerator"), - private final String className; + @EnumFieldDescription("A KeyGenerator specified from the configuration.") + USER_PROVIDED("USER_PROVIDED_CLASS"); Review Comment: why the ENUM name and value are differing. lets align them. `USER_PROVIDED` ########## hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java: ########## @@ -1551,9 +1551,17 @@ public Properties build() { tableConfig.setValue(HoodieTableConfig.POPULATE_META_FIELDS, Boolean.toString(populateMetaFields)); } if (null != keyGeneratorClassProp) { - tableConfig.setValue(HoodieTableConfig.KEY_GENERATOR_TYPE, KeyGeneratorType.fromClassName(keyGeneratorClassProp).name()); + KeyGeneratorType type = KeyGeneratorType.fromClassName(keyGeneratorClassProp); + tableConfig.setValue(HoodieTableConfig.KEY_GENERATOR_TYPE, type.name()); + // For USER_PROVIDED type, key generator class is recorded as well. + if (type == KeyGeneratorType.USER_PROVIDED) { + tableConfig.setValue(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME, keyGeneratorClassProp); + } } else if (null != keyGeneratorType) { tableConfig.setValue(HoodieTableConfig.KEY_GENERATOR_TYPE, keyGeneratorType); + checkArgument(!keyGeneratorType.equals(KeyGeneratorType.USER_PROVIDED.name()), + String.format("When key generator type is %s, the key generator class must be set properly", Review Comment: not sure what are we trying to validate here. both type and class name could be set even for other key gen types. not just USER_PROVIDED. just that we may not serialize the class to table property for other cases. So, we should remove this validation. ########## hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala: ########## @@ -2038,4 +2098,21 @@ object TestCOWDataSource { df.withColumn(c, when(col(c).isNotNull, col(c)).otherwise(lit(null))) } } + + def provideParamsForKeyGenTest(): java.util.List[Arguments] = { + java.util.Arrays.asList( + Arguments.of( + "org.apache.hudi.keygen.UserProvidedKeyGenerator", + KeyGeneratorType.USER_PROVIDED.name()), + Arguments.of( Review Comment: we should also support, just setting the key gen class name w/o setting the type. thats the out of the box usage of UserProvided key gen. can we test that case as well -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
