satishkotha commented on a change in pull request #2627:
URL: https://github.com/apache/hudi/pull/2627#discussion_r587802720
##########
File path:
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java
##########
@@ -20,20 +20,32 @@
import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
import org.apache.spark.sql.Row;
+import java.util.Arrays;
+import java.util.Collections;
import java.util.List;
+import java.util.stream.Collectors;
/**
* Simple Key generator for unpartitioned Hive Tables.
*/
-public class NonpartitionedKeyGenerator extends SimpleKeyGenerator {
+public class NonpartitionedKeyGenerator extends BuiltinKeyGenerator {
private final NonpartitionedAvroKeyGenerator nonpartitionedAvroKeyGenerator;
- public NonpartitionedKeyGenerator(TypedProperties config) {
- super(config);
- nonpartitionedAvroKeyGenerator = new
NonpartitionedAvroKeyGenerator(config);
+ public NonpartitionedKeyGenerator(TypedProperties props) {
+ super(props);
+ this.recordKeyFields =
Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY)
Review comment:
Do you need these initialization here? Can we override
getPartitionPathFields and getRecordKeyFields and call
nonPartitionedAvroKeyGenerator instead to avoid code repetition?
##########
File path:
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestNonpartitionedKeyGenerator.java
##########
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.keygen;
+
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
+import org.apache.hudi.exception.HoodieKeyException;
+
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
+import org.apache.hudi.testutils.KeyGeneratorTestUtilities;
+import org.apache.spark.sql.Row;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import static junit.framework.TestCase.assertEquals;
+
+public class TestNonpartitionedKeyGenerator extends KeyGeneratorTestUtilities {
+
+ private TypedProperties getCommonProps(boolean getComplexRecordKey) {
+ TypedProperties properties = new TypedProperties();
+ if (getComplexRecordKey) {
+ properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY, "_row_key,
pii_col");
+ } else {
+ properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY, "_row_key");
+ }
+ properties.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_OPT_KEY,
"true");
+ return properties;
+ }
+
+ private TypedProperties getPropertiesWithoutPartitionPathProp() {
+ return getCommonProps(false);
+ }
+
+ private TypedProperties getPropertiesWithPartitionPathProp() {
+ TypedProperties properties = getCommonProps(true);
+ properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY,
"timestamp,ts_ms");
+ return properties;
+ }
+
+ private TypedProperties getPropertiesWithoutRecordKeyProp() {
+ TypedProperties properties = new TypedProperties();
+ properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY,
"timestamp");
+ return properties;
+ }
+
+ private TypedProperties getWrongRecordKeyFieldProps() {
+ TypedProperties properties = new TypedProperties();
+ properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY,
"timestamp");
+ properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY, "_wrong_key");
+ return properties;
+ }
+
+ @Test
+ public void testNullRecordKeyFields() {
+ Assertions.assertThrows(IllegalArgumentException.class, () -> new
NonpartitionedKeyGenerator(getPropertiesWithoutRecordKeyProp()));
+ }
+
+ @Test
+ public void testNullPartitionPathFields() {
+ TypedProperties properties = getPropertiesWithoutPartitionPathProp();
+ NonpartitionedKeyGenerator keyGenerator = new
NonpartitionedKeyGenerator(properties);
+ GenericRecord record = getRecord();
+ Row row = KeyGeneratorTestUtilities.getRow(record);
+ Assertions.assertEquals(keyGenerator.getPartitionPath(row), "");
+ }
+
+ @Test
+ public void testNonNullPartitionPathFields() {
+ TypedProperties properties = getPropertiesWithPartitionPathProp();
+ NonpartitionedKeyGenerator keyGenerator = new
NonpartitionedKeyGenerator(properties);
+ GenericRecord record = getRecord();
+ Row row = KeyGeneratorTestUtilities.getRow(record);
+
Assertions.assertEquals(properties.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY),
"timestamp,ts_ms");
+ Assertions.assertEquals(keyGenerator.getPartitionPath(row), "");
+ }
+
+ @Test
+ public void testWrongRecordKeyField() {
+ NonpartitionedKeyGenerator keyGenerator = new
NonpartitionedKeyGenerator(getWrongRecordKeyFieldProps());
+ Assertions.assertThrows(HoodieKeyException.class, () ->
keyGenerator.getRecordKey(getRecord()));
+ Assertions.assertThrows(HoodieKeyException.class, () ->
keyGenerator.buildFieldPositionMapIfNeeded(KeyGeneratorTestUtilities.structType));
+ }
+
+ @Test
+ public void testSingleValueKeyGeneratorNonPartitioned() {
+ TypedProperties properties = new TypedProperties();
+ properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY,
"_row_key");
+ properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY,
"");
+ NonpartitionedKeyGenerator keyGenerator = new
NonpartitionedKeyGenerator(properties);
+ assertEquals(keyGenerator.getRecordKeyFields().size(), 1);
+ assertEquals(keyGenerator.getPartitionPathFields().size(), 0);
+
+ HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
+ GenericRecord record = dataGenerator.generateGenericRecords(1).get(0);
+ String rowKey = record.get("_row_key").toString();
+ HoodieKey hoodieKey = keyGenerator.getKey(record);
+ assertEquals(rowKey, hoodieKey.getRecordKey());
+ assertEquals("", hoodieKey.getPartitionPath());
+ }
+
+ @Test
+ public void testMultipleValueKeyGeneratorNonPartitioned1() {
+ TypedProperties properties = new TypedProperties();
+ properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY,
"_row_key,timestamp");
Review comment:
minor: can you use some other field in place of _row_key here? Its a bit
confusing to have row_key as one part of record key
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/NonpartitionedAvroKeyGenerator.java
##########
@@ -45,6 +51,18 @@ public String getPartitionPath(GenericRecord record) {
return EMPTY_PARTITION_FIELD_LIST;
}
+ @Override
+ public String getRecordKey(GenericRecord record) {
+ // the format of record key varies due to the number of record key fields
+ // 1. if there is only one record key field, the format of record key is
just "<value>"
+ // 2. if there are multiple record key fields, the format is
"<field1>:<value1>,<field2>:<value2>,..."
+ // for backward compatibility, we need to use the right format according
to the number of record key fields
Review comment:
nit: Can you move this to beginning (line 56. you can remove "the format
of record key varies due to the number of record key fields"). I also prefer
line 63 to be in else block (in some cases, compiler optimization works better
that way. This is a 'nit' comment, so Its fine if you want to leave it as is.)
##########
File path:
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestNonpartitionedKeyGenerator.java
##########
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.keygen;
+
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
+import org.apache.hudi.exception.HoodieKeyException;
+
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
+import org.apache.hudi.testutils.KeyGeneratorTestUtilities;
+import org.apache.spark.sql.Row;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import static junit.framework.TestCase.assertEquals;
+
+public class TestNonpartitionedKeyGenerator extends KeyGeneratorTestUtilities {
+
+ private TypedProperties getCommonProps(boolean getComplexRecordKey) {
+ TypedProperties properties = new TypedProperties();
+ if (getComplexRecordKey) {
+ properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY, "_row_key,
pii_col");
+ } else {
+ properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY, "_row_key");
+ }
+ properties.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_OPT_KEY,
"true");
+ return properties;
+ }
+
+ private TypedProperties getPropertiesWithoutPartitionPathProp() {
+ return getCommonProps(false);
+ }
+
+ private TypedProperties getPropertiesWithPartitionPathProp() {
+ TypedProperties properties = getCommonProps(true);
+ properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY,
"timestamp,ts_ms");
+ return properties;
+ }
+
+ private TypedProperties getPropertiesWithoutRecordKeyProp() {
+ TypedProperties properties = new TypedProperties();
+ properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY,
"timestamp");
+ return properties;
+ }
+
+ private TypedProperties getWrongRecordKeyFieldProps() {
+ TypedProperties properties = new TypedProperties();
+ properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY,
"timestamp");
+ properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY, "_wrong_key");
+ return properties;
+ }
+
+ @Test
+ public void testNullRecordKeyFields() {
+ Assertions.assertThrows(IllegalArgumentException.class, () -> new
NonpartitionedKeyGenerator(getPropertiesWithoutRecordKeyProp()));
+ }
+
+ @Test
+ public void testNullPartitionPathFields() {
+ TypedProperties properties = getPropertiesWithoutPartitionPathProp();
+ NonpartitionedKeyGenerator keyGenerator = new
NonpartitionedKeyGenerator(properties);
+ GenericRecord record = getRecord();
+ Row row = KeyGeneratorTestUtilities.getRow(record);
+ Assertions.assertEquals(keyGenerator.getPartitionPath(row), "");
+ }
+
+ @Test
+ public void testNonNullPartitionPathFields() {
+ TypedProperties properties = getPropertiesWithPartitionPathProp();
Review comment:
i think this case should throw an error because using
NonPartitionedKeyGenerator and setting PARTITIONPATH_FIELD_OPT_KEY property is
not expected. wdyt?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]