This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 5c444ff710c [HUDI-7432] Fix excessive object creation in KeyGenUtils
(#10721)
5c444ff710c is described below
commit 5c444ff710cfef83825b7cb9a2d5c5a3b85a9087
Author: wombatu-kun <[email protected]>
AuthorDate: Thu Feb 22 09:47:14 2024 +0700
[HUDI-7432] Fix excessive object creation in KeyGenUtils (#10721)
Co-authored-by: Vova Kolmakov <[email protected]>
---
.../java/org/apache/hudi/keygen/KeyGenUtils.java | 34 ++++++++++++++--------
.../hudi/keygen/TestComplexKeyGenerator.java | 2 +-
.../keygen/TestGlobalDeleteRecordGenerator.java | 2 +-
.../keygen/TestNonpartitionedKeyGenerator.java | 2 +-
.../org/apache/hudi/TestDataSourceDefaults.scala | 4 +--
5 files changed, 27 insertions(+), 17 deletions(-)
diff --git
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java
index 7b88a0ab979..6266d965fd4 100644
---
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java
+++
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java
@@ -146,21 +146,24 @@ public class KeyGenUtils {
public static String getRecordKey(GenericRecord record, List<String>
recordKeyFields, boolean consistentLogicalTimestampEnabled) {
boolean keyIsNullEmpty = true;
StringBuilder recordKey = new StringBuilder();
- for (String recordKeyField : recordKeyFields) {
+ for (int i = 0; i < recordKeyFields.size(); i++) {
+ String recordKeyField = recordKeyFields.get(i);
String recordKeyValue =
HoodieAvroUtils.getNestedFieldValAsString(record, recordKeyField, true,
consistentLogicalTimestampEnabled);
if (recordKeyValue == null) {
- recordKey.append(recordKeyField + DEFAULT_COMPOSITE_KEY_FILED_VALUE +
NULL_RECORDKEY_PLACEHOLDER + DEFAULT_RECORD_KEY_PARTS_SEPARATOR);
+
recordKey.append(recordKeyField).append(DEFAULT_COMPOSITE_KEY_FILED_VALUE).append(NULL_RECORDKEY_PLACEHOLDER);
} else if (recordKeyValue.isEmpty()) {
- recordKey.append(recordKeyField + DEFAULT_COMPOSITE_KEY_FILED_VALUE +
EMPTY_RECORDKEY_PLACEHOLDER + DEFAULT_RECORD_KEY_PARTS_SEPARATOR);
+
recordKey.append(recordKeyField).append(DEFAULT_COMPOSITE_KEY_FILED_VALUE).append(EMPTY_RECORDKEY_PLACEHOLDER);
} else {
- recordKey.append(recordKeyField + DEFAULT_COMPOSITE_KEY_FILED_VALUE +
recordKeyValue + DEFAULT_RECORD_KEY_PARTS_SEPARATOR);
+
recordKey.append(recordKeyField).append(DEFAULT_COMPOSITE_KEY_FILED_VALUE).append(recordKeyValue);
keyIsNullEmpty = false;
}
+ if (i != recordKeyFields.size() - 1) {
+ recordKey.append(DEFAULT_RECORD_KEY_PARTS_SEPARATOR);
+ }
}
- recordKey.deleteCharAt(recordKey.length() - 1);
if (keyIsNullEmpty) {
throw new HoodieKeyException("recordKey values: \"" + recordKey + "\"
for fields: "
- + recordKeyFields.toString() + " cannot be entirely null or empty.");
+ + recordKeyFields + " cannot be entirely null or empty.");
}
return recordKey.toString();
}
@@ -172,20 +175,27 @@ public class KeyGenUtils {
}
StringBuilder partitionPath = new StringBuilder();
- for (String partitionPathField : partitionPathFields) {
+ for (int i = 0; i < partitionPathFields.size(); i++) {
+ String partitionPathField = partitionPathFields.get(i);
String fieldVal = HoodieAvroUtils.getNestedFieldValAsString(record,
partitionPathField, true, consistentLogicalTimestampEnabled);
if (fieldVal == null || fieldVal.isEmpty()) {
- partitionPath.append(hiveStylePartitioning ? partitionPathField + "="
+ HUDI_DEFAULT_PARTITION_PATH
- : HUDI_DEFAULT_PARTITION_PATH);
+ if (hiveStylePartitioning) {
+ partitionPath.append(partitionPathField).append("=");
+ }
+ partitionPath.append(HUDI_DEFAULT_PARTITION_PATH);
} else {
if (encodePartitionPath) {
fieldVal = PartitionPathEncodeUtils.escapePathName(fieldVal);
}
- partitionPath.append(hiveStylePartitioning ? partitionPathField + "="
+ fieldVal : fieldVal);
+ if (hiveStylePartitioning) {
+ partitionPath.append(partitionPathField).append("=");
+ }
+ partitionPath.append(fieldVal);
+ }
+ if (i != partitionPathFields.size() - 1) {
+ partitionPath.append(DEFAULT_PARTITION_PATH_SEPARATOR);
}
- partitionPath.append(DEFAULT_PARTITION_PATH_SEPARATOR);
}
- partitionPath.deleteCharAt(partitionPath.length() - 1);
return partitionPath.toString();
}
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java
b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java
index 296cf3d6e0d..2fa09861d25 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java
+++
b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java
@@ -78,7 +78,7 @@ public class TestComplexKeyGenerator extends
KeyGeneratorTestUtilities {
@Test
public void testNullRecordKeyFields() {
GenericRecord record = getRecord();
- Assertions.assertThrows(StringIndexOutOfBoundsException.class, () -> {
+ Assertions.assertThrows(HoodieKeyException.class, () -> {
ComplexKeyGenerator keyGenerator = new
ComplexKeyGenerator(getPropertiesWithoutRecordKeyProp());
keyGenerator.getRecordKey(record);
});
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteRecordGenerator.java
b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteRecordGenerator.java
index df69279cc89..4c9fc1c9dda 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteRecordGenerator.java
+++
b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteRecordGenerator.java
@@ -62,7 +62,7 @@ public class TestGlobalDeleteRecordGenerator extends
KeyGeneratorTestUtilities {
@Test
public void testNullRecordKeyFields() {
GenericRecord record = getRecord();
- Assertions.assertThrows(StringIndexOutOfBoundsException.class, () -> {
+ Assertions.assertThrows(HoodieKeyException.class, () -> {
BaseKeyGenerator keyGenerator = new
GlobalDeleteKeyGenerator(getPropertiesWithoutRecordKeyProp());
keyGenerator.getRecordKey(record);
});
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestNonpartitionedKeyGenerator.java
b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestNonpartitionedKeyGenerator.java
index fb740d00e2a..187f96197b1 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestNonpartitionedKeyGenerator.java
+++
b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestNonpartitionedKeyGenerator.java
@@ -69,7 +69,7 @@ public class TestNonpartitionedKeyGenerator extends
KeyGeneratorTestUtilities {
@Test
public void testNullRecordKeyFields() {
GenericRecord record = getRecord();
- Assertions.assertThrows(StringIndexOutOfBoundsException.class, () -> {
+ Assertions.assertThrows(HoodieKeyException.class, () -> {
BaseKeyGenerator keyGenerator = new
NonpartitionedKeyGenerator(getPropertiesWithoutRecordKeyProp());
keyGenerator.getRecordKey(record);
});
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala
index a2598c766b1..784ddd6c883 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala
+++
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala
@@ -262,7 +262,7 @@ class TestDataSourceDefaults extends ScalaAssertionSupport {
}
// Record's key field not specified
- assertThrows(classOf[StringIndexOutOfBoundsException]) {
+ assertThrows(classOf[HoodieKeyException]) {
val props = new TypedProperties()
props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD.key,
"partitionField")
val keyGen = new ComplexKeyGenerator(props)
@@ -494,7 +494,7 @@ class TestDataSourceDefaults extends ScalaAssertionSupport {
val props = new TypedProperties()
props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD.key,
"partitionField")
- assertThrows(classOf[StringIndexOutOfBoundsException]) {
+ assertThrows(classOf[HoodieKeyException]) {
new GlobalDeleteKeyGenerator(props).getRecordKey(baseRecord)
}
}