This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new f8e559f5e6f [HUDI-7660] Fix excessive object creation in RowDataKeyGen
(#11084)
f8e559f5e6f is described below
commit f8e559f5e6f75ba5967c96efbcb39dd53365624f
Author: Vova Kolmakov <[email protected]>
AuthorDate: Thu Apr 25 07:21:46 2024 +0700
[HUDI-7660] Fix excessive object creation in RowDataKeyGen (#11084)
---
.../org/apache/hudi/sink/bulk/RowDataKeyGen.java | 45 ++++++++++++----------
1 file changed, 24 insertions(+), 21 deletions(-)
diff --git
a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/RowDataKeyGen.java
b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/RowDataKeyGen.java
index a9f34b36d27..c377575db5e 100644
---
a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/RowDataKeyGen.java
+++
b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/RowDataKeyGen.java
@@ -56,6 +56,8 @@ public class RowDataKeyGen implements Serializable {
private static final String EMPTY_RECORDKEY_PLACEHOLDER = "__empty__";
private static final String DEFAULT_PARTITION_PATH_SEPARATOR = "/";
+ private static final String HIVE_PARTITION_TEMPLATE = "%s=%s";
+ private static final String DEFAULT_FIELD_SEPARATOR = ",";
private final String[] recordKeyFields;
private final String[] partitionPathFields;
@@ -86,7 +88,7 @@ public class RowDataKeyGen implements Serializable {
boolean encodePartitionPath,
boolean consistentLogicalTimestampEnabled,
Option<TimestampBasedAvroKeyGenerator> keyGenOpt) {
- this.partitionPathFields = partitionFields.split(",");
+ this.partitionPathFields = partitionFields.split(DEFAULT_FIELD_SEPARATOR);
this.hiveStylePartitioning = hiveStylePartitioning;
this.encodePartitionPath = encodePartitionPath;
this.consistentLogicalTimestampEnabled = consistentLogicalTimestampEnabled;
@@ -98,7 +100,7 @@ public class RowDataKeyGen implements Serializable {
this.recordKeyFields = null;
this.recordKeyProjection = null;
} else {
- this.recordKeyFields = recordKeys.get().split(",");
+ this.recordKeyFields = recordKeys.get().split(DEFAULT_FIELD_SEPARATOR);
if (this.recordKeyFields.length == 1) {
// efficient code path
this.simpleRecordKey = true;
@@ -166,7 +168,7 @@ public class RowDataKeyGen implements Serializable {
}
}
- // reference: org.apache.hudi.keygen.KeyGenUtils.getRecordPartitionPath
+ // reference: org.apache.hudi.keygen.KeyGenUtils.getRecordKey
private static String getRecordKey(Object[] keyValues, String[] keyFields,
boolean consistentLogicalTimestampEnabled) {
boolean keyIsNullEmpty = true;
StringBuilder recordKey = new StringBuilder();
@@ -176,28 +178,28 @@ public class RowDataKeyGen implements Serializable {
value = getTimestampValue(consistentLogicalTimestampEnabled, value);
String recordKeyValue = StringUtils.objToString(value);
if (recordKeyValue == null) {
-
recordKey.append(recordKeyField).append(":").append(NULL_RECORDKEY_PLACEHOLDER).append(",");
+
recordKey.append(recordKeyField).append(":").append(NULL_RECORDKEY_PLACEHOLDER);
} else if (recordKeyValue.isEmpty()) {
-
recordKey.append(recordKeyField).append(":").append(EMPTY_RECORDKEY_PLACEHOLDER).append(",");
+
recordKey.append(recordKeyField).append(":").append(EMPTY_RECORDKEY_PLACEHOLDER);
} else {
-
recordKey.append(recordKeyField).append(":").append(recordKeyValue).append(",");
+ recordKey.append(recordKeyField).append(":").append(recordKeyValue);
keyIsNullEmpty = false;
}
+ if (i != keyValues.length - 1) {
+ recordKey.append(DEFAULT_FIELD_SEPARATOR);
+ }
}
- recordKey.deleteCharAt(recordKey.length() - 1);
if (keyIsNullEmpty) {
- throw new HoodieKeyException("recordKey values: \"" + recordKey + "\"
for fields: "
- + Arrays.toString(keyFields) + " cannot be entirely null or empty.");
+ throw new HoodieKeyException(String.format("recordKey values: \"%s\" for
fields: %s cannot be entirely null or empty.",
+ recordKey, Arrays.toString(keyFields)));
}
return recordKey.toString();
}
private static Object getTimestampValue(boolean
consistentLogicalTimestampEnabled, Object value) {
- if (!consistentLogicalTimestampEnabled) {
- if (value instanceof TimestampData) {
- TimestampData timestampData = (TimestampData) value;
- value = timestampData.toTimestamp().toInstant().toEpochMilli();
- }
+ if (!consistentLogicalTimestampEnabled && (value instanceof
TimestampData)) {
+ TimestampData timestampData = (TimestampData) value;
+ value = timestampData.toTimestamp().toInstant().toEpochMilli();
}
return value;
}
@@ -213,17 +215,17 @@ public class RowDataKeyGen implements Serializable {
String partField = partFields[i];
String partValue = StringUtils.objToString(partValues[i]);
if (partValue == null || partValue.isEmpty()) {
- partitionPath.append(hiveStylePartitioning ? partField + "=" +
DEFAULT_PARTITION_PATH
- : DEFAULT_PARTITION_PATH);
+ partitionPath.append(hiveStylePartitioning ?
String.format(HIVE_PARTITION_TEMPLATE, partField, DEFAULT_PARTITION_PATH) :
DEFAULT_PARTITION_PATH);
} else {
if (encodePartitionPath) {
partValue = escapePathName(partValue);
}
- partitionPath.append(hiveStylePartitioning ? partField + "=" +
partValue : partValue);
+ partitionPath.append(hiveStylePartitioning ?
String.format(HIVE_PARTITION_TEMPLATE, partField, partValue) : partValue);
+ }
+ if (i != partFields.length - 1) {
+ partitionPath.append(DEFAULT_PARTITION_PATH_SEPARATOR);
}
- partitionPath.append(DEFAULT_PARTITION_PATH_SEPARATOR);
}
- partitionPath.deleteCharAt(partitionPath.length() - 1);
return partitionPath.toString();
}
@@ -232,7 +234,8 @@ public class RowDataKeyGen implements Serializable {
recordKeyValue = getTimestampValue(consistentLogicalTimestampEnabled,
recordKeyValue);
String recordKey = StringUtils.objToString(recordKeyValue);
if (recordKey == null || recordKey.isEmpty()) {
- throw new HoodieKeyException("recordKey value: \"" + recordKey + "\" for
field: \"" + recordKeyField + "\" cannot be null or empty.");
+ throw new HoodieKeyException(String.format("recordKey value: \"%s\" for
field: \"%s\" cannot be null or empty.",
+ recordKey, recordKeyField));
}
return recordKey;
}
@@ -256,7 +259,7 @@ public class RowDataKeyGen implements Serializable {
partitionPath = escapePathName(partitionPath);
}
if (hiveStylePartitioning) {
- partitionPath = partField + "=" + partitionPath;
+ partitionPath = String.format(HIVE_PARTITION_TEMPLATE, partField,
partitionPath);
}
return partitionPath;
}