This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 60eaefd1eb0 [HUDI-9450] Optimizing the hot path using stringformat
leads to performance loss (#13350)
60eaefd1eb0 is described below
commit 60eaefd1eb08bd29a0d6dd01af3ff7b9eeb5f3dc
Author: TheR1sing3un <[email protected]>
AuthorDate: Mon May 26 11:55:02 2025 +0800
[HUDI-9450] Optimizing the hot path using stringformat leads to performance
loss (#13350)
1. Optimizing the hot path using stringformat leads to performance loss
Signed-off-by: TheR1sing3un <[email protected]>
---
.../apache/hudi/index/bucket/BucketIdentifier.java | 19 +++++++++++++++++--
.../main/java/org/apache/hudi/common/fs/FSUtils.java | 7 ++++++-
.../main/java/org/apache/hudi/util/StreamerUtil.java | 6 +++++-
3 files changed, 28 insertions(+), 4 deletions(-)
diff --git
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/BucketIdentifier.java
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/BucketIdentifier.java
index 1f7b141061c..76248d89640 100644
---
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/BucketIdentifier.java
+++
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/BucketIdentifier.java
@@ -54,7 +54,11 @@ public class BucketIdentifier implements Serializable {
}
public static String partitionBucketIdStr(String partition, int bucketId) {
- return String.format("%s_%s", partition, bucketIdStr(bucketId));
+ // format: {partition}_{bucket_id}, bucket id should be 8 digits long,
padded with leading zeros
+ StringBuilder sb = new StringBuilder()
+ .append(partition)
+ .append('_');
+ return appendWithPadZero(bucketId, 8, sb).toString();
}
public static int bucketIdFromFileId(String fileId) {
@@ -62,7 +66,18 @@ public class BucketIdentifier implements Serializable {
}
public static String bucketIdStr(int n) {
- return String.format("%08d", n);
+ // bucket str should be 8 digits long, padded with leading zeros, format
like: "00000001" for bucket 1
+ return appendWithPadZero(n, 8, new StringBuilder()).toString();
+ }
+
+ private static StringBuilder appendWithPadZero(int num, int targetLength,
StringBuilder sb) {
+ String numStr = Integer.toString(num);
+ int zerosNeeded = targetLength - numStr.length();
+ for (int i = 0; i < zerosNeeded; i++) {
+ sb.append('0');
+ }
+ sb.append(numStr);
+ return sb;
}
public static String newBucketFileIdPrefix(int bucketId, boolean fixed) {
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
index 0d78d03b029..3ad7645f42c 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
@@ -322,7 +322,12 @@ public class FSUtils {
}
public static String createNewFileId(String idPfx, int id) {
- return String.format("%s-%d", idPfx, id);
+ // format: {idPrefix}-{id}
+ return new StringBuilder()
+ .append(idPfx)
+ .append('-')
+ .append(id)
+ .toString();
}
/**
diff --git
a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java
b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java
index c7f1fe8c6fc..e2db2555d2c 100644
---
a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java
+++
b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java
@@ -423,7 +423,11 @@ public class StreamerUtil {
* Generates the bucket ID using format {partition path}_{fileID}.
*/
public static String generateBucketKey(String partitionPath, String fileId) {
- return partitionPath + "_" + fileId;
+ return new StringBuilder()
+ .append(partitionPath)
+ .append('_')
+ .append(fileId)
+ .toString();
}
/**