This is an automated email from the ASF dual-hosted git repository.

wombatukun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 760ecb51e9f [HUDI-9329] avoid obtaining an incorrect partition id due 
to int type overflow in `BucketIndexUtil` (#13189)
760ecb51e9f is described below

commit 760ecb51e9f493cd69bfaf47ae0f06d2efd66260
Author: TheR1sing3un <[email protected]>
AuthorDate: Tue Apr 22 15:28:40 2025 +0800

    [HUDI-9329] avoid obtaining an incorrect partition id due to int type 
overflow in `BucketIndexUtil` (#13189)
---
 .../java/org/apache/hudi/common/util/hash/BucketIndexUtil.java | 10 +++++++---
 .../org/apache/hudi/common/util/hash/TestBucketIndexUtil.java  |  2 ++
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git 
a/hudi-common/src/main/java/org/apache/hudi/common/util/hash/BucketIndexUtil.java
 
b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/BucketIndexUtil.java
index 8081e65871b..837439b5fcc 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/common/util/hash/BucketIndexUtil.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/BucketIndexUtil.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.common.util.hash;
 
 import org.apache.hudi.common.util.Functions;
+import org.apache.hudi.common.util.ValidationUtils;
 
 /**
  * Utility class for bucket index.
@@ -37,9 +38,12 @@ public class BucketIndexUtil {
    */
   public static Functions.Function3<Integer, String, Integer, Integer> 
getPartitionIndexFunc(int parallelism) {
     return (bucketNum, partition, curBucket) -> {
-      int partitionIndex = (partition.hashCode() & Integer.MAX_VALUE) % 
parallelism * bucketNum;
-      int globalIndex = partitionIndex + curBucket;
-      return globalIndex % parallelism;
+      long partitionIndex = (partition.hashCode() & Integer.MAX_VALUE) % 
parallelism * (long) bucketNum;
+      long globalIndex = partitionIndex + curBucket;
+      int partitionId = (int) (globalIndex % parallelism);
+      ValidationUtils.checkArgument(partitionId >= 0 && partitionId < 
parallelism,
+          () -> "Partition id should be in range [0, " + parallelism + "), but 
got " + partitionId);
+      return partitionId;
     };
   }
 }
diff --git 
a/hudi-common/src/test/java/org/apache/hudi/common/util/hash/TestBucketIndexUtil.java
 
b/hudi-common/src/test/java/org/apache/hudi/common/util/hash/TestBucketIndexUtil.java
index 482227393eb..ea2f637b938 100644
--- 
a/hudi-common/src/test/java/org/apache/hudi/common/util/hash/TestBucketIndexUtil.java
+++ 
b/hudi-common/src/test/java/org/apache/hudi/common/util/hash/TestBucketIndexUtil.java
@@ -55,6 +55,7 @@ public class TestBucketIndexUtil {
     argsList.add(Arguments.of(201, 100, true));
     argsList.add(Arguments.of(400, 1000, true));
     argsList.add(Arguments.of(401, 1000, true));
+    argsList.add(Arguments.of(65536, 65536, true));
 
     return argsList.stream();
   }
@@ -144,6 +145,7 @@ public class TestBucketIndexUtil {
 
   private void checkResult(Map<Integer, Integer> parallelism2TaskCount, int 
parallelism, int bucketNumber, boolean partitioned, boolean needExpand) {
     int sum = 0;
+    assertTrue(parallelism2TaskCount.keySet().stream().allMatch(id -> id >= 0 
&& id < parallelism));
     for (int v : parallelism2TaskCount.values()) {
       sum = sum + v;
     }

Reply via email to