This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 7a44d53c2 [core] add initial map size to solve load hash index slowly 
(#3051)
7a44d53c2 is described below

commit 7a44d53c2ff963e6291b493df78b054484b88a13
Author: Stephen0421 <[email protected]>
AuthorDate: Wed Mar 20 13:38:00 2024 +0800

    [core] add initial map size to solve load hash index slowly (#3051)
---
 .../main/java/org/apache/paimon/utils/Int2ShortHashMap.java |  4 ++++
 .../main/java/org/apache/paimon/index/PartitionIndex.java   | 13 ++++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git 
a/paimon-common/src/main/java/org/apache/paimon/utils/Int2ShortHashMap.java 
b/paimon-common/src/main/java/org/apache/paimon/utils/Int2ShortHashMap.java
index a271bee66..453f04974 100644
--- a/paimon-common/src/main/java/org/apache/paimon/utils/Int2ShortHashMap.java
+++ b/paimon-common/src/main/java/org/apache/paimon/utils/Int2ShortHashMap.java
@@ -29,6 +29,10 @@ public class Int2ShortHashMap {
         this.map = new Int2ShortOpenHashMap();
     }
 
+    public Int2ShortHashMap(int capacity) {
+        this.map = new Int2ShortOpenHashMap(capacity);
+    }
+
     public void put(int key, short value) {
         map.put(key, value);
     }
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/index/PartitionIndex.java 
b/paimon-core/src/main/java/org/apache/paimon/index/PartitionIndex.java
index 28428aa2d..e9667cfe3 100644
--- a/paimon-core/src/main/java/org/apache/paimon/index/PartitionIndex.java
+++ b/paimon-core/src/main/java/org/apache/paimon/index/PartitionIndex.java
@@ -22,6 +22,7 @@ import org.apache.paimon.data.BinaryRow;
 import org.apache.paimon.manifest.IndexManifestEntry;
 import org.apache.paimon.utils.Int2ShortHashMap;
 import org.apache.paimon.utils.IntIterator;
+import org.apache.paimon.utils.MathUtils;
 
 import java.io.EOFException;
 import java.io.IOException;
@@ -111,8 +112,8 @@ public class PartitionIndex {
             long targetBucketRowNumber,
             IntPredicate loadFilter,
             IntPredicate bucketFilter) {
-        Int2ShortHashMap map = new Int2ShortHashMap();
         List<IndexManifestEntry> files = indexFileHandler.scan(HASH_INDEX, 
partition);
+        Int2ShortHashMap map = new 
Int2ShortHashMap(calculateInitialMapSize(files));
         Map<Integer, Long> buckets = new HashMap<>();
         for (IndexManifestEntry file : files) {
             try (IntIterator iterator = 
indexFileHandler.readHashIndex(file.indexFile())) {
@@ -137,4 +138,14 @@ public class PartitionIndex {
         }
         return new PartitionIndex(map, buckets, targetBucketRowNumber);
     }
+
+    private static int calculateInitialMapSize(List<IndexManifestEntry> files) 
{
+        long size = 16;
+        for (IndexManifestEntry file : files) {
+            size = Math.max(size, file.indexFile().rowCount());
+        }
+        return MathUtils.isPowerOf2(size)
+                ? (int) size
+                : MathUtils.roundDownToPowerOf2((int) size) * 2;
+    }
 }

Reply via email to