This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new e1afe3cce [core] Introduce Int2ShortHashMap.Builder to to accelerate 
init
e1afe3cce is described below

commit e1afe3cce720af98a5196297f8e8d331bdb1986a
Author: Jingsong <[email protected]>
AuthorDate: Wed Mar 20 13:48:25 2024 +0800

    [core] Introduce Int2ShortHashMap.Builder to to accelerate init
---
 .../org/apache/paimon/utils/Int2ShortHashMap.java  | 26 ++++++++++++++++++++++
 .../org/apache/paimon/index/PartitionIndex.java    | 17 +++-----------
 2 files changed, 29 insertions(+), 14 deletions(-)

diff --git 
a/paimon-common/src/main/java/org/apache/paimon/utils/Int2ShortHashMap.java 
b/paimon-common/src/main/java/org/apache/paimon/utils/Int2ShortHashMap.java
index 453f04974..0338d6d61 100644
--- a/paimon-common/src/main/java/org/apache/paimon/utils/Int2ShortHashMap.java
+++ b/paimon-common/src/main/java/org/apache/paimon/utils/Int2ShortHashMap.java
@@ -19,6 +19,8 @@
 package org.apache.paimon.utils;
 
 import it.unimi.dsi.fastutil.ints.Int2ShortOpenHashMap;
+import it.unimi.dsi.fastutil.ints.IntArrayList;
+import it.unimi.dsi.fastutil.shorts.ShortArrayList;
 
 /** Int to short hash map. */
 public class Int2ShortHashMap {
@@ -48,4 +50,28 @@ public class Int2ShortHashMap {
     public int size() {
         return map.size();
     }
+
+    public static Builder builder() {
+        return new Builder();
+    }
+
+    /** Builder of {@link Int2ShortHashMap}. */
+    public static class Builder {
+
+        private final IntArrayList keyList = new IntArrayList();
+        private final ShortArrayList valueList = new ShortArrayList();
+
+        public void put(int key, short value) {
+            keyList.add(key);
+            valueList.add(value);
+        }
+
+        public Int2ShortHashMap build() {
+            Int2ShortHashMap map = new Int2ShortHashMap(keyList.size());
+            for (int i = 0; i < keyList.size(); i++) {
+                map.put(keyList.getInt(i), valueList.getShort(i));
+            }
+            return map;
+        }
+    }
 }
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/index/PartitionIndex.java 
b/paimon-core/src/main/java/org/apache/paimon/index/PartitionIndex.java
index e9667cfe3..79ff72656 100644
--- a/paimon-core/src/main/java/org/apache/paimon/index/PartitionIndex.java
+++ b/paimon-core/src/main/java/org/apache/paimon/index/PartitionIndex.java
@@ -22,7 +22,6 @@ import org.apache.paimon.data.BinaryRow;
 import org.apache.paimon.manifest.IndexManifestEntry;
 import org.apache.paimon.utils.Int2ShortHashMap;
 import org.apache.paimon.utils.IntIterator;
-import org.apache.paimon.utils.MathUtils;
 
 import java.io.EOFException;
 import java.io.IOException;
@@ -113,7 +112,7 @@ public class PartitionIndex {
             IntPredicate loadFilter,
             IntPredicate bucketFilter) {
         List<IndexManifestEntry> files = indexFileHandler.scan(HASH_INDEX, 
partition);
-        Int2ShortHashMap map = new 
Int2ShortHashMap(calculateInitialMapSize(files));
+        Int2ShortHashMap.Builder mapBuilder = Int2ShortHashMap.builder();
         Map<Integer, Long> buckets = new HashMap<>();
         for (IndexManifestEntry file : files) {
             try (IntIterator iterator = 
indexFileHandler.readHashIndex(file.indexFile())) {
@@ -121,7 +120,7 @@ public class PartitionIndex {
                     try {
                         int hash = iterator.next();
                         if (loadFilter.test(hash)) {
-                            map.put(hash, (short) file.bucket());
+                            mapBuilder.put(hash, (short) file.bucket());
                         }
                         if (bucketFilter.test(file.bucket())) {
                             buckets.compute(
@@ -136,16 +135,6 @@ public class PartitionIndex {
                 throw new UncheckedIOException(e);
             }
         }
-        return new PartitionIndex(map, buckets, targetBucketRowNumber);
-    }
-
-    private static int calculateInitialMapSize(List<IndexManifestEntry> files) 
{
-        long size = 16;
-        for (IndexManifestEntry file : files) {
-            size = Math.max(size, file.indexFile().rowCount());
-        }
-        return MathUtils.isPowerOf2(size)
-                ? (int) size
-                : MathUtils.roundDownToPowerOf2((int) size) * 2;
+        return new PartitionIndex(mapBuilder.build(), buckets, 
targetBucketRowNumber);
     }
 }

Reply via email to