This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 7a44d53c2 [core] add initial map size to solve load hash index slowly
(#3051)
7a44d53c2 is described below
commit 7a44d53c2ff963e6291b493df78b054484b88a13
Author: Stephen0421 <[email protected]>
AuthorDate: Wed Mar 20 13:38:00 2024 +0800
[core] add initial map size to solve load hash index slowly (#3051)
---
.../main/java/org/apache/paimon/utils/Int2ShortHashMap.java | 4 ++++
.../main/java/org/apache/paimon/index/PartitionIndex.java | 13 ++++++++++++-
2 files changed, 16 insertions(+), 1 deletion(-)
diff --git
a/paimon-common/src/main/java/org/apache/paimon/utils/Int2ShortHashMap.java
b/paimon-common/src/main/java/org/apache/paimon/utils/Int2ShortHashMap.java
index a271bee66..453f04974 100644
--- a/paimon-common/src/main/java/org/apache/paimon/utils/Int2ShortHashMap.java
+++ b/paimon-common/src/main/java/org/apache/paimon/utils/Int2ShortHashMap.java
@@ -29,6 +29,10 @@ public class Int2ShortHashMap {
this.map = new Int2ShortOpenHashMap();
}
+ public Int2ShortHashMap(int capacity) {
+ this.map = new Int2ShortOpenHashMap(capacity);
+ }
+
public void put(int key, short value) {
map.put(key, value);
}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/index/PartitionIndex.java
b/paimon-core/src/main/java/org/apache/paimon/index/PartitionIndex.java
index 28428aa2d..e9667cfe3 100644
--- a/paimon-core/src/main/java/org/apache/paimon/index/PartitionIndex.java
+++ b/paimon-core/src/main/java/org/apache/paimon/index/PartitionIndex.java
@@ -22,6 +22,7 @@ import org.apache.paimon.data.BinaryRow;
import org.apache.paimon.manifest.IndexManifestEntry;
import org.apache.paimon.utils.Int2ShortHashMap;
import org.apache.paimon.utils.IntIterator;
+import org.apache.paimon.utils.MathUtils;
import java.io.EOFException;
import java.io.IOException;
@@ -111,8 +112,8 @@ public class PartitionIndex {
long targetBucketRowNumber,
IntPredicate loadFilter,
IntPredicate bucketFilter) {
- Int2ShortHashMap map = new Int2ShortHashMap();
List<IndexManifestEntry> files = indexFileHandler.scan(HASH_INDEX,
partition);
+ Int2ShortHashMap map = new
Int2ShortHashMap(calculateInitialMapSize(files));
Map<Integer, Long> buckets = new HashMap<>();
for (IndexManifestEntry file : files) {
try (IntIterator iterator =
indexFileHandler.readHashIndex(file.indexFile())) {
@@ -137,4 +138,14 @@ public class PartitionIndex {
}
return new PartitionIndex(map, buckets, targetBucketRowNumber);
}
+
+ private static int calculateInitialMapSize(List<IndexManifestEntry> files)
{
+ long size = 16;
+ for (IndexManifestEntry file : files) {
+ size = Math.max(size, file.indexFile().rowCount());
+ }
+ return MathUtils.isPowerOf2(size)
+ ? (int) size
+ : MathUtils.roundDownToPowerOf2((int) size) * 2;
+ }
}