This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-paimon.git
The following commit(s) were added to refs/heads/master by this push:
new e1afe3cce [core] Introduce Int2ShortHashMap.Builder to to accelerate
init
e1afe3cce is described below
commit e1afe3cce720af98a5196297f8e8d331bdb1986a
Author: Jingsong <[email protected]>
AuthorDate: Wed Mar 20 13:48:25 2024 +0800
[core] Introduce Int2ShortHashMap.Builder to to accelerate init
---
.../org/apache/paimon/utils/Int2ShortHashMap.java | 26 ++++++++++++++++++++++
.../org/apache/paimon/index/PartitionIndex.java | 17 +++-----------
2 files changed, 29 insertions(+), 14 deletions(-)
diff --git
a/paimon-common/src/main/java/org/apache/paimon/utils/Int2ShortHashMap.java
b/paimon-common/src/main/java/org/apache/paimon/utils/Int2ShortHashMap.java
index 453f04974..0338d6d61 100644
--- a/paimon-common/src/main/java/org/apache/paimon/utils/Int2ShortHashMap.java
+++ b/paimon-common/src/main/java/org/apache/paimon/utils/Int2ShortHashMap.java
@@ -19,6 +19,8 @@
package org.apache.paimon.utils;
import it.unimi.dsi.fastutil.ints.Int2ShortOpenHashMap;
+import it.unimi.dsi.fastutil.ints.IntArrayList;
+import it.unimi.dsi.fastutil.shorts.ShortArrayList;
/** Int to short hash map. */
public class Int2ShortHashMap {
@@ -48,4 +50,28 @@ public class Int2ShortHashMap {
public int size() {
return map.size();
}
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ /** Builder of {@link Int2ShortHashMap}. */
+ public static class Builder {
+
+ private final IntArrayList keyList = new IntArrayList();
+ private final ShortArrayList valueList = new ShortArrayList();
+
+ public void put(int key, short value) {
+ keyList.add(key);
+ valueList.add(value);
+ }
+
+ public Int2ShortHashMap build() {
+ Int2ShortHashMap map = new Int2ShortHashMap(keyList.size());
+ for (int i = 0; i < keyList.size(); i++) {
+ map.put(keyList.getInt(i), valueList.getShort(i));
+ }
+ return map;
+ }
+ }
}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/index/PartitionIndex.java
b/paimon-core/src/main/java/org/apache/paimon/index/PartitionIndex.java
index e9667cfe3..79ff72656 100644
--- a/paimon-core/src/main/java/org/apache/paimon/index/PartitionIndex.java
+++ b/paimon-core/src/main/java/org/apache/paimon/index/PartitionIndex.java
@@ -22,7 +22,6 @@ import org.apache.paimon.data.BinaryRow;
import org.apache.paimon.manifest.IndexManifestEntry;
import org.apache.paimon.utils.Int2ShortHashMap;
import org.apache.paimon.utils.IntIterator;
-import org.apache.paimon.utils.MathUtils;
import java.io.EOFException;
import java.io.IOException;
@@ -113,7 +112,7 @@ public class PartitionIndex {
IntPredicate loadFilter,
IntPredicate bucketFilter) {
List<IndexManifestEntry> files = indexFileHandler.scan(HASH_INDEX,
partition);
- Int2ShortHashMap map = new
Int2ShortHashMap(calculateInitialMapSize(files));
+ Int2ShortHashMap.Builder mapBuilder = Int2ShortHashMap.builder();
Map<Integer, Long> buckets = new HashMap<>();
for (IndexManifestEntry file : files) {
try (IntIterator iterator =
indexFileHandler.readHashIndex(file.indexFile())) {
@@ -121,7 +120,7 @@ public class PartitionIndex {
try {
int hash = iterator.next();
if (loadFilter.test(hash)) {
- map.put(hash, (short) file.bucket());
+ mapBuilder.put(hash, (short) file.bucket());
}
if (bucketFilter.test(file.bucket())) {
buckets.compute(
@@ -136,16 +135,6 @@ public class PartitionIndex {
throw new UncheckedIOException(e);
}
}
- return new PartitionIndex(map, buckets, targetBucketRowNumber);
- }
-
- private static int calculateInitialMapSize(List<IndexManifestEntry> files)
{
- long size = 16;
- for (IndexManifestEntry file : files) {
- size = Math.max(size, file.indexFile().rowCount());
- }
- return MathUtils.isPowerOf2(size)
- ? (int) size
- : MathUtils.roundDownToPowerOf2((int) size) * 2;
+ return new PartitionIndex(mapBuilder.build(), buckets,
targetBucketRowNumber);
}
}