This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch release-1.0
in repository https://gitbox.apache.org/repos/asf/paimon.git

commit 005fb1442f3a8138d52b16fe14901ee25fc31797
Author: Tan-JiaLiang <[email protected]>
AuthorDate: Sat Dec 28 17:38:25 2024 +0800

    [common] Using a faster deserialization method in RoaringBitmap32 (#4765)
---
 .../benchmark/bitmap/RoaringBitmapBenchmark.java   | 111 +++++++++++++++++++++
 .../org/apache/paimon/utils/RoaringBitmap32.java   |  24 ++---
 .../deletionvectors/BitmapDeletionVector.java      |  10 +-
 .../paimon/deletionvectors/DeletionVector.java     |  27 +++--
 .../compact/aggregate/FieldRoaringBitmap32Agg.java |   5 +-
 5 files changed, 144 insertions(+), 33 deletions(-)

diff --git 
a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/bitmap/RoaringBitmapBenchmark.java
 
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/bitmap/RoaringBitmapBenchmark.java
new file mode 100644
index 0000000000..4b989e96e5
--- /dev/null
+++ 
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/bitmap/RoaringBitmapBenchmark.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.benchmark.bitmap;
+
+import org.apache.paimon.benchmark.Benchmark;
+import org.apache.paimon.fs.local.LocalFileIO;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+import org.roaringbitmap.RoaringBitmap;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.file.Path;
+import java.util.Random;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+/** Benchmark for {@link RoaringBitmap}. */
+public class RoaringBitmapBenchmark {
+
+    public static final int ROW_COUNT = 10000000;
+
+    @TempDir Path tempDir;
+
+    @Test
+    public void testDeserialize() throws Exception {
+        Random random = new Random();
+        RoaringBitmap bitmap = new RoaringBitmap();
+        for (int i = 0; i < ROW_COUNT; i++) {
+            if (random.nextBoolean()) {
+                bitmap.add(i);
+            }
+        }
+
+        File file = new File(tempDir.toFile(), 
"bitmap32-deserialize-benchmark");
+        assertThat(file.createNewFile()).isTrue();
+        try (FileOutputStream output = new FileOutputStream(file);
+                DataOutputStream dos = new DataOutputStream(output)) {
+            bitmap.serialize(dos);
+        }
+
+        Benchmark benchmark =
+                new Benchmark("bitmap32-deserialize-benchmark", 100)
+                        .setNumWarmupIters(1)
+                        .setOutputPerIteration(true);
+
+        benchmark.addCase(
+                "deserialize(DataInput)",
+                10,
+                () -> {
+                    try (LocalFileIO.LocalSeekableInputStream seekableStream =
+                                    new 
LocalFileIO.LocalSeekableInputStream(file);
+                            DataInputStream input = new 
DataInputStream(seekableStream)) {
+                        new RoaringBitmap().deserialize(input);
+                    } catch (IOException e) {
+                        throw new RuntimeException(e);
+                    }
+                });
+
+        benchmark.addCase(
+                "deserialize(DataInput, byte[])",
+                10,
+                () -> {
+                    try (LocalFileIO.LocalSeekableInputStream seekableStream =
+                                    new 
LocalFileIO.LocalSeekableInputStream(file);
+                            DataInputStream input = new 
DataInputStream(seekableStream)) {
+                        new RoaringBitmap().deserialize(input, null);
+                    } catch (IOException e) {
+                        throw new RuntimeException(e);
+                    }
+                });
+
+        benchmark.addCase(
+                "deserialize(ByteBuffer)",
+                10,
+                () -> {
+                    try (LocalFileIO.LocalSeekableInputStream seekableStream =
+                                    new 
LocalFileIO.LocalSeekableInputStream(file);
+                            DataInputStream input = new 
DataInputStream(seekableStream)) {
+                        byte[] bytes = new byte[(int) file.length()];
+                        input.readFully(bytes);
+                        new 
RoaringBitmap().deserialize(ByteBuffer.wrap(bytes));
+                    } catch (IOException e) {
+                        throw new RuntimeException(e);
+                    }
+                });
+
+        benchmark.run();
+    }
+}
diff --git 
a/paimon-common/src/main/java/org/apache/paimon/utils/RoaringBitmap32.java 
b/paimon-common/src/main/java/org/apache/paimon/utils/RoaringBitmap32.java
index 5f352f61cd..6496b7003e 100644
--- a/paimon-common/src/main/java/org/apache/paimon/utils/RoaringBitmap32.java
+++ b/paimon-common/src/main/java/org/apache/paimon/utils/RoaringBitmap32.java
@@ -85,8 +85,19 @@ public class RoaringBitmap32 {
         roaringBitmap.serialize(out);
     }
 
+    public byte[] serialize() {
+        roaringBitmap.runOptimize();
+        ByteBuffer buffer = 
ByteBuffer.allocate(roaringBitmap.serializedSizeInBytes());
+        roaringBitmap.serialize(buffer);
+        return buffer.array();
+    }
+
     public void deserialize(DataInput in) throws IOException {
-        roaringBitmap.deserialize(in);
+        roaringBitmap.deserialize(in, null);
+    }
+
+    public void deserialize(ByteBuffer buffer) throws IOException {
+        roaringBitmap.deserialize(buffer);
     }
 
     @Override
@@ -105,17 +116,6 @@ public class RoaringBitmap32 {
         roaringBitmap.clear();
     }
 
-    public byte[] serialize() {
-        roaringBitmap.runOptimize();
-        ByteBuffer buffer = 
ByteBuffer.allocate(roaringBitmap.serializedSizeInBytes());
-        roaringBitmap.serialize(buffer);
-        return buffer.array();
-    }
-
-    public void deserialize(byte[] rbmBytes) throws IOException {
-        roaringBitmap.deserialize(ByteBuffer.wrap(rbmBytes));
-    }
-
     public void flip(final long rangeStart, final long rangeEnd) {
         roaringBitmap.flip(rangeStart, rangeEnd);
     }
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/deletionvectors/BitmapDeletionVector.java
 
b/paimon-core/src/main/java/org/apache/paimon/deletionvectors/BitmapDeletionVector.java
index 51ae729c21..55e0a975e3 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/deletionvectors/BitmapDeletionVector.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/deletionvectors/BitmapDeletionVector.java
@@ -21,9 +21,9 @@ package org.apache.paimon.deletionvectors;
 import org.apache.paimon.utils.RoaringBitmap32;
 
 import java.io.ByteArrayOutputStream;
-import java.io.DataInput;
 import java.io.DataOutputStream;
 import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.util.Objects;
 
 /**
@@ -93,10 +93,10 @@ public class BitmapDeletionVector implements DeletionVector 
{
         }
     }
 
-    public static DeletionVector deserializeFromDataInput(DataInput bis) 
throws IOException {
-        RoaringBitmap32 roaringBitmap = new RoaringBitmap32();
-        roaringBitmap.deserialize(bis);
-        return new BitmapDeletionVector(roaringBitmap);
+    public static DeletionVector deserializeFromByteBuffer(ByteBuffer buffer) 
throws IOException {
+        RoaringBitmap32 bitmap = new RoaringBitmap32();
+        bitmap.deserialize(buffer);
+        return new BitmapDeletionVector(bitmap);
     }
 
     private void checkPosition(long position) {
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/deletionvectors/DeletionVector.java
 
b/paimon-core/src/main/java/org/apache/paimon/deletionvectors/DeletionVector.java
index 8feeac63f7..967e80b0ba 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/deletionvectors/DeletionVector.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/deletionvectors/DeletionVector.java
@@ -26,9 +26,9 @@ import org.apache.paimon.table.source.DeletionFile;
 
 import javax.annotation.Nullable;
 
-import java.io.ByteArrayInputStream;
 import java.io.DataInputStream;
 import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.util.List;
 import java.util.Optional;
 
@@ -99,11 +99,11 @@ public interface DeletionVector {
      * @return A DeletionVector instance that represents the deserialized data.
      */
     static DeletionVector deserializeFromBytes(byte[] bytes) {
-        try (ByteArrayInputStream bis = new ByteArrayInputStream(bytes);
-                DataInputStream dis = new DataInputStream(bis)) {
-            int magicNum = dis.readInt();
+        try {
+            ByteBuffer buffer = ByteBuffer.wrap(bytes);
+            int magicNum = buffer.getInt();
             if (magicNum == BitmapDeletionVector.MAGIC_NUMBER) {
-                return BitmapDeletionVector.deserializeFromDataInput(dis);
+                return BitmapDeletionVector.deserializeFromByteBuffer(buffer);
             } else {
                 throw new RuntimeException("Invalid magic number: " + 
magicNum);
             }
@@ -117,22 +117,21 @@ public interface DeletionVector {
         try (SeekableInputStream input = fileIO.newInputStream(path)) {
             input.seek(deletionFile.offset());
             DataInputStream dis = new DataInputStream(input);
-            int actualLength = dis.readInt();
-            if (actualLength != deletionFile.length()) {
+            int actualSize = dis.readInt();
+            if (actualSize != deletionFile.length()) {
                 throw new RuntimeException(
                         "Size not match, actual size: "
-                                + actualLength
+                                + actualSize
                                 + ", expert size: "
                                 + deletionFile.length()
                                 + ", file path: "
                                 + path);
             }
-            int magicNum = dis.readInt();
-            if (magicNum == BitmapDeletionVector.MAGIC_NUMBER) {
-                return BitmapDeletionVector.deserializeFromDataInput(dis);
-            } else {
-                throw new RuntimeException("Invalid magic number: " + 
magicNum);
-            }
+
+            // read DeletionVector bytes
+            byte[] bytes = new byte[actualSize];
+            dis.readFully(bytes);
+            return deserializeFromBytes(bytes);
         }
     }
 
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldRoaringBitmap32Agg.java
 
b/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldRoaringBitmap32Agg.java
index ef7ac20e83..436a88a3cc 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldRoaringBitmap32Agg.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldRoaringBitmap32Agg.java
@@ -22,6 +22,7 @@ import org.apache.paimon.types.VarBinaryType;
 import org.apache.paimon.utils.RoaringBitmap32;
 
 import java.io.IOException;
+import java.nio.ByteBuffer;
 
 /** roaring bitmap aggregate a field of a row. */
 public class FieldRoaringBitmap32Agg extends FieldAggregator {
@@ -43,8 +44,8 @@ public class FieldRoaringBitmap32Agg extends FieldAggregator {
         }
 
         try {
-            roaringBitmapAcc.deserialize((byte[]) accumulator);
-            roaringBitmapInput.deserialize((byte[]) inputField);
+            roaringBitmapAcc.deserialize(ByteBuffer.wrap((byte[]) 
accumulator));
+            roaringBitmapInput.deserialize(ByteBuffer.wrap((byte[]) 
inputField));
             roaringBitmapAcc.or(roaringBitmapInput);
             return roaringBitmapAcc.serialize();
         } catch (IOException e) {

Reply via email to