JingsongLi commented on code in PR #5546:
URL: https://github.com/apache/paimon/pull/5546#discussion_r2062914754


##########
paimon-core/src/main/java/org/apache/paimon/deletionvectors/DeletionVector.java:
##########
@@ -85,6 +84,9 @@ default boolean checkedDelete(long position) {
     /** @return the number of distinct integers added to the DeletionVector. */
     long getCardinality();
 
+    /** @return the version of the deletion vector. */
+    int dvVersion();

Review Comment:
   version



##########
paimon-core/src/main/java/org/apache/paimon/deletionvectors/DeletionVectorIndexFileWriter.java:
##########
@@ -127,9 +125,25 @@ private long writtenSizeInBytes() {
 
         private void write(String key, DeletionVector deletionVector) throws 
IOException {
             Preconditions.checkNotNull(dataOutputStream);
-            if (versionId == VERSION_ID_V1) {
+            if (writeVersionId == BitmapDeletionVector.VERSION) {
+                Preconditions.checkArgument(
+                        deletionVector instanceof BitmapDeletionVector,
+                        String.format(
+                                "write version id is %s, but deletionVector is 
not an instance of %s, actual class is %s.",
+                                writeVersionId,
+                                BitmapDeletionVector.class.getName(),
+                                deletionVector.getClass().getName()));
+
                 writeV1(key, deletionVector);
-            } else if (versionId == VERSION_ID_V2) {
+            } else if (writeVersionId == Bitmap64DeletionVector.VERSION) {
+                Preconditions.checkArgument(

Review Comment:
   Please use checkArgument(bool, string, args...)



##########
paimon-core/src/main/java/org/apache/paimon/deletionvectors/DeletionVector.java:
##########
@@ -98,40 +100,55 @@ default boolean checkedDelete(long position) {
      * @param bytes The byte array containing the serialized deletion vector.
      * @return A DeletionVector instance that represents the deserialized data.
      */
-    static DeletionVector deserializeFromBytes(byte[] bytes) {
-        try {
-            ByteBuffer buffer = ByteBuffer.wrap(bytes);
-            int magicNum = buffer.getInt();
-            if (magicNum == BitmapDeletionVector.MAGIC_NUMBER) {
-                return BitmapDeletionVector.deserializeFromByteBuffer(buffer);
-            } else {
-                throw new RuntimeException("Invalid magic number: " + 
magicNum);
-            }
-        } catch (IOException e) {
-            throw new RuntimeException("Unable to deserialize deletion 
vector", e);
+    static DeletionVector deserializeFromBytes(byte[] bytes, int version) {
+        if (version == BitmapDeletionVector.VERSION) {
+            return BitmapDeletionVector.deserializeFromBytes(bytes);
+        } else if (version == Bitmap64DeletionVector.VERSION) {
+            return Bitmap64DeletionVector.deserializeFromBytes(bytes);
+        } else {
+            throw new RuntimeException("Invalid deletion vector version: " + 
version);
         }
     }
 
     static DeletionVector read(FileIO fileIO, DeletionFile deletionFile) 
throws IOException {
         Path path = new Path(deletionFile.path());
         try (SeekableInputStream input = fileIO.newInputStream(path)) {
+            // read dv version
+            int version = input.read();
             input.seek(deletionFile.offset());
             DataInputStream dis = new DataInputStream(input);
-            int actualSize = dis.readInt();
-            if (actualSize != deletionFile.length()) {
+
+            if (version == BitmapDeletionVector.VERSION) {
+                // read v1 deletion vector
+                int actualSize = dis.readInt();
+                if (actualSize != deletionFile.length()) {
+                    throw new RuntimeException(
+                            "Size not match, actual size: "
+                                    + actualSize
+                                    + ", expected size: "
+                                    + deletionFile.length()
+                                    + ", file path: "
+                                    + path);
+                }
+
+                byte[] bytes = new byte[actualSize];
+                dis.readFully(bytes);
+                return BitmapDeletionVector.deserializeFromBytes(bytes);
+            } else if (version == Bitmap64DeletionVector.VERSION) {
+                // read v2 deletion vector
+                byte[] bytes = new byte[(int) deletionFile.length()];
+                dis.readFully(bytes);
+
+                return Bitmap64DeletionVector.deserializeFromBytes(bytes);

Review Comment:
   Why v2 doesn't need to skip 4 bytes?



##########
paimon-common/src/main/java/org/apache/paimon/CoreOptions.java:
##########
@@ -1510,6 +1510,13 @@ public class CoreOptions implements Serializable {
                     .defaultValue(MemorySize.ofMebiBytes(2))
                     .withDescription("The target size of deletion vector index 
file.");
 
+    public static final ConfigOption<Integer> DELETION_VECTOR_VERSION =
+            key("deletion-vector.version")

Review Comment:
   deletion-vectors.version



##########
paimon-core/src/main/java/org/apache/paimon/deletionvectors/DeletionVectorIndexFileWriter.java:
##########
@@ -127,9 +125,25 @@ private long writtenSizeInBytes() {
 
         private void write(String key, DeletionVector deletionVector) throws 
IOException {
             Preconditions.checkNotNull(dataOutputStream);
-            if (versionId == VERSION_ID_V1) {
+            if (writeVersionId == BitmapDeletionVector.VERSION) {
+                Preconditions.checkArgument(
+                        deletionVector instanceof BitmapDeletionVector,
+                        String.format(

Review Comment:
   Please use checkArgument(bool, string, args...)



##########
paimon-core/src/main/java/org/apache/paimon/deletionvectors/DeletionVector.java:
##########
@@ -98,40 +100,55 @@ default boolean checkedDelete(long position) {
      * @param bytes The byte array containing the serialized deletion vector.
      * @return A DeletionVector instance that represents the deserialized data.
      */
-    static DeletionVector deserializeFromBytes(byte[] bytes) {
-        try {
-            ByteBuffer buffer = ByteBuffer.wrap(bytes);
-            int magicNum = buffer.getInt();
-            if (magicNum == BitmapDeletionVector.MAGIC_NUMBER) {
-                return BitmapDeletionVector.deserializeFromByteBuffer(buffer);
-            } else {
-                throw new RuntimeException("Invalid magic number: " + 
magicNum);
-            }
-        } catch (IOException e) {
-            throw new RuntimeException("Unable to deserialize deletion 
vector", e);
+    static DeletionVector deserializeFromBytes(byte[] bytes, int version) {
+        if (version == BitmapDeletionVector.VERSION) {
+            return BitmapDeletionVector.deserializeFromBytes(bytes);
+        } else if (version == Bitmap64DeletionVector.VERSION) {
+            return Bitmap64DeletionVector.deserializeFromBytes(bytes);
+        } else {
+            throw new RuntimeException("Invalid deletion vector version: " + 
version);
         }
     }
 
     static DeletionVector read(FileIO fileIO, DeletionFile deletionFile) 
throws IOException {
         Path path = new Path(deletionFile.path());
         try (SeekableInputStream input = fileIO.newInputStream(path)) {
+            // read dv version
+            int version = input.read();

Review Comment:
   Can we avoid this reading? Can we just use magic number to decide which 
version?



##########
paimon-core/src/main/java/org/apache/paimon/deletionvectors/BitmapDeletionVector.java:
##########
@@ -31,6 +31,7 @@
  * not exceeding {@link RoaringBitmap32#MAX_VALUE}.
  */
 public class BitmapDeletionVector implements DeletionVector {
+    public static final int VERSION = 1;

Review Comment:
   keep an empty line up



##########
paimon-common/src/main/java/org/apache/paimon/CoreOptions.java:
##########
@@ -1510,6 +1510,13 @@ public class CoreOptions implements Serializable {
                     .defaultValue(MemorySize.ofMebiBytes(2))
                     .withDescription("The target size of deletion vector index 
file.");
 
+    public static final ConfigOption<Integer> DELETION_VECTOR_VERSION =
+            key("deletion-vector.version")
+                    .intType()
+                    .defaultValue(2)

Review Comment:
   default is 1



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@paimon.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to