Copilot commented on code in PR #69:
URL: https://github.com/apache/paimon-cpp/pull/69#discussion_r3385148583


##########
src/paimon/core/deletionvectors/bitmap_deletion_vector.cpp:
##########
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include "paimon/core/deletionvectors/bitmap_deletion_vector.h"
+
+#include "arrow/util/crc32.h"
+#include "fmt/format.h"
+#include "paimon/common/io/memory_segment_output_stream.h"
+#include "paimon/io/byte_array_input_stream.h"
+#include "paimon/io/data_input_stream.h"
+
+namespace paimon {
+
+Result<int32_t> BitmapDeletionVector::SerializeTo(const 
std::shared_ptr<MemoryPool>& pool,
+                                                  DataOutputStream* out) {
+    PAIMON_ASSIGN_OR_RAISE(std::shared_ptr<Bytes> data, 
SerializeToBytes(pool));
+    int64_t size = data->size();
+    if (size < 0 || size > std::numeric_limits<int32_t>::max()) {
+        return Status::Invalid("BitmapDeletionVector serialize size out of 
range: ", size);
+    }

Review Comment:
   `Status::Invalid` is used here with two arguments, while elsewhere in this 
PR errors are constructed via `fmt::format(...)`. If `Status::Invalid` doesn’t 
have an overload that accepts `(const char*, int64_t)`, this won’t compile (or 
will produce an unintended message). Construct the message with `fmt::format` 
(or equivalent) so the size value is reliably included.



##########
src/paimon/core/deletionvectors/bitmap_deletion_vector.cpp:
##########
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include "paimon/core/deletionvectors/bitmap_deletion_vector.h"
+
+#include "arrow/util/crc32.h"
+#include "fmt/format.h"
+#include "paimon/common/io/memory_segment_output_stream.h"
+#include "paimon/io/byte_array_input_stream.h"
+#include "paimon/io/data_input_stream.h"
+
+namespace paimon {
+
+Result<int32_t> BitmapDeletionVector::SerializeTo(const 
std::shared_ptr<MemoryPool>& pool,
+                                                  DataOutputStream* out) {
+    PAIMON_ASSIGN_OR_RAISE(std::shared_ptr<Bytes> data, 
SerializeToBytes(pool));
+    int64_t size = data->size();
+    if (size < 0 || size > std::numeric_limits<int32_t>::max()) {
+        return Status::Invalid("BitmapDeletionVector serialize size out of 
range: ", size);
+    }
+    PAIMON_RETURN_NOT_OK(out->WriteValue<int32_t>(static_cast<int32_t>(size)));
+    PAIMON_RETURN_NOT_OK(out->WriteBytes(data));
+    uint32_t crc32 = 0;
+    crc32 = arrow::internal::crc32(crc32, data->data(), size);
+    
PAIMON_RETURN_NOT_OK(out->WriteValue<int32_t>(static_cast<int32_t>(crc32)));
+    return static_cast<int32_t>(size);
+}
+
+Result<PAIMON_UNIQUE_PTR<Bytes>> BitmapDeletionVector::SerializeToBytes(
+    const std::shared_ptr<MemoryPool>& pool) {
+    std::shared_ptr<Bytes> bitmap_bytes = 
roaring_bitmap_.Serialize(pool.get());
+    if (bitmap_bytes == nullptr) {
+        assert(bitmap_bytes);
+        return Status::Invalid("roaring bitmap serialize failed");
+    }

Review Comment:
   This `assert(bitmap_bytes);` will always fail when `bitmap_bytes == 
nullptr`, causing debug builds to abort instead of returning the intended error 
`Status`. Remove the assert (or invert it outside the error path) so failure is 
handled consistently in all build modes.



##########
src/paimon/core/deletionvectors/bucketed_dv_maintainer.h:
##########
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#pragma once
+
+#include <map>
+#include <memory>
+#include <optional>
+#include <string>
+
+#include "paimon/core/deletionvectors/bitmap_deletion_vector.h"
+#include "paimon/core/deletionvectors/deletion_vector.h"
+#include "paimon/core/deletionvectors/deletion_vectors_index_file.h"
+#include "paimon/core/index/index_file_handler.h"
+#include "paimon/core/index/index_file_meta.h"
+
+namespace paimon {
+
+// Maintainer of deletionVectors index.
+class BucketedDvMaintainer {
+ public:
+    BucketedDvMaintainer(
+        const std::shared_ptr<DeletionVectorsIndexFile>& dv_index_file,
+        const std::map<std::string, std::shared_ptr<DeletionVector>>& 
deletion_vectors)
+        : dv_index_file_(dv_index_file),
+          deletion_vectors_(deletion_vectors),
+          bitmap64_(dv_index_file->Bitmap64()) {}
+
+    Status NotifyNewDeletion(const std::string& file_name, int64_t position) {
+        std::shared_ptr<DeletionVector> dv;
+        if (auto it = deletion_vectors_.find(file_name); it == 
deletion_vectors_.end()) {
+            if (bitmap64_) {
+                return Status::NotImplemented("not support bitmap 64 deletion 
vectors");
+            }
+            RoaringBitmap32 roaring_bitmap;
+            auto inserted = 
std::make_shared<BitmapDeletionVector>(roaring_bitmap);
+            deletion_vectors_[file_name] = inserted;
+            dv = std::move(inserted);
+        } else {
+            dv = it->second;
+        }
+
+        PAIMON_ASSIGN_OR_RAISE(bool updated, dv->CheckedDelete(position));
+        if (updated) {
+            modified_ = true;
+        }
+        return Status::OK();
+    }
+
+    std::optional<std::shared_ptr<DeletionVector>> DeletionVectorOf(
+        const std::string& file_name) const {
+        if (auto it = deletion_vectors_.find(file_name); it != 
deletion_vectors_.end()) {
+            return it->second;
+        }
+        return std::nullopt;
+    }
+
+    void RemoveDeletionVectorOf(const std::string& file_name) {
+        if (deletion_vectors_.erase(file_name) > 0) {
+            modified_ = true;
+        }
+    }
+
+    Result<std::optional<std::shared_ptr<IndexFileMeta>>> 
WriteDeletionVectorsIndex() {
+        if (modified_) {
+            modified_ = false;
+            PAIMON_ASSIGN_OR_RAISE(std::shared_ptr<IndexFileMeta> result,
+                                   
dv_index_file_->WriteSingleFile(deletion_vectors_));
+            return std::make_optional<std::shared_ptr<IndexFileMeta>>(result);
+        }
+        return std::optional<std::shared_ptr<IndexFileMeta>>();
+    }

Review Comment:
   `modified_` is cleared before attempting the write. If 
`WriteSingleFile(...)` fails, the maintainer will incorrectly consider itself 
“not modified” and will not retry writing on subsequent calls. Only reset 
`modified_` after a successful write (or restore it on error).



##########
src/paimon/core/deletionvectors/bucketed_dv_maintainer.h:
##########
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#pragma once
+
+#include <map>
+#include <memory>
+#include <optional>
+#include <string>
+
+#include "paimon/core/deletionvectors/bitmap_deletion_vector.h"
+#include "paimon/core/deletionvectors/deletion_vector.h"
+#include "paimon/core/deletionvectors/deletion_vectors_index_file.h"
+#include "paimon/core/index/index_file_handler.h"
+#include "paimon/core/index/index_file_meta.h"
+
+namespace paimon {
+
+// Maintainer of deletionVectors index.
+class BucketedDvMaintainer {
+ public:
+    BucketedDvMaintainer(
+        const std::shared_ptr<DeletionVectorsIndexFile>& dv_index_file,
+        const std::map<std::string, std::shared_ptr<DeletionVector>>& 
deletion_vectors)
+        : dv_index_file_(dv_index_file),
+          deletion_vectors_(deletion_vectors),
+          bitmap64_(dv_index_file->Bitmap64()) {}
+
+    Status NotifyNewDeletion(const std::string& file_name, int64_t position) {
+        std::shared_ptr<DeletionVector> dv;
+        if (auto it = deletion_vectors_.find(file_name); it == 
deletion_vectors_.end()) {
+            if (bitmap64_) {
+                return Status::NotImplemented("not support bitmap 64 deletion 
vectors");
+            }

Review Comment:
   The NotImplemented message is grammatically unclear and inconsistent with 
other messages in this PR. Consider changing it to something like “bitmap64 
deletion vectors are not supported” (and keep “bitmap64” consistently 
formatted).



##########
src/paimon/core/deletionvectors/deletion_vector.cpp:
##########
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include "paimon/core/deletionvectors/deletion_vector.h"
+
+#include <cstddef>
+#include <string>
+
+#include "fmt/format.h"
+#include "paimon/core/deletionvectors/bitmap64_deletion_vector.h"
+#include "paimon/core/deletionvectors/bitmap_deletion_vector.h"
+#include "paimon/core/deletionvectors/bucketed_dv_maintainer.h"
+#include "paimon/core/table/source/deletion_file.h"
+#include "paimon/fs/file_system.h"
+#include "paimon/io/data_input_stream.h"
+#include "paimon/memory/memory_pool.h"
+
+namespace paimon {
+
+DeletionVector::Factory DeletionVector::CreateFactory(
+    const std::shared_ptr<FileSystem>& file_system,
+    const std::unordered_map<std::string, DeletionFile>& deletion_file_map,
+    const std::shared_ptr<MemoryPool>& pool) {
+    return [file_system, deletion_file_map,
+            pool](const std::string& file_name) -> 
Result<std::shared_ptr<DeletionVector>> {
+        auto iter = deletion_file_map.find(file_name);
+        if (iter != deletion_file_map.end()) {
+            PAIMON_ASSIGN_OR_RAISE(
+                std::shared_ptr<DeletionVector> dv,
+                DeletionVector::Read(file_system.get(), iter->second, 
pool.get()));
+            return dv;
+        }
+        return std::shared_ptr<DeletionVector>();
+    };
+}
+
+DeletionVector::Factory DeletionVector::CreateFactory(
+    const std::shared_ptr<BucketedDvMaintainer>& dv_maintainer) {
+    return
+        [dv_maintainer](const std::string& file_name) -> 
Result<std::shared_ptr<DeletionVector>> {
+            if (dv_maintainer) {
+                return dv_maintainer->DeletionVectorOf(file_name).value_or(
+                    std::shared_ptr<DeletionVector>());
+            }
+            return std::shared_ptr<DeletionVector>();
+        };
+}
+
+Result<PAIMON_UNIQUE_PTR<DeletionVector>> 
DeletionVector::DeserializeFromBytes(const Bytes* bytes,
+                                                                               
MemoryPool* pool) {
+    return BitmapDeletionVector::Deserialize(bytes->data(), bytes->size(), 
pool);
+}
+
+Result<PAIMON_UNIQUE_PTR<DeletionVector>> DeletionVector::Read(const 
FileSystem* file_system,
+                                                               const 
DeletionFile& deletion_file,
+                                                               MemoryPool* 
pool) {
+    PAIMON_ASSIGN_OR_RAISE(std::shared_ptr<InputStream> input,
+                           file_system->Open(deletion_file.path));
+    DataInputStream file_input_stream(input);
+    PAIMON_RETURN_NOT_OK(file_input_stream.Seek(deletion_file.offset));
+    PAIMON_ASSIGN_OR_RAISE(int32_t actual_length, 
file_input_stream.ReadValue<int32_t>());
+    if (actual_length != deletion_file.length) {
+        return Status::Invalid(
+            fmt::format("Size not match, actual size: {}, expect size: {}, 
file path: {}",
+                        actual_length, deletion_file.length, 
deletion_file.path));
+    }
+    auto bytes = Bytes::AllocateBytes(deletion_file.length, pool);
+    PAIMON_RETURN_NOT_OK(file_input_stream.ReadBytes(bytes.get()));
+    return DeserializeFromBytes(bytes.get(), pool);
+}
+
+Result<PAIMON_UNIQUE_PTR<DeletionVector>> 
DeletionVector::Read(DataInputStream* input_stream,
+                                                               
std::optional<int64_t> length,
+                                                               MemoryPool* 
pool) {
+    PAIMON_ASSIGN_OR_RAISE(int32_t bitmap_length, 
input_stream->ReadValue<int32_t>());
+    PAIMON_ASSIGN_OR_RAISE(int32_t magic_number, 
input_stream->ReadValue<int32_t>());
+
+    if (magic_number == BitmapDeletionVector::MAGIC_NUMBER) {
+        if (length.has_value() && bitmap_length != length.value()) {
+            return Status::Invalid(fmt::format("Size not match, actual size: 
{}, expected size: {}",
+                                               bitmap_length, length.value()));
+        }
+
+        int32_t payload_length = bitmap_length - 
BitmapDeletionVector::MAGIC_NUMBER_SIZE_BYTES;
+        if (payload_length < 0) {
+            return Status::Invalid(fmt::format("Invalid bitmap length: {}", 
bitmap_length));
+        }
+
+        auto bytes = Bytes::AllocateBytes(payload_length, pool);
+        PAIMON_RETURN_NOT_OK(input_stream->ReadBytes(bytes.get()));
+        // skip crc (4 bytes)
+        PAIMON_ASSIGN_OR_RAISE([[maybe_unused]] int32_t unused_crc,
+                               input_stream->ReadValue<int32_t>());
+
+        return 
BitmapDeletionVector::DeserializeWithoutMagicNumber(bytes->data(), 
bytes->size(),
+                                                                   pool);

Review Comment:
   The file format writes a CRC, but the deserializer explicitly skips 
verification. This makes silent corruption much harder to detect (especially 
for remote/object stores). Compute CRC over the same bytes used during 
serialization and compare with the stored CRC; return `Invalid` on mismatch.



##########
src/paimon/core/deletionvectors/deletion_vectors_index_file.cpp:
##########
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include "paimon/core/deletionvectors/deletion_vectors_index_file.h"
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "paimon/core/deletionvectors/deletion_vector_index_file_writer.h"
+
+namespace paimon {
+
+Result<std::shared_ptr<IndexFileMeta>> 
DeletionVectorsIndexFile::WriteSingleFile(
+    const std::map<std::string, std::shared_ptr<DeletionVector>>& input) {
+    return CreateWriter()->WriteSingleFile(input);
+}
+
+std::shared_ptr<DeletionVectorIndexFileWriter> 
DeletionVectorsIndexFile::CreateWriter() const {
+    return std::make_shared<DeletionVectorIndexFileWriter>(fs_, path_factory_, 
pool_);
+}
+
+Result<std::map<std::string, std::shared_ptr<DeletionVector>>>
+DeletionVectorsIndexFile::ReadAllDeletionVectors(
+    const std::shared_ptr<IndexFileMeta>& file_meta) const {
+    std::optional<LinkedHashMap<std::string, DeletionVectorMeta>> 
deletion_vector_metas =
+        file_meta->DvRanges();
+    if (deletion_vector_metas == std::nullopt) {
+        return Status::Invalid(
+            fmt::format("Read all deletion vectors failed from IndexFileMeta 
'{}'. Deletion vector "
+                        "metas is null",
+                        file_meta->FileName()));
+    }
+
+    std::map<std::string, std::shared_ptr<DeletionVector>> deletion_vectors;
+    std::string file_path = path_factory_->ToPath(file_meta);
+    PAIMON_ASSIGN_OR_RAISE(std::shared_ptr<InputStream> input_stream, 
fs_->Open(file_path));
+    auto data_input_stream = std::make_shared<DataInputStream>(input_stream);
+    PAIMON_RETURN_NOT_OK(CheckVersion(data_input_stream));
+    for (const auto& [_, deletion_vector_meta] : 
deletion_vector_metas.value()) {
+        PAIMON_ASSIGN_OR_RAISE(
+            std::shared_ptr<DeletionVector> dv,
+            DeletionVector::Read(data_input_stream.get(),
+                                 
static_cast<int64_t>(deletion_vector_meta.GetLength()),
+                                 pool_.get()));
+        deletion_vectors[deletion_vector_meta.GetDataFileName()] = dv;
+    }

Review Comment:
   `DeletionVectorMeta` includes an offset (`GetOffset()`), but the reader 
ignores it and assumes vectors are laid out and iterated in exactly the same 
order as stored in `DvRanges()`. If the metadata order differs (e.g., map 
reordering, future format changes, or any non-sequential layout), reads will 
deserialize from the wrong position. Use `GetOffset()` to `Seek` before reading 
each DV (or sort metas by offset and read sequentially).



##########
src/paimon/core/deletionvectors/bitmap_deletion_vector.cpp:
##########
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include "paimon/core/deletionvectors/bitmap_deletion_vector.h"
+
+#include "arrow/util/crc32.h"
+#include "fmt/format.h"
+#include "paimon/common/io/memory_segment_output_stream.h"
+#include "paimon/io/byte_array_input_stream.h"
+#include "paimon/io/data_input_stream.h"
+
+namespace paimon {
+
+Result<int32_t> BitmapDeletionVector::SerializeTo(const 
std::shared_ptr<MemoryPool>& pool,
+                                                  DataOutputStream* out) {
+    PAIMON_ASSIGN_OR_RAISE(std::shared_ptr<Bytes> data, 
SerializeToBytes(pool));
+    int64_t size = data->size();
+    if (size < 0 || size > std::numeric_limits<int32_t>::max()) {
+        return Status::Invalid("BitmapDeletionVector serialize size out of 
range: ", size);
+    }
+    PAIMON_RETURN_NOT_OK(out->WriteValue<int32_t>(static_cast<int32_t>(size)));
+    PAIMON_RETURN_NOT_OK(out->WriteBytes(data));
+    uint32_t crc32 = 0;
+    crc32 = arrow::internal::crc32(crc32, data->data(), size);
+    
PAIMON_RETURN_NOT_OK(out->WriteValue<int32_t>(static_cast<int32_t>(crc32)));
+    return static_cast<int32_t>(size);
+}
+
+Result<PAIMON_UNIQUE_PTR<Bytes>> BitmapDeletionVector::SerializeToBytes(
+    const std::shared_ptr<MemoryPool>& pool) {
+    std::shared_ptr<Bytes> bitmap_bytes = 
roaring_bitmap_.Serialize(pool.get());
+    if (bitmap_bytes == nullptr) {
+        assert(bitmap_bytes);
+        return Status::Invalid("roaring bitmap serialize failed");
+    }
+    MemorySegmentOutputStream 
output(MemorySegmentOutputStream::DEFAULT_SEGMENT_SIZE, pool);
+    output.WriteValue<int32_t>(MAGIC_NUMBER);
+    output.WriteBytes(bitmap_bytes);
+    return MemorySegmentUtils::CopyToBytes(output.Segments(), /*offset=*/0, 
output.CurrentSize(),
+                                           pool.get());
+}
+
+Status BitmapDeletionVector::CheckPosition(int64_t position) const {
+    if (position > RoaringBitmap32::MAX_VALUE) {
+        return Status::Invalid(fmt::format(
+            "The file has too many rows, RoaringBitmap32 only supports files 
with row count "
+            "not exceeding {}.",
+            RoaringBitmap32::MAX_VALUE));
+    }
+    return Status::OK();
+}

Review Comment:
   `CheckPosition` doesn’t reject negative `position` values. A negative row 
position is invalid by definition and would be cast to `int32_t` and accepted 
by the bitmap operations, leading to incorrect DV contents/reads. Add an 
explicit `position < 0` check and return an `Invalid` status.



##########
src/paimon/core/deletionvectors/deletion_vectors_index_file.cpp:
##########
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include "paimon/core/deletionvectors/deletion_vectors_index_file.h"
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "paimon/core/deletionvectors/deletion_vector_index_file_writer.h"
+
+namespace paimon {
+
+Result<std::shared_ptr<IndexFileMeta>> 
DeletionVectorsIndexFile::WriteSingleFile(
+    const std::map<std::string, std::shared_ptr<DeletionVector>>& input) {
+    return CreateWriter()->WriteSingleFile(input);
+}
+
+std::shared_ptr<DeletionVectorIndexFileWriter> 
DeletionVectorsIndexFile::CreateWriter() const {
+    return std::make_shared<DeletionVectorIndexFileWriter>(fs_, path_factory_, 
pool_);
+}
+
+Result<std::map<std::string, std::shared_ptr<DeletionVector>>>
+DeletionVectorsIndexFile::ReadAllDeletionVectors(
+    const std::shared_ptr<IndexFileMeta>& file_meta) const {
+    std::optional<LinkedHashMap<std::string, DeletionVectorMeta>> 
deletion_vector_metas =
+        file_meta->DvRanges();
+    if (deletion_vector_metas == std::nullopt) {
+        return Status::Invalid(
+            fmt::format("Read all deletion vectors failed from IndexFileMeta 
'{}'. Deletion vector "
+                        "metas is null",
+                        file_meta->FileName()));

Review Comment:
   “metas is null” is awkward and less actionable. Consider rephrasing to 
clearly state what field is missing, e.g. “IndexFileMeta '<name>' has no 
DvRanges() (missing deletion vector metadata)”.



##########
src/paimon/core/deletionvectors/deletion_vector_index_file_writer.cpp:
##########
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include "paimon/core/deletionvectors/deletion_vector_index_file_writer.h"
+
+#include "paimon/common/utils/scope_guard.h"
+#include "paimon/core/deletionvectors/deletion_file_writer.h"
+
+namespace paimon {
+
+Result<std::shared_ptr<IndexFileMeta>> 
DeletionVectorIndexFileWriter::WriteSingleFile(
+    const std::map<std::string, std::shared_ptr<DeletionVector>>& input) {
+    PAIMON_ASSIGN_OR_RAISE(std::unique_ptr<DeletionFileWriter> writer,
+                           DeletionFileWriter::Create(index_path_factory_, 
fs_, pool_));
+    ScopeGuard guard([&]() {
+        if (writer) {
+            (void)writer->Close();
+        }
+    });
+    for (const auto& [key, value] : input) {
+        PAIMON_RETURN_NOT_OK(writer->Write(key, value));
+    }
+    guard.Release();
+    PAIMON_RETURN_NOT_OK(writer->Close());
+    PAIMON_ASSIGN_OR_RAISE(std::shared_ptr<IndexFileMeta> result, 
writer->GetResult());
+    return result;

Review Comment:
   Releasing the scope guard before `Close()` means a `Close()` failure returns 
early without any guaranteed cleanup path from the guard. Consider either (a) 
releasing the guard only after `Close()` succeeds, or (b) keeping the guard and 
tracking a `closed` flag so the guard closes only when needed. This makes 
resource cleanup robust in error paths.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to