Copilot commented on code in PR #69: URL: https://github.com/apache/paimon-cpp/pull/69#discussion_r3385148583
########## src/paimon/core/deletionvectors/bitmap_deletion_vector.cpp: ########## @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include "paimon/core/deletionvectors/bitmap_deletion_vector.h" + +#include "arrow/util/crc32.h" +#include "fmt/format.h" +#include "paimon/common/io/memory_segment_output_stream.h" +#include "paimon/io/byte_array_input_stream.h" +#include "paimon/io/data_input_stream.h" + +namespace paimon { + +Result<int32_t> BitmapDeletionVector::SerializeTo(const std::shared_ptr<MemoryPool>& pool, + DataOutputStream* out) { + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr<Bytes> data, SerializeToBytes(pool)); + int64_t size = data->size(); + if (size < 0 || size > std::numeric_limits<int32_t>::max()) { + return Status::Invalid("BitmapDeletionVector serialize size out of range: ", size); + } Review Comment: `Status::Invalid` is used here with two arguments, while elsewhere in this PR errors are constructed via `fmt::format(...)`. If `Status::Invalid` doesn’t have an overload that accepts `(const char*, int64_t)`, this won’t compile (or will produce an unintended message). Construct the message with `fmt::format` (or equivalent) so the size value is reliably included. ########## src/paimon/core/deletionvectors/bitmap_deletion_vector.cpp: ########## @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include "paimon/core/deletionvectors/bitmap_deletion_vector.h" + +#include "arrow/util/crc32.h" +#include "fmt/format.h" +#include "paimon/common/io/memory_segment_output_stream.h" +#include "paimon/io/byte_array_input_stream.h" +#include "paimon/io/data_input_stream.h" + +namespace paimon { + +Result<int32_t> BitmapDeletionVector::SerializeTo(const std::shared_ptr<MemoryPool>& pool, + DataOutputStream* out) { + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr<Bytes> data, SerializeToBytes(pool)); + int64_t size = data->size(); + if (size < 0 || size > std::numeric_limits<int32_t>::max()) { + return Status::Invalid("BitmapDeletionVector serialize size out of range: ", size); + } + PAIMON_RETURN_NOT_OK(out->WriteValue<int32_t>(static_cast<int32_t>(size))); + PAIMON_RETURN_NOT_OK(out->WriteBytes(data)); + uint32_t crc32 = 0; + crc32 = arrow::internal::crc32(crc32, data->data(), size); + PAIMON_RETURN_NOT_OK(out->WriteValue<int32_t>(static_cast<int32_t>(crc32))); + return static_cast<int32_t>(size); +} + +Result<PAIMON_UNIQUE_PTR<Bytes>> BitmapDeletionVector::SerializeToBytes( + const std::shared_ptr<MemoryPool>& pool) { + std::shared_ptr<Bytes> bitmap_bytes = roaring_bitmap_.Serialize(pool.get()); + if (bitmap_bytes == nullptr) { + assert(bitmap_bytes); + return Status::Invalid("roaring bitmap serialize failed"); + } Review Comment: This `assert(bitmap_bytes);` will always fail when `bitmap_bytes == nullptr`, causing debug builds to abort instead of returning the intended error `Status`. Remove the assert (or invert it outside the error path) so failure is handled consistently in all build modes. ########## src/paimon/core/deletionvectors/bucketed_dv_maintainer.h: ########## @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include <map> +#include <memory> +#include <optional> +#include <string> + +#include "paimon/core/deletionvectors/bitmap_deletion_vector.h" +#include "paimon/core/deletionvectors/deletion_vector.h" +#include "paimon/core/deletionvectors/deletion_vectors_index_file.h" +#include "paimon/core/index/index_file_handler.h" +#include "paimon/core/index/index_file_meta.h" + +namespace paimon { + +// Maintainer of deletionVectors index. +class BucketedDvMaintainer { + public: + BucketedDvMaintainer( + const std::shared_ptr<DeletionVectorsIndexFile>& dv_index_file, + const std::map<std::string, std::shared_ptr<DeletionVector>>& deletion_vectors) + : dv_index_file_(dv_index_file), + deletion_vectors_(deletion_vectors), + bitmap64_(dv_index_file->Bitmap64()) {} + + Status NotifyNewDeletion(const std::string& file_name, int64_t position) { + std::shared_ptr<DeletionVector> dv; + if (auto it = deletion_vectors_.find(file_name); it == deletion_vectors_.end()) { + if (bitmap64_) { + return Status::NotImplemented("not support bitmap 64 deletion vectors"); + } + RoaringBitmap32 roaring_bitmap; + auto inserted = std::make_shared<BitmapDeletionVector>(roaring_bitmap); + deletion_vectors_[file_name] = inserted; + dv = std::move(inserted); + } else { + dv = it->second; + } + + PAIMON_ASSIGN_OR_RAISE(bool updated, dv->CheckedDelete(position)); + if (updated) { + modified_ = true; + } + return Status::OK(); + } + + std::optional<std::shared_ptr<DeletionVector>> DeletionVectorOf( + const std::string& file_name) const { + if (auto it = deletion_vectors_.find(file_name); it != deletion_vectors_.end()) { + return it->second; + } + return std::nullopt; + } + + void RemoveDeletionVectorOf(const std::string& file_name) { + if (deletion_vectors_.erase(file_name) > 0) { + modified_ = true; + } + } + + Result<std::optional<std::shared_ptr<IndexFileMeta>>> WriteDeletionVectorsIndex() { + if (modified_) { + modified_ = false; + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr<IndexFileMeta> result, + dv_index_file_->WriteSingleFile(deletion_vectors_)); + return std::make_optional<std::shared_ptr<IndexFileMeta>>(result); + } + return std::optional<std::shared_ptr<IndexFileMeta>>(); + } Review Comment: `modified_` is cleared before attempting the write. If `WriteSingleFile(...)` fails, the maintainer will incorrectly consider itself “not modified” and will not retry writing on subsequent calls. Only reset `modified_` after a successful write (or restore it on error). ########## src/paimon/core/deletionvectors/bucketed_dv_maintainer.h: ########## @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once + +#include <map> +#include <memory> +#include <optional> +#include <string> + +#include "paimon/core/deletionvectors/bitmap_deletion_vector.h" +#include "paimon/core/deletionvectors/deletion_vector.h" +#include "paimon/core/deletionvectors/deletion_vectors_index_file.h" +#include "paimon/core/index/index_file_handler.h" +#include "paimon/core/index/index_file_meta.h" + +namespace paimon { + +// Maintainer of deletionVectors index. +class BucketedDvMaintainer { + public: + BucketedDvMaintainer( + const std::shared_ptr<DeletionVectorsIndexFile>& dv_index_file, + const std::map<std::string, std::shared_ptr<DeletionVector>>& deletion_vectors) + : dv_index_file_(dv_index_file), + deletion_vectors_(deletion_vectors), + bitmap64_(dv_index_file->Bitmap64()) {} + + Status NotifyNewDeletion(const std::string& file_name, int64_t position) { + std::shared_ptr<DeletionVector> dv; + if (auto it = deletion_vectors_.find(file_name); it == deletion_vectors_.end()) { + if (bitmap64_) { + return Status::NotImplemented("not support bitmap 64 deletion vectors"); + } Review Comment: The NotImplemented message is grammatically unclear and inconsistent with other messages in this PR. Consider changing it to something like “bitmap64 deletion vectors are not supported” (and keep “bitmap64” consistently formatted). ########## src/paimon/core/deletionvectors/deletion_vector.cpp: ########## @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include "paimon/core/deletionvectors/deletion_vector.h" + +#include <cstddef> +#include <string> + +#include "fmt/format.h" +#include "paimon/core/deletionvectors/bitmap64_deletion_vector.h" +#include "paimon/core/deletionvectors/bitmap_deletion_vector.h" +#include "paimon/core/deletionvectors/bucketed_dv_maintainer.h" +#include "paimon/core/table/source/deletion_file.h" +#include "paimon/fs/file_system.h" +#include "paimon/io/data_input_stream.h" +#include "paimon/memory/memory_pool.h" + +namespace paimon { + +DeletionVector::Factory DeletionVector::CreateFactory( + const std::shared_ptr<FileSystem>& file_system, + const std::unordered_map<std::string, DeletionFile>& deletion_file_map, + const std::shared_ptr<MemoryPool>& pool) { + return [file_system, deletion_file_map, + pool](const std::string& file_name) -> Result<std::shared_ptr<DeletionVector>> { + auto iter = deletion_file_map.find(file_name); + if (iter != deletion_file_map.end()) { + PAIMON_ASSIGN_OR_RAISE( + std::shared_ptr<DeletionVector> dv, + DeletionVector::Read(file_system.get(), iter->second, pool.get())); + return dv; + } + return std::shared_ptr<DeletionVector>(); + }; +} + +DeletionVector::Factory DeletionVector::CreateFactory( + const std::shared_ptr<BucketedDvMaintainer>& dv_maintainer) { + return + [dv_maintainer](const std::string& file_name) -> Result<std::shared_ptr<DeletionVector>> { + if (dv_maintainer) { + return dv_maintainer->DeletionVectorOf(file_name).value_or( + std::shared_ptr<DeletionVector>()); + } + return std::shared_ptr<DeletionVector>(); + }; +} + +Result<PAIMON_UNIQUE_PTR<DeletionVector>> DeletionVector::DeserializeFromBytes(const Bytes* bytes, + MemoryPool* pool) { + return BitmapDeletionVector::Deserialize(bytes->data(), bytes->size(), pool); +} + +Result<PAIMON_UNIQUE_PTR<DeletionVector>> DeletionVector::Read(const FileSystem* file_system, + const DeletionFile& deletion_file, + MemoryPool* pool) { + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr<InputStream> input, + file_system->Open(deletion_file.path)); + DataInputStream file_input_stream(input); + PAIMON_RETURN_NOT_OK(file_input_stream.Seek(deletion_file.offset)); + PAIMON_ASSIGN_OR_RAISE(int32_t actual_length, file_input_stream.ReadValue<int32_t>()); + if (actual_length != deletion_file.length) { + return Status::Invalid( + fmt::format("Size not match, actual size: {}, expect size: {}, file path: {}", + actual_length, deletion_file.length, deletion_file.path)); + } + auto bytes = Bytes::AllocateBytes(deletion_file.length, pool); + PAIMON_RETURN_NOT_OK(file_input_stream.ReadBytes(bytes.get())); + return DeserializeFromBytes(bytes.get(), pool); +} + +Result<PAIMON_UNIQUE_PTR<DeletionVector>> DeletionVector::Read(DataInputStream* input_stream, + std::optional<int64_t> length, + MemoryPool* pool) { + PAIMON_ASSIGN_OR_RAISE(int32_t bitmap_length, input_stream->ReadValue<int32_t>()); + PAIMON_ASSIGN_OR_RAISE(int32_t magic_number, input_stream->ReadValue<int32_t>()); + + if (magic_number == BitmapDeletionVector::MAGIC_NUMBER) { + if (length.has_value() && bitmap_length != length.value()) { + return Status::Invalid(fmt::format("Size not match, actual size: {}, expected size: {}", + bitmap_length, length.value())); + } + + int32_t payload_length = bitmap_length - BitmapDeletionVector::MAGIC_NUMBER_SIZE_BYTES; + if (payload_length < 0) { + return Status::Invalid(fmt::format("Invalid bitmap length: {}", bitmap_length)); + } + + auto bytes = Bytes::AllocateBytes(payload_length, pool); + PAIMON_RETURN_NOT_OK(input_stream->ReadBytes(bytes.get())); + // skip crc (4 bytes) + PAIMON_ASSIGN_OR_RAISE([[maybe_unused]] int32_t unused_crc, + input_stream->ReadValue<int32_t>()); + + return BitmapDeletionVector::DeserializeWithoutMagicNumber(bytes->data(), bytes->size(), + pool); Review Comment: The file format writes a CRC, but the deserializer explicitly skips verification. This makes silent corruption much harder to detect (especially for remote/object stores). Compute CRC over the same bytes used during serialization and compare with the stored CRC; return `Invalid` on mismatch. ########## src/paimon/core/deletionvectors/deletion_vectors_index_file.cpp: ########## @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include "paimon/core/deletionvectors/deletion_vectors_index_file.h" + +#include <map> +#include <memory> +#include <string> +#include <vector> + +#include "paimon/core/deletionvectors/deletion_vector_index_file_writer.h" + +namespace paimon { + +Result<std::shared_ptr<IndexFileMeta>> DeletionVectorsIndexFile::WriteSingleFile( + const std::map<std::string, std::shared_ptr<DeletionVector>>& input) { + return CreateWriter()->WriteSingleFile(input); +} + +std::shared_ptr<DeletionVectorIndexFileWriter> DeletionVectorsIndexFile::CreateWriter() const { + return std::make_shared<DeletionVectorIndexFileWriter>(fs_, path_factory_, pool_); +} + +Result<std::map<std::string, std::shared_ptr<DeletionVector>>> +DeletionVectorsIndexFile::ReadAllDeletionVectors( + const std::shared_ptr<IndexFileMeta>& file_meta) const { + std::optional<LinkedHashMap<std::string, DeletionVectorMeta>> deletion_vector_metas = + file_meta->DvRanges(); + if (deletion_vector_metas == std::nullopt) { + return Status::Invalid( + fmt::format("Read all deletion vectors failed from IndexFileMeta '{}'. Deletion vector " + "metas is null", + file_meta->FileName())); + } + + std::map<std::string, std::shared_ptr<DeletionVector>> deletion_vectors; + std::string file_path = path_factory_->ToPath(file_meta); + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr<InputStream> input_stream, fs_->Open(file_path)); + auto data_input_stream = std::make_shared<DataInputStream>(input_stream); + PAIMON_RETURN_NOT_OK(CheckVersion(data_input_stream)); + for (const auto& [_, deletion_vector_meta] : deletion_vector_metas.value()) { + PAIMON_ASSIGN_OR_RAISE( + std::shared_ptr<DeletionVector> dv, + DeletionVector::Read(data_input_stream.get(), + static_cast<int64_t>(deletion_vector_meta.GetLength()), + pool_.get())); + deletion_vectors[deletion_vector_meta.GetDataFileName()] = dv; + } Review Comment: `DeletionVectorMeta` includes an offset (`GetOffset()`), but the reader ignores it and assumes vectors are laid out and iterated in exactly the same order as stored in `DvRanges()`. If the metadata order differs (e.g., map reordering, future format changes, or any non-sequential layout), reads will deserialize from the wrong position. Use `GetOffset()` to `Seek` before reading each DV (or sort metas by offset and read sequentially). ########## src/paimon/core/deletionvectors/bitmap_deletion_vector.cpp: ########## @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include "paimon/core/deletionvectors/bitmap_deletion_vector.h" + +#include "arrow/util/crc32.h" +#include "fmt/format.h" +#include "paimon/common/io/memory_segment_output_stream.h" +#include "paimon/io/byte_array_input_stream.h" +#include "paimon/io/data_input_stream.h" + +namespace paimon { + +Result<int32_t> BitmapDeletionVector::SerializeTo(const std::shared_ptr<MemoryPool>& pool, + DataOutputStream* out) { + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr<Bytes> data, SerializeToBytes(pool)); + int64_t size = data->size(); + if (size < 0 || size > std::numeric_limits<int32_t>::max()) { + return Status::Invalid("BitmapDeletionVector serialize size out of range: ", size); + } + PAIMON_RETURN_NOT_OK(out->WriteValue<int32_t>(static_cast<int32_t>(size))); + PAIMON_RETURN_NOT_OK(out->WriteBytes(data)); + uint32_t crc32 = 0; + crc32 = arrow::internal::crc32(crc32, data->data(), size); + PAIMON_RETURN_NOT_OK(out->WriteValue<int32_t>(static_cast<int32_t>(crc32))); + return static_cast<int32_t>(size); +} + +Result<PAIMON_UNIQUE_PTR<Bytes>> BitmapDeletionVector::SerializeToBytes( + const std::shared_ptr<MemoryPool>& pool) { + std::shared_ptr<Bytes> bitmap_bytes = roaring_bitmap_.Serialize(pool.get()); + if (bitmap_bytes == nullptr) { + assert(bitmap_bytes); + return Status::Invalid("roaring bitmap serialize failed"); + } + MemorySegmentOutputStream output(MemorySegmentOutputStream::DEFAULT_SEGMENT_SIZE, pool); + output.WriteValue<int32_t>(MAGIC_NUMBER); + output.WriteBytes(bitmap_bytes); + return MemorySegmentUtils::CopyToBytes(output.Segments(), /*offset=*/0, output.CurrentSize(), + pool.get()); +} + +Status BitmapDeletionVector::CheckPosition(int64_t position) const { + if (position > RoaringBitmap32::MAX_VALUE) { + return Status::Invalid(fmt::format( + "The file has too many rows, RoaringBitmap32 only supports files with row count " + "not exceeding {}.", + RoaringBitmap32::MAX_VALUE)); + } + return Status::OK(); +} Review Comment: `CheckPosition` doesn’t reject negative `position` values. A negative row position is invalid by definition and would be cast to `int32_t` and accepted by the bitmap operations, leading to incorrect DV contents/reads. Add an explicit `position < 0` check and return an `Invalid` status. ########## src/paimon/core/deletionvectors/deletion_vectors_index_file.cpp: ########## @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include "paimon/core/deletionvectors/deletion_vectors_index_file.h" + +#include <map> +#include <memory> +#include <string> +#include <vector> + +#include "paimon/core/deletionvectors/deletion_vector_index_file_writer.h" + +namespace paimon { + +Result<std::shared_ptr<IndexFileMeta>> DeletionVectorsIndexFile::WriteSingleFile( + const std::map<std::string, std::shared_ptr<DeletionVector>>& input) { + return CreateWriter()->WriteSingleFile(input); +} + +std::shared_ptr<DeletionVectorIndexFileWriter> DeletionVectorsIndexFile::CreateWriter() const { + return std::make_shared<DeletionVectorIndexFileWriter>(fs_, path_factory_, pool_); +} + +Result<std::map<std::string, std::shared_ptr<DeletionVector>>> +DeletionVectorsIndexFile::ReadAllDeletionVectors( + const std::shared_ptr<IndexFileMeta>& file_meta) const { + std::optional<LinkedHashMap<std::string, DeletionVectorMeta>> deletion_vector_metas = + file_meta->DvRanges(); + if (deletion_vector_metas == std::nullopt) { + return Status::Invalid( + fmt::format("Read all deletion vectors failed from IndexFileMeta '{}'. Deletion vector " + "metas is null", + file_meta->FileName())); Review Comment: “metas is null” is awkward and less actionable. Consider rephrasing to clearly state what field is missing, e.g. “IndexFileMeta '<name>' has no DvRanges() (missing deletion vector metadata)”. ########## src/paimon/core/deletionvectors/deletion_vector_index_file_writer.cpp: ########## @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include "paimon/core/deletionvectors/deletion_vector_index_file_writer.h" + +#include "paimon/common/utils/scope_guard.h" +#include "paimon/core/deletionvectors/deletion_file_writer.h" + +namespace paimon { + +Result<std::shared_ptr<IndexFileMeta>> DeletionVectorIndexFileWriter::WriteSingleFile( + const std::map<std::string, std::shared_ptr<DeletionVector>>& input) { + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr<DeletionFileWriter> writer, + DeletionFileWriter::Create(index_path_factory_, fs_, pool_)); + ScopeGuard guard([&]() { + if (writer) { + (void)writer->Close(); + } + }); + for (const auto& [key, value] : input) { + PAIMON_RETURN_NOT_OK(writer->Write(key, value)); + } + guard.Release(); + PAIMON_RETURN_NOT_OK(writer->Close()); + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr<IndexFileMeta> result, writer->GetResult()); + return result; Review Comment: Releasing the scope guard before `Close()` means a `Close()` failure returns early without any guaranteed cleanup path from the guard. Consider either (a) releasing the guard only after `Close()` succeeds, or (b) keeping the guard and tracking a `closed` flag so the guard closes only when needed. This makes resource cleanup robust in error paths. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
