Copilot commented on code in PR #61007:
URL: https://github.com/apache/doris/pull/61007#discussion_r2879052338
##########
cloud/src/recycler/recycler.cpp:
##########
@@ -3224,6 +3238,225 @@ int
InstanceRecycler::decrement_packed_file_ref_counts(const doris::RowsetMetaCl
return ret;
}
+int InstanceRecycler::decrement_delete_bitmap_packed_file_ref_counts(int64_t
tablet_id,
+ const
std::string& rowset_id,
+ bool*
out_is_packed) {
+ if (out_is_packed) {
+ *out_is_packed = false;
+ }
+
+ // Get delete bitmap storage info from FDB
+ std::string dbm_key = versioned::meta_delete_bitmap_key({instance_id_,
tablet_id, rowset_id});
+ std::unique_ptr<Transaction> txn;
+ TxnErrorCode err = txn_kv_->create_txn(&txn);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG_WARNING("failed to create txn when getting delete bitmap storage")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id)
+ .tag("err", err);
+ return -1;
+ }
+
+ std::string dbm_val;
+ err = txn->get(dbm_key, &dbm_val);
+ if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
+ // No delete bitmap for this rowset, nothing to do
+ LOG_INFO("delete bitmap not found, skip packed file ref count
decrement")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id);
+ return 0;
+ }
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG_WARNING("failed to get delete bitmap storage")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id)
+ .tag("err", err);
+ return -1;
+ }
+
+ DeleteBitmapStoragePB storage;
+ if (!storage.ParseFromString(dbm_val)) {
+ LOG_WARNING("failed to parse delete bitmap storage")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id);
+ return -1;
+ }
+
+ // Check if delete bitmap is stored in packed file
+ if (!storage.has_packed_slice_location() ||
+ storage.packed_slice_location().packed_file_path().empty()) {
+ // Not stored in packed file, nothing to do
+ return 0;
+ }
+
+ if (out_is_packed) {
+ *out_is_packed = true;
+ }
+
+ const auto& packed_loc = storage.packed_slice_location();
+ const std::string& packed_file_path = packed_loc.packed_file_path();
+
+ LOG_INFO("decrementing delete bitmap packed file ref count")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id)
+ .tag("packed_file_path", packed_file_path);
+
+ const int max_retry_times = std::max(1,
config::decrement_packed_file_ref_counts_retry_times);
+ for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
+ std::unique_ptr<Transaction> update_txn;
+ err = txn_kv_->create_txn(&update_txn);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG_WARNING("failed to create txn for delete bitmap packed file
update")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id)
+ .tag("err", err);
+ return -1;
+ }
+
+ std::string packed_key = packed_file_key({instance_id_,
packed_file_path});
+ std::string packed_val;
+ err = update_txn->get(packed_key, &packed_val);
+ if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
+ LOG_WARNING("packed file info not found for delete bitmap")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id)
+ .tag("packed_file_path", packed_file_path);
+ return 0;
+ }
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG_WARNING("failed to get packed file info for delete bitmap")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id)
+ .tag("packed_file_path", packed_file_path)
+ .tag("err", err);
+ return -1;
+ }
+
+ cloud::PackedFileInfoPB packed_info;
+ if (!packed_info.ParseFromString(packed_val)) {
+ LOG_WARNING("failed to parse packed file info for delete bitmap")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id)
+ .tag("packed_file_path", packed_file_path);
+ return -1;
+ }
+
+ // Find and mark the small file entry as deleted
+ // Use tablet_id and rowset_id to match entry instead of path,
+ // because path format may vary with path_version (with or without
shard prefix)
+ auto* entries = packed_info.mutable_slices();
+ bool found = false;
+ bool already_deleted = false;
+ for (auto& entry : *entries) {
+ if (entry.tablet_id() == tablet_id && entry.rowset_id() ==
rowset_id) {
+ if (!entry.deleted()) {
+ entry.set_deleted(true);
+ if (!entry.corrected()) {
+ entry.set_corrected(true);
+ }
+ } else {
+ already_deleted = true;
+ }
+ found = true;
+ break;
+ }
+ }
+
Review Comment:
`decrement_delete_bitmap_packed_file_ref_counts()` looks up the slice to
mark deleted using only `tablet_id` + `rowset_id`. A packed file can contain
multiple slices for the same rowset (all segments + delete-bitmap), and segment
slices are already marked deleted by `decrement_packed_file_ref_counts()`
earlier in `delete_rowset_data()`. This can cause this function to match an
unrelated (already-deleted) segment entry and return early, leaving the
delete-bitmap slice undeleted and the packed file `ref_cnt` incorrect. Consider
matching the slice using the persisted `packed_slice_location` (e.g., `offset`
+ `size`, optionally also `tablet_id/rowset_id`) instead of only
`tablet_id/rowset_id`.
```suggestion
// Prefer an undeleted slice for this tablet/rowset pair. There may
be
// multiple slices (segments + delete-bitmap) for the same rowset;
// segment slices are expected to be marked deleted earlier, so here
// we look for a remaining undeleted slice to mark as deleted.
cloud::PackedFileSlicePB* target_slice = nullptr;
for (auto& entry : *entries) {
if (entry.tablet_id() == tablet_id && entry.rowset_id() ==
rowset_id) {
found = true;
if (!entry.deleted()) {
target_slice = &entry;
break; // prefer the first undeleted matching slice
} else {
// Remember that at least one matching slice is already
deleted
already_deleted = true;
}
}
}
if (target_slice != nullptr) {
target_slice->set_deleted(true);
if (!target_slice->corrected()) {
target_slice->set_corrected(true);
}
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]