gavinchou commented on code in PR #54516:
URL: https://github.com/apache/doris/pull/54516#discussion_r2272408384
##########
cloud/src/recycler/recycler.cpp:
##########
@@ -2901,55 +2908,85 @@ int InstanceRecycler::recycle_restore_jobs() {
LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" <<
instance_id_
<< " restore_job_pb=" << restore_job_pb.DebugString();
- std::string restore_job_rs_key0 =
job_restore_rowset_key({instance_id_, tablet_id, 0});
- std::string restore_job_rs_key1 =
job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
-
std::unique_ptr<Transaction> txn;
- std::string msg;
- MetaServiceCode code = MetaServiceCode::OK;
- if (code != MetaServiceCode::OK) {
- LOG_WARNING("scan restore job rowsets failed when recycle restore
jobs")
+ TxnErrorCode err = txn_kv_->create_txn(&txn);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG_WARNING("failed to recycle restore job")
+ .tag("err", err)
.tag("tablet id", tablet_id)
- .tag("msg", msg)
- .tag("code", code)
- .tag("instance id", instance_id_);
+ .tag("instance_id", instance_id_)
+ .tag("reason", "failed to create txn");
return -1;
}
- // Recycle all data and KV associated with the tablet.
- // This includes rowsets, segments, and related resources.
- if (recycle_tablet(tablet_id, metrics_context) != 0) {
- LOG_WARNING("failed to recycle tablet")
- .tag("tablet_id", tablet_id)
- .tag("instance_id", instance_id_);
+ std::string val;
+ err = txn->get(k, &val);
+ if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip
it
+ LOG_INFO("restore job {} has been recycled", tablet_id);
+ return 0;
+ }
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG_WARNING("failed to get kv");
return -1;
- } else {
- // Delete restore job rowsets kv only if tablet recycling succeeded
- // to prevent data leak.
- TxnErrorCode err = txn_kv_->create_txn(&txn);
+ }
+ restore_job_pb.Clear();
+ if (!restore_job_pb.ParseFromString(val)) {
+ LOG_WARNING("malformed recycle restore job value").tag("key",
hex(k));
+ return -1;
+ }
+
+ // PREPARED or COMMITTED, change state to DROPPED and return
+ if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED ||
+ restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) {
+ restore_job_pb.set_state(RestoreJobCloudPB::DROPPED);
+ restore_job_pb.set_need_recycle_data(true);
+ txn->put(k, restore_job_pb.SerializeAsString());
+ err = txn->commit();
if (err != TxnErrorCode::TXN_OK) {
- LOG_WARNING("failed to recycle restore job")
- .tag("err", err)
- .tag("tablet id", tablet_id)
- .tag("instance_id", instance_id_)
- .tag("reason", "failed to create txn");
+ LOG_WARNING("failed to commit txn: {}", err);
return -1;
}
+ num_aborted++;
+ return 0;
+ }
- // delete all restore job rowset kv
- txn->remove(restore_job_rs_key0, restore_job_rs_key1);
-
+ // Change state to RECYCLING
+ if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) {
+ restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING);
+ txn->put(k, restore_job_pb.SerializeAsString());
err = txn->commit();
if (err != TxnErrorCode::TXN_OK) {
- LOG_WARNING("failed to recycle tablet restore job rowset kv")
- .tag("err", err)
- .tag("tablet id", tablet_id)
- .tag("instance_id", instance_id_)
- .tag("reason", "failed to commit txn");
+ LOG_WARNING("failed to commit txn: {}", err);
return -1;
}
}
+ std::string restore_job_rs_key0 =
job_restore_rowset_key({instance_id_, tablet_id, 0});
+ std::string restore_job_rs_key1 =
job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
+
+ // Recycle all data associated with the restore job.
+ // This includes rowsets, segments, and related resources.
+ bool need_recycle_data = restore_job_pb.need_recycle_data();
+ if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) !=
0) {
+ LOG_WARNING("failed to recycle tablet")
+ .tag("tablet_id", tablet_id)
+ .tag("instance_id", instance_id_);
+ return -1;
+ }
+
+ // delete all restore job rowset kv
+ txn->remove(restore_job_rs_key0, restore_job_rs_key1);
+
+ err = txn->commit();
Review Comment:
L2957 commit 过了, 这里不应该在commit
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]