gavinchou commented on code in PR #46798:
URL: https://github.com/apache/doris/pull/46798#discussion_r1915348412


##########
cloud/src/recycler/recycler.cpp:
##########
@@ -1588,59 +1610,93 @@ int InstanceRecycler::recycle_tablet(int64_t tablet_id) 
{
             .tag("instance_id", instance_id_)
             .tag("tablet_id", tablet_id);
 
+    int ret = 0;
     auto start_time = steady_clock::now();
 
-    std::unique_ptr<int, std::function<void(int*)>> 
defer_log_statistics((int*)0x01, [&](int*) {
-        auto cost = duration<float>(steady_clock::now() - start_time).count();
-        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", 
cost)
-                .tag("instance_id", instance_id_)
-                .tag("tablet_id", tablet_id);
-    });
+    std::unique_ptr<Transaction> txn;
+    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
+        LOG_WARNING("failed to delete rowset kv of tablet ")
+                .tag("tablet id", tablet_id)
+                .tag("reason", "failed to create txn");
+        ret = -1;
+    }
 
-    // delete all rowset kv in this tablet
+    // collect resource ids
     std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
     std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
     std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, 
""});
     std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 
1, ""});
 
-    int ret = 0;
-    std::unique_ptr<Transaction> txn;
-    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
-        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id;
-        ret = -1;
-    }
-    txn->remove(rs_key0, rs_key1);
-    txn->remove(recyc_rs_key0, recyc_rs_key1);
+    std::set<std::string> resource_ids;
 
-    // remove delete bitmap for MoW table
-    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, 
tablet_id});
-    txn->remove(pending_key);
-    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, 
tablet_id, "", 0, 0});
-    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, 
tablet_id + 1, "", 0, 0});
-    txn->remove(delete_bitmap_start, delete_bitmap_end);
+    std::unique_ptr<int, std::function<void(int*)>> 
defer_log_statistics((int*)0x01, [&](int*) {
+        auto cost = duration<float>(steady_clock::now() - start_time).count();
+        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", 
cost)
+                .tag("instance_id", instance_id_)
+                .tag("tablet_id", tablet_id)
+                .tag("ret", ret);
+    });
 
-    TxnErrorCode err = txn->commit();
-    if (err != TxnErrorCode::TXN_OK) {
-        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id 
<< ", err=" << err;
+    GetRowsetResponse resp;
+    std::string msg;
+    MetaServiceCode code = MetaServiceCode::OK;
+    // get rowsets in tablet
+    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, 
instance_id_,

Review Comment:
   this action may not be done in a single transaction.



##########
cloud/src/recycler/recycler.cpp:
##########
@@ -1588,59 +1610,93 @@ int InstanceRecycler::recycle_tablet(int64_t tablet_id) 
{
             .tag("instance_id", instance_id_)
             .tag("tablet_id", tablet_id);
 
+    int ret = 0;
     auto start_time = steady_clock::now();
 
-    std::unique_ptr<int, std::function<void(int*)>> 
defer_log_statistics((int*)0x01, [&](int*) {
-        auto cost = duration<float>(steady_clock::now() - start_time).count();
-        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", 
cost)
-                .tag("instance_id", instance_id_)
-                .tag("tablet_id", tablet_id);
-    });
+    std::unique_ptr<Transaction> txn;
+    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
+        LOG_WARNING("failed to delete rowset kv of tablet ")
+                .tag("tablet id", tablet_id)
+                .tag("reason", "failed to create txn");
+        ret = -1;
+    }
 
-    // delete all rowset kv in this tablet
+    // collect resource ids
     std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
     std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
     std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, 
""});
     std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 
1, ""});
 
-    int ret = 0;
-    std::unique_ptr<Transaction> txn;
-    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
-        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id;
-        ret = -1;
-    }
-    txn->remove(rs_key0, rs_key1);
-    txn->remove(recyc_rs_key0, recyc_rs_key1);
+    std::set<std::string> resource_ids;
 
-    // remove delete bitmap for MoW table
-    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, 
tablet_id});
-    txn->remove(pending_key);
-    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, 
tablet_id, "", 0, 0});
-    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, 
tablet_id + 1, "", 0, 0});
-    txn->remove(delete_bitmap_start, delete_bitmap_end);
+    std::unique_ptr<int, std::function<void(int*)>> 
defer_log_statistics((int*)0x01, [&](int*) {
+        auto cost = duration<float>(steady_clock::now() - start_time).count();
+        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", 
cost)
+                .tag("instance_id", instance_id_)
+                .tag("tablet_id", tablet_id)
+                .tag("ret", ret);
+    });
 
-    TxnErrorCode err = txn->commit();
-    if (err != TxnErrorCode::TXN_OK) {
-        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id 
<< ", err=" << err;
+    GetRowsetResponse resp;
+    std::string msg;
+    MetaServiceCode code = MetaServiceCode::OK;
+    // get rowsets in tablet
+    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, 
instance_id_,
+                        tablet_id, code, msg, &resp);
+    if (code != MetaServiceCode::OK) {
+        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
+                .tag("tablet id", tablet_id)
+                .tag("msg", msg)
+                .tag("code", code);
         ret = -1;
     }
+    
TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_tablet.create_rowset_meta", 
&resp);
+
+    for (const auto& rs_meta : resp.rowset_meta()) {
+        if (!rs_meta.has_resource_id()) {
+            continue;

Review Comment:
   this is impossible. log here



##########
cloud/src/recycler/recycler.cpp:
##########
@@ -1588,59 +1610,93 @@ int InstanceRecycler::recycle_tablet(int64_t tablet_id) 
{
             .tag("instance_id", instance_id_)
             .tag("tablet_id", tablet_id);
 
+    int ret = 0;
     auto start_time = steady_clock::now();
 
-    std::unique_ptr<int, std::function<void(int*)>> 
defer_log_statistics((int*)0x01, [&](int*) {
-        auto cost = duration<float>(steady_clock::now() - start_time).count();
-        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", 
cost)
-                .tag("instance_id", instance_id_)
-                .tag("tablet_id", tablet_id);
-    });
+    std::unique_ptr<Transaction> txn;
+    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
+        LOG_WARNING("failed to delete rowset kv of tablet ")
+                .tag("tablet id", tablet_id)
+                .tag("reason", "failed to create txn");
+        ret = -1;
+    }
 
-    // delete all rowset kv in this tablet
+    // collect resource ids
     std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
     std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
     std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, 
""});
     std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 
1, ""});
 
-    int ret = 0;
-    std::unique_ptr<Transaction> txn;
-    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
-        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id;
-        ret = -1;
-    }
-    txn->remove(rs_key0, rs_key1);
-    txn->remove(recyc_rs_key0, recyc_rs_key1);
+    std::set<std::string> resource_ids;
 
-    // remove delete bitmap for MoW table
-    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, 
tablet_id});
-    txn->remove(pending_key);
-    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, 
tablet_id, "", 0, 0});
-    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, 
tablet_id + 1, "", 0, 0});
-    txn->remove(delete_bitmap_start, delete_bitmap_end);
+    std::unique_ptr<int, std::function<void(int*)>> 
defer_log_statistics((int*)0x01, [&](int*) {
+        auto cost = duration<float>(steady_clock::now() - start_time).count();
+        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", 
cost)
+                .tag("instance_id", instance_id_)
+                .tag("tablet_id", tablet_id)
+                .tag("ret", ret);
+    });
 
-    TxnErrorCode err = txn->commit();
-    if (err != TxnErrorCode::TXN_OK) {
-        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id 
<< ", err=" << err;
+    GetRowsetResponse resp;
+    std::string msg;
+    MetaServiceCode code = MetaServiceCode::OK;
+    // get rowsets in tablet
+    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, 
instance_id_,
+                        tablet_id, code, msg, &resp);
+    if (code != MetaServiceCode::OK) {
+        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
+                .tag("tablet id", tablet_id)
+                .tag("msg", msg)
+                .tag("code", code);
         ret = -1;
     }
+    
TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_tablet.create_rowset_meta", 
&resp);
+
+    for (const auto& rs_meta : resp.rowset_meta()) {

Review Comment:
   also make some stats about
   1. number of rowsets, number of segments 
   2. sum of size of all rowsets, index size (if there is any)
   3. max version
   4. min and max creation time, min and max expiration time (if any)



##########
cloud/src/recycler/recycler.cpp:
##########
@@ -1588,59 +1610,93 @@ int InstanceRecycler::recycle_tablet(int64_t tablet_id) 
{
             .tag("instance_id", instance_id_)
             .tag("tablet_id", tablet_id);
 
+    int ret = 0;
     auto start_time = steady_clock::now();
 
-    std::unique_ptr<int, std::function<void(int*)>> 
defer_log_statistics((int*)0x01, [&](int*) {
-        auto cost = duration<float>(steady_clock::now() - start_time).count();
-        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", 
cost)
-                .tag("instance_id", instance_id_)
-                .tag("tablet_id", tablet_id);
-    });
+    std::unique_ptr<Transaction> txn;
+    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
+        LOG_WARNING("failed to delete rowset kv of tablet ")
+                .tag("tablet id", tablet_id)
+                .tag("reason", "failed to create txn");
+        ret = -1;
+    }
 
-    // delete all rowset kv in this tablet
+    // collect resource ids
     std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
     std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
     std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, 
""});
     std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 
1, ""});
 
-    int ret = 0;
-    std::unique_ptr<Transaction> txn;
-    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
-        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id;
-        ret = -1;
-    }
-    txn->remove(rs_key0, rs_key1);
-    txn->remove(recyc_rs_key0, recyc_rs_key1);
+    std::set<std::string> resource_ids;
 
-    // remove delete bitmap for MoW table
-    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, 
tablet_id});
-    txn->remove(pending_key);
-    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, 
tablet_id, "", 0, 0});
-    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, 
tablet_id + 1, "", 0, 0});
-    txn->remove(delete_bitmap_start, delete_bitmap_end);
+    std::unique_ptr<int, std::function<void(int*)>> 
defer_log_statistics((int*)0x01, [&](int*) {
+        auto cost = duration<float>(steady_clock::now() - start_time).count();
+        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", 
cost)
+                .tag("instance_id", instance_id_)
+                .tag("tablet_id", tablet_id)
+                .tag("ret", ret);
+    });
 
-    TxnErrorCode err = txn->commit();
-    if (err != TxnErrorCode::TXN_OK) {
-        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id 
<< ", err=" << err;
+    GetRowsetResponse resp;
+    std::string msg;
+    MetaServiceCode code = MetaServiceCode::OK;
+    // get rowsets in tablet
+    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, 
instance_id_,
+                        tablet_id, code, msg, &resp);
+    if (code != MetaServiceCode::OK) {
+        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")

Review Comment:
   also log instance id



##########
cloud/src/recycler/recycler.cpp:
##########
@@ -30,6 +30,7 @@
 #include <string_view>
 
 #include "common/stopwatch.h"
+#include "meta-service/meta_service.h"

Review Comment:
   what is this header for?



##########
cloud/src/recycler/recycler.cpp:
##########
@@ -276,7 +280,12 @@ void Recycler::recycle_callback() {
             std::lock_guard lock(mtx_);
             recycling_instance_map_.erase(instance_id);
         }
-        LOG_INFO("finish recycle instance").tag("instance_id", instance_id);
+        auto elpased_ms =

Review Comment:
   strange indention



##########
cloud/src/recycler/recycler.cpp:
##########
@@ -1588,59 +1610,93 @@ int InstanceRecycler::recycle_tablet(int64_t tablet_id) 
{
             .tag("instance_id", instance_id_)
             .tag("tablet_id", tablet_id);
 
+    int ret = 0;
     auto start_time = steady_clock::now();
 
-    std::unique_ptr<int, std::function<void(int*)>> 
defer_log_statistics((int*)0x01, [&](int*) {
-        auto cost = duration<float>(steady_clock::now() - start_time).count();
-        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", 
cost)
-                .tag("instance_id", instance_id_)
-                .tag("tablet_id", tablet_id);
-    });
+    std::unique_ptr<Transaction> txn;
+    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
+        LOG_WARNING("failed to delete rowset kv of tablet ")
+                .tag("tablet id", tablet_id)
+                .tag("reason", "failed to create txn");
+        ret = -1;
+    }
 
-    // delete all rowset kv in this tablet
+    // collect resource ids
     std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
     std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
     std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, 
""});
     std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 
1, ""});
 
-    int ret = 0;
-    std::unique_ptr<Transaction> txn;
-    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
-        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id;
-        ret = -1;
-    }
-    txn->remove(rs_key0, rs_key1);
-    txn->remove(recyc_rs_key0, recyc_rs_key1);
+    std::set<std::string> resource_ids;
 
-    // remove delete bitmap for MoW table
-    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, 
tablet_id});
-    txn->remove(pending_key);
-    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, 
tablet_id, "", 0, 0});
-    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, 
tablet_id + 1, "", 0, 0});
-    txn->remove(delete_bitmap_start, delete_bitmap_end);
+    std::unique_ptr<int, std::function<void(int*)>> 
defer_log_statistics((int*)0x01, [&](int*) {
+        auto cost = duration<float>(steady_clock::now() - start_time).count();
+        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", 
cost)
+                .tag("instance_id", instance_id_)
+                .tag("tablet_id", tablet_id)
+                .tag("ret", ret);
+    });
 
-    TxnErrorCode err = txn->commit();
-    if (err != TxnErrorCode::TXN_OK) {
-        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id 
<< ", err=" << err;
+    GetRowsetResponse resp;
+    std::string msg;
+    MetaServiceCode code = MetaServiceCode::OK;
+    // get rowsets in tablet
+    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, 
instance_id_,
+                        tablet_id, code, msg, &resp);
+    if (code != MetaServiceCode::OK) {
+        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
+                .tag("tablet id", tablet_id)
+                .tag("msg", msg)
+                .tag("code", code);
         ret = -1;
     }
+    
TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_tablet.create_rowset_meta", 
&resp);
+
+    for (const auto& rs_meta : resp.rowset_meta()) {
+        if (!rs_meta.has_resource_id()) {
+            continue;
+        }
+        auto it = accessor_map_.find(rs_meta.resource_id());
+        // possible if the accessor is not initilized correctly
+        if (it == accessor_map_.end()) [[unlikely]] {
+            LOG_WARNING(
+                    "failed to find resource id when recycle tablet, skip this 
vault accessor "
+                    "recycle process")
+                    .tag("tablet id", tablet_id)
+                    .tag("instance_id", instance_id_)
+                    .tag("resource_id", rs_meta.resource_id())
+                    .tag("rowset meta pb", rs_meta.DebugString());
+            continue;
+        }
+        resource_ids.emplace(rs_meta.resource_id());
+    }
+
+    LOG_INFO("recycle tablet resource ids are")

Review Comment:
   also log instance id, tablet id



##########
cloud/src/recycler/recycler.cpp:
##########
@@ -1588,59 +1610,93 @@ int InstanceRecycler::recycle_tablet(int64_t tablet_id) 
{
             .tag("instance_id", instance_id_)
             .tag("tablet_id", tablet_id);
 
+    int ret = 0;
     auto start_time = steady_clock::now();
 
-    std::unique_ptr<int, std::function<void(int*)>> 
defer_log_statistics((int*)0x01, [&](int*) {
-        auto cost = duration<float>(steady_clock::now() - start_time).count();
-        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", 
cost)
-                .tag("instance_id", instance_id_)
-                .tag("tablet_id", tablet_id);
-    });
+    std::unique_ptr<Transaction> txn;
+    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
+        LOG_WARNING("failed to delete rowset kv of tablet ")
+                .tag("tablet id", tablet_id)
+                .tag("reason", "failed to create txn");
+        ret = -1;
+    }
 
-    // delete all rowset kv in this tablet
+    // collect resource ids
     std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
     std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
     std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, 
""});
     std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 
1, ""});
 
-    int ret = 0;
-    std::unique_ptr<Transaction> txn;
-    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
-        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id;
-        ret = -1;
-    }
-    txn->remove(rs_key0, rs_key1);
-    txn->remove(recyc_rs_key0, recyc_rs_key1);
+    std::set<std::string> resource_ids;
 
-    // remove delete bitmap for MoW table
-    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, 
tablet_id});
-    txn->remove(pending_key);
-    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, 
tablet_id, "", 0, 0});
-    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, 
tablet_id + 1, "", 0, 0});
-    txn->remove(delete_bitmap_start, delete_bitmap_end);
+    std::unique_ptr<int, std::function<void(int*)>> 
defer_log_statistics((int*)0x01, [&](int*) {
+        auto cost = duration<float>(steady_clock::now() - start_time).count();
+        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", 
cost)
+                .tag("instance_id", instance_id_)
+                .tag("tablet_id", tablet_id)
+                .tag("ret", ret);
+    });
 
-    TxnErrorCode err = txn->commit();
-    if (err != TxnErrorCode::TXN_OK) {
-        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id 
<< ", err=" << err;
+    GetRowsetResponse resp;
+    std::string msg;
+    MetaServiceCode code = MetaServiceCode::OK;
+    // get rowsets in tablet
+    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, 
instance_id_,
+                        tablet_id, code, msg, &resp);
+    if (code != MetaServiceCode::OK) {
+        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
+                .tag("tablet id", tablet_id)
+                .tag("msg", msg)
+                .tag("code", code);
         ret = -1;
     }
+    
TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_tablet.create_rowset_meta", 
&resp);
+
+    for (const auto& rs_meta : resp.rowset_meta()) {
+        if (!rs_meta.has_resource_id()) {
+            continue;
+        }
+        auto it = accessor_map_.find(rs_meta.resource_id());
+        // possible if the accessor is not initilized correctly
+        if (it == accessor_map_.end()) [[unlikely]] {
+            LOG_WARNING(
+                    "failed to find resource id when recycle tablet, skip this 
vault accessor "
+                    "recycle process")
+                    .tag("tablet id", tablet_id)
+                    .tag("instance_id", instance_id_)
+                    .tag("resource_id", rs_meta.resource_id())
+                    .tag("rowset meta pb", rs_meta.DebugString());

Review Comment:
   use rs_meta.ShortDebugString() instead.



##########
cloud/src/recycler/recycler.cpp:
##########
@@ -1588,59 +1610,93 @@ int InstanceRecycler::recycle_tablet(int64_t tablet_id) 
{
             .tag("instance_id", instance_id_)
             .tag("tablet_id", tablet_id);
 
+    int ret = 0;
     auto start_time = steady_clock::now();
 
-    std::unique_ptr<int, std::function<void(int*)>> 
defer_log_statistics((int*)0x01, [&](int*) {
-        auto cost = duration<float>(steady_clock::now() - start_time).count();
-        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", 
cost)
-                .tag("instance_id", instance_id_)
-                .tag("tablet_id", tablet_id);
-    });
+    std::unique_ptr<Transaction> txn;
+    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
+        LOG_WARNING("failed to delete rowset kv of tablet ")
+                .tag("tablet id", tablet_id)
+                .tag("reason", "failed to create txn");
+        ret = -1;
+    }
 
-    // delete all rowset kv in this tablet
+    // collect resource ids
     std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
     std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
     std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, 
""});
     std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 
1, ""});
 
-    int ret = 0;
-    std::unique_ptr<Transaction> txn;
-    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
-        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id;
-        ret = -1;
-    }
-    txn->remove(rs_key0, rs_key1);
-    txn->remove(recyc_rs_key0, recyc_rs_key1);
+    std::set<std::string> resource_ids;
 
-    // remove delete bitmap for MoW table
-    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, 
tablet_id});
-    txn->remove(pending_key);
-    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, 
tablet_id, "", 0, 0});
-    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, 
tablet_id + 1, "", 0, 0});
-    txn->remove(delete_bitmap_start, delete_bitmap_end);
+    std::unique_ptr<int, std::function<void(int*)>> 
defer_log_statistics((int*)0x01, [&](int*) {
+        auto cost = duration<float>(steady_clock::now() - start_time).count();
+        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", 
cost)
+                .tag("instance_id", instance_id_)
+                .tag("tablet_id", tablet_id)
+                .tag("ret", ret);

Review Comment:
   log more stats from line 1655



##########
cloud/src/recycler/recycler.cpp:
##########
@@ -1588,59 +1610,93 @@ int InstanceRecycler::recycle_tablet(int64_t tablet_id) 
{
             .tag("instance_id", instance_id_)
             .tag("tablet_id", tablet_id);
 
+    int ret = 0;
     auto start_time = steady_clock::now();
 
-    std::unique_ptr<int, std::function<void(int*)>> 
defer_log_statistics((int*)0x01, [&](int*) {
-        auto cost = duration<float>(steady_clock::now() - start_time).count();
-        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", 
cost)
-                .tag("instance_id", instance_id_)
-                .tag("tablet_id", tablet_id);
-    });
+    std::unique_ptr<Transaction> txn;
+    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
+        LOG_WARNING("failed to delete rowset kv of tablet ")
+                .tag("tablet id", tablet_id)
+                .tag("reason", "failed to create txn");
+        ret = -1;
+    }
 
-    // delete all rowset kv in this tablet
+    // collect resource ids
     std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
     std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
     std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, 
""});
     std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 
1, ""});
 
-    int ret = 0;
-    std::unique_ptr<Transaction> txn;
-    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
-        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id;
-        ret = -1;
-    }
-    txn->remove(rs_key0, rs_key1);
-    txn->remove(recyc_rs_key0, recyc_rs_key1);
+    std::set<std::string> resource_ids;
 
-    // remove delete bitmap for MoW table
-    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, 
tablet_id});
-    txn->remove(pending_key);
-    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, 
tablet_id, "", 0, 0});
-    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, 
tablet_id + 1, "", 0, 0});
-    txn->remove(delete_bitmap_start, delete_bitmap_end);
+    std::unique_ptr<int, std::function<void(int*)>> 
defer_log_statistics((int*)0x01, [&](int*) {
+        auto cost = duration<float>(steady_clock::now() - start_time).count();
+        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", 
cost)
+                .tag("instance_id", instance_id_)
+                .tag("tablet_id", tablet_id)
+                .tag("ret", ret);
+    });
 
-    TxnErrorCode err = txn->commit();
-    if (err != TxnErrorCode::TXN_OK) {
-        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id 
<< ", err=" << err;
+    GetRowsetResponse resp;
+    std::string msg;
+    MetaServiceCode code = MetaServiceCode::OK;
+    // get rowsets in tablet
+    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, 
instance_id_,
+                        tablet_id, code, msg, &resp);
+    if (code != MetaServiceCode::OK) {
+        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
+                .tag("tablet id", tablet_id)
+                .tag("msg", msg)
+                .tag("code", code);
         ret = -1;
     }
+    
TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_tablet.create_rowset_meta", 
&resp);
+
+    for (const auto& rs_meta : resp.rowset_meta()) {
+        if (!rs_meta.has_resource_id()) {
+            continue;
+        }
+        auto it = accessor_map_.find(rs_meta.resource_id());
+        // possible if the accessor is not initilized correctly
+        if (it == accessor_map_.end()) [[unlikely]] {

Review Comment:
   we should not consider this is a successful recycling tablet if we fail to 
find vault.
   return error if we encounter this situation, should not continue.
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to