This is an automated email from the ASF dual-hosted git repository.

liaoxin01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d1377414429 [fix](cloud) Prevent tablet KV leak after partial recycle 
failure (#63377)
d1377414429 is described below

commit d13774144295d478665d28dfdbcfe2c519aa6ff9
Author: Yixuan Wang <[email protected]>
AuthorDate: Tue Jun 9 17:47:44 2026 +0800

    [fix](cloud) Prevent tablet KV leak after partial recycle failure (#63377)
    
    ### What problem does this PR solve?
    
    recycle_tablets now deletes KV entries only for tablets whose object
    cleanup succeeded.
    On partial deletion failure, it returns -1 while preserving KV
    consistency for failed tablets, so retry can succeed later.
    Added a regression/unit test for partial recycle failure, plus a test
    sync point in SyncExecutor.
---
 cloud/src/common/config.h                  |   1 +
 cloud/src/common/sync_executor.h           |   6 +-
 cloud/src/recycler/recycler.cpp            | 139 +++++++-------
 cloud/test/mock_accessor.h                 |   2 +-
 cloud/test/recycler_operation_log_test.cpp |  12 +-
 cloud/test/recycler_test.cpp               | 297 +++++++++++++++++++++++++++--
 6 files changed, 358 insertions(+), 99 deletions(-)

diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h
index 3b51947e700..25b8e0101e6 100644
--- a/cloud/src/common/config.h
+++ b/cloud/src/common/config.h
@@ -359,6 +359,7 @@ CONF_Int64(txn_lazy_commit_shuffle_seed, "0"); // 0 means 
generate a random seed
 // When enabled, defer deleting pending delete bitmaps until lazy commit 
completes.
 // This reduces contention during transaction commit by extending delete 
bitmap locks.
 CONF_mBool(txn_lazy_commit_defer_deleting_pending_delete_bitmaps, "false");
+CONF_mBool(enable_recycler_check_lazy_txn_finished, "true");
 // max TabletIndexPB num for batch get
 CONF_Int32(max_tablet_index_num_per_batch, "1000");
 CONF_Int32(max_restore_job_rowsets_per_batch, "1000");
diff --git a/cloud/src/common/sync_executor.h b/cloud/src/common/sync_executor.h
index 95650e5316a..7785a8a9881 100644
--- a/cloud/src/common/sync_executor.h
+++ b/cloud/src/common/sync_executor.h
@@ -116,7 +116,9 @@ private:
             T t = _callback();
             // We'll return this task result to user even if this task return 
error
             // So we don't set _valid to false here
-            if (_cancel(t)) {
+            bool bypass_cancel = false;
+            TEST_SYNC_POINT_CALLBACK("SyncExecutor::Task::bypass_cancel", 
&bypass_cancel);
+            if (!bypass_cancel && _cancel(t)) {
                 stop_token = true;
             }
             _pro.set_value(std::move(t));
@@ -146,4 +148,4 @@ private:
     std::function<bool(const T&)> _cancel;
     std::string _name_tag;
 };
-} // namespace doris::cloud
\ No newline at end of file
+} // namespace doris::cloud
diff --git a/cloud/src/recycler/recycler.cpp b/cloud/src/recycler/recycler.cpp
index d1f0bbd9589..f2cea7904d8 100644
--- a/cloud/src/recycler/recycler.cpp
+++ b/cloud/src/recycler/recycler.cpp
@@ -2927,135 +2927,125 @@ int InstanceRecycler::recycle_tablets(int64_t 
table_id, int64_t index_id,
                 .tag("num_recycled", num_recycled);
     };
 
-    // The first string_view represents the tablet key which has been recycled
-    // The second bool represents whether the following fdb's tablet key 
deletion could be done using range move or not
-    using TabletKeyPair = std::pair<std::string_view, bool>;
-    SyncExecutor<TabletKeyPair> sync_executor(
+    // The tablet key and id which have been recycled.
+    struct TabletInfo {
+        std::string_view tablet_meta_key;
+        int64_t tablet_id;
+    };
+    SyncExecutor<TabletInfo> sync_executor(
             _thread_pool_group.recycle_tablet_pool,
             fmt::format("recycle tablets, tablet id {}, index id {}, partition 
id {}", table_id,
                         index_id, partition_id),
-            [](const TabletKeyPair& k) { return k.first.empty(); });
+            [](const TabletInfo& k) { return k.tablet_meta_key.empty(); });
 
-    // Elements in `tablet_keys` has the same lifetime as `it` in 
`scan_and_recycle`
-    std::vector<std::string> tablet_idx_keys;
-    std::vector<std::string> restore_job_keys;
+    // Elements in `tablets_info` has the same lifetime as `it` in 
`scan_and_recycle`
     std::vector<std::string> init_rs_keys;
-    std::vector<std::string> tablet_compact_stats_keys;
-    std::vector<std::string> tablet_load_stats_keys;
-    std::vector<std::string> versioned_meta_tablet_keys;
+    bool has_failure = false;
     auto recycle_func = [&, this](std::string_view k, std::string_view v) -> 
int {
-        bool use_range_remove = true;
         ++num_scanned;
         doris::TabletMetaCloudPB tablet_meta_pb;
         if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
             LOG_WARNING("malformed tablet meta").tag("key", hex(k));
-            use_range_remove = false;
+            has_failure = true;
             return -1;
         }
         int64_t tablet_id = tablet_meta_pb.tablet_id();
 
-        if (!check_lazy_txn_finished(txn_kv_, instance_id_, 
tablet_meta_pb.tablet_id())) {
+        if (config::enable_recycler_check_lazy_txn_finished &&
+            !check_lazy_txn_finished(txn_kv_, instance_id_, 
tablet_meta_pb.tablet_id())) {
             LOG(WARNING) << "lazy txn not finished tablet_id=" << 
tablet_meta_pb.tablet_id();
+            has_failure = true;
             return -1;
         }
 
-        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, 
tablet_id}));
-        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, 
tablet_id}));
-        if (is_multi_version) {
-            // The tablet index/inverted index are recycled in 
recycle_versioned_tablet.
-            tablet_compact_stats_keys.push_back(
-                    versioned::tablet_compact_stats_key({instance_id_, 
tablet_id}));
-            tablet_load_stats_keys.push_back(
-                    versioned::tablet_load_stats_key({instance_id_, 
tablet_id}));
-            versioned_meta_tablet_keys.push_back(
-                    versioned::meta_tablet_key({instance_id_, tablet_id}));
-        }
         TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", 
false);
-        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = 
use_range_remove,
-                           &metrics_context, k]() mutable -> TabletKeyPair {
-            if (recycle_tablet(tid, metrics_context) != 0) {
-                LOG_WARNING("failed to recycle tablet")
-                        .tag("instance_id", instance_id_)
-                        .tag("tablet_id", tid);
-                range_move = false;
-                return {std::string_view(), range_move};
-            }
-            ++num_recycled;
-            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? 
"(empty)" : hex(k));
-            return {k, range_move};
-        });
+        sync_executor.add(
+                [this, &num_recycled, tid = tablet_id, &metrics_context, k]() 
-> TabletInfo {
+                    if (recycle_tablet(tid, metrics_context) != 0) {
+                        LOG_WARNING("failed to recycle tablet")
+                                .tag("instance_id", instance_id_)
+                                .tag("tablet_id", tid);
+                        return {.tablet_meta_key = std::string_view(), 
.tablet_id = tid};
+                    }
+                    ++num_recycled;
+                    LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? 
"(empty)" : hex(k));
+                    return {.tablet_meta_key = k, .tablet_id = tid};
+                });
         return 0;
     };
 
-    // TODO(AlexYue): Add one ut to cover use_range_remove = false
     auto loop_done = [&, this]() -> int {
+        int ret = 0;
         bool finished = true;
-        auto tablet_keys = sync_executor.when_all(&finished);
+        bool has_empty_key = false;
+        DORIS_CLOUD_DEFER {
+            init_rs_keys.clear();
+            has_failure = false;
+        };
+        auto tablets_info = sync_executor.when_all(&finished);
         if (!finished) {
             LOG_WARNING("failed to recycle tablet").tag("instance_id", 
instance_id_);
             return -1;
         }
-        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
-        if (!tablet_keys.empty() &&
-            std::ranges::all_of(tablet_keys, [](const auto& k) { return 
k.first.empty(); })) {
-            return -1;
+
+        size_t size_before_erase = tablets_info.size();
+        std::erase_if(tablets_info, [](const TabletInfo& t) { return 
t.tablet_meta_key.empty(); });
+        if (tablets_info.empty()) {
+            return size_before_erase == 0 ? 0 : -1;
+        } else if (size_before_erase != tablets_info.size()) {
+            has_empty_key = true;
         }
+
+        ret = has_empty_key ? -1 : 0;
         // sort the vector using key's order
-        std::sort(tablet_keys.begin(), tablet_keys.end(),
-                  [](const auto& prev, const auto& last) { return prev.first < 
last.first; });
-        bool use_range_remove = true;
-        for (auto& [_, remove] : tablet_keys) {
-            if (!remove) {
-                use_range_remove = remove;
-                break;
-            }
-        }
-        DORIS_CLOUD_DEFER {
-            tablet_idx_keys.clear();
-            restore_job_keys.clear();
-            init_rs_keys.clear();
-            tablet_compact_stats_keys.clear();
-            tablet_load_stats_keys.clear();
-            versioned_meta_tablet_keys.clear();
-        };
+        std::ranges::sort(tablets_info, [](const auto& prev, const auto& last) 
{
+            return prev.tablet_meta_key < last.tablet_meta_key;
+        });
         std::unique_ptr<Transaction> txn;
         if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
             LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << 
instance_id_;
             return -1;
         }
         std::string tablet_key_end;
-        if (!tablet_keys.empty()) {
-            if (use_range_remove) {
-                tablet_key_end = std::string(tablet_keys.back().first) + 
'\x00';
-                txn->remove(tablet_keys.front().first, tablet_key_end);
+        if (!tablets_info.empty()) {
+            if (!has_empty_key && !has_failure) {
+                tablet_key_end = 
std::string(tablets_info.back().tablet_meta_key) + '\x00';
+                txn->remove(tablets_info.front().tablet_meta_key, 
tablet_key_end);
             } else {
-                for (auto& [k, _] : tablet_keys) {
-                    txn->remove(k);
+                for (auto& tablet_info : tablets_info) {
+                    txn->remove(tablet_info.tablet_meta_key);
                 }
             }
         }
         if (is_multi_version) {
-            for (auto& k : tablet_compact_stats_keys) {
+            for (auto& tablet_info : tablets_info) {
                 // Remove all versions of tablet compact stats for recycled 
tablet
+                auto k = versioned::tablet_compact_stats_key({instance_id_, 
tablet_info.tablet_id});
                 LOG_INFO("remove versioned tablet compact stats key")
                         .tag("compact_stats_key", hex(k));
                 versioned_remove_all(txn.get(), k);
             }
-            for (auto& k : tablet_load_stats_keys) {
+            for (auto& tablet_info : tablets_info) {
                 // Remove all versions of tablet load stats for recycled tablet
+                auto k = versioned::tablet_load_stats_key({instance_id_, 
tablet_info.tablet_id});
                 LOG_INFO("remove versioned tablet load stats 
key").tag("load_stats_key", hex(k));
                 versioned_remove_all(txn.get(), k);
             }
-            for (auto& k : versioned_meta_tablet_keys) {
+            for (auto& tablet_info : tablets_info) {
                 // Remove all versions of meta tablet for recycled tablet
+                auto k = versioned::meta_tablet_key({instance_id_, 
tablet_info.tablet_id});
                 LOG_INFO("remove versioned meta tablet 
key").tag("meta_tablet_key", hex(k));
                 versioned_remove_all(txn.get(), k);
             }
         }
-        for (auto& k : tablet_idx_keys) {
+        for (auto& tablet_info : tablets_info) {
+            std::string k;
+            meta_tablet_idx_key({instance_id_, tablet_info.tablet_id}, &k);
             txn->remove(k);
         }
-        for (auto& k : restore_job_keys) {
+        for (auto& tablet_info : tablets_info) {
+            std::string k;
+            job_restore_tablet_key({instance_id_, tablet_info.tablet_id}, &k);
             txn->remove(k);
         }
         for (auto& k : init_rs_keys) {
@@ -3066,7 +3056,7 @@ int InstanceRecycler::recycle_tablets(int64_t table_id, 
int64_t index_id,
                          << ", err=" << err;
             return -1;
         }
-        return 0;
+        return ret;
     };
 
     int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, 
std::move(recycle_func),
@@ -4176,7 +4166,8 @@ int InstanceRecycler::scan_tablets_and_statistics(int64_t 
table_id, int64_t inde
         }
         int64_t tablet_id = tablet_meta_pb.tablet_id();
 
-        if (!check_lazy_txn_finished(txn_kv_, instance_id_, 
tablet_meta_pb.tablet_id())) {
+        if (config::enable_recycler_check_lazy_txn_finished &&
+            !check_lazy_txn_finished(txn_kv_, instance_id_, 
tablet_meta_pb.tablet_id())) {
             return 0;
         }
 
diff --git a/cloud/test/mock_accessor.h b/cloud/test/mock_accessor.h
index 0e29c383899..03fed7caeaf 100644
--- a/cloud/test/mock_accessor.h
+++ b/cloud/test/mock_accessor.h
@@ -109,7 +109,7 @@ inline auto MockAccessor::get_prefix_range(const 
std::string& path_prefix) {
 }
 
 inline int MockAccessor::delete_prefix_impl(const std::string& path_prefix) {
-    TEST_SYNC_POINT_RETURN_WITH_VALUE("MockAccessor::delete_prefix", (int)0);
+    TEST_SYNC_POINT_RETURN_WITH_VALUE("MockAccessor::delete_prefix", (int)0, 
&path_prefix);
     LOG(INFO) << "delete object of prefix=" << path_prefix;
     std::lock_guard lock(mtx_);
 
diff --git a/cloud/test/recycler_operation_log_test.cpp 
b/cloud/test/recycler_operation_log_test.cpp
index 1d3c3e65065..2e829ddde4e 100644
--- a/cloud/test/recycler_operation_log_test.cpp
+++ b/cloud/test/recycler_operation_log_test.cpp
@@ -1099,9 +1099,9 @@ TEST(RecycleOperationLogTest, RecycleCompactionLog) {
         ret->first = true;
         ret->second = true;
     });
-    sp->set_call_back("recycle_tablet::bypass_check", [&](auto&& args) {
-        auto* ret = doris::try_any_cast_ret<bool>(args);
-        ret->first = false;
+    sp->set_call_back("recycle_tablet::begin", [&](auto&& args) {
+        auto* ret = doris::try_any_cast_ret<int>(args);
+        ret->first = 0;
         ret->second = true;
     });
     sp->enable_processing();
@@ -1622,9 +1622,9 @@ TEST(RecycleOperationLogTest, RecycleSchemaChangeLog) {
         ret->first = true;
         ret->second = true;
     });
-    sp->set_call_back("recycle_tablet::bypass_check", [&](auto&& args) {
-        auto* ret = doris::try_any_cast_ret<bool>(args);
-        ret->first = false;
+    sp->set_call_back("recycle_tablet::begin", [&](auto&& args) {
+        auto* ret = doris::try_any_cast_ret<int>(args);
+        ret->first = 0;
         ret->second = true;
     });
     sp->enable_processing();
diff --git a/cloud/test/recycler_test.cpp b/cloud/test/recycler_test.cpp
index 5858ecca9d0..1bfbcc62a72 100644
--- a/cloud/test/recycler_test.cpp
+++ b/cloud/test/recycler_test.cpp
@@ -932,6 +932,24 @@ static int create_partition_version_kv(TxnKv* txn_kv, 
int64_t table_id, int64_t
     return 0;
 }
 
+static int create_partition_version_with_pending_txn_kv(TxnKv* txn_kv, int64_t 
table_id,
+                                                        int64_t partition_id, 
int64_t txn_id) {
+    auto key = partition_version_key({instance_id, db_id, table_id, 
partition_id});
+    VersionPB version;
+    version.set_version(1);
+    version.add_pending_txn_ids(txn_id);
+    auto val = version.SerializeAsString();
+    std::unique_ptr<Transaction> txn;
+    if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) {
+        return -1;
+    }
+    txn->put(key, val);
+    if (txn->commit() != TxnErrorCode::TXN_OK) {
+        return -1;
+    }
+    return 0;
+}
+
 static int create_delete_bitmap_update_lock_kv(TxnKv* txn_kv, int64_t 
table_id, int64_t lock_id,
                                                int64_t initiator, int64_t 
expiration) {
     auto key = meta_delete_bitmap_update_lock_key({instance_id, table_id, -1});
@@ -8977,19 +8995,7 @@ TEST(RecyclerTest, 
abort_job_for_related_rowset_when_tablet_recycled) {
 }
 
 TEST(RecyclerTest, recycle_tablet_with_delete_file_failure) {
-    // Test case: recycle_tablet with delete_files failure should not cause
-    // the next recycle to hang due to improperly deleted KV entries.
-    //
-    // Bug scenario:
-    // 1. recycle_tablet calls accessor->delete_files, but it fails (returns 
-1)
-    // 2. recycle_tablet returns empty key (line 2784)
-    // 3. The empty key is filtered out by SyncExecutor (line 2740)
-    // 4. Old bug: tablet_keys with empty keys caused use_range_remove logic 
issue (line 2801-2810)
-    // 5. restore_job_keys were still deleted (line 2857-2859) despite tablet 
not being recycled
-    // 6. Next recycle attempt would hang at check_lazy_txn_finished (line 
2760-2763)
-    //
-    // After fix: Empty keys are filtered out by SyncExecutor, so tablet meta 
keys
-    // won't be deleted if recycle_tablet fails.
+    // If object deletion fails, recycle_tablets should report failure and 
keep the tablet KV.
 
     auto* sp = SyncPoint::get_instance();
     DORIS_CLOUD_DEFER {
@@ -9053,11 +9059,9 @@ TEST(RecyclerTest, 
recycle_tablet_with_delete_file_failure) {
     });
     sp->enable_processing();
 
-    // First recycle attempt - should fail to recycle tablet due to 
delete_directory failure
+    // First recycle attempt should fail due to delete_directory failure.
     int ret = recycler.recycle_tablets(table_id, index_id, ctx);
     EXPECT_EQ(ret, -1) << "First recycle attempt should failed";
-    // recycle_tablets may return -1 or 0 depending on implementation,
-    // but the key point is that tablet should NOT be fully recycled
     EXPECT_GT(delete_directory_call_count.load(), 0) << "delete_directory 
should have been called";
 
     // Verify tablet metadata still exists (because recycle_tablet failed)
@@ -9122,6 +9126,267 @@ TEST(RecyclerTest, 
recycle_tablet_with_delete_file_failure) {
     }
 }
 
+TEST(RecyclerTest, recycle_tablet_with_delete_file_partial_failure) {
+    // If part of object deletion fails, recycle_tablets should delete KV only 
for tablets whose
+    // objects were deleted successfully, but still return failure for the 
whole round.
+
+    auto* sp = SyncPoint::get_instance();
+    DORIS_CLOUD_DEFER {
+        sp->clear_all_call_backs();
+        sp->clear_trace();
+        sp->disable_processing();
+    };
+
+    auto txn_kv = std::make_shared<MemTxnKv>();
+    ASSERT_EQ(txn_kv->init(), 0);
+
+    InstanceInfoPB instance;
+    instance.set_instance_id(::instance_id);
+    auto obj_info = instance.add_obj_info();
+    obj_info->set_id("recycle_tablet_with_delete_file_failure");
+    obj_info->set_ak(config::test_s3_ak);
+    obj_info->set_sk(config::test_s3_sk);
+    obj_info->set_endpoint(config::test_s3_endpoint);
+    obj_info->set_region(config::test_s3_region);
+    obj_info->set_bucket(config::test_s3_bucket);
+    obj_info->set_prefix("recycle_tablet_with_delete_file_failure");
+
+    InstanceRecycler recycler(txn_kv, instance, thread_group,
+                              std::make_shared<TxnLazyCommitter>(txn_kv));
+    ASSERT_EQ(recycler.init(), 0);
+
+    doris::TabletSchemaCloudPB schema;
+    schema.set_schema_version(1);
+
+    constexpr int64_t table_id = 21000;
+    constexpr int64_t index_id = 21001;
+    constexpr int64_t partition_id = 21002;
+    constexpr int64_t tablet_id = 21003;
+
+    auto accessor = recycler.accessor_map_.begin()->second;
+
+    // Create 5 tablet with its metadata and index keys
+    for (int i = 0; i < 5; ++i) {
+        ASSERT_EQ(create_tablet(txn_kv.get(), table_id, index_id, 
partition_id, tablet_id + i), 0);
+    }
+
+    // Create some committed rowsets for the tablet (in meta_rowset_key)
+    // These will be picked up by recycle_tablet when it scans for rowsets
+    for (int i = 0; i < 5; ++i) {
+        for (int j = 0; j < 5; j++) {
+            create_committed_rowset(txn_kv.get(), accessor.get(),
+                                    "recycle_tablet_with_delete_file_failure", 
tablet_id + i, j,
+                                    index_id, 2, 1, true);
+        }
+    }
+
+    // Create partition version kv (required for lazy txn check)
+    ASSERT_EQ(create_partition_version_kv(txn_kv.get(), table_id, 
partition_id), 0);
+
+    // Inject failure for a tablet in the middle of the scanned key range. 
This verifies that
+    // successful tablet KV deletion does not use range remove across the 
failed tablet.
+    std::atomic<int> delete_directory_call_count {0};
+    const std::string failed_tablet_path = tablet_path_prefix(tablet_id + 2);
+    sp->set_call_back("SyncExecutor::Task::bypass_cancel",
+                      [](auto&& args) { *try_any_cast<bool*>(args[0]) = true; 
});
+    sp->set_call_back("MockAccessor::delete_prefix",
+                      [&delete_directory_call_count, 
&failed_tablet_path](auto&& args) {
+                          auto* path_prefix = try_any_cast<const 
std::string*>(args[0]);
+                          if (*path_prefix == failed_tablet_path) {
+                              auto* ret = try_any_cast_ret<int>(args);
+                              delete_directory_call_count++;
+                              ret->first = -1;    // Return error
+                              ret->second = true; // Override return value
+                          }
+                      });
+    sp->enable_processing();
+
+    // First recycle attempt should fail due to partial delete_directory 
failure.
+    int ret = recycler.recycle_tablets(table_id, index_id, ctx);
+    EXPECT_EQ(ret, -1) << "First recycle attempt should failed";
+    EXPECT_GT(delete_directory_call_count.load(), 0) << "delete_directory 
should have been called";
+
+    // Verify only the tablet that failed object deletion still keeps its KV.
+    {
+        int remaining_tablet_count = 0;
+        int recycled_tablet_count = 0;
+        for (int i = 0; i < 5; i++) {
+            std::unique_ptr<Transaction> txn;
+            ASSERT_EQ(txn_kv->create_txn(&txn), TxnErrorCode::TXN_OK);
+            std::string tablet_key = meta_tablet_key(
+                    {::instance_id, table_id, index_id, partition_id, 
tablet_id + i});
+            std::string val;
+            TxnErrorCode err = txn->get(tablet_key, &val);
+            std::string tablet_idx_key = meta_tablet_idx_key({::instance_id, 
tablet_id + i});
+            TxnErrorCode idx_err = txn->get(tablet_idx_key, &val);
+            if (err == TxnErrorCode::TXN_OK) {
+                EXPECT_EQ(i, 2) << "Only the middle failed tablet should keep 
tablet meta";
+                EXPECT_EQ(idx_err, TxnErrorCode::TXN_OK)
+                        << "Tablet index key should remain with failed tablet 
meta";
+                ++remaining_tablet_count;
+            } else {
+                EXPECT_NE(i, 2) << "The failed middle tablet should not be 
range removed";
+                EXPECT_EQ(err, TxnErrorCode::TXN_KEY_NOT_FOUND)
+                        << "Unexpected tablet meta get error";
+                EXPECT_EQ(idx_err, TxnErrorCode::TXN_KEY_NOT_FOUND)
+                        << "Tablet index key should be deleted with recycled 
tablet meta";
+                ++recycled_tablet_count;
+            }
+        }
+        EXPECT_EQ(remaining_tablet_count, 1);
+        EXPECT_EQ(recycled_tablet_count, 4);
+    }
+
+    // Clear the sync point to allow delete_directory to succeed
+    sp->clear_all_call_backs();
+    sp->disable_processing();
+
+    // Second recycle attempt - should succeed without hanging at 
check_lazy_txn_finished
+    // This verifies the fix: empty keys are properly filtered, so KV entries 
are consistent
+    ret = recycler.recycle_tablets(table_id, index_id, ctx);
+    EXPECT_EQ(ret, 0) << "Second recycle attempt should succeed";
+
+    // Verify all related kv have been deleted
+    {
+        for (int i = 0; i < 5; i++) {
+            std::unique_ptr<Transaction> txn;
+            ASSERT_EQ(txn_kv->create_txn(&txn), TxnErrorCode::TXN_OK);
+
+            // Check tablet meta key is deleted
+            std::string tablet_key = meta_tablet_key(
+                    {::instance_id, table_id, index_id, partition_id, 
tablet_id + i});
+            std::string val;
+            TxnErrorCode err = txn->get(tablet_key, &val);
+            EXPECT_EQ(err, TxnErrorCode::TXN_KEY_NOT_FOUND)
+                    << "Tablet key should be deleted after successful recycle";
+
+            // Check tablet index key is deleted
+            std::string tablet_idx_key = meta_tablet_idx_key({::instance_id, 
tablet_id + i});
+            err = txn->get(tablet_idx_key, &val);
+            EXPECT_EQ(err, TxnErrorCode::TXN_KEY_NOT_FOUND)
+                    << "Tablet index key should be deleted after successful 
recycle";
+
+            // Check restore job key is deleted
+            std::string restore_job_key = 
job_restore_tablet_key({::instance_id, tablet_id + i});
+            err = txn->get(restore_job_key, &val);
+            EXPECT_EQ(err, TxnErrorCode::TXN_KEY_NOT_FOUND)
+                    << "Restore job key should be deleted after successful 
recycle";
+
+            // Check no recycle rowset keys remain
+            std::string recyc_rs_key_begin = 
recycle_rowset_key({::instance_id, tablet_id + i, ""});
+            std::string recyc_rs_key_end =
+                    recycle_rowset_key({::instance_id, tablet_id + i + 1, ""});
+            std::unique_ptr<RangeGetIterator> it;
+            ASSERT_EQ(txn->get(recyc_rs_key_begin, recyc_rs_key_end, &it), 
TxnErrorCode::TXN_OK);
+            EXPECT_EQ(it->size(), 0) << "All recycle rowset keys should be 
deleted";
+        }
+    }
+}
+
+TEST(RecyclerTest, recycle_tablet_with_lazy_txn_partial_failure) {
+    // If check_lazy_txn_finished fails before scheduling a tablet recycle 
task, successful tablet KV
+    // deletion must not use range remove across the failed tablet.
+
+    auto* sp = SyncPoint::get_instance();
+    DORIS_CLOUD_DEFER {
+        sp->clear_all_call_backs();
+        sp->clear_trace();
+        sp->disable_processing();
+    };
+
+    bool old_enable_check = config::enable_recycler_check_lazy_txn_finished;
+    config::enable_recycler_check_lazy_txn_finished = true;
+    DORIS_CLOUD_DEFER {
+        config::enable_recycler_check_lazy_txn_finished = old_enable_check;
+    };
+
+    auto txn_kv = std::make_shared<MemTxnKv>();
+    ASSERT_EQ(txn_kv->init(), 0);
+
+    InstanceInfoPB instance;
+    instance.set_instance_id(::instance_id);
+    auto obj_info = instance.add_obj_info();
+    obj_info->set_id("recycle_tablet_with_lazy_txn_partial_failure");
+    obj_info->set_ak(config::test_s3_ak);
+    obj_info->set_sk(config::test_s3_sk);
+    obj_info->set_endpoint(config::test_s3_endpoint);
+    obj_info->set_region(config::test_s3_region);
+    obj_info->set_bucket(config::test_s3_bucket);
+    obj_info->set_prefix("recycle_tablet_with_lazy_txn_partial_failure");
+
+    InstanceRecycler recycler(txn_kv, instance, thread_group,
+                              std::make_shared<TxnLazyCommitter>(txn_kv));
+    ASSERT_EQ(recycler.init(), 0);
+
+    constexpr int64_t table_id = 22000;
+    constexpr int64_t index_id = 22001;
+    constexpr int64_t partition_id = 22002;
+    constexpr int64_t tablet_id = 22003;
+    constexpr int64_t failed_partition_id = 22008;
+
+    auto accessor = recycler.accessor_map_.begin()->second;
+
+    for (int i = 0; i < 5; ++i) {
+        ASSERT_EQ(create_tablet(txn_kv.get(), table_id, index_id, 
partition_id, tablet_id + i), 0);
+        create_committed_rowset(txn_kv.get(), accessor.get(),
+                                
"recycle_tablet_with_lazy_txn_partial_failure", tablet_id + i, i,
+                                index_id, 2, 1, true);
+    }
+
+    {
+        std::unique_ptr<Transaction> txn;
+        ASSERT_EQ(txn_kv->create_txn(&txn), TxnErrorCode::TXN_OK);
+        TabletIndexPB tablet_idx_pb;
+        tablet_idx_pb.set_db_id(db_id);
+        tablet_idx_pb.set_table_id(table_id);
+        tablet_idx_pb.set_index_id(index_id);
+        tablet_idx_pb.set_partition_id(failed_partition_id);
+        tablet_idx_pb.set_tablet_id(tablet_id + 2);
+        txn->put(meta_tablet_idx_key({::instance_id, tablet_id + 2}),
+                 tablet_idx_pb.SerializeAsString());
+        ASSERT_EQ(txn->commit(), TxnErrorCode::TXN_OK);
+    }
+
+    ASSERT_EQ(create_partition_version_kv(txn_kv.get(), table_id, 
partition_id), 0);
+    ASSERT_EQ(create_partition_version_with_pending_txn_kv(txn_kv.get(), 
table_id,
+                                                           
failed_partition_id, 12345),
+              0);
+
+    std::atomic<int> lazy_txn_not_finished_count {0};
+    sp->set_call_back("SyncExecutor::Task::bypass_cancel",
+                      [](auto&& args) { *try_any_cast<bool*>(args[0]) = true; 
});
+    sp->set_call_back("check_lazy_txn_finished::txn_not_finished",
+                      [&lazy_txn_not_finished_count](auto&&) { 
++lazy_txn_not_finished_count; });
+    sp->enable_processing();
+
+    int ret = recycler.recycle_tablets(table_id, index_id, ctx);
+    EXPECT_EQ(ret, -1) << "Recycle should fail while a lazy txn is still 
pending";
+    EXPECT_EQ(lazy_txn_not_finished_count.load(), 1)
+            << "Only the middle tablet should hit the lazy txn pre-add failure 
path";
+
+    for (int i = 0; i < 5; ++i) {
+        std::unique_ptr<Transaction> txn;
+        ASSERT_EQ(txn_kv->create_txn(&txn), TxnErrorCode::TXN_OK);
+        std::string val;
+        std::string tablet_key =
+                meta_tablet_key({::instance_id, table_id, index_id, 
partition_id, tablet_id + i});
+        TxnErrorCode err = txn->get(tablet_key, &val);
+
+        std::string tablet_idx_key = meta_tablet_idx_key({::instance_id, 
tablet_id + i});
+        TxnErrorCode idx_err = txn->get(tablet_idx_key, &val);
+        if (i == 2) {
+            EXPECT_EQ(err, TxnErrorCode::TXN_OK)
+                    << "Failed middle tablet meta should not be range removed";
+            EXPECT_EQ(idx_err, TxnErrorCode::TXN_OK) << "Failed middle tablet 
index should remain";
+        } else {
+            EXPECT_EQ(err, TxnErrorCode::TXN_KEY_NOT_FOUND)
+                    << "Successful tablet meta should be recycled";
+            EXPECT_EQ(idx_err, TxnErrorCode::TXN_KEY_NOT_FOUND)
+                    << "Successful tablet index should be recycled";
+        }
+    }
+}
+
 TEST(RecyclerTest, enable_recycler_default_true) {
     EXPECT_TRUE(config::enable_recycler);
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to