This is an automated email from the ASF dual-hosted git repository.
liaoxin01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new d1377414429 [fix](cloud) Prevent tablet KV leak after partial recycle
failure (#63377)
d1377414429 is described below
commit d13774144295d478665d28dfdbcfe2c519aa6ff9
Author: Yixuan Wang <[email protected]>
AuthorDate: Tue Jun 9 17:47:44 2026 +0800
[fix](cloud) Prevent tablet KV leak after partial recycle failure (#63377)
### What problem does this PR solve?
recycle_tablets now deletes KV entries only for tablets whose object
cleanup succeeded.
On partial deletion failure, it returns -1 while preserving KV
consistency for failed tablets, so retry can succeed later.
Added a regression/unit test for partial recycle failure, plus a test
sync point in SyncExecutor.
---
cloud/src/common/config.h | 1 +
cloud/src/common/sync_executor.h | 6 +-
cloud/src/recycler/recycler.cpp | 139 +++++++-------
cloud/test/mock_accessor.h | 2 +-
cloud/test/recycler_operation_log_test.cpp | 12 +-
cloud/test/recycler_test.cpp | 297 +++++++++++++++++++++++++++--
6 files changed, 358 insertions(+), 99 deletions(-)
diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h
index 3b51947e700..25b8e0101e6 100644
--- a/cloud/src/common/config.h
+++ b/cloud/src/common/config.h
@@ -359,6 +359,7 @@ CONF_Int64(txn_lazy_commit_shuffle_seed, "0"); // 0 means
generate a random seed
// When enabled, defer deleting pending delete bitmaps until lazy commit
completes.
// This reduces contention during transaction commit by extending delete
bitmap locks.
CONF_mBool(txn_lazy_commit_defer_deleting_pending_delete_bitmaps, "false");
+CONF_mBool(enable_recycler_check_lazy_txn_finished, "true");
// max TabletIndexPB num for batch get
CONF_Int32(max_tablet_index_num_per_batch, "1000");
CONF_Int32(max_restore_job_rowsets_per_batch, "1000");
diff --git a/cloud/src/common/sync_executor.h b/cloud/src/common/sync_executor.h
index 95650e5316a..7785a8a9881 100644
--- a/cloud/src/common/sync_executor.h
+++ b/cloud/src/common/sync_executor.h
@@ -116,7 +116,9 @@ private:
T t = _callback();
// We'll return this task result to user even if this task return
error
// So we don't set _valid to false here
- if (_cancel(t)) {
+ bool bypass_cancel = false;
+ TEST_SYNC_POINT_CALLBACK("SyncExecutor::Task::bypass_cancel",
&bypass_cancel);
+ if (!bypass_cancel && _cancel(t)) {
stop_token = true;
}
_pro.set_value(std::move(t));
@@ -146,4 +148,4 @@ private:
std::function<bool(const T&)> _cancel;
std::string _name_tag;
};
-} // namespace doris::cloud
\ No newline at end of file
+} // namespace doris::cloud
diff --git a/cloud/src/recycler/recycler.cpp b/cloud/src/recycler/recycler.cpp
index d1f0bbd9589..f2cea7904d8 100644
--- a/cloud/src/recycler/recycler.cpp
+++ b/cloud/src/recycler/recycler.cpp
@@ -2927,135 +2927,125 @@ int InstanceRecycler::recycle_tablets(int64_t
table_id, int64_t index_id,
.tag("num_recycled", num_recycled);
};
- // The first string_view represents the tablet key which has been recycled
- // The second bool represents whether the following fdb's tablet key
deletion could be done using range move or not
- using TabletKeyPair = std::pair<std::string_view, bool>;
- SyncExecutor<TabletKeyPair> sync_executor(
+ // The tablet key and id which have been recycled.
+ struct TabletInfo {
+ std::string_view tablet_meta_key;
+ int64_t tablet_id;
+ };
+ SyncExecutor<TabletInfo> sync_executor(
_thread_pool_group.recycle_tablet_pool,
fmt::format("recycle tablets, tablet id {}, index id {}, partition
id {}", table_id,
index_id, partition_id),
- [](const TabletKeyPair& k) { return k.first.empty(); });
+ [](const TabletInfo& k) { return k.tablet_meta_key.empty(); });
- // Elements in `tablet_keys` has the same lifetime as `it` in
`scan_and_recycle`
- std::vector<std::string> tablet_idx_keys;
- std::vector<std::string> restore_job_keys;
+ // Elements in `tablets_info` has the same lifetime as `it` in
`scan_and_recycle`
std::vector<std::string> init_rs_keys;
- std::vector<std::string> tablet_compact_stats_keys;
- std::vector<std::string> tablet_load_stats_keys;
- std::vector<std::string> versioned_meta_tablet_keys;
+ bool has_failure = false;
auto recycle_func = [&, this](std::string_view k, std::string_view v) ->
int {
- bool use_range_remove = true;
++num_scanned;
doris::TabletMetaCloudPB tablet_meta_pb;
if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
LOG_WARNING("malformed tablet meta").tag("key", hex(k));
- use_range_remove = false;
+ has_failure = true;
return -1;
}
int64_t tablet_id = tablet_meta_pb.tablet_id();
- if (!check_lazy_txn_finished(txn_kv_, instance_id_,
tablet_meta_pb.tablet_id())) {
+ if (config::enable_recycler_check_lazy_txn_finished &&
+ !check_lazy_txn_finished(txn_kv_, instance_id_,
tablet_meta_pb.tablet_id())) {
LOG(WARNING) << "lazy txn not finished tablet_id=" <<
tablet_meta_pb.tablet_id();
+ has_failure = true;
return -1;
}
- tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_,
tablet_id}));
- restore_job_keys.push_back(job_restore_tablet_key({instance_id_,
tablet_id}));
- if (is_multi_version) {
- // The tablet index/inverted index are recycled in
recycle_versioned_tablet.
- tablet_compact_stats_keys.push_back(
- versioned::tablet_compact_stats_key({instance_id_,
tablet_id}));
- tablet_load_stats_keys.push_back(
- versioned::tablet_load_stats_key({instance_id_,
tablet_id}));
- versioned_meta_tablet_keys.push_back(
- versioned::meta_tablet_key({instance_id_, tablet_id}));
- }
TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check",
false);
- sync_executor.add([this, &num_recycled, tid = tablet_id, range_move =
use_range_remove,
- &metrics_context, k]() mutable -> TabletKeyPair {
- if (recycle_tablet(tid, metrics_context) != 0) {
- LOG_WARNING("failed to recycle tablet")
- .tag("instance_id", instance_id_)
- .tag("tablet_id", tid);
- range_move = false;
- return {std::string_view(), range_move};
- }
- ++num_recycled;
- LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ?
"(empty)" : hex(k));
- return {k, range_move};
- });
+ sync_executor.add(
+ [this, &num_recycled, tid = tablet_id, &metrics_context, k]()
-> TabletInfo {
+ if (recycle_tablet(tid, metrics_context) != 0) {
+ LOG_WARNING("failed to recycle tablet")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tid);
+ return {.tablet_meta_key = std::string_view(),
.tablet_id = tid};
+ }
+ ++num_recycled;
+ LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ?
"(empty)" : hex(k));
+ return {.tablet_meta_key = k, .tablet_id = tid};
+ });
return 0;
};
- // TODO(AlexYue): Add one ut to cover use_range_remove = false
auto loop_done = [&, this]() -> int {
+ int ret = 0;
bool finished = true;
- auto tablet_keys = sync_executor.when_all(&finished);
+ bool has_empty_key = false;
+ DORIS_CLOUD_DEFER {
+ init_rs_keys.clear();
+ has_failure = false;
+ };
+ auto tablets_info = sync_executor.when_all(&finished);
if (!finished) {
LOG_WARNING("failed to recycle tablet").tag("instance_id",
instance_id_);
return -1;
}
- if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
- if (!tablet_keys.empty() &&
- std::ranges::all_of(tablet_keys, [](const auto& k) { return
k.first.empty(); })) {
- return -1;
+
+ size_t size_before_erase = tablets_info.size();
+ std::erase_if(tablets_info, [](const TabletInfo& t) { return
t.tablet_meta_key.empty(); });
+ if (tablets_info.empty()) {
+ return size_before_erase == 0 ? 0 : -1;
+ } else if (size_before_erase != tablets_info.size()) {
+ has_empty_key = true;
}
+
+ ret = has_empty_key ? -1 : 0;
// sort the vector using key's order
- std::sort(tablet_keys.begin(), tablet_keys.end(),
- [](const auto& prev, const auto& last) { return prev.first <
last.first; });
- bool use_range_remove = true;
- for (auto& [_, remove] : tablet_keys) {
- if (!remove) {
- use_range_remove = remove;
- break;
- }
- }
- DORIS_CLOUD_DEFER {
- tablet_idx_keys.clear();
- restore_job_keys.clear();
- init_rs_keys.clear();
- tablet_compact_stats_keys.clear();
- tablet_load_stats_keys.clear();
- versioned_meta_tablet_keys.clear();
- };
+ std::ranges::sort(tablets_info, [](const auto& prev, const auto& last)
{
+ return prev.tablet_meta_key < last.tablet_meta_key;
+ });
std::unique_ptr<Transaction> txn;
if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" <<
instance_id_;
return -1;
}
std::string tablet_key_end;
- if (!tablet_keys.empty()) {
- if (use_range_remove) {
- tablet_key_end = std::string(tablet_keys.back().first) +
'\x00';
- txn->remove(tablet_keys.front().first, tablet_key_end);
+ if (!tablets_info.empty()) {
+ if (!has_empty_key && !has_failure) {
+ tablet_key_end =
std::string(tablets_info.back().tablet_meta_key) + '\x00';
+ txn->remove(tablets_info.front().tablet_meta_key,
tablet_key_end);
} else {
- for (auto& [k, _] : tablet_keys) {
- txn->remove(k);
+ for (auto& tablet_info : tablets_info) {
+ txn->remove(tablet_info.tablet_meta_key);
}
}
}
if (is_multi_version) {
- for (auto& k : tablet_compact_stats_keys) {
+ for (auto& tablet_info : tablets_info) {
// Remove all versions of tablet compact stats for recycled
tablet
+ auto k = versioned::tablet_compact_stats_key({instance_id_,
tablet_info.tablet_id});
LOG_INFO("remove versioned tablet compact stats key")
.tag("compact_stats_key", hex(k));
versioned_remove_all(txn.get(), k);
}
- for (auto& k : tablet_load_stats_keys) {
+ for (auto& tablet_info : tablets_info) {
// Remove all versions of tablet load stats for recycled tablet
+ auto k = versioned::tablet_load_stats_key({instance_id_,
tablet_info.tablet_id});
LOG_INFO("remove versioned tablet load stats
key").tag("load_stats_key", hex(k));
versioned_remove_all(txn.get(), k);
}
- for (auto& k : versioned_meta_tablet_keys) {
+ for (auto& tablet_info : tablets_info) {
// Remove all versions of meta tablet for recycled tablet
+ auto k = versioned::meta_tablet_key({instance_id_,
tablet_info.tablet_id});
LOG_INFO("remove versioned meta tablet
key").tag("meta_tablet_key", hex(k));
versioned_remove_all(txn.get(), k);
}
}
- for (auto& k : tablet_idx_keys) {
+ for (auto& tablet_info : tablets_info) {
+ std::string k;
+ meta_tablet_idx_key({instance_id_, tablet_info.tablet_id}, &k);
txn->remove(k);
}
- for (auto& k : restore_job_keys) {
+ for (auto& tablet_info : tablets_info) {
+ std::string k;
+ job_restore_tablet_key({instance_id_, tablet_info.tablet_id}, &k);
txn->remove(k);
}
for (auto& k : init_rs_keys) {
@@ -3066,7 +3056,7 @@ int InstanceRecycler::recycle_tablets(int64_t table_id,
int64_t index_id,
<< ", err=" << err;
return -1;
}
- return 0;
+ return ret;
};
int ret = scan_and_recycle(tablet_key_begin, tablet_key_end,
std::move(recycle_func),
@@ -4176,7 +4166,8 @@ int InstanceRecycler::scan_tablets_and_statistics(int64_t
table_id, int64_t inde
}
int64_t tablet_id = tablet_meta_pb.tablet_id();
- if (!check_lazy_txn_finished(txn_kv_, instance_id_,
tablet_meta_pb.tablet_id())) {
+ if (config::enable_recycler_check_lazy_txn_finished &&
+ !check_lazy_txn_finished(txn_kv_, instance_id_,
tablet_meta_pb.tablet_id())) {
return 0;
}
diff --git a/cloud/test/mock_accessor.h b/cloud/test/mock_accessor.h
index 0e29c383899..03fed7caeaf 100644
--- a/cloud/test/mock_accessor.h
+++ b/cloud/test/mock_accessor.h
@@ -109,7 +109,7 @@ inline auto MockAccessor::get_prefix_range(const
std::string& path_prefix) {
}
inline int MockAccessor::delete_prefix_impl(const std::string& path_prefix) {
- TEST_SYNC_POINT_RETURN_WITH_VALUE("MockAccessor::delete_prefix", (int)0);
+ TEST_SYNC_POINT_RETURN_WITH_VALUE("MockAccessor::delete_prefix", (int)0,
&path_prefix);
LOG(INFO) << "delete object of prefix=" << path_prefix;
std::lock_guard lock(mtx_);
diff --git a/cloud/test/recycler_operation_log_test.cpp
b/cloud/test/recycler_operation_log_test.cpp
index 1d3c3e65065..2e829ddde4e 100644
--- a/cloud/test/recycler_operation_log_test.cpp
+++ b/cloud/test/recycler_operation_log_test.cpp
@@ -1099,9 +1099,9 @@ TEST(RecycleOperationLogTest, RecycleCompactionLog) {
ret->first = true;
ret->second = true;
});
- sp->set_call_back("recycle_tablet::bypass_check", [&](auto&& args) {
- auto* ret = doris::try_any_cast_ret<bool>(args);
- ret->first = false;
+ sp->set_call_back("recycle_tablet::begin", [&](auto&& args) {
+ auto* ret = doris::try_any_cast_ret<int>(args);
+ ret->first = 0;
ret->second = true;
});
sp->enable_processing();
@@ -1622,9 +1622,9 @@ TEST(RecycleOperationLogTest, RecycleSchemaChangeLog) {
ret->first = true;
ret->second = true;
});
- sp->set_call_back("recycle_tablet::bypass_check", [&](auto&& args) {
- auto* ret = doris::try_any_cast_ret<bool>(args);
- ret->first = false;
+ sp->set_call_back("recycle_tablet::begin", [&](auto&& args) {
+ auto* ret = doris::try_any_cast_ret<int>(args);
+ ret->first = 0;
ret->second = true;
});
sp->enable_processing();
diff --git a/cloud/test/recycler_test.cpp b/cloud/test/recycler_test.cpp
index 5858ecca9d0..1bfbcc62a72 100644
--- a/cloud/test/recycler_test.cpp
+++ b/cloud/test/recycler_test.cpp
@@ -932,6 +932,24 @@ static int create_partition_version_kv(TxnKv* txn_kv,
int64_t table_id, int64_t
return 0;
}
+static int create_partition_version_with_pending_txn_kv(TxnKv* txn_kv, int64_t
table_id,
+ int64_t partition_id,
int64_t txn_id) {
+ auto key = partition_version_key({instance_id, db_id, table_id,
partition_id});
+ VersionPB version;
+ version.set_version(1);
+ version.add_pending_txn_ids(txn_id);
+ auto val = version.SerializeAsString();
+ std::unique_ptr<Transaction> txn;
+ if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
+ txn->put(key, val);
+ if (txn->commit() != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
+ return 0;
+}
+
static int create_delete_bitmap_update_lock_kv(TxnKv* txn_kv, int64_t
table_id, int64_t lock_id,
int64_t initiator, int64_t
expiration) {
auto key = meta_delete_bitmap_update_lock_key({instance_id, table_id, -1});
@@ -8977,19 +8995,7 @@ TEST(RecyclerTest,
abort_job_for_related_rowset_when_tablet_recycled) {
}
TEST(RecyclerTest, recycle_tablet_with_delete_file_failure) {
- // Test case: recycle_tablet with delete_files failure should not cause
- // the next recycle to hang due to improperly deleted KV entries.
- //
- // Bug scenario:
- // 1. recycle_tablet calls accessor->delete_files, but it fails (returns
-1)
- // 2. recycle_tablet returns empty key (line 2784)
- // 3. The empty key is filtered out by SyncExecutor (line 2740)
- // 4. Old bug: tablet_keys with empty keys caused use_range_remove logic
issue (line 2801-2810)
- // 5. restore_job_keys were still deleted (line 2857-2859) despite tablet
not being recycled
- // 6. Next recycle attempt would hang at check_lazy_txn_finished (line
2760-2763)
- //
- // After fix: Empty keys are filtered out by SyncExecutor, so tablet meta
keys
- // won't be deleted if recycle_tablet fails.
+ // If object deletion fails, recycle_tablets should report failure and
keep the tablet KV.
auto* sp = SyncPoint::get_instance();
DORIS_CLOUD_DEFER {
@@ -9053,11 +9059,9 @@ TEST(RecyclerTest,
recycle_tablet_with_delete_file_failure) {
});
sp->enable_processing();
- // First recycle attempt - should fail to recycle tablet due to
delete_directory failure
+ // First recycle attempt should fail due to delete_directory failure.
int ret = recycler.recycle_tablets(table_id, index_id, ctx);
EXPECT_EQ(ret, -1) << "First recycle attempt should failed";
- // recycle_tablets may return -1 or 0 depending on implementation,
- // but the key point is that tablet should NOT be fully recycled
EXPECT_GT(delete_directory_call_count.load(), 0) << "delete_directory
should have been called";
// Verify tablet metadata still exists (because recycle_tablet failed)
@@ -9122,6 +9126,267 @@ TEST(RecyclerTest,
recycle_tablet_with_delete_file_failure) {
}
}
+TEST(RecyclerTest, recycle_tablet_with_delete_file_partial_failure) {
+ // If part of object deletion fails, recycle_tablets should delete KV only
for tablets whose
+ // objects were deleted successfully, but still return failure for the
whole round.
+
+ auto* sp = SyncPoint::get_instance();
+ DORIS_CLOUD_DEFER {
+ sp->clear_all_call_backs();
+ sp->clear_trace();
+ sp->disable_processing();
+ };
+
+ auto txn_kv = std::make_shared<MemTxnKv>();
+ ASSERT_EQ(txn_kv->init(), 0);
+
+ InstanceInfoPB instance;
+ instance.set_instance_id(::instance_id);
+ auto obj_info = instance.add_obj_info();
+ obj_info->set_id("recycle_tablet_with_delete_file_failure");
+ obj_info->set_ak(config::test_s3_ak);
+ obj_info->set_sk(config::test_s3_sk);
+ obj_info->set_endpoint(config::test_s3_endpoint);
+ obj_info->set_region(config::test_s3_region);
+ obj_info->set_bucket(config::test_s3_bucket);
+ obj_info->set_prefix("recycle_tablet_with_delete_file_failure");
+
+ InstanceRecycler recycler(txn_kv, instance, thread_group,
+ std::make_shared<TxnLazyCommitter>(txn_kv));
+ ASSERT_EQ(recycler.init(), 0);
+
+ doris::TabletSchemaCloudPB schema;
+ schema.set_schema_version(1);
+
+ constexpr int64_t table_id = 21000;
+ constexpr int64_t index_id = 21001;
+ constexpr int64_t partition_id = 21002;
+ constexpr int64_t tablet_id = 21003;
+
+ auto accessor = recycler.accessor_map_.begin()->second;
+
+ // Create 5 tablet with its metadata and index keys
+ for (int i = 0; i < 5; ++i) {
+ ASSERT_EQ(create_tablet(txn_kv.get(), table_id, index_id,
partition_id, tablet_id + i), 0);
+ }
+
+ // Create some committed rowsets for the tablet (in meta_rowset_key)
+ // These will be picked up by recycle_tablet when it scans for rowsets
+ for (int i = 0; i < 5; ++i) {
+ for (int j = 0; j < 5; j++) {
+ create_committed_rowset(txn_kv.get(), accessor.get(),
+ "recycle_tablet_with_delete_file_failure",
tablet_id + i, j,
+ index_id, 2, 1, true);
+ }
+ }
+
+ // Create partition version kv (required for lazy txn check)
+ ASSERT_EQ(create_partition_version_kv(txn_kv.get(), table_id,
partition_id), 0);
+
+ // Inject failure for a tablet in the middle of the scanned key range.
This verifies that
+ // successful tablet KV deletion does not use range remove across the
failed tablet.
+ std::atomic<int> delete_directory_call_count {0};
+ const std::string failed_tablet_path = tablet_path_prefix(tablet_id + 2);
+ sp->set_call_back("SyncExecutor::Task::bypass_cancel",
+ [](auto&& args) { *try_any_cast<bool*>(args[0]) = true;
});
+ sp->set_call_back("MockAccessor::delete_prefix",
+ [&delete_directory_call_count,
&failed_tablet_path](auto&& args) {
+ auto* path_prefix = try_any_cast<const
std::string*>(args[0]);
+ if (*path_prefix == failed_tablet_path) {
+ auto* ret = try_any_cast_ret<int>(args);
+ delete_directory_call_count++;
+ ret->first = -1; // Return error
+ ret->second = true; // Override return value
+ }
+ });
+ sp->enable_processing();
+
+ // First recycle attempt should fail due to partial delete_directory
failure.
+ int ret = recycler.recycle_tablets(table_id, index_id, ctx);
+ EXPECT_EQ(ret, -1) << "First recycle attempt should failed";
+ EXPECT_GT(delete_directory_call_count.load(), 0) << "delete_directory
should have been called";
+
+ // Verify only the tablet that failed object deletion still keeps its KV.
+ {
+ int remaining_tablet_count = 0;
+ int recycled_tablet_count = 0;
+ for (int i = 0; i < 5; i++) {
+ std::unique_ptr<Transaction> txn;
+ ASSERT_EQ(txn_kv->create_txn(&txn), TxnErrorCode::TXN_OK);
+ std::string tablet_key = meta_tablet_key(
+ {::instance_id, table_id, index_id, partition_id,
tablet_id + i});
+ std::string val;
+ TxnErrorCode err = txn->get(tablet_key, &val);
+ std::string tablet_idx_key = meta_tablet_idx_key({::instance_id,
tablet_id + i});
+ TxnErrorCode idx_err = txn->get(tablet_idx_key, &val);
+ if (err == TxnErrorCode::TXN_OK) {
+ EXPECT_EQ(i, 2) << "Only the middle failed tablet should keep
tablet meta";
+ EXPECT_EQ(idx_err, TxnErrorCode::TXN_OK)
+ << "Tablet index key should remain with failed tablet
meta";
+ ++remaining_tablet_count;
+ } else {
+ EXPECT_NE(i, 2) << "The failed middle tablet should not be
range removed";
+ EXPECT_EQ(err, TxnErrorCode::TXN_KEY_NOT_FOUND)
+ << "Unexpected tablet meta get error";
+ EXPECT_EQ(idx_err, TxnErrorCode::TXN_KEY_NOT_FOUND)
+ << "Tablet index key should be deleted with recycled
tablet meta";
+ ++recycled_tablet_count;
+ }
+ }
+ EXPECT_EQ(remaining_tablet_count, 1);
+ EXPECT_EQ(recycled_tablet_count, 4);
+ }
+
+ // Clear the sync point to allow delete_directory to succeed
+ sp->clear_all_call_backs();
+ sp->disable_processing();
+
+ // Second recycle attempt - should succeed without hanging at
check_lazy_txn_finished
+ // This verifies the fix: empty keys are properly filtered, so KV entries
are consistent
+ ret = recycler.recycle_tablets(table_id, index_id, ctx);
+ EXPECT_EQ(ret, 0) << "Second recycle attempt should succeed";
+
+ // Verify all related kv have been deleted
+ {
+ for (int i = 0; i < 5; i++) {
+ std::unique_ptr<Transaction> txn;
+ ASSERT_EQ(txn_kv->create_txn(&txn), TxnErrorCode::TXN_OK);
+
+ // Check tablet meta key is deleted
+ std::string tablet_key = meta_tablet_key(
+ {::instance_id, table_id, index_id, partition_id,
tablet_id + i});
+ std::string val;
+ TxnErrorCode err = txn->get(tablet_key, &val);
+ EXPECT_EQ(err, TxnErrorCode::TXN_KEY_NOT_FOUND)
+ << "Tablet key should be deleted after successful recycle";
+
+ // Check tablet index key is deleted
+ std::string tablet_idx_key = meta_tablet_idx_key({::instance_id,
tablet_id + i});
+ err = txn->get(tablet_idx_key, &val);
+ EXPECT_EQ(err, TxnErrorCode::TXN_KEY_NOT_FOUND)
+ << "Tablet index key should be deleted after successful
recycle";
+
+ // Check restore job key is deleted
+ std::string restore_job_key =
job_restore_tablet_key({::instance_id, tablet_id + i});
+ err = txn->get(restore_job_key, &val);
+ EXPECT_EQ(err, TxnErrorCode::TXN_KEY_NOT_FOUND)
+ << "Restore job key should be deleted after successful
recycle";
+
+ // Check no recycle rowset keys remain
+ std::string recyc_rs_key_begin =
recycle_rowset_key({::instance_id, tablet_id + i, ""});
+ std::string recyc_rs_key_end =
+ recycle_rowset_key({::instance_id, tablet_id + i + 1, ""});
+ std::unique_ptr<RangeGetIterator> it;
+ ASSERT_EQ(txn->get(recyc_rs_key_begin, recyc_rs_key_end, &it),
TxnErrorCode::TXN_OK);
+ EXPECT_EQ(it->size(), 0) << "All recycle rowset keys should be
deleted";
+ }
+ }
+}
+
+TEST(RecyclerTest, recycle_tablet_with_lazy_txn_partial_failure) {
+ // If check_lazy_txn_finished fails before scheduling a tablet recycle
task, successful tablet KV
+ // deletion must not use range remove across the failed tablet.
+
+ auto* sp = SyncPoint::get_instance();
+ DORIS_CLOUD_DEFER {
+ sp->clear_all_call_backs();
+ sp->clear_trace();
+ sp->disable_processing();
+ };
+
+ bool old_enable_check = config::enable_recycler_check_lazy_txn_finished;
+ config::enable_recycler_check_lazy_txn_finished = true;
+ DORIS_CLOUD_DEFER {
+ config::enable_recycler_check_lazy_txn_finished = old_enable_check;
+ };
+
+ auto txn_kv = std::make_shared<MemTxnKv>();
+ ASSERT_EQ(txn_kv->init(), 0);
+
+ InstanceInfoPB instance;
+ instance.set_instance_id(::instance_id);
+ auto obj_info = instance.add_obj_info();
+ obj_info->set_id("recycle_tablet_with_lazy_txn_partial_failure");
+ obj_info->set_ak(config::test_s3_ak);
+ obj_info->set_sk(config::test_s3_sk);
+ obj_info->set_endpoint(config::test_s3_endpoint);
+ obj_info->set_region(config::test_s3_region);
+ obj_info->set_bucket(config::test_s3_bucket);
+ obj_info->set_prefix("recycle_tablet_with_lazy_txn_partial_failure");
+
+ InstanceRecycler recycler(txn_kv, instance, thread_group,
+ std::make_shared<TxnLazyCommitter>(txn_kv));
+ ASSERT_EQ(recycler.init(), 0);
+
+ constexpr int64_t table_id = 22000;
+ constexpr int64_t index_id = 22001;
+ constexpr int64_t partition_id = 22002;
+ constexpr int64_t tablet_id = 22003;
+ constexpr int64_t failed_partition_id = 22008;
+
+ auto accessor = recycler.accessor_map_.begin()->second;
+
+ for (int i = 0; i < 5; ++i) {
+ ASSERT_EQ(create_tablet(txn_kv.get(), table_id, index_id,
partition_id, tablet_id + i), 0);
+ create_committed_rowset(txn_kv.get(), accessor.get(),
+
"recycle_tablet_with_lazy_txn_partial_failure", tablet_id + i, i,
+ index_id, 2, 1, true);
+ }
+
+ {
+ std::unique_ptr<Transaction> txn;
+ ASSERT_EQ(txn_kv->create_txn(&txn), TxnErrorCode::TXN_OK);
+ TabletIndexPB tablet_idx_pb;
+ tablet_idx_pb.set_db_id(db_id);
+ tablet_idx_pb.set_table_id(table_id);
+ tablet_idx_pb.set_index_id(index_id);
+ tablet_idx_pb.set_partition_id(failed_partition_id);
+ tablet_idx_pb.set_tablet_id(tablet_id + 2);
+ txn->put(meta_tablet_idx_key({::instance_id, tablet_id + 2}),
+ tablet_idx_pb.SerializeAsString());
+ ASSERT_EQ(txn->commit(), TxnErrorCode::TXN_OK);
+ }
+
+ ASSERT_EQ(create_partition_version_kv(txn_kv.get(), table_id,
partition_id), 0);
+ ASSERT_EQ(create_partition_version_with_pending_txn_kv(txn_kv.get(),
table_id,
+
failed_partition_id, 12345),
+ 0);
+
+ std::atomic<int> lazy_txn_not_finished_count {0};
+ sp->set_call_back("SyncExecutor::Task::bypass_cancel",
+ [](auto&& args) { *try_any_cast<bool*>(args[0]) = true;
});
+ sp->set_call_back("check_lazy_txn_finished::txn_not_finished",
+ [&lazy_txn_not_finished_count](auto&&) {
++lazy_txn_not_finished_count; });
+ sp->enable_processing();
+
+ int ret = recycler.recycle_tablets(table_id, index_id, ctx);
+ EXPECT_EQ(ret, -1) << "Recycle should fail while a lazy txn is still
pending";
+ EXPECT_EQ(lazy_txn_not_finished_count.load(), 1)
+ << "Only the middle tablet should hit the lazy txn pre-add failure
path";
+
+ for (int i = 0; i < 5; ++i) {
+ std::unique_ptr<Transaction> txn;
+ ASSERT_EQ(txn_kv->create_txn(&txn), TxnErrorCode::TXN_OK);
+ std::string val;
+ std::string tablet_key =
+ meta_tablet_key({::instance_id, table_id, index_id,
partition_id, tablet_id + i});
+ TxnErrorCode err = txn->get(tablet_key, &val);
+
+ std::string tablet_idx_key = meta_tablet_idx_key({::instance_id,
tablet_id + i});
+ TxnErrorCode idx_err = txn->get(tablet_idx_key, &val);
+ if (i == 2) {
+ EXPECT_EQ(err, TxnErrorCode::TXN_OK)
+ << "Failed middle tablet meta should not be range removed";
+ EXPECT_EQ(idx_err, TxnErrorCode::TXN_OK) << "Failed middle tablet
index should remain";
+ } else {
+ EXPECT_EQ(err, TxnErrorCode::TXN_KEY_NOT_FOUND)
+ << "Successful tablet meta should be recycled";
+ EXPECT_EQ(idx_err, TxnErrorCode::TXN_KEY_NOT_FOUND)
+ << "Successful tablet index should be recycled";
+ }
+ }
+}
+
TEST(RecyclerTest, enable_recycler_default_true) {
EXPECT_TRUE(config::enable_recycler);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]