This is an automated email from the ASF dual-hosted git repository.
laiyingchun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new d7fb77f4c Leader rebalance ignores soft deleted tables
d7fb77f4c is described below
commit d7fb77f4cfbf6d638c6d11009ce3728ae30e01e3
Author: xinghuayu007 <[email protected]>
AuthorDate: Wed May 8 14:51:20 2024 +0800
Leader rebalance ignores soft deleted tables
Soft deleted tables no longer provide read/write service.
Therefore it is not needed to do leader rebalance for soft
deleted tables, which may take a long time.
This patch provides a configuration
'leader_rebalancing_filter_soft_deleted_tables' to decide
whether to ignore soft deleted tables when leader rebalancing.
Change-Id: I1e2f37b004ed7d394e18d21cce97a2c9702adba3
Reviewed-on: http://gerrit.cloudera.org:8080/21411
Tested-by: Kudu Jenkins
Reviewed-by: Marton Greber <[email protected]>
Reviewed-by: Yingchun Lai <[email protected]>
---
src/kudu/master/auto_leader_rebalancer-test.cc | 114 +++++++++++++++++++++----
src/kudu/master/auto_leader_rebalancer.cc | 11 ++-
src/kudu/master/catalog_manager.cc | 8 ++
src/kudu/master/catalog_manager.h | 8 ++
4 files changed, 122 insertions(+), 19 deletions(-)
diff --git a/src/kudu/master/auto_leader_rebalancer-test.cc
b/src/kudu/master/auto_leader_rebalancer-test.cc
index 234a6a8da..d9f5949c0 100644
--- a/src/kudu/master/auto_leader_rebalancer-test.cc
+++ b/src/kudu/master/auto_leader_rebalancer-test.cc
@@ -60,6 +60,7 @@ class AutoRebalancerTask;
} // namespace master
} // namespace kudu
+using kudu::client::KuduTable;
using kudu::cluster::InternalMiniCluster;
using kudu::cluster::InternalMiniClusterOptions;
using kudu::tserver::ListTabletsResponsePB;
@@ -69,10 +70,12 @@ using kudu::consensus::LeaderStepDownResponsePB;
using kudu::rpc::RpcController;
using std::string;
using std::unique_ptr;
+using kudu::client::sp::shared_ptr;
using std::vector;
DECLARE_bool(auto_leader_rebalancing_enabled);
DECLARE_bool(auto_rebalancing_enabled);
+DECLARE_bool(leader_rebalancing_ignore_soft_deleted_tables);
DECLARE_int32(heartbeat_interval_ms);
DECLARE_uint32(auto_leader_rebalancing_interval_seconds);
DECLARE_uint32(auto_rebalancing_interval_seconds);
@@ -140,17 +143,21 @@ class LeaderRebalancerTest : public KuduTest {
}
// Get the leader numbers of each tablet server.
- void GetLeaderDistribution(std::map<string, int32_t>* leader_map) {
+ void GetLeaderDistribution(std::map<string, int32_t>* leader_map,
+ const string& table_name) {
leader_map->clear();
- scoped_refptr<TableInfo> table;
+ shared_ptr<KuduTable> table;
+ workload_->client()->OpenTable(table_name, &table);
+
+ scoped_refptr<TableInfo> table_info;
master::Master* master = cluster_->mini_master()->master();
master::CatalogManager* catalog_manager = master->catalog_manager();
{
CatalogManager::ScopedLeaderSharedLock leaderlock(catalog_manager);
- catalog_manager->GetTableInfoByName(table_name(), &table);
+ catalog_manager->GetTableInfo(table->id(), &table_info);
}
std::vector<string> leader_list;
- for (const auto& tablet : table->tablet_map()) {
+ for (const auto& tablet : table_info->tablet_map()) {
client::KuduTablet* ptr;
workload_->client()->GetTablet(tablet.second->id(), &ptr);
unique_ptr<client::KuduTablet> tablet_ptr(ptr);
@@ -172,7 +179,8 @@ class LeaderRebalancerTest : public KuduTest {
}
// Make the leader distribution as the vector passed in.
- Status MakeLeaderDistribution(std::vector<int32_t> leader_distribution) {
+ Status MakeLeaderDistribution(std::vector<int32_t> leader_distribution,
+ const string table_name) {
master::Master* master = cluster_->mini_master()->master();
TSDescriptorVector descriptors;
master->ts_manager()->GetAllDescriptors(&descriptors);
@@ -185,7 +193,7 @@ class LeaderRebalancerTest : public KuduTest {
master::CatalogManager* catalog_manager = master->catalog_manager();
{
CatalogManager::ScopedLeaderSharedLock leaderlock(catalog_manager);
- catalog_manager->GetTableInfoByName(table_name(), &table);
+ catalog_manager->GetTableInfoByName(table_name, &table);
}
if (std::accumulate(leader_distribution.begin(),
leader_distribution.end(), 0) !=
@@ -253,11 +261,11 @@ TEST_F(LeaderRebalancerTest, FunctionalTestForDivided) {
// Simulate the leader distribution.
std::vector<int32_t> leader_distribution = {4, 4, 1};
- MakeLeaderDistribution(leader_distribution);
+ MakeLeaderDistribution(leader_distribution, table_name());
SleepFor(MonoDelta::FromMilliseconds(3000));
std::map<string, int32_t> leader_map;
- GetLeaderDistribution(&leader_map);
+ GetLeaderDistribution(&leader_map, table_name());
LOG(INFO) << "The leader distribution is " << '\n';
for (const auto& leader : leader_map) {
std::cout << leader.first << " " << leader.second << '\n';
@@ -276,7 +284,7 @@ TEST_F(LeaderRebalancerTest, FunctionalTestForDivided) {
// Check the leader numbers of each tablet server. It should always be
floor(avg)
// or ceil(avg), where the parameter avg is (tablet num) / (tablet server
num).
double expected_leader_num = static_cast<double>(kNumTablets) / 3;
- GetLeaderDistribution(&leader_map);
+ GetLeaderDistribution(&leader_map, table_name());
LOG(INFO) << "The leader distribution is " << '\n';
for (const auto& leader : leader_map) {
std::cout << leader.first << " " << leader.second << '\n';
@@ -288,10 +296,10 @@ TEST_F(LeaderRebalancerTest, FunctionalTestForDivided) {
// Try different leader distribution.
std::vector<int32_t> leader_distribution2 = {0, 8, 1};
- MakeLeaderDistribution(leader_distribution2);
+ MakeLeaderDistribution(leader_distribution2, table_name());
SleepFor(MonoDelta::FromMilliseconds(3000));
- GetLeaderDistribution(&leader_map);
+ GetLeaderDistribution(&leader_map, table_name());
LOG(INFO) << "The leader distribution is " << '\n';
for (const auto& leader : leader_map) {
std::cout << leader.first << " " << leader.second << '\n';
@@ -302,7 +310,7 @@ TEST_F(LeaderRebalancerTest, FunctionalTestForDivided) {
SleepFor(MonoDelta::FromMilliseconds(FLAGS_heartbeat_interval_ms));
}
- GetLeaderDistribution(&leader_map);
+ GetLeaderDistribution(&leader_map, table_name());
LOG(INFO) << "The leader distribution is " << '\n';
for (const auto& leader : leader_map) {
std::cout << leader.first << " " << leader.second << '\n';
@@ -323,11 +331,11 @@ TEST_F(LeaderRebalancerTest, FunctionalTestForNotDivided)
{
// Simulate the leader distribution.
std::vector<int32_t> leader_distribution = {5, 4, 1};
- MakeLeaderDistribution(leader_distribution);
+ MakeLeaderDistribution(leader_distribution, table_name());
SleepFor(MonoDelta::FromMilliseconds(3000));
std::map<string, int32_t> leader_map;
- GetLeaderDistribution(&leader_map);
+ GetLeaderDistribution(&leader_map, table_name());
LOG(INFO) << "The leader distribution is " << '\n';
for (const auto& leader : leader_map) {
std::cout << leader.first << " " << leader.second << '\n';
@@ -346,7 +354,7 @@ TEST_F(LeaderRebalancerTest, FunctionalTestForNotDivided) {
// Check the leader numbers of each tablet server. It should always be
floor(avg)
// or ceil(avg), where the parameter avg is (tablet num) / (tablet server
num).
double expected_leader_num = static_cast<double>(kNumTablets) / 3;
- GetLeaderDistribution(&leader_map);
+ GetLeaderDistribution(&leader_map, table_name());
LOG(INFO) << "The leader distribution is " << '\n';
for (const auto& leader : leader_map) {
std::cout << leader.first << " " << leader.second << '\n';
@@ -358,10 +366,10 @@ TEST_F(LeaderRebalancerTest, FunctionalTestForNotDivided)
{
// Try different leader distribution.
std::vector<int32_t> leader_distribution2 = {8, 1, 1};
- MakeLeaderDistribution(leader_distribution2);
+ MakeLeaderDistribution(leader_distribution2, table_name());
SleepFor(MonoDelta::FromMilliseconds(3000));
- GetLeaderDistribution(&leader_map);
+ GetLeaderDistribution(&leader_map, table_name());
LOG(INFO) << "The leader distribution is " << '\n';
for (const auto& leader : leader_map) {
std::cout << leader.first << " " << leader.second << '\n';
@@ -372,7 +380,7 @@ TEST_F(LeaderRebalancerTest, FunctionalTestForNotDivided) {
SleepFor(MonoDelta::FromMilliseconds(FLAGS_heartbeat_interval_ms));
}
- GetLeaderDistribution(&leader_map);
+ GetLeaderDistribution(&leader_map, table_name());
LOG(INFO) << "The leader distribution is " << '\n';
for (const auto& leader : leader_map) {
std::cout << leader.first << " " << leader.second << '\n';
@@ -524,5 +532,75 @@ TEST_F(LeaderRebalancerTest, TestMaintenanceMode) {
}
}
+class FilterSoftDeletedTableTest :
+ public LeaderRebalancerTest,
+ public ::testing::WithParamInterface<bool> {
+};
+
+INSTANTIATE_TEST_SUITE_P(, FilterSoftDeletedTableTest, ::testing::Bool());
+TEST_P(FilterSoftDeletedTableTest, TestFilterSofteDeletedTable) {
+ FLAGS_leader_rebalancing_ignore_soft_deleted_tables = GetParam();
+
+ constexpr const int kNumTServers = 3;
+ constexpr const int kNumTablets = 9;
+ constexpr const int kNumReplicas = 3;
+ constexpr const char* const soft_deleted_table = "soft_deleted_table";
+
+ cluster_opts_.num_tablet_servers = kNumTServers;
+ ASSERT_OK(CreateAndStartCluster());
+
+ CreateWorkloadTable(kNumTablets, /*num_replicas*/ kNumReplicas);
+
+ // Simulate the leader distribution.
+ std::vector<int32_t> leader_distribution = {4, 4, 1};
+ MakeLeaderDistribution(leader_distribution, table_name());
+ SleepFor(MonoDelta::FromMilliseconds(3000));
+
+ string first_table = table_name();
+
+ // Create a new table.
+ workload_.reset(new TestWorkload(cluster_.get()));
+ workload_->set_table_name(soft_deleted_table);
+ workload_->set_num_tablets(kNumTablets);
+ workload_->set_num_replicas(kNumReplicas);
+ workload_->Setup();
+
+ // Simulate the leader distribution.
+ MakeLeaderDistribution(leader_distribution, soft_deleted_table);
+ SleepFor(MonoDelta::FromMilliseconds(3000));
+
+ // Delete the table 'soft_deleted_table'.
+ workload_->client()->SoftDeleteTable(soft_deleted_table, 3600);
+
+ // Try to run leader rebalance for 10 times.
+ int32_t retries = 10;
+ master::Master* master = cluster_->mini_master()->master();
+ master::AutoLeaderRebalancerTask* leader_rebalancer =
+ master->catalog_manager()->auto_leader_rebalancer();
+ for (int i = 0; i < retries; i++) {
+ leader_rebalancer->RunLeaderRebalancer();
+ SleepFor(MonoDelta::FromMilliseconds(FLAGS_heartbeat_interval_ms));
+ }
+
+ std::map<string, int32_t> leader_map;
+ // The first table is leader rebalanced.
+ GetLeaderDistribution(&leader_map, first_table);
+ for (const auto& leader: leader_map) {
+ ASSERT_EQ(leader.second, 3);
+ }
+
+ GetLeaderDistribution(&leader_map, soft_deleted_table);
+ // The soft deleted table is not leader rebalanced.
+ if (FLAGS_leader_rebalancing_ignore_soft_deleted_tables) {
+ for (const auto& leader: leader_map) {
+ ASSERT_NE(leader.second, 3);
+ }
+ } else {
+ for (const auto& leader: leader_map) {
+ ASSERT_EQ(leader.second, 3);
+ }
+ }
+}
+
} // namespace master
} // namespace kudu
diff --git a/src/kudu/master/auto_leader_rebalancer.cc
b/src/kudu/master/auto_leader_rebalancer.cc
index b6bd1268d..6d2c6091f 100644
--- a/src/kudu/master/auto_leader_rebalancer.cc
+++ b/src/kudu/master/auto_leader_rebalancer.cc
@@ -87,6 +87,11 @@ DEFINE_uint32(leader_rebalancing_max_moves_per_round, 10,
TAG_FLAG(leader_rebalancing_max_moves_per_round, advanced);
TAG_FLAG(leader_rebalancing_max_moves_per_round, runtime);
+DEFINE_bool(leader_rebalancing_ignore_soft_deleted_tables, false,
+ "Whether to ignore rebalancing the soft deleted tables");
+TAG_FLAG(leader_rebalancing_ignore_soft_deleted_tables, advanced);
+TAG_FLAG(leader_rebalancing_ignore_soft_deleted_tables, runtime);
+
DECLARE_bool(auto_leader_rebalancing_enabled);
namespace kudu {
@@ -431,7 +436,11 @@ Status AutoLeaderRebalancerTask::RunLeaderRebalancer() {
{
CatalogManager::ScopedLeaderSharedLock leader_lock(catalog_manager_);
RETURN_NOT_OK(leader_lock.first_failed_status());
- catalog_manager_->GetAllTables(&table_infos);
+ if (FLAGS_leader_rebalancing_ignore_soft_deleted_tables) {
+ catalog_manager_->GetNormalizedTables(&table_infos);
+ } else {
+ catalog_manager_->GetAllTables(&table_infos);
+ }
}
for (const auto& table_info : table_infos) {
RunLeaderRebalanceForTable(table_info, tserver_uuids, exclude_dest_uuids);
diff --git a/src/kudu/master/catalog_manager.cc
b/src/kudu/master/catalog_manager.cc
index 1ab7bc85c..da9881c22 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -4275,6 +4275,14 @@ void
CatalogManager::GetAllTables(vector<scoped_refptr<TableInfo>>* tables) {
AppendValuesFromMap(table_ids_map_, tables);
}
+void CatalogManager::GetNormalizedTables(vector<scoped_refptr<TableInfo>>*
tables) {
+ leader_lock_.AssertAcquiredForReading();
+
+ tables->clear();
+ shared_lock<LockType> l(lock_);
+ AppendValuesFromMap(normalized_table_names_map_, tables);
+}
+
void CatalogManager::GetAllTabletsForTests(vector<scoped_refptr<TabletInfo>>*
tablets) {
leader_lock_.AssertAcquiredForReading();
diff --git a/src/kudu/master/catalog_manager.h
b/src/kudu/master/catalog_manager.h
index 136ccbaab..a7bf477df 100644
--- a/src/kudu/master/catalog_manager.h
+++ b/src/kudu/master/catalog_manager.h
@@ -872,6 +872,14 @@ class CatalogManager : public
tserver::TabletReplicaLookupIf {
// NOTE: This should only be used by tests or web-ui
void GetAllTables(std::vector<scoped_refptr<TableInfo>>* tables);
+ // Retrieve all tables from the normalized_table_names_map_.
+ // The normalized_table_names_map_ inherently does not contain soft-deleted
+ // or not running tables, so no manual exclusion is needed. May fail if
+ // the catalog manager is not yet running. Caller must hold leader_lock_.
+ //
+ // NOTE: This should only be used by tests or web-ui
+ void GetNormalizedTables(std::vector<scoped_refptr<TableInfo>>* tables);
+
// Check if a table exists by name, setting 'exist' appropriately. May fail
// if the catalog manager is not yet running. Caller must hold leader_lock_.
//