This is an automated email from the ASF dual-hosted git repository.
laiyingchun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 96606dec9 [master] Exclude tservers in MAINTENANCE_MODE when leader
rebalancing
96606dec9 is described below
commit 96606dec9be2ac418747ec8856de4a354f78f581
Author: duyuqi <[email protected]>
AuthorDate: Fri Mar 10 19:44:08 2023 +0800
[master] Exclude tservers in MAINTENANCE_MODE when leader rebalancing
Currently, auto leader rebalancing may transfer new leaders to tservers
which have entered MAINTENANCE_MODE. Because a tserver in MAINTENANCE_MODE
is being maintained by administrators, maybe it is decommissioning.
Since it might be decommissioned/decommissioning, it doesn't make sense
for the leader rebalancer to transfer a leader to a tserver in
MAINTENANCE_MODE
as leaders serve many users' requests. For this reason, we should exclude
such tservers.
This patch fixes the problem and adds an unit test for this.
Change-Id: I2f85a675e69fd02a62e2625881dad2ca5e27acd9
Reviewed-on: http://gerrit.cloudera.org:8080/19608
Tested-by: Kudu Jenkins
Reviewed-by: Yingchun Lai <[email protected]>
Reviewed-by: Yuqi Du <[email protected]>
---
src/kudu/master/auto_leader_rebalancer-test.cc | 75 +++++++++++++++++++++++++-
src/kudu/master/auto_leader_rebalancer.cc | 22 +++++++-
src/kudu/master/auto_leader_rebalancer.h | 2 +
3 files changed, 97 insertions(+), 2 deletions(-)
diff --git a/src/kudu/master/auto_leader_rebalancer-test.cc
b/src/kudu/master/auto_leader_rebalancer-test.cc
index 8ff1d0d0e..5357bdd15 100644
--- a/src/kudu/master/auto_leader_rebalancer-test.cc
+++ b/src/kudu/master/auto_leader_rebalancer-test.cc
@@ -16,6 +16,7 @@
// under the License.
#include "kudu/master/auto_leader_rebalancer.h"
+#include <algorithm>
#include <cstdint>
#include <memory>
#include <ostream>
@@ -27,15 +28,20 @@
#include <glog/logging.h>
#include <gtest/gtest.h>
+#include "kudu/consensus/metadata.pb.h"
#include "kudu/gutil/ref_counted.h"
#include "kudu/integration-tests/test_workload.h"
#include "kudu/master/catalog_manager.h"
#include "kudu/master/master.h"
+#include "kudu/master/master.pb.h"
#include "kudu/master/mini_master.h"
#include "kudu/master/ts_descriptor.h"
#include "kudu/master/ts_manager.h"
#include "kudu/mini-cluster/internal_mini_cluster.h"
+#include "kudu/rpc/rpc_controller.h"
#include "kudu/tserver/mini_tablet_server.h"
+#include "kudu/tserver/tserver.pb.h"
+#include "kudu/tserver/tserver_service.proxy.h" // IWYU pragma: keep
#include "kudu/util/monotime.h"
#include "kudu/util/status.h"
#include "kudu/util/test_macros.h"
@@ -50,8 +56,10 @@ class AutoRebalancerTask;
using kudu::cluster::InternalMiniCluster;
using kudu::cluster::InternalMiniClusterOptions;
+using kudu::tserver::ListTabletsResponsePB;
using std::string;
using std::unique_ptr;
+using std::vector;
DECLARE_bool(auto_leader_rebalancing_enabled);
DECLARE_bool(auto_rebalancing_enabled);
@@ -118,7 +126,7 @@ class LeaderRebalancerTest : public KuduTest {
}
return
catalog_manager->auto_leader_rebalancer()->RunLeaderRebalanceForTable(
- table_info, tserver_uuids,
AutoLeaderRebalancerTask::ExecuteMode::TEST);
+ table_info, tserver_uuids, {},
AutoLeaderRebalancerTask::ExecuteMode::TEST);
}
protected:
@@ -203,5 +211,70 @@ TEST_F(LeaderRebalancerTest, RestartTserver) {
ASSERT_OK(CheckLeaderBalance());
}
+TEST_F(LeaderRebalancerTest, TestMaintenanceMode) {
+ SKIP_IF_SLOW_NOT_ALLOWED();
+ constexpr const int kNumTServers = 3;
+ constexpr const int kNumTablets = 10;
+ cluster_opts_.num_tablet_servers = kNumTServers;
+ FLAGS_leader_rebalancing_max_moves_per_round = 5;
+ ASSERT_OK(CreateAndStartCluster());
+
+ CreateWorkloadTable(kNumTablets, /*num_replicas*/ 3);
+
+ // Leader master
+ master::Master* master = cluster_->mini_master()->master();
+
+ master::AutoLeaderRebalancerTask* leader_rebalancer =
+ master->catalog_manager()->auto_leader_rebalancer();
+ ASSERT_NE(leader_rebalancer, nullptr);
+
+ constexpr const int kCurrentTserverIndex = 0;
+ tserver::MiniTabletServer* mini_tserver =
cluster_->mini_tablet_server(kCurrentTserverIndex);
+ // Sets the tserver state for a tserver to 'MAINTENANCE_MODE'.
+ ASSERT_OK(
+ master->ts_manager()->SetTServerState(mini_tserver->uuid(),
+ TServerStatePB::MAINTENANCE_MODE,
+
ChangeTServerStateRequestPB::ALLOW_MISSING_TSERVER,
+
master->catalog_manager()->sys_catalog()));
+ ASSERT_EQ(TServerStatePB::MAINTENANCE_MODE,
+ master->ts_manager()->GetTServerState(mini_tserver->uuid()));
+ // Restart the tserver to force transferring all leaders on it to make
leaders not balanced.
+ mini_tserver->Shutdown();
+ SleepFor(MonoDelta::FromMilliseconds(5 * FLAGS_heartbeat_interval_ms));
+ ASSERT_OK(mini_tserver->Start());
+
+ // Try to run some 'leader rebalance' iterations. If mini_tserver is not in
MAINTENANCE_MODE,
+ // it's enough to reach leader balanced, more tries is not necessary and
less tries
+ // may not reach leader rebalanced.
+ constexpr const int32_t retries = std::max(kNumTablets / 2, 3);
+ for (int i = 0; i < retries; i++) {
+ ASSERT_OK(leader_rebalancer->RunLeaderRebalancer());
+ if (CheckLeaderBalance().ok()) {
+ break;
+ }
+ SleepFor(MonoDelta::FromMilliseconds(FLAGS_heartbeat_interval_ms));
+ }
+ // This cluster cannot reach the state of rebalanced leadership distribution
since 1 of the 3
+ // tservers is in maintenance mode.
+ Status status = CheckLeaderBalance();
+ ASSERT_TRUE(status.IsIllegalState()) << status.ToString();
+
+ {
+ // The tserver in maintenance mode should have no leaders.
+ std::shared_ptr<tserver::TabletServerServiceProxy> proxy =
+ cluster_->tserver_proxy(kCurrentTserverIndex);
+ tserver::ListTabletsRequestPB req;
+ ListTabletsResponsePB resp;
+ rpc::RpcController rpc;
+ rpc.set_timeout(MonoDelta::FromMilliseconds(1000));
+ ASSERT_OK(proxy->ListTablets(req, &resp, &rpc));
+ ASSERT_FALSE(resp.has_error());
+ ASSERT_FALSE(resp.status_and_schema().empty());
+ for (const auto& replica : resp.status_and_schema()) {
+ ASSERT_NE(consensus::RaftPeerPB::LEADER, replica.role());
+ }
+ }
+}
+
} // namespace master
} // namespace kudu
diff --git a/src/kudu/master/auto_leader_rebalancer.cc
b/src/kudu/master/auto_leader_rebalancer.cc
index 1a5adf143..3f9030c9c 100644
--- a/src/kudu/master/auto_leader_rebalancer.cc
+++ b/src/kudu/master/auto_leader_rebalancer.cc
@@ -25,6 +25,8 @@
#include <ostream>
#include <string>
#include <type_traits>
+#include <unordered_map>
+#include <unordered_set>
#include <utility>
#include <vector>
@@ -37,6 +39,7 @@
#include "kudu/consensus/metadata.pb.h"
#include "kudu/gutil/macros.h"
#include "kudu/gutil/map-util.h"
+#include "kudu/gutil/port.h"
#include "kudu/gutil/strings/substitute.h"
#include "kudu/master/catalog_manager.h"
#include "kudu/master/master.pb.h"
@@ -62,6 +65,7 @@ using kudu::rpc::RpcController;
using std::map;
using std::nullopt;
using std::string;
+using std::unordered_set;
using std::vector;
using strings::Substitute;
@@ -116,6 +120,7 @@ void AutoLeaderRebalancerTask::Shutdown() {
Status AutoLeaderRebalancerTask::RunLeaderRebalanceForTable(
const scoped_refptr<TableInfo>& table_info,
const vector<string>& tserver_uuids,
+ const unordered_set<string>& exclude_dest_uuids,
AutoLeaderRebalancerTask::ExecuteMode mode) {
LOG(INFO) << Substitute("leader rebalance for table $0",
table_info->table_name());
TableMetadataLock table_l(table_info.get(), LockMode::READ);
@@ -243,6 +248,9 @@ Status AutoLeaderRebalancerTask::RunLeaderRebalanceForTable(
}
double min_score = 1;
for (int j = 0; j < uuid_followers.size(); j++) {
+ if (ContainsKey(exclude_dest_uuids, uuid_followers[j])) {
+ continue;
+ }
std::pair<int32_t, int32_t>& replica_and_leader_count =
replica_and_leader_count_by_ts_uuid[uuid_followers[j]];
int32_t replica_count = replica_and_leader_count.first;
@@ -328,6 +336,10 @@ Status
AutoLeaderRebalancerTask::RunLeaderRebalanceForTable(
if (!ts_manager_->LookupTSByUUID(leader_uuid, &leader_desc)) {
continue;
}
+ if (PREDICT_FALSE(TServerStatePB::MAINTENANCE_MODE ==
+ ts_manager_->GetTServerState(task.second.second))) {
+ continue;
+ }
vector<Sockaddr> resolved;
RETURN_NOT_OK(host_port->ResolveAddresses(&resolved));
@@ -380,6 +392,14 @@ Status AutoLeaderRebalancerTask::RunLeaderRebalancer() {
tserver_uuids.emplace_back(e->permanent_uuid());
}
+ // Avoid transferring leaders to tservers that are in MAINTENANCE_MODE.
+ auto tserver_state_by_uuid = ts_manager_->GetTServerStates();
+ unordered_set<string> exclude_dest_uuids;
+ for (const auto& uuid_state : tserver_state_by_uuid) {
+ if (uuid_state.second.first == TServerStatePB::MAINTENANCE_MODE) {
+ exclude_dest_uuids.insert(uuid_state.first);
+ }
+ }
vector<scoped_refptr<TableInfo>> table_infos;
{
CatalogManager::ScopedLeaderSharedLock leader_lock(catalog_manager_);
@@ -387,7 +407,7 @@ Status AutoLeaderRebalancerTask::RunLeaderRebalancer() {
catalog_manager_->GetAllTables(&table_infos);
}
for (const auto& table_info : table_infos) {
- RunLeaderRebalanceForTable(table_info, tserver_uuids);
+ RunLeaderRebalanceForTable(table_info, tserver_uuids, exclude_dest_uuids);
}
// @TODO(duyuqi)
// Enrich the log and add metrics for leader rebalancer.
diff --git a/src/kudu/master/auto_leader_rebalancer.h
b/src/kudu/master/auto_leader_rebalancer.h
index 004e19b1d..4dd19ab64 100644
--- a/src/kudu/master/auto_leader_rebalancer.h
+++ b/src/kudu/master/auto_leader_rebalancer.h
@@ -21,6 +21,7 @@
#include <memory>
#include <random>
#include <string>
+#include <unordered_set>
#include <vector>
#include "kudu/gutil/ref_counted.h"
@@ -75,6 +76,7 @@ class AutoLeaderRebalancerTask {
Status RunLeaderRebalanceForTable(
const scoped_refptr<TableInfo>& table_info,
const std::vector<std::string>& tserver_uuids,
+ const std::unordered_set<std::string>& exclude_dest_uuids,
AutoLeaderRebalancerTask::ExecuteMode mode =
AutoLeaderRebalancerTask::ExecuteMode::NORMAL);
// Only one task can be scheduled at a time.