This is an automated email from the ASF dual-hosted git repository.
mgreber pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 2f16608ab Auto-rebalancer: clear replace marker on failed moves
2f16608ab is described below
commit 2f16608ab4490867a98faa4a08b43d08f5851162
Author: 宋家成 <[email protected]>
AuthorDate: Mon Feb 26 17:29:21 2024 +0800
Auto-rebalancer: clear replace marker on failed moves
When CheckMoveCompleted fails, best-effort clear the replace marker on
the source replica and drop the move so rebalancing can continue.
Add a test that forces move failure and verifies no replica remains
marked for replacement.
Change-Id: I99dafa654878b9d6d8162d84500913ae0655692b
Reviewed-on: http://gerrit.cloudera.org:8080/21073
Reviewed-by: Zoltan Chovan <[email protected]>
Tested-by: Zoltan Chovan <[email protected]>
Reviewed-by: Marton Greber <[email protected]>
---
src/kudu/master/auto_rebalancer-test.cc | 62 +++++++++++++++++++++++++++++++++
src/kudu/master/auto_rebalancer.cc | 51 +++++++++++++++++++++++++++
src/kudu/master/auto_rebalancer.h | 3 ++
3 files changed, 116 insertions(+)
diff --git a/src/kudu/master/auto_rebalancer-test.cc
b/src/kudu/master/auto_rebalancer-test.cc
index 584688194..927a3651d 100644
--- a/src/kudu/master/auto_rebalancer-test.cc
+++ b/src/kudu/master/auto_rebalancer-test.cc
@@ -28,9 +28,12 @@
#include <vector>
#include <gflags/gflags_declare.h>
+#include "kudu/util/scoped_cleanup.h"
#include <glog/logging.h>
#include <gtest/gtest.h>
+#include "kudu/consensus/consensus.pb.h"
+#include "kudu/consensus/metadata.pb.h"
#include "kudu/gutil/map-util.h"
#include "kudu/gutil/ref_counted.h"
#include "kudu/gutil/strings/join.h"
@@ -46,6 +49,7 @@
#include "kudu/master/ts_manager.h"
#include "kudu/mini-cluster/internal_mini_cluster.h"
#include "kudu/tserver/mini_tablet_server.h"
+#include "kudu/rpc/rpc_controller.h"
#include "kudu/tserver/tablet_server.h"
#include "kudu/util/logging_test_util.h"
#include "kudu/util/metrics.h"
@@ -56,9 +60,13 @@
using kudu::cluster::InternalMiniCluster;
using kudu::cluster::InternalMiniClusterOptions;
+using kudu::consensus::EXCLUDE_HEALTH_REPORT;
+using kudu::consensus::GetConsensusStateRequestPB;
+using kudu::consensus::GetConsensusStateResponsePB;
using kudu::itest::GetTableLocations;
using kudu::itest::ListTabletServers;
using kudu::master::GetTableLocationsResponsePB;
+using kudu::rpc::RpcController;
using std::set;
using std::string;
using std::unique_ptr;
@@ -69,6 +77,7 @@ using strings::Substitute;
DECLARE_bool(auto_leader_rebalancing_enabled);
DECLARE_bool(auto_rebalancing_enabled);
+DECLARE_bool(auto_rebalancing_fail_moves_for_test);
DECLARE_int32(consensus_inject_latency_ms_in_notifications);
DECLARE_int32(follower_unavailable_considered_failed_sec);
DECLARE_int32(raft_heartbeat_interval_ms);
@@ -825,5 +834,58 @@ TEST_F(AutoRebalancerTest, TestDeletedTables) {
NO_FATALS(CheckNoLeaderMovesScheduled());
}
+// Test that the replace marker will be cleared if a rebalancing move fails.
+TEST_F(AutoRebalancerTest, TestRemoveReplaceFlagIfMoveFails) {
+ bool auto_leader_rebalancing_enabled = false; // Disable for testing.
+ const bool original_fail_moves = FLAGS_auto_rebalancing_fail_moves_for_test;
+ SCOPED_CLEANUP({ FLAGS_auto_rebalancing_fail_moves_for_test =
original_fail_moves; });
+
+ const int kNumTservers = 3;
+ const int kNumTablets = 4;
+
+ cluster_opts_.num_tablet_servers = kNumTservers;
+ ASSERT_OK(CreateAndStartCluster(auto_leader_rebalancing_enabled));
+
+ NO_FATALS(CheckAutoRebalancerStarted());
+
+ CreateWorkloadTable(kNumTablets, /*num_replicas*/3);
+
+ // Set the test flag to true so that the moves are all regarded as failed.
+ FLAGS_auto_rebalancing_fail_moves_for_test = true;
+
+ // Bring up a new tserver.
+ ASSERT_OK(cluster_->AddTabletServer());
+
+ // The TSManager should still believe the original tservers are available,
+ // so the auto-rebalancer should attempt to schedule replica moves from those
+ // tservers to the new one.
+ NO_FATALS(CheckSomeMovesScheduled());
+
+ // Stop the auto rebalancing after current loop.
+ FLAGS_auto_rebalancing_enabled = false;
+
+ // Check all the replace markers are false.
+ for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
+ const auto& tserver = cluster_->mini_tablet_server(i);
+ const auto& tablet_ids = tserver->ListTablets();
+ for (const auto& tablet_id: tablet_ids) {
+ GetConsensusStateRequestPB req;
+ GetConsensusStateResponsePB resp;
+ RpcController controller;
+ controller.set_timeout(MonoDelta::FromSeconds(60));
+ req.set_dest_uuid(tserver->uuid());
+ req.add_tablet_ids(tablet_id);
+ req.set_report_health(EXCLUDE_HEALTH_REPORT);
+ ASSERT_OK(cluster_->tserver_consensus_proxy(i)->GetConsensusState(req,
&resp, &controller));
+
+ const auto& committed_config =
resp.tablets(0).cstate().committed_config();
+ for (int p = 0; p < committed_config.peers_size(); p++) {
+ const auto& peer = committed_config.peers(p);
+ ASSERT_FALSE(peer.attrs().replace());
+ }
+ }
+ }
+}
+
} // namespace master
} // namespace kudu
diff --git a/src/kudu/master/auto_rebalancer.cc
b/src/kudu/master/auto_rebalancer.cc
index 62516b474..10147a56d 100644
--- a/src/kudu/master/auto_rebalancer.cc
+++ b/src/kudu/master/auto_rebalancer.cc
@@ -41,6 +41,7 @@
#include "kudu/consensus/consensus.pb.h"
#include "kudu/consensus/consensus.proxy.h"
#include "kudu/consensus/metadata.pb.h"
+#include "kudu/gutil/macros.h"
#include "kudu/gutil/map-util.h"
#include "kudu/gutil/ref_counted.h"
#include "kudu/gutil/strings/substitute.h"
@@ -57,10 +58,12 @@
#include "kudu/security/init.h"
#include "kudu/tserver/tserver.pb.h"
#include "kudu/util/cow_object.h"
+#include "kudu/util/flag_tags.h"
#include "kudu/util/monotime.h"
#include "kudu/util/net/net_util.h"
#include "kudu/util/net/sockaddr.h"
#include "kudu/util/pb_util.h"
+#include "kudu/util/slice.h"
#include "kudu/util/status.h"
#include "kudu/util/thread.h"
@@ -139,6 +142,11 @@
DEFINE_uint32(auto_rebalancing_wait_for_replica_moves_seconds, 1,
"How long to wait before checking to see if the scheduled
replica movement "
"in this iteration of auto-rebalancing has completed.");
+DEFINE_bool(auto_rebalancing_fail_moves_for_test, false,
+ "All CheckMoveCompleted will fail with IllegalState if this flag
is true. "
+ "This is only used for test.");
+TAG_FLAG(auto_rebalancing_fail_moves_for_test, unsafe);
+
DECLARE_bool(auto_rebalancing_enabled);
namespace kudu {
@@ -722,6 +730,45 @@ Status AutoRebalancerTask::CheckReplicaMovesCompleted(
moves_per_tserver_[dst_ts_uuid]--;
}
+ // If a move fails, clear the replace marker so the master doesn't keep
+ // trying to replace the replica indefinitely (especially for leaders).
+ BulkChangeConfigRequestPB req;
+ auto* modify_peer = req.add_config_changes();
+ modify_peer->set_type(MODIFY_PEER);
+ *modify_peer->mutable_peer()->mutable_permanent_uuid() =
move.ts_uuid_from;
+ modify_peer->mutable_peer()->mutable_attrs()->set_replace(false);
+ string leader_uuid;
+ HostPort leader_hp;
+ Status clear_replace_status = GetTabletLeader(move.tablet_uuid,
&leader_uuid, &leader_hp);
+ // Best-effort cleanup: failures here should not keep the move queued.
+ if (!clear_replace_status.ok()) {
+ LOG(WARNING) << "Removing replace marker failed: "
+ << clear_replace_status.message().ToString();
+ } else {
+ ChangeConfigResponsePB resp;
+ RpcController rpc;
+
rpc.set_timeout(MonoDelta::FromSeconds(FLAGS_auto_rebalancing_rpc_timeout_seconds));
+ req.set_dest_uuid(leader_uuid);
+ req.set_tablet_id(move.tablet_uuid);
+ vector<Sockaddr> resolved;
+ clear_replace_status = leader_hp.ResolveAddresses(&resolved);
+ if (!clear_replace_status.ok()) {
+ LOG(WARNING) << "Removing replace marker failed: "
+ << clear_replace_status.message().ToString();
+ } else {
+ ConsensusServiceProxy proxy(messenger_, resolved[0],
leader_hp.host());
+ clear_replace_status = proxy.BulkChangeConfig(req, &resp, &rpc);
+ if (clear_replace_status.ok() && resp.has_error()) {
+ clear_replace_status = StatusFromPB(resp.error().status());
+ }
+ if (!clear_replace_status.ok()) {
+ LOG(WARNING) << "Removing replace marker failed: "
+ << clear_replace_status.message().ToString();
+ }
+ }
+ }
+
+ // Always drop the failed move so rebalancing can make progress.
replica_moves->erase(replica_moves->begin() + i);
LOG(WARNING) << Substitute("Could not move replica: $0", s.ToString());
return s;
@@ -769,6 +816,10 @@ Status AutoRebalancerTask::CheckMoveCompleted(
const rebalance::Rebalancer::ReplicaMove& replica_move,
bool* is_complete) {
+ if (PREDICT_FALSE(FLAGS_auto_rebalancing_fail_moves_for_test)) {
+ return Status::IllegalState("Injected artificial test failure.");
+ }
+
DCHECK(is_complete);
*is_complete = false;
diff --git a/src/kudu/master/auto_rebalancer.h
b/src/kudu/master/auto_rebalancer.h
index 64cb5562f..6843fa400 100644
--- a/src/kudu/master/auto_rebalancer.h
+++ b/src/kudu/master/auto_rebalancer.h
@@ -135,6 +135,9 @@ class AutoRebalancerTask {
// to have them moved in order to rebalance the cluster.
// Returns a non-OK status if the replica or the replica's tserver
// cannot be found, or the request to move the replica cannot be completed.
+ //
+ // Some information used to clear the replace marker if moves fail will be
+ // added to the ReplicaMoves in this method.
Status ExecuteMoves(
const std::vector<rebalance::Rebalancer::ReplicaMove>& replica_moves);