This is an automated email from the ASF dual-hosted git repository.

mgreber pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 2f16608ab Auto-rebalancer: clear replace marker on failed moves
2f16608ab is described below

commit 2f16608ab4490867a98faa4a08b43d08f5851162
Author: 宋家成 <[email protected]>
AuthorDate: Mon Feb 26 17:29:21 2024 +0800

    Auto-rebalancer: clear replace marker on failed moves
    
    When CheckMoveCompleted fails, best-effort clear the replace marker on
    the source replica and drop the move so rebalancing can continue.
    Add a test that forces move failure and verifies no replica remains
    marked for replacement.
    
    Change-Id: I99dafa654878b9d6d8162d84500913ae0655692b
    Reviewed-on: http://gerrit.cloudera.org:8080/21073
    Reviewed-by: Zoltan Chovan <[email protected]>
    Tested-by: Zoltan Chovan <[email protected]>
    Reviewed-by: Marton Greber <[email protected]>
---
 src/kudu/master/auto_rebalancer-test.cc | 62 +++++++++++++++++++++++++++++++++
 src/kudu/master/auto_rebalancer.cc      | 51 +++++++++++++++++++++++++++
 src/kudu/master/auto_rebalancer.h       |  3 ++
 3 files changed, 116 insertions(+)

diff --git a/src/kudu/master/auto_rebalancer-test.cc 
b/src/kudu/master/auto_rebalancer-test.cc
index 584688194..927a3651d 100644
--- a/src/kudu/master/auto_rebalancer-test.cc
+++ b/src/kudu/master/auto_rebalancer-test.cc
@@ -28,9 +28,12 @@
 #include <vector>
 
 #include <gflags/gflags_declare.h>
+#include "kudu/util/scoped_cleanup.h"
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
+#include "kudu/consensus/consensus.pb.h"
+#include "kudu/consensus/metadata.pb.h"
 #include "kudu/gutil/map-util.h"
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/gutil/strings/join.h"
@@ -46,6 +49,7 @@
 #include "kudu/master/ts_manager.h"
 #include "kudu/mini-cluster/internal_mini_cluster.h"
 #include "kudu/tserver/mini_tablet_server.h"
+#include "kudu/rpc/rpc_controller.h"
 #include "kudu/tserver/tablet_server.h"
 #include "kudu/util/logging_test_util.h"
 #include "kudu/util/metrics.h"
@@ -56,9 +60,13 @@
 
 using kudu::cluster::InternalMiniCluster;
 using kudu::cluster::InternalMiniClusterOptions;
+using kudu::consensus::EXCLUDE_HEALTH_REPORT;
+using kudu::consensus::GetConsensusStateRequestPB;
+using kudu::consensus::GetConsensusStateResponsePB;
 using kudu::itest::GetTableLocations;
 using kudu::itest::ListTabletServers;
 using kudu::master::GetTableLocationsResponsePB;
+using kudu::rpc::RpcController;
 using std::set;
 using std::string;
 using std::unique_ptr;
@@ -69,6 +77,7 @@ using strings::Substitute;
 
 DECLARE_bool(auto_leader_rebalancing_enabled);
 DECLARE_bool(auto_rebalancing_enabled);
+DECLARE_bool(auto_rebalancing_fail_moves_for_test);
 DECLARE_int32(consensus_inject_latency_ms_in_notifications);
 DECLARE_int32(follower_unavailable_considered_failed_sec);
 DECLARE_int32(raft_heartbeat_interval_ms);
@@ -825,5 +834,58 @@ TEST_F(AutoRebalancerTest, TestDeletedTables) {
   NO_FATALS(CheckNoLeaderMovesScheduled());
 }
 
+// Test that the replace marker will be cleared if a rebalancing move fails.
+TEST_F(AutoRebalancerTest, TestRemoveReplaceFlagIfMoveFails) {
+  bool auto_leader_rebalancing_enabled = false; // Disable for testing.
+  const bool original_fail_moves = FLAGS_auto_rebalancing_fail_moves_for_test;
+  SCOPED_CLEANUP({ FLAGS_auto_rebalancing_fail_moves_for_test = 
original_fail_moves; });
+
+  const int kNumTservers = 3;
+  const int kNumTablets = 4;
+
+  cluster_opts_.num_tablet_servers = kNumTservers;
+  ASSERT_OK(CreateAndStartCluster(auto_leader_rebalancing_enabled));
+
+  NO_FATALS(CheckAutoRebalancerStarted());
+
+  CreateWorkloadTable(kNumTablets, /*num_replicas*/3);
+
+  // Set the test flag to true so that the moves are all regarded as failed.
+  FLAGS_auto_rebalancing_fail_moves_for_test = true;
+
+  // Bring up a new tserver.
+  ASSERT_OK(cluster_->AddTabletServer());
+
+  // The TSManager should still believe the original tservers are available,
+  // so the auto-rebalancer should attempt to schedule replica moves from those
+  // tservers to the new one.
+  NO_FATALS(CheckSomeMovesScheduled());
+
+  // Stop the auto rebalancing after current loop.
+  FLAGS_auto_rebalancing_enabled = false;
+
+  // Check all the replace markers are false.
+  for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
+    const auto& tserver = cluster_->mini_tablet_server(i);
+    const auto& tablet_ids = tserver->ListTablets();
+    for (const auto& tablet_id: tablet_ids) {
+      GetConsensusStateRequestPB req;
+      GetConsensusStateResponsePB resp;
+      RpcController controller;
+      controller.set_timeout(MonoDelta::FromSeconds(60));
+      req.set_dest_uuid(tserver->uuid());
+      req.add_tablet_ids(tablet_id);
+      req.set_report_health(EXCLUDE_HEALTH_REPORT);
+      ASSERT_OK(cluster_->tserver_consensus_proxy(i)->GetConsensusState(req, 
&resp, &controller));
+
+      const auto& committed_config = 
resp.tablets(0).cstate().committed_config();
+      for (int p = 0; p < committed_config.peers_size(); p++) {
+        const auto& peer = committed_config.peers(p);
+        ASSERT_FALSE(peer.attrs().replace());
+      }
+    }
+  }
+}
+
 } // namespace master
 } // namespace kudu
diff --git a/src/kudu/master/auto_rebalancer.cc 
b/src/kudu/master/auto_rebalancer.cc
index 62516b474..10147a56d 100644
--- a/src/kudu/master/auto_rebalancer.cc
+++ b/src/kudu/master/auto_rebalancer.cc
@@ -41,6 +41,7 @@
 #include "kudu/consensus/consensus.pb.h"
 #include "kudu/consensus/consensus.proxy.h"
 #include "kudu/consensus/metadata.pb.h"
+#include "kudu/gutil/macros.h"
 #include "kudu/gutil/map-util.h"
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/gutil/strings/substitute.h"
@@ -57,10 +58,12 @@
 #include "kudu/security/init.h"
 #include "kudu/tserver/tserver.pb.h"
 #include "kudu/util/cow_object.h"
+#include "kudu/util/flag_tags.h"
 #include "kudu/util/monotime.h"
 #include "kudu/util/net/net_util.h"
 #include "kudu/util/net/sockaddr.h"
 #include "kudu/util/pb_util.h"
+#include "kudu/util/slice.h"
 #include "kudu/util/status.h"
 #include "kudu/util/thread.h"
 
@@ -139,6 +142,11 @@ 
DEFINE_uint32(auto_rebalancing_wait_for_replica_moves_seconds, 1,
               "How long to wait before checking to see if the scheduled 
replica movement "
               "in this iteration of auto-rebalancing has completed.");
 
+DEFINE_bool(auto_rebalancing_fail_moves_for_test, false,
+            "All CheckMoveCompleted will fail with IllegalState if this flag 
is true. "
+            "This is only used for test.");
+TAG_FLAG(auto_rebalancing_fail_moves_for_test, unsafe);
+
 DECLARE_bool(auto_rebalancing_enabled);
 
 namespace kudu {
@@ -722,6 +730,45 @@ Status AutoRebalancerTask::CheckReplicaMovesCompleted(
         moves_per_tserver_[dst_ts_uuid]--;
       }
 
+      // If a move fails, clear the replace marker so the master doesn't keep
+      // trying to replace the replica indefinitely (especially for leaders).
+      BulkChangeConfigRequestPB req;
+      auto* modify_peer = req.add_config_changes();
+      modify_peer->set_type(MODIFY_PEER);
+      *modify_peer->mutable_peer()->mutable_permanent_uuid() = 
move.ts_uuid_from;
+      modify_peer->mutable_peer()->mutable_attrs()->set_replace(false);
+      string leader_uuid;
+      HostPort leader_hp;
+      Status clear_replace_status = GetTabletLeader(move.tablet_uuid, 
&leader_uuid, &leader_hp);
+      // Best-effort cleanup: failures here should not keep the move queued.
+      if (!clear_replace_status.ok()) {
+        LOG(WARNING) << "Removing replace marker failed: "
+                     << clear_replace_status.message().ToString();
+      } else {
+        ChangeConfigResponsePB resp;
+        RpcController rpc;
+        
rpc.set_timeout(MonoDelta::FromSeconds(FLAGS_auto_rebalancing_rpc_timeout_seconds));
+        req.set_dest_uuid(leader_uuid);
+        req.set_tablet_id(move.tablet_uuid);
+        vector<Sockaddr> resolved;
+        clear_replace_status = leader_hp.ResolveAddresses(&resolved);
+        if (!clear_replace_status.ok()) {
+          LOG(WARNING) << "Removing replace marker failed: "
+                       << clear_replace_status.message().ToString();
+        } else {
+          ConsensusServiceProxy proxy(messenger_, resolved[0], 
leader_hp.host());
+          clear_replace_status = proxy.BulkChangeConfig(req, &resp, &rpc);
+          if (clear_replace_status.ok() && resp.has_error()) {
+            clear_replace_status = StatusFromPB(resp.error().status());
+          }
+          if (!clear_replace_status.ok()) {
+            LOG(WARNING) << "Removing replace marker failed: "
+                         << clear_replace_status.message().ToString();
+          }
+        }
+      }
+
+      // Always drop the failed move so rebalancing can make progress.
       replica_moves->erase(replica_moves->begin() + i);
       LOG(WARNING) << Substitute("Could not move replica: $0", s.ToString());
       return s;
@@ -769,6 +816,10 @@ Status AutoRebalancerTask::CheckMoveCompleted(
     const rebalance::Rebalancer::ReplicaMove& replica_move,
     bool* is_complete) {
 
+  if (PREDICT_FALSE(FLAGS_auto_rebalancing_fail_moves_for_test)) {
+    return Status::IllegalState("Injected artificial test failure.");
+  }
+
   DCHECK(is_complete);
   *is_complete = false;
 
diff --git a/src/kudu/master/auto_rebalancer.h 
b/src/kudu/master/auto_rebalancer.h
index 64cb5562f..6843fa400 100644
--- a/src/kudu/master/auto_rebalancer.h
+++ b/src/kudu/master/auto_rebalancer.h
@@ -135,6 +135,9 @@ class AutoRebalancerTask {
   // to have them moved in order to rebalance the cluster.
   // Returns a non-OK status if the replica or the replica's tserver
   // cannot be found, or the request to move the replica cannot be completed.
+  //
+  // Some information used to clear the replace marker if moves fail will be
+  // added to the ReplicaMoves in this method.
   Status ExecuteMoves(
       const std::vector<rebalance::Rebalancer::ReplicaMove>& replica_moves);
 

Reply via email to