Repository: kudu
Updated Branches:
  refs/heads/master cee17c03b -> 2692ac4ad


[tools] sanity check for ScheduleReplicaMove()

This change introduces a couple of improvements into the implementation
of the kudu::tools::ScheduleReplicaMove() function, used by the
kudu CLI tool's 'kudu tablet change_config move_replica' and
'kudu cluster rebalance' sub-commands:

  * use CAS semantics when changing tablet Raft configuration
  * avoid setting the REPLACE attribute if it's set already

The former protects against unexpected Raft configuration changes
in the middle of the replica movement process.  The latter helps in
situations when a configuration change initiated by a prior run of
the rebalancing tool failed at later stages -- with this change,
there is no need to reset the source replica's attributes separately
if such situation would not resolve on its own (e.g., when the REPLACE
attribute is set on a leader replica).

This changelist does not add corresponding tests since the existing
ConcurrentRebalancersTest.TwoConcurrentRebalancers scenario provides
enough coverage.  As a result of this change, the test scenario
became more stable when running with --stress_cpu_threads=16 flag.

before (24 out of 256 failed):
  http://dist-test.cloudera.org/job?job_id=aserbin.1531287387.25723

after (none of 256 failed):
  http://dist-test.cloudera.org/job?job_id=aserbin.1531288786.42701

Change-Id: Ie311b4bb2dbe3e5f1e86cb1364039b71d7c08019
Reviewed-on: http://gerrit.cloudera.org:8080/10920
Tested-by: Kudu Jenkins
Reviewed-by: Will Berkeley <wdberke...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/2692ac4a
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/2692ac4a
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/2692ac4a

Branch: refs/heads/master
Commit: 2692ac4ad750e29e9ca146df3b378a72a1e84a0b
Parents: cee17c0
Author: Alexey Serbin <aser...@cloudera.com>
Authored: Tue Jul 10 22:04:32 2018 -0700
Committer: Alexey Serbin <aser...@cloudera.com>
Committed: Thu Jul 12 03:07:42 2018 +0000

----------------------------------------------------------------------
 src/kudu/tools/kudu-admin-test.cc   |  7 -------
 src/kudu/tools/tool_replica_util.cc | 30 ++++++++++++++++++++++++++----
 2 files changed, 26 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/2692ac4a/src/kudu/tools/kudu-admin-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/kudu-admin-test.cc 
b/src/kudu/tools/kudu-admin-test.cc
index e084753..21bae3b 100644
--- a/src/kudu/tools/kudu-admin-test.cc
+++ b/src/kudu/tools/kudu-admin-test.cc
@@ -1867,13 +1867,6 @@ TEST_P(ConcurrentRebalancersTest, 
TwoConcurrentRebalancers) {
   {
     string out;
     string err;
-    // TODO(aserbin): sometimes, when replica movement fails because of
-    //   concurrent rebalancers or other reasons, the REPLACE attribute is left
-    //   in replica's Raft config. In such cases, rebalancing fails because
-    //   it cannot make progress due to the semantics of the ChangeConfig()
-    //   method, returning error
-    //     'Invalid argument: must modify a field when calling MODIFY_PEER'
-    //   in attempt to set REPLACE attribute again.
     const auto s = RunKuduTool(tool_args, &out, &err);
     ASSERT_TRUE(s.ok()) << s.ToString() << ": " << err;
     ASSERT_STR_CONTAINS(out, "rebalancing is complete: cluster is balanced")

http://git-wip-us.apache.org/repos/asf/kudu/blob/2692ac4a/src/kudu/tools/tool_replica_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_replica_util.cc 
b/src/kudu/tools/tool_replica_util.cc
index 3001350..e06fbcd 100644
--- a/src/kudu/tools/tool_replica_util.cc
+++ b/src/kudu/tools/tool_replica_util.cc
@@ -347,23 +347,44 @@ Status ScheduleReplicaMove(const vector<string>& 
master_addresses,
   // Get information on current replication scheme: the move scenario depends
   // on the replication scheme used.
   bool is_343_scheme;
+  ConsensusStatePB cstate;
   RETURN_NOT_OK(GetConsensusState(proxy, tablet_id, leader_uuid,
                                   client->default_admin_operation_timeout(),
-                                  nullptr, &is_343_scheme));
+                                  &cstate, &is_343_scheme));
+  // Sanity check: the target replica should not be present in the config.
+  // Anyway, ChangeConfig() RPC would return an error in that case, but this
+  // pre-condition allows us to short-circuit that.
+  for (const auto& peer : cstate.committed_config().peers()) {
+    if (peer.permanent_uuid() == to_ts_uuid) {
+      return Status::IllegalState(Substitute(
+          "tablet $0: replica $1 is already present", tablet_id, to_ts_uuid));
+    }
+  }
+
+  const auto cas_opid_idx = cstate.committed_config().opid_index();
+
   // The pre- KUDU-1097 way of moving a replica involves first adding a new
   // replica and then evicting the old one.
   if (!is_343_scheme) {
     return DoChangeConfig(master_addresses, tablet_id, to_ts_uuid,
-                          RaftPeerPB::VOTER, ADD_PEER);
+                          RaftPeerPB::VOTER, ADD_PEER, cas_opid_idx);
+  }
+
+  // Check whether the REPLACE attribute is already set for the source replica.
+  bool is_from_ts_uuid_replace_attr_set = false;
+  for (const auto& peer : cstate.committed_config().peers()) {
+    if (peer.permanent_uuid() == from_ts_uuid) {
+      is_from_ts_uuid_replace_attr_set = peer.attrs().replace();
+      break;
+    }
   }
 
   // In a post-KUDU-1097 world, the procedure to move a replica is to add the
   // replace=true attribute to the replica to remove while simultaneously
   // adding the replacement as a non-voter with promote=true.
   // The following code implements tablet movement in that paradigm.
-
   BulkChangeConfigRequestPB req;
-  {
+  if (!is_from_ts_uuid_replace_attr_set) {
     auto* change = req.add_config_changes();
     change->set_type(MODIFY_PEER);
     *change->mutable_peer()->mutable_permanent_uuid() = from_ts_uuid;
@@ -379,6 +400,7 @@ Status ScheduleReplicaMove(const vector<string>& 
master_addresses,
     RETURN_NOT_OK(GetRpcAddressForTS(client, to_ts_uuid, &hp));
     RETURN_NOT_OK(HostPortToPB(hp, 
change->mutable_peer()->mutable_last_known_addr()));
   }
+  req.set_cas_config_opid_index(cas_opid_idx);
 
   consensus::ChangeConfigResponsePB resp;
   RpcController rpc;

Reply via email to