This is an automated email from the ASF dual-hosted git repository.

bankim pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 645f9dc72f62bc4cca8f59f9e043c37586e87f63
Author: Bankim Bhavsar <[email protected]>
AuthorDate: Wed Jan 27 15:44:02 2021 -0800

    [tool] KUDU-2181 CLI to remove master
    
    This change adds CLI to remove master from the cluster
    which in turn invokes the RemoveMaster ChangeConfig RPC.
    
    This CLI will be part of the workflow to remove master
    from an existing cluster or recover dead master at the
    same HostPort.
    
    Change-Id: I5c97b03475b0ffc7b387d7dfc17acc4b13858fb7
    Reviewed-on: http://gerrit.cloudera.org:8080/17010
    Reviewed-by: Alexey Serbin <[email protected]>
    Tested-by: Bankim Bhavsar <[email protected]>
    Reviewed-by: Andrew Wong <[email protected]>
---
 src/kudu/client/master_proxy_rpc.cc          |   3 +
 src/kudu/master/dynamic_multi_master-test.cc | 125 +++++++++++++++------------
 src/kudu/tools/kudu-tool-test.cc             |   7 +-
 src/kudu/tools/tool_action_common.cc         |  12 +++
 src/kudu/tools/tool_action_master.cc         |  50 +++++++++++
 5 files changed, 141 insertions(+), 56 deletions(-)

diff --git a/src/kudu/client/master_proxy_rpc.cc 
b/src/kudu/client/master_proxy_rpc.cc
index 9d54f0a..92c7bda 100644
--- a/src/kudu/client/master_proxy_rpc.cc
+++ b/src/kudu/client/master_proxy_rpc.cc
@@ -76,6 +76,8 @@ using master::ListTabletServersRequestPB;
 using master::ListTabletServersResponsePB;
 using master::MasterServiceProxy;
 using master::MasterErrorPB;
+using master::RemoveMasterRequestPB;
+using master::RemoveMasterResponsePB;
 using master::ReplaceTabletRequestPB;
 using master::ReplaceTabletResponsePB;
 using rpc::BackoffType;
@@ -299,6 +301,7 @@ template class 
AsyncLeaderMasterRpc<IsCreateTableDoneRequestPB, IsCreateTableDon
 template class AsyncLeaderMasterRpc<ListMastersRequestPB, 
ListMastersResponsePB>;
 template class AsyncLeaderMasterRpc<ListTablesRequestPB, ListTablesResponsePB>;
 template class AsyncLeaderMasterRpc<ListTabletServersRequestPB, 
ListTabletServersResponsePB>;
+template class AsyncLeaderMasterRpc<RemoveMasterRequestPB, 
RemoveMasterResponsePB>;
 template class AsyncLeaderMasterRpc<ReplaceTabletRequestPB, 
ReplaceTabletResponsePB>;
 
 } // namespace internal
diff --git a/src/kudu/master/dynamic_multi_master-test.cc 
b/src/kudu/master/dynamic_multi_master-test.cc
index 9789d98..b6d5593 100644
--- a/src/kudu/master/dynamic_multi_master-test.cc
+++ b/src/kudu/master/dynamic_multi_master-test.cc
@@ -319,29 +319,6 @@ class DynamicMultiMasterTest : public KuduTest {
     ASSERT_EQ(orig_num_masters_, resp.masters_size());
   }
 
-  // Remove the master specified by 'hp' and optional 'master_uuid' from the 
cluster.
-  // Unset 'hp' can be used to indicate to not supply RPC address in the 
RemoveMaster RPC request.
-  Status RemoveMasterFromCluster(const HostPort& hp, const string& master_uuid 
= "") {
-    auto remove_master = [&] (int leader_master_idx) {
-      RemoveMasterRequestPB req;
-      RemoveMasterResponsePB resp;
-      RpcController rpc;
-      if (hp != HostPort()) {
-        *req.mutable_rpc_addr() = HostPortToPB(hp);
-      }
-      if (!master_uuid.empty()) {
-        *req.mutable_master_uuid() = master_uuid;
-      }
-      rpc.RequireServerFeature(MasterFeatures::DYNAMIC_MULTI_MASTER);
-      Status s = cluster_->master_proxy(leader_master_idx)->RemoveMaster(req, 
&resp, &rpc);
-      boost::optional<MasterErrorPB::Code> err_code(resp.has_error(), 
resp.error().code());
-      return std::make_pair(s, err_code);
-    };
-
-    RETURN_NOT_OK(RunLeaderMasterRPC(remove_master));
-    return cluster_->RemoveMaster(hp);
-  }
-
   // Fetch a follower (non-leader) master index from the cluster.
   Status GetFollowerMasterIndex(int* follower_master_idx) {
     int leader_master_idx;
@@ -375,7 +352,7 @@ class DynamicMultiMasterTest : public KuduTest {
     }
 
     vector<string> cmd = {"master", "add", JoinStrings(addresses, ",")};
-    if (master != HostPort()) {
+    if (master.Initialized()) {
       cmd.emplace_back(master.ToString());
     }
     if (wait_secs != 0) {
@@ -390,6 +367,31 @@ class DynamicMultiMasterTest : public KuduTest {
     return cluster_->AddMaster(new_master_);
   }
 
+  // Removes the specified master from the cluster using the CLI tool.
+  // Unset 'master_to_remove' can be used to indicate to not supply master 
address.
+  // Returns generic RuntimeError() on failure with the actual error in the 
optional 'err'
+  // output parameter.
+  Status RemoveMasterFromClusterUsingCLITool(const HostPort& master_to_remove,
+                                             string* err = nullptr,
+                                             const string& master_uuid = "") {
+    auto hps = cluster_->master_rpc_addrs();
+    vector<string> addresses;
+    addresses.reserve(hps.size());
+    for (const auto& hp : hps) {
+      addresses.emplace_back(hp.ToString());
+    }
+
+    vector<string> args = {"master", "remove", JoinStrings(addresses, ",")};
+    if (master_to_remove.Initialized()) {
+      args.push_back(master_to_remove.ToString());
+    }
+    if (!master_uuid.empty()) {
+      args.push_back("--master_uuid=" + master_uuid);
+    }
+    RETURN_NOT_OK(tools::RunKuduTool(args, nullptr, err));
+    return cluster_->RemoveMaster(master_to_remove);
+  }
+
   // Verify one of the 'expected_roles' and 'expected_member_type' of the new 
master by
   // making RPC to it directly.
   void VerifyNewMasterDirectly(const set<consensus::RaftPeerPB::Role>& 
expected_roles,
@@ -853,7 +855,7 @@ TEST_P(ParameterizedRemoveMasterTest, TestRemoveMaster) {
 
   // Verify the master to be removed is part of the list of masters.
   ASSERT_NE(std::find(master_hps.begin(), master_hps.end(), master_to_remove), 
master_hps.end());
-  ASSERT_OK(RemoveMasterFromCluster(master_to_remove));
+  ASSERT_OK(RemoveMasterFromClusterUsingCLITool(master_to_remove, nullptr, 
master_to_remove_uuid));
 
   // Verify we have one master less and the desired master was removed.
   vector<HostPort> updated_master_hps;
@@ -868,9 +870,10 @@ TEST_P(ParameterizedRemoveMasterTest, TestRemoveMaster) {
   NO_FATALS(cv.CheckRowCount(kTableName, ClusterVerifier::EXACTLY, 0));
 
   // Removing the same master again should result in an error
-  Status s = RemoveMasterFromCluster(master_to_remove, master_to_remove_uuid);
-  ASSERT_TRUE(s.IsRemoteError()) << s.ToString();
-  ASSERT_STR_CONTAINS(s.ToString(), Substitute("Master $0 not found", 
master_to_remove.ToString()));
+  string err;
+  Status s = RemoveMasterFromClusterUsingCLITool(master_to_remove, &err, 
master_to_remove_uuid);
+  ASSERT_TRUE(s.IsRuntimeError()) << s.ToString();
+  ASSERT_STR_CONTAINS(err, Substitute("Master $0 not found", 
master_to_remove.ToString()));
 
   // Attempt transferring leadership to the removed master
   s = TransferMasterLeadershipAsync(cluster_.get(), master_to_remove_uuid);
@@ -964,20 +967,26 @@ TEST_F(DynamicMultiMasterTest, 
TestRemoveMasterFeatureFlagNotSpecified) {
   NO_FATALS(VerifyNumMastersAndGetAddresses(orig_num_masters_, &master_hps));
 
   // Try removing non-leader master.
-  int non_leader_master_idx = -1;
-  ASSERT_OK(GetFollowerMasterIndex(&non_leader_master_idx));
-  auto master_to_remove = 
cluster_->master(non_leader_master_idx)->bound_rpc_hostport();
-  Status s = RemoveMasterFromCluster(master_to_remove);
-  ASSERT_TRUE(s.IsRemoteError()) << s.ToString();
-  ASSERT_STR_MATCHES(s.ToString(), "unsupported feature flags");
+  {
+    int non_leader_master_idx = -1;
+    ASSERT_OK(GetFollowerMasterIndex(&non_leader_master_idx));
+    auto master_to_remove = 
cluster_->master(non_leader_master_idx)->bound_rpc_hostport();
+    string err;
+    Status s = RemoveMasterFromClusterUsingCLITool(master_to_remove, &err);
+    ASSERT_TRUE(s.IsRuntimeError()) << s.ToString();
+    ASSERT_STR_MATCHES(err, "Cluster does not support RemoveMaster");
+  }
 
   // Try removing leader master
-  int leader_master_idx = -1;
-  ASSERT_OK(cluster_->GetLeaderMasterIndex(&leader_master_idx));
-  master_to_remove = cluster_->master(leader_master_idx)->bound_rpc_hostport();
-  s = RemoveMasterFromCluster(master_to_remove);
-  ASSERT_TRUE(s.IsRemoteError());
-  ASSERT_STR_MATCHES(s.ToString(), "unsupported feature flags");
+  {
+    int leader_master_idx = -1;
+    ASSERT_OK(cluster_->GetLeaderMasterIndex(&leader_master_idx));
+    auto master_to_remove = 
cluster_->master(leader_master_idx)->bound_rpc_hostport();
+    string err;
+    Status s = RemoveMasterFromClusterUsingCLITool(master_to_remove, &err);
+    ASSERT_TRUE(s.IsRuntimeError()) << s.ToString();
+    ASSERT_STR_MATCHES(err, "Cluster does not support RemoveMaster");
+  }
 
   // Verify no change in number of masters.
   NO_FATALS(VerifyNumMastersAndGetAddresses(orig_num_masters_, &master_hps));
@@ -1045,6 +1054,8 @@ TEST_F(DynamicMultiMasterTest, 
TestRemoveMasterToNonLeader) {
   int non_leader_master_idx = !leader_master_idx;
   *req.mutable_rpc_addr() = 
HostPortToPB(cluster_->master(leader_master_idx)->bound_rpc_hostport());
   rpc.RequireServerFeature(MasterFeatures::DYNAMIC_MULTI_MASTER);
+  // Using the master proxy directly instead of using CLI as this test wants 
to test
+  // invoking RemoveMaster RPC to non-leader master.
   ASSERT_OK(cluster_->master_proxy(non_leader_master_idx)->RemoveMaster(req, 
&resp, &rpc));
   ASSERT_TRUE(resp.has_error());
   ASSERT_EQ(MasterErrorPB::NOT_THE_LEADER, resp.error().code());
@@ -1102,17 +1113,19 @@ TEST_F(DynamicMultiMasterTest, 
TestRemoveMasterMissingAndIncorrectHostname) {
 
   // Empty HostPort.
   {
-    Status actual = RemoveMasterFromCluster(HostPort(), string() /* 
unspecified master */);
-    ASSERT_TRUE(actual.IsRemoteError()) << actual.ToString();
-    ASSERT_STR_CONTAINS(actual.ToString(), "RPC address of the master to be 
removed not supplied");
+    string err;
+    Status actual = RemoveMasterFromClusterUsingCLITool(HostPort(), &err);
+    ASSERT_TRUE(actual.IsRuntimeError()) << actual.ToString();
+    ASSERT_STR_CONTAINS(err, "must provide positional argument 
master_address");
   }
 
   // Non-existent hostname.
   {
     HostPort dummy_hp = HostPort("non-existent-path.local", 
Master::kDefaultPort);
-    Status actual = RemoveMasterFromCluster(dummy_hp);
-    ASSERT_TRUE(actual.IsRemoteError()) << actual.ToString();
-    ASSERT_STR_CONTAINS(actual.ToString(), Substitute("Master $0 not found", 
dummy_hp.ToString()));
+    string err;
+    Status actual = RemoveMasterFromClusterUsingCLITool(dummy_hp, &err);
+    ASSERT_TRUE(actual.IsRuntimeError()) << actual.ToString();
+    ASSERT_STR_CONTAINS(err, Substitute("Master $0 not found", 
dummy_hp.ToString()));
   }
 
   // Verify no change in number of masters.
@@ -1133,9 +1146,10 @@ TEST_F(DynamicMultiMasterTest, 
TestRemoveMasterMismatchHostnameAndUuid) {
   auto random_uuid = std::to_string(rng.Next64());
   auto master_to_remove = cluster_->master(0)->bound_rpc_hostport();
   ASSERT_NE(random_uuid, cluster_->master(0)->uuid());
-  Status actual = RemoveMasterFromCluster(master_to_remove, random_uuid);
-  ASSERT_TRUE(actual.IsRemoteError()) << actual.ToString();
-  ASSERT_STR_CONTAINS(actual.ToString(),
+  string err;
+  Status actual = RemoveMasterFromClusterUsingCLITool(master_to_remove, &err, 
random_uuid);
+  ASSERT_TRUE(actual.IsRuntimeError()) << actual.ToString();
+  ASSERT_STR_CONTAINS(err,
                       Substitute("Mismatch in UUID of the master $0 to be 
removed. "
                                  "Expected: $1, supplied: $2.", 
master_to_remove.ToString(),
                                  cluster_->master(0)->uuid(), random_uuid));
@@ -1171,15 +1185,16 @@ TEST_P(ParameterizedRemoveLeaderMasterTest, 
TestRemoveLeaderMaster) {
   int leader_master_idx;
   ASSERT_OK(cluster_->GetLeaderMasterIndex(&leader_master_idx));
   const auto master_to_remove = 
cluster_->master(leader_master_idx)->bound_rpc_hostport();
-  Status s = RemoveMasterFromCluster(master_to_remove);
-  ASSERT_TRUE(s.IsRemoteError()) << s.ToString();
+  string err;
+  Status s = RemoveMasterFromClusterUsingCLITool(master_to_remove, &err);
+  ASSERT_TRUE(s.IsRuntimeError()) << s.ToString();
   if (orig_num_masters_ == 1) {
-    ASSERT_STR_CONTAINS(s.ToString(), Substitute("Can't remove master $0 in a 
single master "
-                                                 "configuration", 
master_to_remove.ToString()));
+    ASSERT_STR_CONTAINS(err, Substitute("Can't remove master $0 in a single 
master "
+                                        "configuration", 
master_to_remove.ToString()));
   } else {
     ASSERT_GT(orig_num_masters_, 1);
-    ASSERT_STR_CONTAINS(s.ToString(), Substitute("Can't remove the leader 
master $0",
-                                                 master_to_remove.ToString()));
+    ASSERT_STR_CONTAINS(err, Substitute("Can't remove the leader master $0",
+                                        master_to_remove.ToString()));
   }
 
   // Verify no change in number of masters.
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 2b7b316..14541fb 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -1107,7 +1107,8 @@ TEST_F(ToolTest, TestModeHelp) {
         "status.*Get the status",
         "timestamp.*Get the current timestamp",
         "list.*List masters in a Kudu cluster",
-        "add.*Add a master to the Raft configuration"
+        "add.*Add a master to the Raft configuration",
+        "remove.*Remove a master from the Raft configuration"
     };
     NO_FATALS(RunTestHelp(kCmd, kMasterModeRegexes));
     NO_FATALS(RunTestHelpRpcFlags(kCmd,
@@ -1130,6 +1131,8 @@ TEST_F(ToolTest, TestModeHelp) {
   {
     NO_FATALS(RunTestHelp("master add --help",
                           {"-wait_secs \\(Timeout in seconds to wait for the 
newly added master"}));
+    NO_FATALS(RunTestHelp("master remove --help",
+                          {"-master_uuid \\(Permanent UUID of the master"}));
   }
   {
     const vector<string> kPbcModeRegexes = {
@@ -1321,6 +1324,8 @@ TEST_F(ToolTest, TestActionMissingRequiredArg) {
   NO_FATALS(RunActionMissingRequiredArg("master list", "master_addresses"));
   NO_FATALS(RunActionMissingRequiredArg("master add", "master_addresses"));
   NO_FATALS(RunActionMissingRequiredArg("master add master.example.com", 
"master_address"));
+  NO_FATALS(RunActionMissingRequiredArg("master remove", "master_addresses"));
+  NO_FATALS(RunActionMissingRequiredArg("master remove master.example.com", 
"master_address"));
   NO_FATALS(RunActionMissingRequiredArg("cluster ksck 
--master_addresses=master.example.com",
                                         "master_addresses"));
   NO_FATALS(RunActionMissingRequiredArg("local_replica cmeta 
rewrite_raft_config fake_id",
diff --git a/src/kudu/tools/tool_action_common.cc 
b/src/kudu/tools/tool_action_common.cc
index 2ad498f..661ded3 100644
--- a/src/kudu/tools/tool_action_common.cc
+++ b/src/kudu/tools/tool_action_common.cc
@@ -984,6 +984,18 @@ Status LeaderMasterProxy::SyncRpc(
 
 template
 Status LeaderMasterProxy::SyncRpc(
+    const master::RemoveMasterRequestPB& req,
+    master::RemoveMasterResponsePB* resp,
+    string func_name,
+    const std::function<void(MasterServiceProxy*,
+                             const master::RemoveMasterRequestPB&,
+                             master::RemoveMasterResponsePB*,
+                             RpcController*,
+                             const ResponseCallback&)>& func,
+    std::vector<uint32_t> required_feature_flags);
+
+template
+Status LeaderMasterProxy::SyncRpc(
     const master::ReplaceTabletRequestPB& req,
     master::ReplaceTabletResponsePB* resp,
     string func_name,
diff --git a/src/kudu/tools/tool_action_master.cc 
b/src/kudu/tools/tool_action_master.cc
index 7216d38..e3da814 100644
--- a/src/kudu/tools/tool_action_master.cc
+++ b/src/kudu/tools/tool_action_master.cc
@@ -59,6 +59,8 @@ DECLARE_bool(force);
 DECLARE_int64(timeout_ms);
 DECLARE_string(columns);
 
+DEFINE_string(master_uuid, "", "Permanent UUID of the master. Only needed to 
disambiguate in case "
+                               "of multiple masters with same RPC address");
 DEFINE_int64(wait_secs, 8,
              "Timeout in seconds to wait for the newly added master to be 
promoted as VOTER.");
 
@@ -72,6 +74,8 @@ using kudu::master::Master;
 using kudu::master::MasterServiceProxy;
 using kudu::master::RefreshAuthzCacheRequestPB;
 using kudu::master::RefreshAuthzCacheResponsePB;
+using kudu::master::RemoveMasterRequestPB;
+using kudu::master::RemoveMasterResponsePB;
 using kudu::consensus::RaftPeerPB;
 using kudu::rpc::RpcController;
 using std::cout;
@@ -218,6 +222,38 @@ Status AddMasterChangeConfig(const RunnerContext& context) 
{
   return Status::OK();
 }
 
+Status RemoveMasterChangeConfig(const RunnerContext& context) {
+  const string& master_hp_str = FindOrDie(context.required_args, 
kMasterAddressArg);
+  HostPort hp;
+  RETURN_NOT_OK(hp.ParseString(master_hp_str, Master::kDefaultPort));
+
+  LeaderMasterProxy proxy;
+  RETURN_NOT_OK(proxy.Init(context));
+
+  RemoveMasterRequestPB req;
+  RemoveMasterResponsePB resp;
+  *req.mutable_rpc_addr() = HostPortToPB(hp);
+  if (!FLAGS_master_uuid.empty()) {
+    *req.mutable_master_uuid() = FLAGS_master_uuid;
+  }
+
+  RETURN_NOT_OK((proxy.SyncRpc<RemoveMasterRequestPB, RemoveMasterResponsePB>(
+                 req, &resp, "RemoveMaster", 
&MasterServiceProxy::RemoveMasterAsync,
+                 { master::MasterFeatures::DYNAMIC_MULTI_MASTER })));
+
+  if (resp.has_error()) {
+    return StatusFromPB(resp.error().status());
+  }
+
+  LOG(INFO) << Substitute("Successfully removed master $0 from the cluster. 
Please follow the next "
+                          "steps from the Kudu administration documentation on 
"
+                          "\"Removing Kudu Masters from a Multi-Master 
Deployment\" or "
+                          "\"Recovering from a dead Kudu Master in a 
Multi-Master Deployment\" "
+                          "as appropriate.",
+                          hp.ToString());
+  return Status::OK();
+}
+
 Status ListMasters(const RunnerContext& context) {
   LeaderMasterProxy proxy;
   RETURN_NOT_OK(proxy.Init(context));
@@ -534,6 +570,20 @@ unique_ptr<Mode> BuildMasterMode() {
         .Build();
     builder.AddAction(std::move(add_master));
   }
+  {
+    unique_ptr<Action> remove_master =
+        ActionBuilder("remove", &RemoveMasterChangeConfig)
+        .Description("Remove a master from the Raft configuration of the Kudu 
cluster. "
+                     "Please refer to the Kudu administration documentation on 
"
+                     "\"Removing Kudu Masters from a Multi-Master Deployment\" 
or "
+                     "\"Recovering from a dead Kudu Master in a Multi-Master 
Deployment\" "
+                     "as appropriate.")
+        .AddRequiredParameter({ kMasterAddressesArg, kMasterAddressesArgDesc })
+        .AddRequiredParameter({ kMasterAddressArg, kMasterAddressDesc })
+        .AddOptionalParameter("master_uuid")
+        .Build();
+    builder.AddAction(std::move(remove_master));
+  }
 
   return builder.Build();
 }

Reply via email to