This is an automated email from the ASF dual-hosted git repository.

awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 053d2e1c94ab1ae79b9c17a475a0e3d9a25fd0d0
Author: Andrew Wong <[email protected]>
AuthorDate: Wed Sep 11 18:09:58 2019 -0700

    KUDU-2069 p3: add RPC endpoint for maintenance mode
    
    This patch adds an RPC endpoint to the master to allow entering and
    exiting maintenance mode. The endpoint requires superuser privileges, as
    this is meant to be used for administrative purposes.
    
    The endpoint is currently hidden behind a flag, as maintenance mode
    behavior is still in progress. I'm introducing the endpoint now to
    facilitate testing and will remove the flag once finished.
    
    The requests are represented as explicit edges in a (currently simple)
    state machine transitioning a given tserver between different tserver
    states.
    
    Basic checking is done to validate requests, and a test is added for
    this checking.
    
    Change-Id: I9d565bd745507f2511b91a96d2d446240c5406b5
    Reviewed-on: http://gerrit.cloudera.org:8080/14221
    Reviewed-by: Andrew Wong <[email protected]>
    Tested-by: Kudu Jenkins
---
 src/kudu/master/catalog_manager.cc |  1 +
 src/kudu/master/master.proto       | 35 ++++++++++++++++++
 src/kudu/master/master_service.cc  | 76 ++++++++++++++++++++++++++++++++++++++
 src/kudu/master/master_service.h   |  6 +++
 src/kudu/master/ts_state-test.cc   | 40 ++++++++++++++++++++
 5 files changed, 158 insertions(+)

diff --git a/src/kudu/master/catalog_manager.cc 
b/src/kudu/master/catalog_manager.cc
index 3bad2cb..34a41ed 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -5281,6 +5281,7 @@ INITTED_OR_RESPOND(ConnectToMasterResponsePB);
 INITTED_OR_RESPOND(GetMasterRegistrationResponsePB);
 INITTED_OR_RESPOND(TSHeartbeatResponsePB);
 INITTED_AND_LEADER_OR_RESPOND(AlterTableResponsePB);
+INITTED_AND_LEADER_OR_RESPOND(ChangeTServerStateResponsePB);
 INITTED_AND_LEADER_OR_RESPOND(CreateTableResponsePB);
 INITTED_AND_LEADER_OR_RESPOND(DeleteTableResponsePB);
 INITTED_AND_LEADER_OR_RESPOND(IsAlterTableDoneResponsePB);
diff --git a/src/kudu/master/master.proto b/src/kudu/master/master.proto
index a932232..63297cf 100644
--- a/src/kudu/master/master.proto
+++ b/src/kudu/master/master.proto
@@ -833,6 +833,37 @@ enum TServerStatePB {
   MAINTENANCE_MODE = 2;
 }
 
+message TServerStateChangePB {
+  // Representation of a change in tserver state.
+  enum StateChange {
+    // Default value for backwards compatibility.
+    UNKNOWN_STATE_CHANGE = 0;
+
+    // Transitions from having no state to being in maintenance mode. If the
+    // tserver is already in maintenance mode, this is a no-op.
+    ENTER_MAINTENANCE_MODE = 1;
+
+    // Transitions from being in maintenance mode to having no tserver state. 
If
+    // the tserver already has no state, this is a no-op.
+    EXIT_MAINTENANCE_MODE = 2;
+  }
+
+  // The tserver UUID on which to apply the state change.
+  optional string uuid = 1;
+
+  // The change in tserver state.
+  optional StateChange change = 2;
+}
+
+message ChangeTServerStateRequestPB {
+  // TODO(awong): consider setting tserver state in batches.
+  optional TServerStateChangePB change = 1;
+}
+
+message ChangeTServerStateResponsePB {
+  optional MasterErrorPB error = 1;
+}
+
 // GetMasterRegistrationRequest/Response: get the instance id and
 // HTTP/RPC addresses for this Master server.
 message GetMasterRegistrationRequestPB {
@@ -983,6 +1014,10 @@ service MasterService {
       (ResetAuthzCacheResponsePB) {
     option (kudu.rpc.authz_method) = "AuthorizeSuperUser";
   }
+  rpc ChangeTServerState(ChangeTServerStateRequestPB) returns
+      (ChangeTServerStateResponsePB) {
+    option (kudu.rpc.authz_method) = "AuthorizeSuperUser";
+  }
 
   // Master->Master RPCs
   // ------------------------------------------------------------
diff --git a/src/kudu/master/master_service.cc 
b/src/kudu/master/master_service.cc
index 20fa876..6941ed7 100644
--- a/src/kudu/master/master_service.cc
+++ b/src/kudu/master/master_service.cc
@@ -56,6 +56,7 @@
 #include "kudu/util/monotime.h"
 #include "kudu/util/net/sockaddr.h"
 #include "kudu/util/pb_util.h"
+#include "kudu/util/scoped_cleanup.h"
 #include "kudu/util/status.h"
 
 DECLARE_bool(hive_metastore_sasl_enabled);
@@ -93,6 +94,12 @@ DEFINE_bool(master_support_authz_tokens, true,
             "testing version compatibility in the client.");
 TAG_FLAG(master_support_authz_tokens, hidden);
 
+// TODO(awong): once maintenance mode is done, remove this.
+DEFINE_bool(master_support_maintenance_mode, false,
+            "Whether the master supports maintenance mode. Used for "
+            "testing while maintenance mode in progress.");
+TAG_FLAG(master_support_maintenance_mode, hidden);
+
 using boost::make_optional;
 using google::protobuf::Message;
 using kudu::consensus::ReplicaManagementInfoPB;
@@ -122,6 +129,22 @@ void CheckRespErrorOrSetUnknown(const Status& s, 
RespClass* resp) {
   }
 }
 
+// Sets 'to_state' to the end state of the given 'change' and returns true.
+// Returns false if the 'change' isn't supported.
+bool StateChangeToTServerState(const TServerStateChangePB::StateChange& change,
+                               TServerStatePB* to_state) {
+  switch (change) {
+    case TServerStateChangePB::ENTER_MAINTENANCE_MODE:
+      *to_state = TServerStatePB::MAINTENANCE_MODE;
+      return true;
+    case TServerStateChangePB::EXIT_MAINTENANCE_MODE:
+      *to_state = TServerStatePB::NONE;
+      return true;
+    default:
+      return false;
+  }
+}
+
 } // anonymous namespace
 
 MasterServiceImpl::MasterServiceImpl(Master* server)
@@ -163,6 +186,59 @@ void MasterServiceImpl::Ping(const PingRequestPB* /*req*/,
   rpc->RespondSuccess();
 }
 
+void MasterServiceImpl::ChangeTServerState(const ChangeTServerStateRequestPB* 
req,
+                                           ChangeTServerStateResponsePB* resp,
+                                           rpc::RpcContext* rpc) {
+  // Do some basic checking on the contents of the request.
+  Status s;
+  auto respond_error = MakeScopedCleanup([&] {
+    if (PREDICT_FALSE(!s.ok())) {
+      rpc->RespondFailure(s);
+    }
+  });
+  if (PREDICT_FALSE(!FLAGS_master_support_maintenance_mode)) {
+    s = Status::NotSupported("maintenance mode is not supported");
+    return;
+  }
+  if (!req->has_change()) {
+    s = Status::InvalidArgument("request must contain tserver state change");
+    return;
+  }
+  const auto& ts_state_change = req->change();
+  if (!ts_state_change.has_uuid()) {
+    s = Status::InvalidArgument("uuid not provided");
+    return;
+  }
+  const auto& ts_uuid = ts_state_change.uuid();
+  if (!ts_state_change.has_change()) {
+    s = Status::InvalidArgument(Substitute("state change not provided for $0", 
ts_uuid));
+    return;
+  }
+  const auto& change = ts_state_change.change();
+  TServerStatePB to_state;
+  if (!StateChangeToTServerState(change, &to_state)) {
+    s = Status::InvalidArgument(Substitute("invalid state change: $0", 
change));
+    return;
+  }
+  respond_error.cancel();
+
+  // Make sure we're the leader.
+  CatalogManager* catalog_manager = server_->catalog_manager();
+  CatalogManager::ScopedLeaderSharedLock l(catalog_manager);
+  if (!l.CheckIsInitializedAndIsLeaderOrRespond(resp, rpc)) {
+    return;
+  }
+
+  // Set the appropriate state for the given tserver.
+  s = server_->ts_manager()->SetTServerState(ts_uuid, to_state,
+      server_->catalog_manager()->sys_catalog());
+  if (PREDICT_FALSE(!s.ok())) {
+    rpc->RespondFailure(s);
+    return;
+  }
+  rpc->RespondSuccess();
+}
+
 void MasterServiceImpl::TSHeartbeat(const TSHeartbeatRequestPB* req,
                                     TSHeartbeatResponsePB* resp,
                                     rpc::RpcContext* rpc) {
diff --git a/src/kudu/master/master_service.h b/src/kudu/master/master_service.h
index 83dee04..4098ce7 100644
--- a/src/kudu/master/master_service.h
+++ b/src/kudu/master/master_service.h
@@ -38,6 +38,8 @@ namespace master {
 
 class AlterTableRequestPB;
 class AlterTableResponsePB;
+class ChangeTServerStateRequestPB;
+class ChangeTServerStateResponsePB;
 class ConnectToMasterRequestPB;
 class ConnectToMasterResponsePB;
 class CreateTableRequestPB;
@@ -98,6 +100,10 @@ class MasterServiceImpl : public MasterServiceIf {
                           google::protobuf::Message* resp,
                           rpc::RpcContext* context) override;
 
+  void ChangeTServerState(const ChangeTServerStateRequestPB* req,
+                          ChangeTServerStateResponsePB* resp,
+                          rpc::RpcContext* rpc) override;
+
   void Ping(const PingRequestPB* req,
             PingResponsePB* resp,
             rpc::RpcContext* rpc) override;
diff --git a/src/kudu/master/ts_state-test.cc b/src/kudu/master/ts_state-test.cc
index cfeba76..387dac5 100644
--- a/src/kudu/master/ts_state-test.cc
+++ b/src/kudu/master/ts_state-test.cc
@@ -22,6 +22,7 @@
 #include <utility>
 #include <vector>
 
+#include <gflags/gflags_declare.h>
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
@@ -48,6 +49,8 @@
 #include "kudu/util/test_macros.h"
 #include "kudu/util/test_util.h"
 
+DECLARE_bool(master_support_maintenance_mode);
+
 using kudu::consensus::ReplicaManagementInfoPB;
 using kudu::rpc::Messenger;
 using kudu::rpc::MessengerBuilder;
@@ -307,5 +310,42 @@ TEST_F(TServerStateTest, 
MaintenanceModeTServerDoesntGetNewReplicas) {
   ASSERT_OK(CreateTable("happy-table"));
 }
 
+// Test to exercise the RPC endpoint to change the tserver state.
+TEST_F(TServerStateTest, TestRPCs) {
+  FLAGS_master_support_maintenance_mode = true;
+  ChangeTServerStateRequestPB req;
+  Status s;
+  // Sends a state change RPC and ensures there's an error, matching the
+  // input error string if provided.
+  const auto send_req_check_failed = [&] (const string& error) {
+    RpcController rpc;
+    ChangeTServerStateResponsePB resp;
+    s = proxy_->ChangeTServerState(req, &resp, &rpc);
+    ASSERT_TRUE(s.IsRemoteError()) << s.ToString();
+    if (!error.empty()) {
+      ASSERT_STR_CONTAINS(s.ToString(), error);
+    }
+  };
+  NO_FATALS(send_req_check_failed("must contain tserver state change"));
+  TServerStateChangePB* ts_state_change = req.mutable_change();
+
+  NO_FATALS(send_req_check_failed("uuid not provided"));
+  ts_state_change->set_uuid(kTServer);
+
+  NO_FATALS(send_req_check_failed("state change not provided"));
+
+  // Now send over a correct request. Do this a couple times to sanity check
+  // that repeated calls are just no-ops.
+  ts_state_change->set_change(TServerStateChangePB::ENTER_MAINTENANCE_MODE);
+  const int kNumRepeatedCalls = 2;
+  for (int i = 0; i < kNumRepeatedCalls; i++) {
+    RpcController rpc;
+    ChangeTServerStateResponsePB resp;
+    ASSERT_OK(proxy_->ChangeTServerState(req, &resp, &rpc));
+    ASSERT_FALSE(resp.has_error());
+    ASSERT_EQ(TServerStatePB::MAINTENANCE_MODE, 
ts_manager_->GetTServerState(kTServer));
+  }
+}
+
 } // namespace master
 } // namespace kudu

Reply via email to