This is an automated email from the ASF dual-hosted git repository. awong pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 053d2e1c94ab1ae79b9c17a475a0e3d9a25fd0d0 Author: Andrew Wong <[email protected]> AuthorDate: Wed Sep 11 18:09:58 2019 -0700 KUDU-2069 p3: add RPC endpoint for maintenance mode This patch adds an RPC endpoint to the master to allow entering and exiting maintenance mode. The endpoint requires superuser privileges, as this is meant to be used for administrative purposes. The endpoint is currently hidden behind a flag, as maintenance mode behavior is still in progress. I'm introducing the endpoint now to facilitate testing and will remove the flag once finished. The requests are represented as explicit edges in a (currently simple) state machine transitioning a given tserver between different tserver states. Basic checking is done to validate requests, and a test is added for this checking. Change-Id: I9d565bd745507f2511b91a96d2d446240c5406b5 Reviewed-on: http://gerrit.cloudera.org:8080/14221 Reviewed-by: Andrew Wong <[email protected]> Tested-by: Kudu Jenkins --- src/kudu/master/catalog_manager.cc | 1 + src/kudu/master/master.proto | 35 ++++++++++++++++++ src/kudu/master/master_service.cc | 76 ++++++++++++++++++++++++++++++++++++++ src/kudu/master/master_service.h | 6 +++ src/kudu/master/ts_state-test.cc | 40 ++++++++++++++++++++ 5 files changed, 158 insertions(+) diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc index 3bad2cb..34a41ed 100644 --- a/src/kudu/master/catalog_manager.cc +++ b/src/kudu/master/catalog_manager.cc @@ -5281,6 +5281,7 @@ INITTED_OR_RESPOND(ConnectToMasterResponsePB); INITTED_OR_RESPOND(GetMasterRegistrationResponsePB); INITTED_OR_RESPOND(TSHeartbeatResponsePB); INITTED_AND_LEADER_OR_RESPOND(AlterTableResponsePB); +INITTED_AND_LEADER_OR_RESPOND(ChangeTServerStateResponsePB); INITTED_AND_LEADER_OR_RESPOND(CreateTableResponsePB); INITTED_AND_LEADER_OR_RESPOND(DeleteTableResponsePB); INITTED_AND_LEADER_OR_RESPOND(IsAlterTableDoneResponsePB); diff --git a/src/kudu/master/master.proto b/src/kudu/master/master.proto index a932232..63297cf 100644 --- a/src/kudu/master/master.proto +++ b/src/kudu/master/master.proto @@ -833,6 +833,37 @@ enum TServerStatePB { MAINTENANCE_MODE = 2; } +message TServerStateChangePB { + // Representation of a change in tserver state. + enum StateChange { + // Default value for backwards compatibility. + UNKNOWN_STATE_CHANGE = 0; + + // Transitions from having no state to being in maintenance mode. If the + // tserver is already in maintenance mode, this is a no-op. + ENTER_MAINTENANCE_MODE = 1; + + // Transitions from being in maintenance mode to having no tserver state. If + // the tserver already has no state, this is a no-op. + EXIT_MAINTENANCE_MODE = 2; + } + + // The tserver UUID on which to apply the state change. + optional string uuid = 1; + + // The change in tserver state. + optional StateChange change = 2; +} + +message ChangeTServerStateRequestPB { + // TODO(awong): consider setting tserver state in batches. + optional TServerStateChangePB change = 1; +} + +message ChangeTServerStateResponsePB { + optional MasterErrorPB error = 1; +} + // GetMasterRegistrationRequest/Response: get the instance id and // HTTP/RPC addresses for this Master server. message GetMasterRegistrationRequestPB { @@ -983,6 +1014,10 @@ service MasterService { (ResetAuthzCacheResponsePB) { option (kudu.rpc.authz_method) = "AuthorizeSuperUser"; } + rpc ChangeTServerState(ChangeTServerStateRequestPB) returns + (ChangeTServerStateResponsePB) { + option (kudu.rpc.authz_method) = "AuthorizeSuperUser"; + } // Master->Master RPCs // ------------------------------------------------------------ diff --git a/src/kudu/master/master_service.cc b/src/kudu/master/master_service.cc index 20fa876..6941ed7 100644 --- a/src/kudu/master/master_service.cc +++ b/src/kudu/master/master_service.cc @@ -56,6 +56,7 @@ #include "kudu/util/monotime.h" #include "kudu/util/net/sockaddr.h" #include "kudu/util/pb_util.h" +#include "kudu/util/scoped_cleanup.h" #include "kudu/util/status.h" DECLARE_bool(hive_metastore_sasl_enabled); @@ -93,6 +94,12 @@ DEFINE_bool(master_support_authz_tokens, true, "testing version compatibility in the client."); TAG_FLAG(master_support_authz_tokens, hidden); +// TODO(awong): once maintenance mode is done, remove this. +DEFINE_bool(master_support_maintenance_mode, false, + "Whether the master supports maintenance mode. Used for " + "testing while maintenance mode in progress."); +TAG_FLAG(master_support_maintenance_mode, hidden); + using boost::make_optional; using google::protobuf::Message; using kudu::consensus::ReplicaManagementInfoPB; @@ -122,6 +129,22 @@ void CheckRespErrorOrSetUnknown(const Status& s, RespClass* resp) { } } +// Sets 'to_state' to the end state of the given 'change' and returns true. +// Returns false if the 'change' isn't supported. +bool StateChangeToTServerState(const TServerStateChangePB::StateChange& change, + TServerStatePB* to_state) { + switch (change) { + case TServerStateChangePB::ENTER_MAINTENANCE_MODE: + *to_state = TServerStatePB::MAINTENANCE_MODE; + return true; + case TServerStateChangePB::EXIT_MAINTENANCE_MODE: + *to_state = TServerStatePB::NONE; + return true; + default: + return false; + } +} + } // anonymous namespace MasterServiceImpl::MasterServiceImpl(Master* server) @@ -163,6 +186,59 @@ void MasterServiceImpl::Ping(const PingRequestPB* /*req*/, rpc->RespondSuccess(); } +void MasterServiceImpl::ChangeTServerState(const ChangeTServerStateRequestPB* req, + ChangeTServerStateResponsePB* resp, + rpc::RpcContext* rpc) { + // Do some basic checking on the contents of the request. + Status s; + auto respond_error = MakeScopedCleanup([&] { + if (PREDICT_FALSE(!s.ok())) { + rpc->RespondFailure(s); + } + }); + if (PREDICT_FALSE(!FLAGS_master_support_maintenance_mode)) { + s = Status::NotSupported("maintenance mode is not supported"); + return; + } + if (!req->has_change()) { + s = Status::InvalidArgument("request must contain tserver state change"); + return; + } + const auto& ts_state_change = req->change(); + if (!ts_state_change.has_uuid()) { + s = Status::InvalidArgument("uuid not provided"); + return; + } + const auto& ts_uuid = ts_state_change.uuid(); + if (!ts_state_change.has_change()) { + s = Status::InvalidArgument(Substitute("state change not provided for $0", ts_uuid)); + return; + } + const auto& change = ts_state_change.change(); + TServerStatePB to_state; + if (!StateChangeToTServerState(change, &to_state)) { + s = Status::InvalidArgument(Substitute("invalid state change: $0", change)); + return; + } + respond_error.cancel(); + + // Make sure we're the leader. + CatalogManager* catalog_manager = server_->catalog_manager(); + CatalogManager::ScopedLeaderSharedLock l(catalog_manager); + if (!l.CheckIsInitializedAndIsLeaderOrRespond(resp, rpc)) { + return; + } + + // Set the appropriate state for the given tserver. + s = server_->ts_manager()->SetTServerState(ts_uuid, to_state, + server_->catalog_manager()->sys_catalog()); + if (PREDICT_FALSE(!s.ok())) { + rpc->RespondFailure(s); + return; + } + rpc->RespondSuccess(); +} + void MasterServiceImpl::TSHeartbeat(const TSHeartbeatRequestPB* req, TSHeartbeatResponsePB* resp, rpc::RpcContext* rpc) { diff --git a/src/kudu/master/master_service.h b/src/kudu/master/master_service.h index 83dee04..4098ce7 100644 --- a/src/kudu/master/master_service.h +++ b/src/kudu/master/master_service.h @@ -38,6 +38,8 @@ namespace master { class AlterTableRequestPB; class AlterTableResponsePB; +class ChangeTServerStateRequestPB; +class ChangeTServerStateResponsePB; class ConnectToMasterRequestPB; class ConnectToMasterResponsePB; class CreateTableRequestPB; @@ -98,6 +100,10 @@ class MasterServiceImpl : public MasterServiceIf { google::protobuf::Message* resp, rpc::RpcContext* context) override; + void ChangeTServerState(const ChangeTServerStateRequestPB* req, + ChangeTServerStateResponsePB* resp, + rpc::RpcContext* rpc) override; + void Ping(const PingRequestPB* req, PingResponsePB* resp, rpc::RpcContext* rpc) override; diff --git a/src/kudu/master/ts_state-test.cc b/src/kudu/master/ts_state-test.cc index cfeba76..387dac5 100644 --- a/src/kudu/master/ts_state-test.cc +++ b/src/kudu/master/ts_state-test.cc @@ -22,6 +22,7 @@ #include <utility> #include <vector> +#include <gflags/gflags_declare.h> #include <glog/logging.h> #include <gtest/gtest.h> @@ -48,6 +49,8 @@ #include "kudu/util/test_macros.h" #include "kudu/util/test_util.h" +DECLARE_bool(master_support_maintenance_mode); + using kudu::consensus::ReplicaManagementInfoPB; using kudu::rpc::Messenger; using kudu::rpc::MessengerBuilder; @@ -307,5 +310,42 @@ TEST_F(TServerStateTest, MaintenanceModeTServerDoesntGetNewReplicas) { ASSERT_OK(CreateTable("happy-table")); } +// Test to exercise the RPC endpoint to change the tserver state. +TEST_F(TServerStateTest, TestRPCs) { + FLAGS_master_support_maintenance_mode = true; + ChangeTServerStateRequestPB req; + Status s; + // Sends a state change RPC and ensures there's an error, matching the + // input error string if provided. + const auto send_req_check_failed = [&] (const string& error) { + RpcController rpc; + ChangeTServerStateResponsePB resp; + s = proxy_->ChangeTServerState(req, &resp, &rpc); + ASSERT_TRUE(s.IsRemoteError()) << s.ToString(); + if (!error.empty()) { + ASSERT_STR_CONTAINS(s.ToString(), error); + } + }; + NO_FATALS(send_req_check_failed("must contain tserver state change")); + TServerStateChangePB* ts_state_change = req.mutable_change(); + + NO_FATALS(send_req_check_failed("uuid not provided")); + ts_state_change->set_uuid(kTServer); + + NO_FATALS(send_req_check_failed("state change not provided")); + + // Now send over a correct request. Do this a couple times to sanity check + // that repeated calls are just no-ops. + ts_state_change->set_change(TServerStateChangePB::ENTER_MAINTENANCE_MODE); + const int kNumRepeatedCalls = 2; + for (int i = 0; i < kNumRepeatedCalls; i++) { + RpcController rpc; + ChangeTServerStateResponsePB resp; + ASSERT_OK(proxy_->ChangeTServerState(req, &resp, &rpc)); + ASSERT_FALSE(resp.has_error()); + ASSERT_EQ(TServerStatePB::MAINTENANCE_MODE, ts_manager_->GetTServerState(kTServer)); + } +} + } // namespace master } // namespace kudu
