This is an automated email from the ASF dual-hosted git repository. awong pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 1f4d473c74e7d57eeae3b384a46f4eb088e16223 Author: Andrew Wong <[email protected]> AuthorDate: Tue Oct 8 13:28:11 2019 -0700 KUDU-2069 p8: support listing tserver state through CLI This patch adds support to the tserver list tool to show the tserver states. $ kudu tserver list <master> --columns=uuid,state I opted to have the state be an optional return field because there are cases where a tablet that is unregistered will have a state associated with it (e.g. if the master is restarted), in which case it may be surprising to see info about them, unless explicitly requested. Change-Id: I5917927f3bc4bb6bebd1cfb63555ec20d94d9091 Reviewed-on: http://gerrit.cloudera.org:8080/14369 Reviewed-by: Adar Dembo <[email protected]> Tested-by: Kudu Jenkins --- src/kudu/master/master.proto | 6 +++++ src/kudu/master/master_service.cc | 28 +++++++++++++++++++- src/kudu/master/ts_manager.cc | 5 ++++ src/kudu/master/ts_manager.h | 7 ++++- src/kudu/tools/kudu-tool-test.cc | 49 +++++++++++++++++++++++++++++++++++ src/kudu/tools/tool_action_tserver.cc | 21 ++++++++++----- 6 files changed, 108 insertions(+), 8 deletions(-) diff --git a/src/kudu/master/master.proto b/src/kudu/master/master.proto index 9d37612..e480b6c 100644 --- a/src/kudu/master/master.proto +++ b/src/kudu/master/master.proto @@ -805,6 +805,11 @@ message HiveMetastoreConfig { // ============================================================================ message ListTabletServersRequestPB { + // Whether or not to include the tserver states. + // Note: this may include states of tservers that haven't been registered and + // thus don't have a complete response Entries. In such cases, the Entries + // will be returned with bogus info w.r.t registration and heartbeating. + optional bool include_states = 1; } message ListTabletServersResponsePB { @@ -815,6 +820,7 @@ message ListTabletServersResponsePB { optional ServerRegistrationPB registration = 2; optional int32 millis_since_heartbeat = 3; optional string location = 4; + optional TServerStatePB state = 5; } repeated Entry servers = 2; } diff --git a/src/kudu/master/master_service.cc b/src/kudu/master/master_service.cc index 5c23933..5b20d6b 100644 --- a/src/kudu/master/master_service.cc +++ b/src/kudu/master/master_service.cc @@ -20,6 +20,7 @@ #include <memory> #include <ostream> #include <string> +#include <unordered_map> #include <utility> #include <vector> @@ -33,6 +34,7 @@ #include "kudu/common/wire_protocol.pb.h" #include "kudu/consensus/metadata.pb.h" #include "kudu/consensus/replica_management.pb.h" +#include "kudu/gutil/map-util.h" #include "kudu/gutil/port.h" #include "kudu/gutil/strings/substitute.h" #include "kudu/hms/hms_catalog.h" @@ -547,7 +549,10 @@ void MasterServiceImpl::GetTableSchema(const GetTableSchemaRequestPB* req, void MasterServiceImpl::ListTabletServers(const ListTabletServersRequestPB* req, ListTabletServersResponsePB* resp, rpc::RpcContext* rpc) { - vector<std::shared_ptr<TSDescriptor> > descs; + TSManager* ts_manager = server_->ts_manager(); + TServerStateMap states = req->include_states() ? + ts_manager->GetTServerStates() : TServerStateMap(); + vector<std::shared_ptr<TSDescriptor>> descs; server_->ts_manager()->GetAllDescriptors(&descs); for (const std::shared_ptr<TSDescriptor>& desc : descs) { ListTabletServersResponsePB::Entry* entry = resp->add_servers(); @@ -555,6 +560,27 @@ void MasterServiceImpl::ListTabletServers(const ListTabletServersRequestPB* req, desc->GetRegistration(entry->mutable_registration()); entry->set_millis_since_heartbeat(desc->TimeSinceHeartbeat().ToMilliseconds()); if (desc->location()) entry->set_location(desc->location().get()); + + // If we need to return states, do so. + const auto& uuid = desc->permanent_uuid(); + const auto state = FindWithDefault(states, uuid, TServerStatePB::NONE); + entry->set_state(state); + states.erase(uuid); + } + // If there are any states left (e.g. they don't correspond to a registered + // server), report them. Set a bogus seqno, since the servers have not + // registered. + for (const auto& ts_and_state : states) { + const auto& ts = ts_and_state.first; + const auto& state = ts_and_state.second; + ListTabletServersResponsePB::Entry* entry = resp->add_servers(); + NodeInstancePB* instance = entry->mutable_instance_id(); + instance->set_permanent_uuid(ts); + instance->set_instance_seqno(-1); + entry->set_millis_since_heartbeat(-1); + // Note: The state map should only track non-NONE states. + DCHECK_NE(TServerStatePB::NONE, state); + entry->set_state(state); } rpc->RespondSuccess(); } diff --git a/src/kudu/master/ts_manager.cc b/src/kudu/master/ts_manager.cc index 9caaa32..46f68d9 100644 --- a/src/kudu/master/ts_manager.cc +++ b/src/kudu/master/ts_manager.cc @@ -216,6 +216,11 @@ unordered_set<string> TSManager::GetUuidsToIgnoreForUnderreplication() const { return uuids; } +TServerStateMap TSManager::GetTServerStates() const { + shared_lock<RWMutex> tsl(ts_state_lock_); + return ts_state_by_uuid_; +} + void TSManager::GetDescriptorsAvailableForPlacement(TSDescriptorVector* descs) const { descs->clear(); shared_lock<RWMutex> tsl(ts_state_lock_); diff --git a/src/kudu/master/ts_manager.h b/src/kudu/master/ts_manager.h index 7e3e42f..dc5feb8 100644 --- a/src/kudu/master/ts_manager.h +++ b/src/kudu/master/ts_manager.h @@ -41,6 +41,8 @@ namespace master { class LocationCache; class SysCatalogTable; +typedef std::unordered_map<std::string, TServerStatePB> TServerStateMap; + // Tracks the servers that the master has heard from, along with their // last heartbeat, etc. // @@ -96,6 +98,9 @@ class TSManager { // mode). std::unordered_set<std::string> GetUuidsToIgnoreForUnderreplication() const; + // Return the tablet server states for each tablet server UUID. + TServerStateMap GetTServerStates() const; + // Get the TS count. int GetCount() const; @@ -151,7 +156,7 @@ class TSManager { // Maps from the UUIDs of tablet servers to their tserver state, if any. // Note: the states don't necessarily belong to registered tablet servers. - std::unordered_map<std::string, TServerStatePB> ts_state_by_uuid_; + TServerStateMap ts_state_by_uuid_; LocationCache* location_cache_; diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc index 30a09a0..a5066dc 100644 --- a/src/kudu/tools/kudu-tool-test.cc +++ b/src/kudu/tools/kudu-tool-test.cc @@ -2915,6 +2915,55 @@ TEST_F(ToolTest, TestTserverListLocationNotAssigned) { ASSERT_STR_CONTAINS(out, cluster_->tablet_server(0)->uuid() + ",<none>"); } +TEST_F(ToolTest, TestTServerListState) { + NO_FATALS(StartExternalMiniCluster()); + string master_addr = cluster_->master()->bound_rpc_addr().ToString(); + const string ts_uuid = cluster_->tablet_server(0)->uuid(); + + // First, set some tserver state. + NO_FATALS(RunActionStdoutNone(Substitute("tserver state enter_maintenance $0 $1", + master_addr, ts_uuid))); + + // If the state isn't requested, we shouldn't see any. + string out; + NO_FATALS(RunActionStdoutString( + Substitute("tserver list $0 --columns=uuid --format=csv", master_addr), &out)); + ASSERT_STR_NOT_CONTAINS(out, Substitute("$0,$1", ts_uuid, "MAINTENANCE_MODE")); + + // If it is requested, we should see the state. + NO_FATALS(RunActionStdoutString( + Substitute("tserver list $0 --columns=uuid,state --format=csv", master_addr), &out)); + ASSERT_STR_CONTAINS(out, Substitute("$0,$1", ts_uuid, "MAINTENANCE_MODE")); + + // We should still see the state if the tserver hasn't been registered. + // "Unregister" the server by restarting the cluster and only bringing up the + // master, and verify that we still see the tserver in maintenance mode. + cluster_->Shutdown(); + ASSERT_OK(cluster_->master()->Restart()); + master_addr = cluster_->master()->bound_rpc_addr().ToString(); + NO_FATALS(RunActionStdoutString( + Substitute("tserver list $0 --columns=uuid,state --format=csv", master_addr), &out)); + ASSERT_STR_CONTAINS(out, Substitute("$0,$1", ts_uuid, "MAINTENANCE_MODE")); + + NO_FATALS(RunActionStdoutNone(Substitute("tserver state exit_maintenance $0 $1", + master_addr, ts_uuid))); + + // Once removed, we should see none. + // Since the tserver hasn't registered, there should be no output at all. + NO_FATALS(RunActionStdoutString( + Substitute("tserver list $0 --columns=uuid,state --format=csv", master_addr), &out)); + ASSERT_EQ("", out); + + // And once the tserver has registered, we should see no state. Assert + // eventually to give the tserver some time to register. + ASSERT_OK(cluster_->tablet_server(0)->Restart()); + ASSERT_EVENTUALLY([&] { + NO_FATALS(RunActionStdoutString( + Substitute("tserver list $0 --columns=uuid,state --format=csv", master_addr), &out)); + ASSERT_STR_CONTAINS(out, Substitute("$0,$1", ts_uuid, "NONE")); + }); +} + TEST_F(ToolTest, TestMasterList) { ExternalMiniClusterOptions opts; opts.num_tablet_servers = 0; diff --git a/src/kudu/tools/tool_action_tserver.cc b/src/kudu/tools/tool_action_tserver.cc index 744458c..bf62257 100644 --- a/src/kudu/tools/tool_action_tserver.cc +++ b/src/kudu/tools/tool_action_tserver.cc @@ -32,7 +32,6 @@ #include "kudu/gutil/map-util.h" #include "kudu/gutil/strings/join.h" #include "kudu/gutil/strings/split.h" -#include "kudu/gutil/strings/stringpiece.h" #include "kudu/gutil/strings/substitute.h" #include "kudu/master/master.pb.h" #include "kudu/master/master.proxy.h" @@ -100,11 +99,17 @@ Status ListTServers(const RunnerContext& context) { LeaderMasterProxy proxy; RETURN_NOT_OK(proxy.Init(context)); + const vector<string> cols = strings::Split(FLAGS_columns, ",", strings::SkipEmpty()); ListTabletServersRequestPB req; + for (const auto& col : cols) { + if (boost::iequals(col, "state")) { + req.set_include_states(true); + } + } ListTabletServersResponsePB resp; - proxy.SyncRpc<ListTabletServersRequestPB, ListTabletServersResponsePB>( - req, &resp, "ListTabletServers", &MasterServiceProxy::ListTabletServersAsync); + RETURN_NOT_OK((proxy.SyncRpc<ListTabletServersRequestPB, ListTabletServersResponsePB>( + req, &resp, "ListTabletServers", &MasterServiceProxy::ListTabletServersAsync))); if (resp.has_error()) { return StatusFromPB(resp.error().status()); @@ -117,7 +122,7 @@ Status ListTServers(const RunnerContext& context) { return strings::Substitute("$0:$1", hostport.host(), hostport.port()); }; - for (const auto& column : strings::Split(FLAGS_columns, ",", strings::SkipEmpty())) { + for (const auto& column : cols) { vector<string> values; if (boost::iequals(column, "uuid")) { for (const auto& server : servers) { @@ -156,10 +161,14 @@ Status ListTServers(const RunnerContext& context) { for (const auto& server : servers) { values.emplace_back(StartTimeToString(server.registration())); } + } else if (boost::iequals(column, "state")) { + for (const auto& server : servers) { + values.emplace_back(TServerStatePB_Name(server.state())); + } } else { return Status::InvalidArgument("unknown column (--columns)", column); } - table.AddColumn(column.ToString(), std::move(values)); + table.AddColumn(column, std::move(values)); } RETURN_NOT_OK(table.PrintTo(cout)); @@ -248,7 +257,7 @@ unique_ptr<Mode> BuildTServerMode() { string("Comma-separated list of tserver info fields to " "include in output.\nPossible values: uuid, " "rpc-addresses, http-addresses, version, seqno, " - "heartbeat and start_time")) + "heartbeat, start_time, state")) .AddOptionalParameter("format") .AddOptionalParameter("timeout_ms") .Build();
