This is an automated email from the ASF dual-hosted git repository. awong pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit e8362ca1bb48ed0150cf134d93899d7a2f74caf3 Author: Andrew Wong <[email protected]> AuthorDate: Sat Sep 14 23:28:49 2019 -0700 KUDU-2069 p2: stop placement onto servers in maintenance mode This patch stops placement of replicas on tablet servers that are in maintenance mode. Change-Id: Icc3ca41a3b3c8625bec0470e9604df86e91b4952 Reviewed-on: http://gerrit.cloudera.org:8080/14220 Reviewed-by: Andrew Wong <[email protected]> Tested-by: Andrew Wong <[email protected]> --- src/kudu/master/catalog_manager.cc | 6 +-- src/kudu/master/ts_manager.cc | 34 +++++++++++----- src/kudu/master/ts_manager.h | 16 ++++++-- src/kudu/master/ts_state-test.cc | 82 +++++++++++++++++++++++++++++++++++--- 4 files changed, 117 insertions(+), 21 deletions(-) diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc index 0933d89..3bad2cb 100644 --- a/src/kudu/master/catalog_manager.cc +++ b/src/kudu/master/catalog_manager.cc @@ -1574,7 +1574,7 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req, // Verify that the number of replicas isn't larger than the number of live tablet // servers. TSDescriptorVector ts_descs; - master_->ts_manager()->GetAllLiveDescriptors(&ts_descs); + master_->ts_manager()->GetDescriptorsAvailableForPlacement(&ts_descs); const auto num_live_tservers = ts_descs.size(); if (FLAGS_catalog_manager_check_ts_count_for_create_table && num_replicas > num_live_tservers) { // Note: this error message is matched against in master-stress-test. @@ -3766,7 +3766,7 @@ bool AsyncAddReplicaTask::SendRequest(int attempt) { } TSDescriptorVector ts_descs; - master_->ts_manager()->GetAllLiveDescriptors(&ts_descs); + master_->ts_manager()->GetDescriptorsAvailableForPlacement(&ts_descs); // Get the dimension of the tablet. Otherwise, it will be none. optional<string> dimension = none; @@ -4649,7 +4649,7 @@ Status CatalogManager::ProcessPendingAssignments( // For those tablets which need to be created in this round, assign replicas. { TSDescriptorVector ts_descs; - master_->ts_manager()->GetAllLiveDescriptors(&ts_descs); + master_->ts_manager()->GetDescriptorsAvailableForPlacement(&ts_descs); PlacementPolicy policy(std::move(ts_descs), &rng_); for (auto& tablet : deferred.needs_create_rpc) { // NOTE: if we fail to select replicas on the first pass (due to diff --git a/src/kudu/master/ts_manager.cc b/src/kudu/master/ts_manager.cc index 6eef6c1..7106ed3 100644 --- a/src/kudu/master/ts_manager.cc +++ b/src/kudu/master/ts_manager.cc @@ -197,24 +197,24 @@ void TSManager::GetAllDescriptors(TSDescriptorVector* descs) const { AppendValuesFromMap(servers_by_id_, descs); } -void TSManager::GetAllLiveDescriptors(TSDescriptorVector* descs) const { - descs->clear(); +int TSManager::GetCount() const { + shared_lock<rw_spinlock> l(lock_); + return servers_by_id_.size(); +} +void TSManager::GetDescriptorsAvailableForPlacement(TSDescriptorVector* descs) const { + descs->clear(); shared_lock<rw_spinlock> l(lock_); + shared_lock<RWMutex> tsl(ts_state_lock_); descs->reserve(servers_by_id_.size()); for (const TSDescriptorMap::value_type& entry : servers_by_id_) { const shared_ptr<TSDescriptor>& ts = entry.second; - if (!ts->PresumedDead()) { + if (AvailableForPlacementUnlocked(*ts)) { descs->push_back(ts); } } } -int TSManager::GetCount() const { - shared_lock<rw_spinlock> l(lock_); - return servers_by_id_.size(); -} - Status TSManager::SetTServerState(const string& ts_uuid, TServerStatePB ts_state, SysCatalogTable* sys_catalog) { @@ -238,9 +238,14 @@ Status TSManager::SetTServerState(const string& ts_uuid, return Status::OK(); } +TServerStatePB TSManager::GetTServerStateUnlocked(const string& ts_uuid) const { + ts_state_lock_.AssertAcquired(); + return FindWithDefault(ts_state_by_uuid_, ts_uuid, TServerStatePB::NONE); +} + TServerStatePB TSManager::GetTServerState(const string& ts_uuid) const { shared_lock<RWMutex> l(ts_state_lock_); - return FindWithDefault(ts_state_by_uuid_, ts_uuid, TServerStatePB::NONE); + return GetTServerStateUnlocked(ts_uuid); } Status TSManager::ReloadTServerStates(SysCatalogTable* sys_catalog) { @@ -266,6 +271,17 @@ int TSManager::ClusterSkew() const { return max_count - min_count; } +bool TSManager::AvailableForPlacementUnlocked(const TSDescriptor& ts) const { + DCHECK(lock_.is_locked()); + // TODO(KUDU-1827): this should also be used when decommissioning a server. + if (GetTServerStateUnlocked(ts.permanent_uuid()) == TServerStatePB::MAINTENANCE_MODE) { + return false; + } + // If the tablet server has heartbeated recently enough, it is considered + // alive and available for placement. + return !ts.PresumedDead(); +} + } // namespace master } // namespace kudu diff --git a/src/kudu/master/ts_manager.h b/src/kudu/master/ts_manager.h index cd803fa..7d3603f 100644 --- a/src/kudu/master/ts_manager.h +++ b/src/kudu/master/ts_manager.h @@ -84,9 +84,11 @@ class TSManager { // list. void GetAllDescriptors(TSDescriptorVector* descs) const; - // Return all of the currently registered TS descriptors that have sent a - // heartbeat recently, indicating that they're alive and well. - void GetAllLiveDescriptors(TSDescriptorVector* descs) const; + // Return all of the currently registered TS descriptors that are available + // for replica placement -- they have sent a heartbeat recently, indicating + // that they're alive and well, and they aren't in a mode that would block + // replication to them (e.g. maintenance mode). + void GetDescriptorsAvailableForPlacement(TSDescriptorVector* descs) const; // Get the TS count. int GetCount() const; @@ -108,6 +110,14 @@ class TSManager { int ClusterSkew() const; + // Return the tserver state for the given tablet server UUID, or NONE if one + // doesn't exist. Must hold 'ts_state_lock_' to call. + TServerStatePB GetTServerStateUnlocked(const std::string& ts_uuid) const; + + // Returns whether the given server can have replicas placed on it (e.g. it + // is not dead, not in maintenance mode). + bool AvailableForPlacementUnlocked(const TSDescriptor& ts) const; + mutable rw_spinlock lock_; FunctionGaugeDetacher metric_detacher_; diff --git a/src/kudu/master/ts_state-test.cc b/src/kudu/master/ts_state-test.cc index c0c45ab..cfeba76 100644 --- a/src/kudu/master/ts_state-test.cc +++ b/src/kudu/master/ts_state-test.cc @@ -19,12 +19,15 @@ #include <memory> #include <string> #include <thread> +#include <utility> #include <vector> #include <glog/logging.h> #include <gtest/gtest.h> #include "kudu/common/common.pb.h" +#include "kudu/common/wire_protocol-test-util.h" +#include "kudu/common/wire_protocol.h" #include "kudu/common/wire_protocol.pb.h" #include "kudu/consensus/replica_management.pb.h" #include "kudu/gutil/strings/substitute.h" @@ -33,6 +36,7 @@ #include "kudu/master/master.pb.h" #include "kudu/master/master.proxy.h" #include "kudu/master/mini_master.h" +#include "kudu/master/ts_descriptor.h" #include "kudu/master/ts_manager.h" #include "kudu/rpc/messenger.h" #include "kudu/rpc/rpc_controller.h" @@ -90,10 +94,10 @@ class TServerStateTest : public KuduTest { mini_master_.reset(new MiniMaster(GetTestPath("master"), HostPort("127.0.0.1", 0))); ASSERT_OK(mini_master_->Start()); - Master* master = mini_master_->master(); - ASSERT_OK(master->WaitUntilCatalogManagerIsLeaderAndReadyForTests( + master_ = mini_master_->master(); + ASSERT_OK(master_->WaitUntilCatalogManagerIsLeaderAndReadyForTests( MonoDelta::FromSeconds(30))); - ts_manager_ = master->ts_manager(); + ts_manager_ = master_->ts_manager(); MessengerBuilder builder("client"); ASSERT_OK(builder.Build(&client_messenger_)); proxy_.reset(new MasterServiceProxy(client_messenger_, @@ -103,9 +107,8 @@ class TServerStateTest : public KuduTest { // Sets the tserver state for the given tablet server to 'state'. Status SetTServerState(const string& tserver_uuid, TServerStatePB state) { - Master* master = mini_master_->master(); - return master->ts_manager()->SetTServerState( - tserver_uuid, state, master->catalog_manager()->sys_catalog()); + return master_->ts_manager()->SetTServerState( + tserver_uuid, state, master_->catalog_manager()->sys_catalog()); } // Pretends to be a tserver by sending a heartbeat to the master from the @@ -130,8 +133,28 @@ class TServerStateTest : public KuduTest { return proxy_->TSHeartbeat(req, &resp_pb, &rpc); } + // Creates a table with the given name with a simple schema, a default + // replication factor, and a single partition. + Status CreateTable(const string& table_name) { + RpcController rpc; + CreateTableResponsePB resp; + CreateTableRequestPB req; + req.set_name(table_name); + RETURN_NOT_OK(SchemaToPB(GetSimpleTestSchema(), req.mutable_schema())); + + RETURN_NOT_OK(proxy_->CreateTable(req, &resp, &rpc)); + if (resp.has_error()) { + RETURN_NOT_OK(StatusFromPB(resp.error().status())); + } + if (!resp.has_table_id()) { + return Status::RuntimeError("expected table id"); + } + return Status::OK(); + } + protected: unique_ptr<MiniMaster> mini_master_; + Master* master_; TSManager* ts_manager_; unique_ptr<MasterServiceProxy> proxy_; shared_ptr<Messenger> client_messenger_; @@ -237,5 +260,52 @@ TEST_F(TServerStateTest, TestConcurrentSetTServerState) { } } +// Test that tablet servers that are in maintenance mode don't get tablet +// replicas placed on them. +TEST_F(TServerStateTest, MaintenanceModeTServerDoesntGetNewReplicas) { + const int kNumTServers = 4; + vector<string> tserver_ids; + + // Report to the master from a few tablet servers to register them. + for (int i = 0; i < kNumTServers; i++) { + string tserver_id = Substitute("$0-$1", kTServer, i); + ASSERT_OK(SendHeartbeat(tserver_id)); + tserver_ids.emplace_back(std::move(tserver_id)); + } + + // Put one of the tablet servers in maintenance mode and create a few tables. + const string& first_maintenance_tserver = tserver_ids[0]; + ASSERT_OK(SetTServerState(first_maintenance_tserver, TServerStatePB::MAINTENANCE_MODE)); + const int kNumTables = 10; + for (int i = 0; i < kNumTables; i++) { + ASSERT_OK(CreateTable(Substitute("table-$0", i))); + } + TSDescriptorVector descs; + master_->ts_manager()->GetAllDescriptors(&descs); + for (const auto& desc : descs) { + if (desc->permanent_uuid() == first_maintenance_tserver) { + // The tablet server in maintenance mode should have had no replicas + // placed on it because it's in maintenance mode. + ASSERT_EQ(0, desc->RecentReplicaCreations()); + } else { + // All others should have some. Note that we can't compare an exact + // number because the replica creations has a decay factor built into it. + ASSERT_LT(0, desc->RecentReplicaCreations()); + } + } + // If we put another tserver in maintenance mode, we'll only have two + // remaining tservers, and won't be able to create new tables with the + // default replication factor. + ASSERT_OK(SetTServerState(tserver_ids[1], TServerStatePB::MAINTENANCE_MODE)); + string sad_table_id; + Status s = CreateTable("sad-table"); + ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString(); + ASSERT_STR_CONTAINS(s.ToString(), "not enough live tablet servers"); + + // And if we exit maintenance mode, we should be able to create tables again. + ASSERT_OK(SetTServerState(tserver_ids[1], TServerStatePB::NONE)); + ASSERT_OK(CreateTable("happy-table")); +} + } // namespace master } // namespace kudu
