This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch branch-1.9.x in repository https://gitbox.apache.org/repos/asf/kudu.git
commit b267c9f45fa95c716ec9e525d1f4eeadbd6dd7ed Author: Alexey Serbin <[email protected]> AuthorDate: Tue Feb 26 18:14:30 2019 -0800 [master] use cache for assigned locations This changelist contains changes to incorporate LocationCache into master. With this patch, master caches once assigned location to tablet servers and clients. Corresponding tests have been updated accordingly and a few additional tests are added with this changelist. Conflicts: src/kudu/client/client-test.cc src/kudu/tools/ksck_remote-test.cc Change-Id: I12c8952c43a8ad352acd46c8006824b2ad9d1204 Reviewed-on: http://gerrit.cloudera.org:8080/12619 Tested-by: Kudu Jenkins Reviewed-by: Will Berkeley <[email protected]> (cherry picked from commit 4ace91713ad81e72135cca7679e8a6e63b4382b5) Reviewed-on: http://gerrit.cloudera.org:8080/12784 Reviewed-by: Andrew Wong <[email protected]> --- src/kudu/client/client-test.cc | 44 ++++++++++++- .../integration-tests/location_assignment-itest.cc | 70 +++++++++++++++++++++ src/kudu/master/master-test.cc | 25 ++++++-- src/kudu/master/master.cc | 9 ++- src/kudu/master/master.h | 11 +++- src/kudu/master/master_service.cc | 10 +-- src/kudu/master/ts_descriptor-test.cc | 24 ++++---- src/kudu/master/ts_descriptor.cc | 72 +++------------------- src/kudu/master/ts_descriptor.h | 16 ++--- src/kudu/master/ts_manager.cc | 9 ++- src/kudu/master/ts_manager.h | 10 ++- src/kudu/tools/ksck_remote-test.cc | 36 +++++------ 12 files changed, 209 insertions(+), 127 deletions(-) diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc index 1979645..1693e1a 100644 --- a/src/kudu/client/client-test.cc +++ b/src/kudu/client/client-test.cc @@ -111,6 +111,7 @@ DECLARE_bool(allow_unsafe_replication_factor); DECLARE_bool(fail_dns_resolution); +DECLARE_bool(location_mapping_by_uuid); DECLARE_bool(log_inject_latency); DECLARE_bool(master_support_connect_to_master_rpc); DECLARE_bool(rpc_trace_negotiation); @@ -137,6 +138,8 @@ DEFINE_int32(test_scan_num_rows, 1000, "Number of rows to insert and scan"); METRIC_DECLARE_counter(block_manager_total_bytes_read); METRIC_DECLARE_counter(rpcs_queue_overflow); +METRIC_DECLARE_counter(location_mapping_cache_hits); +METRIC_DECLARE_counter(location_mapping_cache_queries); METRIC_DECLARE_histogram(handler_latency_kudu_master_MasterService_GetMasterRegistration); METRIC_DECLARE_histogram(handler_latency_kudu_master_MasterService_GetTableLocations); METRIC_DECLARE_histogram(handler_latency_kudu_master_MasterService_GetTabletLocations); @@ -5796,6 +5799,10 @@ TEST_F(ClientTest, TestBlockScannerHijackingAttempts) { } } +TEST_F(ClientTest, TestClientLocationNoLocationMappingCmd) { + ASSERT_TRUE(client_->location().empty()); +} + // Client test that assigns locations to clients and tablet servers. // For now, assigns a uniform location to all clients and tablet servers. class ClientWithLocationTest : public ClientTest { @@ -5806,15 +5813,46 @@ class ClientWithLocationTest : public ClientTest { const string location = "/foo"; FLAGS_location_mapping_cmd = strings::Substitute("$0 $1", location_cmd_path, location); + FLAGS_location_mapping_by_uuid = true; } }; -TEST_F(ClientTest, TestClientLocationNoLocationMappingCmd) { - ASSERT_TRUE(client_->location().empty()); +TEST_F(ClientWithLocationTest, TestClientLocation) { + ASSERT_EQ("/foo", client_->location()); } -TEST_F(ClientWithLocationTest, TestClientLocation) { +TEST_F(ClientWithLocationTest, LocationCacheMetricsOnClientConnectToCluster) { ASSERT_EQ("/foo", client_->location()); + + auto& metric_entity = cluster_->mini_master()->master()->metric_entity(); + scoped_refptr<Counter> counter_hits( + METRIC_location_mapping_cache_hits.Instantiate(metric_entity)); + const auto hits_before = counter_hits->value(); + ASSERT_EQ(0, hits_before); + scoped_refptr<Counter> counter_queries( + METRIC_location_mapping_cache_queries.Instantiate(metric_entity)); + const auto queries_before = counter_queries->value(); + // Expecting location assignment queries from all tablet servers and + // the client. + ASSERT_EQ(cluster_->num_tablet_servers() + 1, queries_before); + + static constexpr int kIterNum = 10; + for (auto iter = 0; iter < kIterNum; ++iter) { + shared_ptr<KuduClient> client; + ASSERT_OK(KuduClientBuilder() + .add_master_server_addr(cluster_->mini_master()->bound_rpc_addr().ToString()) + .Build(&client)); + ASSERT_EQ("/foo", client->location()); + } + + // The location mapping cache should be hit every time a client is connecting + // from the same host as the former client. Nothing else should be touching + // the location assignment logic but ConnectToCluster() requests coming from + // the clients instantiated above. + const auto queries_after = counter_queries->value(); + ASSERT_EQ(queries_before + kIterNum, queries_after); + const auto hits_after = counter_hits->value(); + ASSERT_EQ(hits_before + kIterNum, hits_after); } } // namespace client } // namespace kudu diff --git a/src/kudu/integration-tests/location_assignment-itest.cc b/src/kudu/integration-tests/location_assignment-itest.cc index cae1fa8..ce1d891 100644 --- a/src/kudu/integration-tests/location_assignment-itest.cc +++ b/src/kudu/integration-tests/location_assignment-itest.cc @@ -49,6 +49,8 @@ DECLARE_int32(num_replicas); DECLARE_int32(num_tablet_servers); +METRIC_DECLARE_counter(location_mapping_cache_hits); +METRIC_DECLARE_counter(location_mapping_cache_queries); METRIC_DECLARE_counter(scans_started); METRIC_DECLARE_entity(tablet); @@ -225,6 +227,74 @@ TEST_P(TsLocationAssignmentITest, Basic) { NO_FATALS(cluster_->AssertNoCrashes()); } +// Verify the behavior of the location mapping cache upon tablet server +// registrations. +TEST_P(TsLocationAssignmentITest, LocationMappingCacheOnTabletServerRestart) { + if (!AllowSlowTests()) { + LOG(WARNING) << "test is skipped; set KUDU_ALLOW_SLOW_TESTS=1 to run"; + return; + } + + NO_FATALS(StartCluster()); + NO_FATALS(CheckLocationInfo()); + NO_FATALS(cluster_->AssertNoCrashes()); + + const auto num_tablet_servers = cluster_->num_tablet_servers(); + + int64_t hits_before; + ASSERT_OK(itest::GetInt64Metric( + cluster_->leader_master()->bound_http_hostport(), + &METRIC_ENTITY_server, + nullptr, + &METRIC_location_mapping_cache_hits, + "value", + &hits_before)); + ASSERT_EQ(0, hits_before); + + int64_t queries_before; + ASSERT_OK(itest::GetInt64Metric( + cluster_->leader_master()->bound_http_hostport(), + &METRIC_ENTITY_server, + nullptr, + &METRIC_location_mapping_cache_queries, + "value", + &queries_before)); + ASSERT_EQ(num_tablet_servers, queries_before); + + for (auto idx = 0; idx < num_tablet_servers; ++idx) { + auto* ts = cluster_->tablet_server(idx); + ts->Shutdown(); + ASSERT_OK(ts->Restart()); + } + + NO_FATALS(CheckLocationInfo()); + NO_FATALS(cluster_->AssertNoCrashes()); + + ASSERT_EVENTUALLY([&]() { + int64_t hits_after; + ASSERT_OK(itest::GetInt64Metric( + cluster_->leader_master()->bound_http_hostport(), + &METRIC_ENTITY_server, + nullptr, + &METRIC_location_mapping_cache_hits, + "value", + &hits_after)); + ASSERT_EQ(hits_before + num_tablet_servers, hits_after); + }); + + ASSERT_EVENTUALLY([&]() { + int64_t queries_after; + ASSERT_OK(itest::GetInt64Metric( + cluster_->leader_master()->bound_http_hostport(), + &METRIC_ENTITY_server, + nullptr, + &METRIC_location_mapping_cache_queries, + "value", + &queries_after)); + ASSERT_EQ(queries_before + num_tablet_servers, queries_after); + }); +} + INSTANTIATE_TEST_CASE_P(, TsLocationAssignmentITest, ::testing::Combine(::testing::Values(1, 3), ::testing::Values(1, 8, 16, 32))); diff --git a/src/kudu/master/master-test.cc b/src/kudu/master/master-test.cc index 54dac9d..5e41fa4 100644 --- a/src/kudu/master/master-test.cc +++ b/src/kudu/master/master-test.cc @@ -449,6 +449,11 @@ TEST_F(MasterTest, TestRegisterAndHeartbeat) { // Set a command that always fails. FLAGS_location_mapping_cmd = "false"; + // Restarting the master to take into account the new setting for the + // --location_mapping_cmd flag. + mini_master_->Shutdown(); + ASSERT_OK(mini_master_->Restart()); + // Set a new UUID so registration is for the first time. auto new_common = common; new_common.mutable_ts_instance()->set_permanent_uuid("lmc-fail-ts"); @@ -462,7 +467,7 @@ TEST_F(MasterTest, TestRegisterAndHeartbeat) { // Registration should fail. Status s = proxy_->TSHeartbeat(hb_req, &hb_resp, &rpc); - ASSERT_TRUE(s.IsRemoteError()); + ASSERT_TRUE(s.IsRemoteError()) << s.ToString(); ASSERT_STR_CONTAINS(s.ToString(), "failed to run location mapping command"); // Make sure the tablet server isn't returned to clients. @@ -473,11 +478,7 @@ TEST_F(MasterTest, TestRegisterAndHeartbeat) { LOG(INFO) << SecureDebugString(list_ts_resp); ASSERT_FALSE(list_ts_resp.has_error()); - ASSERT_EQ(1, list_ts_resp.servers_size()); - ASSERT_EQ("my-ts-uuid", list_ts_resp.servers(0).instance_id().permanent_uuid()); - - // Reset the flag. - FLAGS_location_mapping_cmd = ""; + ASSERT_EQ(0, list_ts_resp.servers_size()); } } @@ -1632,6 +1633,10 @@ TEST_F(MasterTest, TestConnectToMasterAndAssignLocation) { const string location = "/foo"; FLAGS_location_mapping_cmd = Substitute("$0 $1", kLocationCmdPath, location); { + // Restarting the master to take into account the new setting for the + // --location_mapping_cmd flag. + mini_master_->Shutdown(); + ASSERT_OK(mini_master_->Restart()); ConnectToMasterRequestPB req; ConnectToMasterResponsePB resp; RpcController rpc; @@ -1644,6 +1649,10 @@ TEST_F(MasterTest, TestConnectToMasterAndAssignLocation) { // location should be assigned. FLAGS_location_mapping_cmd = "false"; { + // Restarting the master to take into account the new setting for the + // --location_mapping_cmd flag. + mini_master_->Shutdown(); + ASSERT_OK(mini_master_->Restart()); ConnectToMasterRequestPB req; ConnectToMasterResponsePB resp; RpcController rpc; @@ -1656,6 +1665,10 @@ TEST_F(MasterTest, TestConnectToMasterAndAssignLocation) { const string new_location = "/bar"; FLAGS_location_mapping_cmd = Substitute("$0 $1", kLocationCmdPath, new_location); { + // Restarting the master to take into account the new setting for the + // --location_mapping_cmd flag. + mini_master_->Shutdown(); + ASSERT_OK(mini_master_->Restart()); ConnectToMasterRequestPB req; ConnectToMasterResponsePB resp; RpcController rpc; diff --git a/src/kudu/master/master.cc b/src/kudu/master/master.cc index ae33cd2..f6a46d2 100644 --- a/src/kudu/master/master.cc +++ b/src/kudu/master/master.cc @@ -37,8 +37,10 @@ #include "kudu/fs/fs_manager.h" #include "kudu/gutil/bind.h" #include "kudu/gutil/bind_helpers.h" +#include "kudu/gutil/ref_counted.h" #include "kudu/gutil/strings/substitute.h" #include "kudu/master/catalog_manager.h" +#include "kudu/master/location_cache.h" #include "kudu/master/master.pb.h" #include "kudu/master/master.proxy.h" #include "kudu/master/master_cert_authority.h" @@ -87,6 +89,7 @@ TAG_FLAG(authz_token_validity_seconds, experimental); DECLARE_bool(hive_metastore_sasl_enabled); DECLARE_string(keytab_file); +DECLARE_string(location_mapping_cmd); using std::min; using std::shared_ptr; @@ -124,11 +127,15 @@ GROUP_FLAG_VALIDATOR(hive_metastore_sasl_enabled, ValidateHiveMetastoreSaslEnabl Master::Master(const MasterOptions& opts) : KuduServer("Master", opts, "kudu.master"), state_(kStopped), - ts_manager_(new TSManager(metric_entity_)), catalog_manager_(new CatalogManager(this)), path_handlers_(new MasterPathHandlers(this)), opts_(opts), registration_initialized_(false) { + const auto& location_cmd = FLAGS_location_mapping_cmd; + if (!location_cmd.empty()) { + location_cache_.reset(new LocationCache(location_cmd, metric_entity_.get())); + } + ts_manager_.reset(new TSManager(location_cache_.get(), metric_entity_)); } Master::~Master() { diff --git a/src/kudu/master/master.h b/src/kudu/master/master.h index fda75a1..077aabd 100644 --- a/src/kudu/master/master.h +++ b/src/kudu/master/master.h @@ -38,6 +38,9 @@ class HostPortPB; class MaintenanceManager; class MonoDelta; class ThreadPool; +namespace master { +class LocationCache; +} // namespace master namespace security { class TokenSigner; @@ -83,6 +86,8 @@ class Master : public kserver::KuduServer { const MasterOptions& opts() { return opts_; } + LocationCache* location_cache() { return location_cache_.get(); } + // Get the RPC and HTTP addresses for this master instance. Status GetMasterRegistration(ServerRegistrationPB* registration) const; @@ -131,7 +136,6 @@ class Master : public kserver::KuduServer { std::unique_ptr<MasterCertAuthority> cert_authority_; std::unique_ptr<security::TokenSigner> token_signer_; - gscoped_ptr<TSManager> ts_manager_; gscoped_ptr<CatalogManager> catalog_manager_; gscoped_ptr<MasterPathHandlers> path_handlers_; @@ -151,6 +155,11 @@ class Master : public kserver::KuduServer { // The maintenance manager for this master. std::shared_ptr<MaintenanceManager> maintenance_manager_; + // A simplistic cache to track already assigned locations. + std::unique_ptr<LocationCache> location_cache_; + + gscoped_ptr<TSManager> ts_manager_; + DISALLOW_COPY_AND_ASSIGN(Master); }; diff --git a/src/kudu/master/master_service.cc b/src/kudu/master/master_service.cc index 4a29a61..3866a3d 100644 --- a/src/kudu/master/master_service.cc +++ b/src/kudu/master/master_service.cc @@ -36,6 +36,7 @@ #include "kudu/gutil/strings/substitute.h" #include "kudu/hms/hms_catalog.h" #include "kudu/master/catalog_manager.h" +#include "kudu/master/location_cache.h" #include "kudu/master/master.h" #include "kudu/master/master.pb.h" #include "kudu/master/master_cert_authority.h" @@ -57,7 +58,6 @@ DECLARE_bool(hive_metastore_sasl_enabled); DECLARE_bool(raft_prepare_replacement_before_eviction); DECLARE_string(hive_metastore_uris); -DECLARE_string(location_mapping_cmd); DEFINE_int32(master_inject_latency_on_tablet_lookups_ms, 0, "Number of milliseconds that the master will sleep before responding to " @@ -533,12 +533,12 @@ void MasterServiceImpl::ConnectToMaster(const ConnectToMasterRequestPB* /*req*/, } // Assign a location to the client if needed. - if (!FLAGS_location_mapping_cmd.empty() && + auto* location_cache = server_->location_cache(); + if (location_cache != nullptr && PREDICT_TRUE(FLAGS_master_client_location_assignment_enabled)) { string location; - Status s = GetLocationFromLocationMappingCmd(FLAGS_location_mapping_cmd, - rpc->remote_address().host(), - &location); + const auto s = location_cache->GetLocation( + rpc->remote_address().host(), &location); if (s.ok()) { resp->set_client_location(location); } else { diff --git a/src/kudu/master/ts_descriptor-test.cc b/src/kudu/master/ts_descriptor-test.cc index a7ec824..1593458 100644 --- a/src/kudu/master/ts_descriptor-test.cc +++ b/src/kudu/master/ts_descriptor-test.cc @@ -22,20 +22,18 @@ #include <vector> #include <boost/optional/optional.hpp> -#include <gflags/gflags_declare.h> #include <glog/logging.h> #include <gtest/gtest.h> #include "kudu/common/common.pb.h" #include "kudu/common/wire_protocol.pb.h" #include "kudu/gutil/strings/substitute.h" +#include "kudu/master/location_cache.h" #include "kudu/util/path_util.h" #include "kudu/util/status.h" #include "kudu/util/test_macros.h" #include "kudu/util/test_util.h" -DECLARE_string(location_mapping_cmd); - using std::shared_ptr; using std::string; using std::vector; @@ -75,7 +73,7 @@ TEST(TSDescriptorTest, TestRegistration) { ServerRegistrationPB registration; SetupBasicRegistrationInfo(uuid, &instance, ®istration); shared_ptr<TSDescriptor> desc; - ASSERT_OK(TSDescriptor::RegisterNew(instance, registration, &desc)); + ASSERT_OK(TSDescriptor::RegisterNew(instance, registration, nullptr, &desc)); // Spot check some fields and the ToString value. ASSERT_EQ(uuid, desc->permanent_uuid()); @@ -90,14 +88,15 @@ TEST(TSDescriptorTest, TestLocationCmd) { "testdata/first_argument.sh"); // A happy case, using all allowed special characters. const string location = "/foo-bar0/BAAZ._9-quux"; - FLAGS_location_mapping_cmd = Substitute("$0 $1", kLocationCmdPath, location); + const string location_cmd = Substitute("$0 $1", kLocationCmdPath, location); + LocationCache cache(location_cmd, nullptr); const string uuid = "test"; NodeInstancePB instance; ServerRegistrationPB registration; SetupBasicRegistrationInfo(uuid, &instance, ®istration); shared_ptr<TSDescriptor> desc; - ASSERT_OK(TSDescriptor::RegisterNew(instance, registration, &desc)); + ASSERT_OK(TSDescriptor::RegisterNew(instance, registration, &cache, &desc)); ASSERT_EQ(location, desc->location()); @@ -108,9 +107,10 @@ TEST(TSDescriptorTest, TestLocationCmd) { "foo", // Doesn't begin with /. "/foo$", // Contains the illegal character '$'. }; - for (const auto& bad_location : bad_locations) { - FLAGS_location_mapping_cmd = Substitute("$0 $1", kLocationCmdPath, bad_location); - ASSERT_TRUE(desc->Register(instance, registration).IsRuntimeError()); + for (const auto& location : bad_locations) { + const auto location_cmd = Substitute("$0 $1", kLocationCmdPath, location); + LocationCache cache(location_cmd, nullptr); + ASSERT_TRUE(desc->Register(instance, registration, &cache).IsRuntimeError()); } // Bad cases where the script is invalid. @@ -126,9 +126,9 @@ TEST(TSDescriptorTest, TestLocationCmd) { // Command returns too many locations (i.e. contains illegal ' ' character). Substitute("echo $0 $1", "/foo", "/bar"), }; - for (const auto& bad_cmd : bad_cmds) { - FLAGS_location_mapping_cmd = bad_cmd; - ASSERT_TRUE(desc->Register(instance, registration).IsRuntimeError()); + for (const auto& cmd : bad_cmds) { + LocationCache cache(cmd, nullptr); + ASSERT_TRUE(desc->Register(instance, registration, &cache).IsRuntimeError()); } } } // namespace master diff --git a/src/kudu/master/ts_descriptor.cc b/src/kudu/master/ts_descriptor.cc index 619f927..208adac 100644 --- a/src/kudu/master/ts_descriptor.cc +++ b/src/kudu/master/ts_descriptor.cc @@ -18,7 +18,6 @@ #include "kudu/master/ts_descriptor.h" #include <cmath> -#include <cstdio> #include <mutex> #include <ostream> #include <unordered_set> @@ -32,17 +31,15 @@ #include "kudu/common/wire_protocol.h" #include "kudu/common/wire_protocol.pb.h" #include "kudu/consensus/consensus.proxy.h" -#include "kudu/gutil/strings/charset.h" -#include "kudu/gutil/strings/split.h" -#include "kudu/gutil/strings/strip.h" +#include "kudu/gutil/port.h" #include "kudu/gutil/strings/substitute.h" +#include "kudu/master/location_cache.h" #include "kudu/tserver/tserver_admin.proxy.h" #include "kudu/util/flag_tags.h" #include "kudu/util/logging.h" #include "kudu/util/net/net_util.h" #include "kudu/util/net/sockaddr.h" #include "kudu/util/pb_util.h" -#include "kudu/util/subprocess.h" #include "kudu/util/trace.h" DEFINE_int32(tserver_unresponsive_timeout_ms, 60 * 1000, @@ -75,63 +72,12 @@ using strings::Substitute; namespace kudu { namespace master { -namespace { -// Returns if 'location' is a valid location string, i.e. it begins with / -// and consists of /-separated tokens each of which contains only characters -// from the set [a-zA-Z0-9_-.]. -bool IsValidLocation(const string& location) { - if (location.empty() || location[0] != '/') { - return false; - } - const strings::CharSet charset("abcdefghijklmnopqrstuvwxyz" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "0123456789" - "_-./"); - for (const auto c : location) { - if (!charset.Test(c)) { - return false; - } - } - return true; -} -} // anonymous namespace - -Status GetLocationFromLocationMappingCmd(const string& cmd, - const string& host, - string* location) { - DCHECK(location); - vector<string> argv = strings::Split(cmd, " ", strings::SkipEmpty()); - if (argv.empty()) { - return Status::RuntimeError("invalid empty location mapping command"); - } - argv.push_back(host); - string stderr, location_temp; - Status s = Subprocess::Call(argv, /*stdin=*/"", &location_temp, &stderr); - if (!s.ok()) { - return Status::RuntimeError( - Substitute("failed to run location mapping command: $0", s.ToString()), - stderr); - } - StripWhiteSpace(&location_temp); - // Special case an empty location for a better error. - if (location_temp.empty()) { - return Status::RuntimeError( - "location mapping command returned invalid empty location"); - } - if (!IsValidLocation(location_temp)) { - return Status::RuntimeError( - "location mapping command returned invalid location", - location_temp); - } - *location = std::move(location_temp); - return Status::OK(); -} - Status TSDescriptor::RegisterNew(const NodeInstancePB& instance, const ServerRegistrationPB& registration, + LocationCache* location_cache, shared_ptr<TSDescriptor>* desc) { shared_ptr<TSDescriptor> ret(TSDescriptor::make_shared(instance.permanent_uuid())); - RETURN_NOT_OK(ret->Register(instance, registration)); + RETURN_NOT_OK(ret->Register(instance, registration, location_cache)); desc->swap(ret); return Status::OK(); } @@ -210,7 +156,8 @@ Status TSDescriptor::RegisterUnlocked(const NodeInstancePB& instance, } Status TSDescriptor::Register(const NodeInstancePB& instance, - const ServerRegistrationPB& registration) { + const ServerRegistrationPB& registration, + LocationCache* location_cache) { // Do basic registration work under the lock. { std::lock_guard<simple_spinlock> l(lock_); @@ -219,8 +166,7 @@ Status TSDescriptor::Register(const NodeInstancePB& instance, // Resolve the location outside the lock. This involves calling the location // mapping script. - const string& location_mapping_cmd = FLAGS_location_mapping_cmd; - if (!location_mapping_cmd.empty()) { + if (PREDICT_TRUE(location_cache != nullptr)) { // In some test scenarios the location is assigned per tablet server UUID. // That's the case when multiple (or even all) tablet servers have the same // IP address for their RPC endpoint. @@ -228,9 +174,7 @@ Status TSDescriptor::Register(const NodeInstancePB& instance, ? permanent_uuid() : registration_->rpc_addresses(0).host(); TRACE(Substitute("tablet server $0: assigning location", permanent_uuid())); string location; - Status s = GetLocationFromLocationMappingCmd(location_mapping_cmd, - cmd_arg, - &location); + const auto s = location_cache->GetLocation(cmd_arg, &location); TRACE(Substitute( "tablet server $0: assigned location '$1'", permanent_uuid(), location)); diff --git a/src/kudu/master/ts_descriptor.h b/src/kudu/master/ts_descriptor.h index 65724fe..76a5fa8 100644 --- a/src/kudu/master/ts_descriptor.h +++ b/src/kudu/master/ts_descriptor.h @@ -53,17 +53,7 @@ class TabletServerAdminServiceProxy; namespace master { -// Resolves 'host', which is the IP address or hostname of a tablet server or -// client, into a location using the command 'cmd'. The result will be stored -// in 'location', which must not be null. If there is an error running the -// command or the output is invalid, an error Status will be returned. -// TODO(wdberkeley): Refactor into a separate class and implement a caching -// policy. -// TODO(wdberkeley): Eventually we may want to get multiple locations at once -// by giving the script multiple arguments (like Hadoop). -Status GetLocationFromLocationMappingCmd(const std::string& cmd, - const std::string& host, - std::string* location); +class LocationCache; // Master-side view of a single tablet server. // @@ -73,6 +63,7 @@ class TSDescriptor : public enable_make_shared<TSDescriptor> { public: static Status RegisterNew(const NodeInstancePB& instance, const ServerRegistrationPB& registration, + LocationCache* location_cache, std::shared_ptr<TSDescriptor>* desc); virtual ~TSDescriptor() = default; @@ -89,7 +80,8 @@ class TSDescriptor : public enable_make_shared<TSDescriptor> { // Register this tablet server. Status Register(const NodeInstancePB& instance, - const ServerRegistrationPB& registration); + const ServerRegistrationPB& registration, + LocationCache* location_cache); const std::string &permanent_uuid() const { return permanent_uuid_; } int64_t latest_seqno() const; diff --git a/src/kudu/master/ts_manager.cc b/src/kudu/master/ts_manager.cc index 4e4bebf..6bd5939 100644 --- a/src/kudu/master/ts_manager.cc +++ b/src/kudu/master/ts_manager.cc @@ -49,7 +49,9 @@ using strings::Substitute; namespace kudu { namespace master { -TSManager::TSManager(const scoped_refptr<MetricEntity>& metric_entity) { +TSManager::TSManager(LocationCache* location_cache, + const scoped_refptr<MetricEntity>& metric_entity) + : location_cache_(location_cache) { METRIC_cluster_replica_skew.InstantiateFunctionGauge( metric_entity, Bind(&TSManager::ClusterSkew, Unretained(this))) @@ -93,14 +95,15 @@ Status TSManager::RegisterTS(const NodeInstancePB& instance, if (!ContainsKey(servers_by_id_, uuid)) { shared_ptr<TSDescriptor> new_desc; - RETURN_NOT_OK(TSDescriptor::RegisterNew(instance, registration, &new_desc)); + RETURN_NOT_OK(TSDescriptor::RegisterNew( + instance, registration, location_cache_, &new_desc)); InsertOrDie(&servers_by_id_, uuid, new_desc); LOG(INFO) << Substitute("Registered new tserver with Master: $0", new_desc->ToString()); desc->swap(new_desc); } else { shared_ptr<TSDescriptor> found(FindOrDie(servers_by_id_, uuid)); - RETURN_NOT_OK(found->Register(instance, registration)); + RETURN_NOT_OK(found->Register(instance, registration, location_cache_)); LOG(INFO) << Substitute("Re-registered known tserver with Master: $0", found->ToString()); desc->swap(found); diff --git a/src/kudu/master/ts_manager.h b/src/kudu/master/ts_manager.h index b6dc306..bf52a9c 100644 --- a/src/kudu/master/ts_manager.h +++ b/src/kudu/master/ts_manager.h @@ -35,6 +35,8 @@ class ServerRegistrationPB; namespace master { +class LocationCache; + // Tracks the servers that the master has heard from, along with their // last heartbeat, etc. // @@ -47,7 +49,11 @@ namespace master { // This class is thread-safe. class TSManager { public: - explicit TSManager(const scoped_refptr<MetricEntity>& metric_entity); + // 'location_cache' is a pointer to location mapping cache to use when + // registering tablet servers. The location cache should outlive the + // TSManager. 'metric_entity' is used to register metrics used by TSManager. + TSManager(LocationCache* location_cache, + const scoped_refptr<MetricEntity>& metric_entity); virtual ~TSManager(); // Lookup the tablet server descriptor for the given instance identifier. @@ -92,6 +98,8 @@ class TSManager { std::string, std::shared_ptr<TSDescriptor>> TSDescriptorMap; TSDescriptorMap servers_by_id_; + LocationCache* location_cache_; + DISALLOW_COPY_AND_ASSIGN(TSManager); }; diff --git a/src/kudu/tools/ksck_remote-test.cc b/src/kudu/tools/ksck_remote-test.cc index 1436155..6a42e0b 100644 --- a/src/kudu/tools/ksck_remote-test.cc +++ b/src/kudu/tools/ksck_remote-test.cc @@ -510,13 +510,19 @@ TEST_F(RemoteKsckTest, TestLeaderMasterDown) { } TEST_F(RemoteKsckTest, TestClusterWithLocation) { - // There is no location assigned for the existing three tablet servers. - // With the flag set, the newly added server will be assiged with location '/foo'. const string location_cmd_path = JoinPathSegments(GetTestExecutableDirectory(), "testdata/first_argument.sh"); const string location = "/foo"; FLAGS_location_mapping_cmd = strings::Substitute("$0 $1", location_cmd_path, location); + // After setting the --location_mapping_cmd flag it's necessary to restart + // the masters to pick up the new value. + for (auto idx = 0; idx < mini_cluster_->num_masters(); ++idx) { + auto* master = mini_cluster_->mini_master(idx); + master->Shutdown(); + ASSERT_OK(master->Start()); + } + ASSERT_OK(mini_cluster_->AddTabletServer()); ASSERT_EQ(4, mini_cluster_->num_tablet_servers()); @@ -530,29 +536,21 @@ TEST_F(RemoteKsckTest, TestClusterWithLocation) { ASSERT_OK(ksck_->PrintResults()); string err_string = err_stream_.str(); - // The existing tablet servers should have location '<none>' displayed. - for (int i = 0; i < 3; i++) { - auto *ts = mini_cluster_->mini_tablet_server(i); - ASSERT_STR_CONTAINS(err_string, strings::Substitute("$0 | $1 | HEALTHY | <none>", - ts->uuid(), - ts->bound_rpc_addr().ToString())); + // With the flag set and masters restarted, all tablet servers are assigned + // with location '/foo': both the existing ones and the newly added. + for (int idx = 0; idx < mini_cluster_->num_tablet_servers(); ++idx) { + auto *ts = mini_cluster_->mini_tablet_server(idx); + ASSERT_STR_CONTAINS(err_string, strings::Substitute("$0 | $1 | HEALTHY | $2", + ts->uuid(), + ts->bound_rpc_addr().ToString(), + location)); } - - // The newly added tablet server should have the assigned location displayed. - auto *ts = mini_cluster_->mini_tablet_server(3); - ASSERT_STR_CONTAINS(err_string, strings::Substitute("$0 | $1 | HEALTHY | $2", - ts->uuid(), - ts->bound_rpc_addr().ToString(), - location)); - ASSERT_STR_CONTAINS(err_string, "Tablet Server Location Summary\n" " Location | Count\n" "----------+---------\n"); ASSERT_STR_CONTAINS(err_string, - " <none> | 3\n"); - ASSERT_STR_CONTAINS(err_string, - " /foo | 1\n"); + " /foo | 4\n"); } } // namespace tools
