hms-tool: lookup master addresses config from master The HMS check and fix tools currently use the master addresses configuration provided on the command line to validate and fix metadata in HMS table entries. This only works correctly if the administrator inputs the master addresses exactly as its configured on the masters. If the hostports are reordered or use a slightly different format which resolves to the same addresses, then metadata will be flagged as stale and rewritten unnecessarily. As an example, the administrator could pass 'localhost' as the master address if they ran it on the master node, but this wouldn't be appropriate to write into the HMS.
This commit changes the handling so that the tools use the master addresses returned from the ConnectToCluster mechanism already present in the client, which should match the master addresses config of the master exactly. Change-Id: If170cebe6e5d7fa05fbd7faf18755aa57bdfeeec Reviewed-on: http://gerrit.cloudera.org:8080/11083 Tested-by: Kudu Jenkins Reviewed-by: Dan Burkert <danburk...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/b2c64289 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/b2c64289 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/b2c64289 Branch: refs/heads/master Commit: b2c6428909f468fa7dc106f601ee8c790060ac99 Parents: f847726 Author: Dan Burkert <danburk...@apache.org> Authored: Mon Jul 30 10:56:12 2018 -0700 Committer: Dan Burkert <danburk...@apache.org> Committed: Wed Aug 1 19:47:32 2018 +0000 ---------------------------------------------------------------------- src/kudu/client/client-internal.cc | 12 +++++++++++- src/kudu/client/client-internal.h | 8 +++++++- src/kudu/client/client.h | 24 +++++++++++++----------- src/kudu/tools/tool_action_common.cc | 4 ++++ src/kudu/tools/tool_action_common.h | 3 +++ src/kudu/tools/tool_action_hms.cc | 26 +++++++++++++++++--------- 6 files changed, 55 insertions(+), 22 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/b2c64289/src/kudu/client/client-internal.cc ---------------------------------------------------------------------- diff --git a/src/kudu/client/client-internal.cc b/src/kudu/client/client-internal.cc index 10def55..335a23a 100644 --- a/src/kudu/client/client-internal.cc +++ b/src/kudu/client/client-internal.cc @@ -35,6 +35,7 @@ #include "kudu/client/master_rpc.h" #include "kudu/client/meta_cache.h" #include "kudu/client/schema.h" +#include "kudu/common/common.pb.h" #include "kudu/common/partition.h" #include "kudu/common/schema.h" #include "kudu/common/wire_protocol.h" @@ -46,6 +47,7 @@ #include "kudu/master/master.h" #include "kudu/master/master.pb.h" #include "kudu/master/master.proxy.h" +#include "kudu/rpc/connection.h" #include "kudu/rpc/messenger.h" #include "kudu/rpc/request_tracker.h" #include "kudu/rpc/rpc_controller.h" @@ -60,7 +62,6 @@ #include "kudu/util/net/sockaddr.h" #include "kudu/util/pb_util.h" #include "kudu/util/thread_restrictions.h" -#include "kudu/rpc/connection.h" using std::pair; using std::set; @@ -702,6 +703,10 @@ void KuduClient::Data::ConnectedToClusterCb( if (status.ok()) { leader_master_hostport_ = HostPort(leader_hostname, leader_addr.port()); + master_hostports_.clear(); + for (const auto& hostport : connect_response.master_addrs()) { + master_hostports_.emplace_back(HostPort(hostport.host(), hostport.port())); + } master_proxy_.reset(new MasterServiceProxy(messenger_, leader_addr, leader_hostname)); master_proxy_->set_user_credentials(user_credentials_); } @@ -822,6 +827,11 @@ HostPort KuduClient::Data::leader_master_hostport() const { return leader_master_hostport_; } +const vector<HostPort>& KuduClient::Data::master_hostports() const { + std::lock_guard<simple_spinlock> l(leader_master_lock_); + return master_hostports_; +} + shared_ptr<master::MasterServiceProxy> KuduClient::Data::master_proxy() const { std::lock_guard<simple_spinlock> l(leader_master_lock_); return master_proxy_; http://git-wip-us.apache.org/repos/asf/kudu/blob/b2c64289/src/kudu/client/client-internal.h ---------------------------------------------------------------------- diff --git a/src/kudu/client/client-internal.h b/src/kudu/client/client-internal.h index e77e3bf..467d5ec 100644 --- a/src/kudu/client/client-internal.h +++ b/src/kudu/client/client-internal.h @@ -191,6 +191,8 @@ class KuduClient::Data { HostPort leader_master_hostport() const; + const std::vector<HostPort>& master_hostports() const; + uint64_t GetLatestObservedTimestamp() const; void UpdateLatestObservedTimestamp(uint64_t timestamp); @@ -254,6 +256,10 @@ class KuduClient::Data { // ConnectToClusterAsync. HostPort leader_master_hostport_; + // The master RPC host ports as configured on the most recently connected to + // leader master in ConnectedToClusterCb. + std::vector<HostPort> master_hostports_; + // Proxy to the leader master. std::shared_ptr<master::MasterServiceProxy> master_proxy_; @@ -266,7 +272,7 @@ class KuduClient::Data { std::vector<StatusCallback> leader_master_callbacks_primary_creds_; // Protects 'leader_master_rpc_{any,primary}_creds_', - // 'leader_master_hostport_', and 'master_proxy_'. + // 'leader_master_hostport_', 'master_hostports_', and 'master_proxy_'. // // See: KuduClient::Data::ConnectToClusterAsync for a more // in-depth explanation of why this is needed and how it works. http://git-wip-us.apache.org/repos/asf/kudu/blob/b2c64289/src/kudu/client/client.h ---------------------------------------------------------------------- diff --git a/src/kudu/client/client.h b/src/kudu/client/client.h index cbf3257..cfe62eb 100644 --- a/src/kudu/client/client.h +++ b/src/kudu/client/client.h @@ -58,17 +58,18 @@ class PartitionSchema; class SecurityUnknownTskTest; namespace client { +class KuduClient; class KuduTableAlterer; } namespace tools { class LeaderMasterProxy; +std::string GetMasterAddresses(const client::KuduClient&); void SetAlterExternalCatalogs(client::KuduTableAlterer*, bool); } // namespace tools namespace client { -class KuduClient; class KuduDelete; class KuduInsert; class KuduLoggingCallback; @@ -538,26 +539,27 @@ class KUDU_EXPORT KuduClient : public sp::enable_shared_from_this<KuduClient> { private: class KUDU_NO_EXPORT Data; - friend class internal::Batcher; - friend class internal::GetTableSchemaRpc; - friend class internal::LookupRpc; - friend class internal::MetaCache; - friend class internal::RemoteTablet; - friend class internal::RemoteTabletServer; - friend class internal::WriteRpc; - friend class ConnectToClusterBaseTest; friend class ClientTest; + friend class ConnectToClusterBaseTest; friend class KuduClientBuilder; friend class KuduPartitionerBuilder; - friend class KuduScanner; friend class KuduScanToken; friend class KuduScanTokenBuilder; + friend class KuduScanner; friend class KuduSession; friend class KuduTable; friend class KuduTableAlterer; friend class KuduTableCreator; - friend class ::kudu::SecurityUnknownTskTest; + friend class internal::Batcher; + friend class internal::GetTableSchemaRpc; + friend class internal::LookupRpc; + friend class internal::MetaCache; + friend class internal::RemoteTablet; + friend class internal::RemoteTabletServer; + friend class internal::WriteRpc; + friend class kudu::SecurityUnknownTskTest; friend class tools::LeaderMasterProxy; + friend std::string tools::GetMasterAddresses(const client::KuduClient&); FRIEND_TEST(kudu::ClientStressTest, TestUniqueClientIds); FRIEND_TEST(ClientTest, TestGetSecurityInfoFromMaster); http://git-wip-us.apache.org/repos/asf/kudu/blob/b2c64289/src/kudu/tools/tool_action_common.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tools/tool_action_common.cc b/src/kudu/tools/tool_action_common.cc index 5b61aa2..db86b02 100644 --- a/src/kudu/tools/tool_action_common.cc +++ b/src/kudu/tools/tool_action_common.cc @@ -445,6 +445,10 @@ void SetAlterExternalCatalogs(client::KuduTableAlterer* alterer, bool alter_exte alterer->alter_external_catalogs(alter_external_catalogs); } +string GetMasterAddresses(const client::KuduClient& client) { + return HostPort::ToCommaSeparatedString(client.data_->master_hostports()); +} + Status PrintServerStatus(const string& address, uint16_t default_port) { ServerStatusPB status; RETURN_NOT_OK(GetServerStatus(address, default_port, &status)); http://git-wip-us.apache.org/repos/asf/kudu/blob/b2c64289/src/kudu/tools/tool_action_common.h ---------------------------------------------------------------------- diff --git a/src/kudu/tools/tool_action_common.h b/src/kudu/tools/tool_action_common.h index 8fc2d34..3043bd8 100644 --- a/src/kudu/tools/tool_action_common.h +++ b/src/kudu/tools/tool_action_common.h @@ -137,6 +137,9 @@ Status SetServerFlag(const std::string& address, uint16_t default_port, // Set the non-public 'alter_external_catalogs' option on a KuduTableAlterer. void SetAlterExternalCatalogs(client::KuduTableAlterer* alterer, bool alter_external_catalogs); +// Get the configured master addresses on the most recently connected to leader master. +std::string GetMasterAddresses(const client::KuduClient& client); + // A table of data to present to the user. // // Supports formatting based on the --format flag. http://git-wip-us.apache.org/repos/asf/kudu/blob/b2c64289/src/kudu/tools/tool_action_hms.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tools/tool_action_hms.cc b/src/kudu/tools/tool_action_hms.cc index 7468690..e709c82 100644 --- a/src/kudu/tools/tool_action_hms.cc +++ b/src/kudu/tools/tool_action_hms.cc @@ -102,20 +102,27 @@ Status RenameTableInKuduCatalog(KuduClient* kudu_client, Status Init(const RunnerContext& context, shared_ptr<KuduClient>* kudu_client, - unique_ptr<HmsCatalog>* hms_catalog) { + unique_ptr<HmsCatalog>* hms_catalog, + string* master_addrs) { const string& master_addrs_flag = FindOrDie(context.required_args, kMasterAddressesArg); - vector<string> master_addrs = Split(master_addrs_flag, ","); // Create a Kudu Client. RETURN_NOT_OK(KuduClientBuilder() .default_rpc_timeout(MonoDelta::FromMilliseconds(FLAGS_timeout_ms)) - .master_server_addrs(master_addrs) + .master_server_addrs(Split(master_addrs_flag, ",")) .Build(kudu_client)); + // Get the configured master addresses from the leader master. It's critical + // that the check and fix tools use the exact same master address + // configuration that the masters do, otherwise the HMS table entries will + // disagree on the master addresses property. + *master_addrs = GetMasterAddresses(**kudu_client); + if (FLAGS_hive_metastore_uris.empty()) { // Lookup the HMS URIs and SASL config from the master configuration. vector<GetFlagsResponsePB_Flag> flags; - RETURN_NOT_OK(GetServerFlags(master_addrs[0], master::Master::kDefaultPort, false, {}, &flags)); + RETURN_NOT_OK(GetServerFlags(vector<string>(Split(*master_addrs, ","))[0], + master::Master::kDefaultPort, false, {}, &flags)); auto hms_uris = std::find_if(flags.begin(), flags.end(), [] (const GetFlagsResponsePB_Flag& flag) { @@ -146,7 +153,8 @@ Status Init(const RunnerContext& context, Status HmsDowngrade(const RunnerContext& context) { shared_ptr<KuduClient> kudu_client; unique_ptr<HmsCatalog> hms_catalog; - Init(context, &kudu_client, &hms_catalog); + string master_addrs; + Init(context, &kudu_client, &hms_catalog, &master_addrs); // 1. Identify all Kudu tables in the HMS entries. vector<hive::Table> hms_tables; @@ -377,10 +385,10 @@ Status AnalyzeCatalogs(const string& master_addrs, Status CheckHmsMetadata(const RunnerContext& context) { // TODO(dan): check that the critical HMS configuration flags // (--hive_metastore_uris, --hive_metastore_sasl_enabled) match on all masters. - const string& master_addrs = FindOrDie(context.required_args, kMasterAddressesArg); shared_ptr<KuduClient> kudu_client; unique_ptr<HmsCatalog> hms_catalog; - RETURN_NOT_OK(Init(context, &kudu_client, &hms_catalog)); + string master_addrs; + RETURN_NOT_OK(Init(context, &kudu_client, &hms_catalog, &master_addrs)); CatalogReport report; RETURN_NOT_OK(AnalyzeCatalogs(master_addrs, hms_catalog.get(), kudu_client.get(), &report)); @@ -467,10 +475,10 @@ string TableIdent(const KuduTable& table) { // failing due to duplicate table being present are logged and execution // continues. Status FixHmsMetadata(const RunnerContext& context) { - const string& master_addrs = FindOrDie(context.required_args, kMasterAddressesArg); shared_ptr<KuduClient> kudu_client; unique_ptr<HmsCatalog> hms_catalog; - RETURN_NOT_OK(Init(context, &kudu_client, &hms_catalog)); + string master_addrs; + RETURN_NOT_OK(Init(context, &kudu_client, &hms_catalog, &master_addrs)); CatalogReport report; RETURN_NOT_OK(AnalyzeCatalogs(master_addrs, hms_catalog.get(), kudu_client.get(), &report));