This is an automated email from the ASF dual-hosted git repository.

granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit bd966e2b99a2b5f2a9752f61c8fe84a56d5d2307
Author: Grant Henke <[email protected]>
AuthorDate: Fri Sep 4 12:36:58 2020 -0500

    KUDU-2884: [hms] Improve master address matching
    
    This patch improves the master address matching in the
    `kudu hms fix` tool. Instead of expecting a literal match
    of the address strings, it expects the match to be functionally
    the same. This means that the addresses can be missing the
    port (using the default), duplicated, and re-ordered. The
    HMS will still consider them the same if they match the
    clusters masters functionally.
    
    Though the default port is considered when matching master
    addresses, it is not included in the automated hms tool tests
    given the tests use random ephemeral ports. Instead a test
    for MasterAddressesToSet and UnorderedHostPortSet equality
    was added.
    
    Change-Id: I00059ff1b5134f7e5d6ea8be4fdf0701aa56b0c7
    Reviewed-on: http://gerrit.cloudera.org:8080/16420
    Reviewed-by: Andrew Wong <[email protected]>
    Reviewed-by: Alexey Serbin <[email protected]>
    Tested-by: Kudu Jenkins
---
 src/kudu/tools/kudu-tool-test.cc     | 90 +++++++++++++++++++++++-------------
 src/kudu/tools/tool_action-test.cc   | 37 ++++++++++++++-
 src/kudu/tools/tool_action_common.cc | 21 +++++++++
 src/kudu/tools/tool_action_common.h  |  8 ++++
 src/kudu/tools/tool_action_hms.cc    | 12 ++++-
 5 files changed, 134 insertions(+), 34 deletions(-)

diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index cfc4595..b0802eb 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -3961,6 +3961,21 @@ TEST_P(ToolTestKerberosParameterized, TestHmsDowngrade) {
   NO_FATALS(RunActionStdoutNone(Substitute("hms check $0", master_addr)));
 }
 
+namespace {
+
+// Rewrites the specified HMS table, replacing the given parameters with the
+// given value.
+Status AlterHmsWithReplacedParam(HmsClient* hms_client,
+                                 const string& db, const string& table,
+                                 const string& param, const string& val) {
+  hive::Table updated_hms_table;
+  RETURN_NOT_OK(hms_client->GetTable(db, table, &updated_hms_table));
+  updated_hms_table.parameters[param] = val;
+  return hms_client->AlterTable(db, table, updated_hms_table);
+}
+
+} // anonymous namespace
+
 // Test HMS inconsistencies that can be automatically fixed.
 // Kerberos is enabled in order to test the tools work in secure clusters.
 TEST_P(ToolTestKerberosParameterized, TestCheckAndAutomaticFixHmsMetadata) {
@@ -3970,9 +3985,15 @@ TEST_P(ToolTestKerberosParameterized, 
TestCheckAndAutomaticFixHmsMetadata) {
   opts.hms_mode = HmsMode::DISABLE_HIVE_METASTORE;
   opts.enable_kerberos = EnableKerberos();
   opts.extra_master_flags.emplace_back("--allow_empty_owner=true");
+  opts.num_masters = 3;
   NO_FATALS(StartExternalMiniCluster(std::move(opts)));
 
-  string master_addr = cluster_->master()->bound_rpc_addr().ToString();
+  vector<string> master_addrs;
+  for (const auto& hp : cluster_->master_rpc_addrs()) {
+    master_addrs.emplace_back(hp.ToString());
+  }
+  const string master_addrs_str = JoinStrings(master_addrs, ",");
+
   thrift::ClientOptions hms_opts;
   hms_opts.enable_kerberos = EnableKerberos();
   hms_opts.service_principal = "hive";
@@ -3983,7 +4004,7 @@ TEST_P(ToolTestKerberosParameterized, 
TestCheckAndAutomaticFixHmsMetadata) {
 
   FLAGS_hive_metastore_uris = cluster_->hms()->uris();
   FLAGS_hive_metastore_sasl_enabled = EnableKerberos();
-  HmsCatalog hms_catalog(master_addr);
+  HmsCatalog hms_catalog(master_addrs_str);
   ASSERT_OK(hms_catalog.Start());
 
   shared_ptr<KuduClient> kudu_client;
@@ -4067,6 +4088,22 @@ TEST_P(ToolTestKerberosParameterized, 
TestCheckAndAutomaticFixHmsMetadata) {
         "orphan-hms-table-id", "default.orphan_hms_table", kUsername,
         SchemaBuilder().Build()));
 
+  // Test case: orphan table in the HMS with different, but functionally
+  // the same master addresses.
+  ASSERT_OK(hms_catalog.CreateTable(
+      "orphan-hms-masters-id", "default.orphan_hms_table_masters", kUsername,
+      SchemaBuilder().Build()));
+  // Reverse the address order and duplicate the addresses.
+  vector<string> modified_addrs;
+  for (const auto& hp : cluster_->master_rpc_addrs()) {
+    modified_addrs.emplace_back(hp.ToString());
+    modified_addrs.emplace_back(hp.ToString());
+  }
+  std::reverse(modified_addrs.begin(), modified_addrs.end());
+  LOG(INFO) << "Modified Masters: " << JoinStrings(modified_addrs, ",");
+  ASSERT_OK(AlterHmsWithReplacedParam(&hms_client, "default", 
"orphan_hms_table_masters",
+      HmsClient::kKuduMasterAddrsKey, JoinStrings(modified_addrs, ",")));
+
   // Test case: orphan external synchronized table in the HMS.
   ASSERT_OK(hms_catalog.CreateTable(
       "orphan-hms-table-id-external", "default.orphan_hms_table_external", 
kUsername,
@@ -4080,7 +4117,7 @@ TEST_P(ToolTestKerberosParameterized, 
TestCheckAndAutomaticFixHmsMetadata) {
   // Test case: orphan legacy table in the HMS.
   ASSERT_OK(CreateLegacyHmsTable(&hms_client, "default", 
"orphan_hms_table_legacy_managed",
         "impala::default.orphan_hms_table_legacy_managed",
-        master_addr, HmsClient::kManagedTable, kUsername));
+        master_addrs_str, HmsClient::kManagedTable, kUsername));
 
   // Test case: orphan table in Kudu.
   ASSERT_OK(CreateKuduTable(kudu_client, "default.kudu_orphan", 
static_cast<string>("")));
@@ -4090,14 +4127,14 @@ TEST_P(ToolTestKerberosParameterized, 
TestCheckAndAutomaticFixHmsMetadata) {
   ASSERT_OK(CreateKuduTable(kudu_client, "impala::default.legacy_managed", 
kUsername));
   ASSERT_OK(kudu_client->OpenTable("impala::default.legacy_managed", 
&legacy_managed));
   ASSERT_OK(CreateLegacyHmsTable(&hms_client, "default", "legacy_managed",
-      "impala::default.legacy_managed", master_addr, HmsClient::kManagedTable, 
kUsername));
+      "impala::default.legacy_managed", master_addrs_str, 
HmsClient::kManagedTable, kUsername));
 
   // Test case: Legacy external purge table.
   shared_ptr<KuduTable> legacy_purge;
   ASSERT_OK(CreateKuduTable(kudu_client, "impala::default.legacy_purge", 
kUsername));
   ASSERT_OK(kudu_client->OpenTable("impala::default.legacy_purge", 
&legacy_purge));
   ASSERT_OK(CreateLegacyHmsTable(&hms_client, "default", "legacy_purge",
-      "impala::default.legacy_purge", master_addr, HmsClient::kExternalTable, 
kUsername));
+      "impala::default.legacy_purge", master_addrs_str, 
HmsClient::kExternalTable, kUsername));
   hive::Table hms_legacy_purge;
   ASSERT_OK(hms_client.GetTable("default", "legacy_purge", &hms_legacy_purge));
   hms_legacy_purge.parameters[HmsClient::kExternalPurgeKey] = "true";
@@ -4106,7 +4143,7 @@ TEST_P(ToolTestKerberosParameterized, 
TestCheckAndAutomaticFixHmsMetadata) {
 
   // Test case: legacy external table (pointed at the legacy managed table).
   ASSERT_OK(CreateLegacyHmsTable(&hms_client, "default", "legacy_external",
-      "impala::default.legacy_managed", master_addr, 
HmsClient::kExternalTable, kUsername));
+      "impala::default.legacy_managed", master_addrs_str, 
HmsClient::kExternalTable, kUsername));
 
   // Test case: legacy managed table with no owner.
   shared_ptr<KuduTable> legacy_no_owner;
@@ -4114,7 +4151,8 @@ TEST_P(ToolTestKerberosParameterized, 
TestCheckAndAutomaticFixHmsMetadata) {
                             static_cast<string>("")));
   ASSERT_OK(kudu_client->OpenTable("impala::default.legacy_no_owner", 
&legacy_no_owner));
   ASSERT_OK(CreateLegacyHmsTable(&hms_client, "default", "legacy_no_owner",
-        "impala::default.legacy_no_owner", master_addr, 
HmsClient::kManagedTable, boost::none));
+        "impala::default.legacy_no_owner", master_addrs_str, 
HmsClient::kManagedTable,
+        boost::none));
 
   // Test case: legacy managed table with a Hive-incompatible name (no 
database).
   shared_ptr<KuduTable> legacy_hive_incompatible_name;
@@ -4122,7 +4160,7 @@ TEST_P(ToolTestKerberosParameterized, 
TestCheckAndAutomaticFixHmsMetadata) {
   ASSERT_OK(kudu_client->OpenTable("legacy_hive_incompatible_name",
         &legacy_hive_incompatible_name));
   ASSERT_OK(CreateLegacyHmsTable(&hms_client, "default", 
"legacy_hive_incompatible_name",
-        "legacy_hive_incompatible_name", master_addr,
+        "legacy_hive_incompatible_name", master_addrs_str,
         HmsClient::kManagedTable, kUsername));
 
   // Test case: Kudu table in non-default database.
@@ -4170,6 +4208,7 @@ TEST_P(ToolTestKerberosParameterized, 
TestCheckAndAutomaticFixHmsMetadata) {
     "default.no_owner_in_hms",
     "default.no_owner_in_kudu",
     "default.orphan_hms_table",
+    "default.orphan_hms_table_masters",
     "default.orphan_hms_table_external",
     "default.orphan_hms_table_legacy_managed",
     "default.kudu_orphan",
@@ -4197,8 +4236,8 @@ TEST_P(ToolTestKerberosParameterized, 
TestCheckAndAutomaticFixHmsMetadata) {
   auto check = [&] () {
     string out;
     string err;
-    Status s = RunActionStdoutStderrString(Substitute("hms check $0 $1", 
master_addr, hms_flags),
-                                           &out, &err);
+    Status s = RunActionStdoutStderrString(
+        Substitute("hms check $0 $1", master_addrs_str, hms_flags), &out, 
&err);
     SCOPED_TRACE(strings::CUnescapeOrDie(out));
     if (inconsistent_tables.empty()) {
       ASSERT_OK(s);
@@ -4220,16 +4259,18 @@ TEST_P(ToolTestKerberosParameterized, 
TestCheckAndAutomaticFixHmsMetadata) {
 
   // 'hms fix --dryrun should not change the output of 'hms check'.
   NO_FATALS(RunActionStdoutNone(
-        Substitute("hms fix $0 --dryrun --drop_orphan_hms_tables $1", 
master_addr, hms_flags)));
+        Substitute("hms fix $0 --dryrun --drop_orphan_hms_tables $1",
+            master_addrs_str, hms_flags)));
   NO_FATALS(check());
 
   // Drop orphan hms tables.
   NO_FATALS(RunActionStdoutNone(
         Substitute("hms fix $0 --drop_orphan_hms_tables 
--nocreate_missing_hms_tables "
                    "--noupgrade_hms_tables --nofix_inconsistent_tables $1",
-                   master_addr, hms_flags)));
+                   master_addrs_str, hms_flags)));
   make_consistent({
     "default.orphan_hms_table",
+    "default.orphan_hms_table_masters",
     "default.orphan_hms_table_external",
     "default.orphan_hms_table_legacy_managed",
   });
@@ -4238,7 +4279,7 @@ TEST_P(ToolTestKerberosParameterized, 
TestCheckAndAutomaticFixHmsMetadata) {
   // Create missing hms tables.
   NO_FATALS(RunActionStdoutNone(
         Substitute("hms fix $0 --noupgrade_hms_tables 
--nofix_inconsistent_tables $1",
-                   master_addr, hms_flags)));
+                   master_addrs_str, hms_flags)));
   make_consistent({
     "default.kudu_orphan",
     "my_db.table",
@@ -4247,7 +4288,7 @@ TEST_P(ToolTestKerberosParameterized, 
TestCheckAndAutomaticFixHmsMetadata) {
 
   // Upgrade legacy HMS tables.
   NO_FATALS(RunActionStdoutNone(
-        Substitute("hms fix $0 --nofix_inconsistent_tables $1", master_addr, 
hms_flags)));
+        Substitute("hms fix $0 --nofix_inconsistent_tables $1", 
master_addrs_str, hms_flags)));
   make_consistent({
     "default.legacy_managed",
     "default.legacy_purge",
@@ -4258,7 +4299,7 @@ TEST_P(ToolTestKerberosParameterized, 
TestCheckAndAutomaticFixHmsMetadata) {
   NO_FATALS(check());
 
   // Refresh stale HMS tables.
-  NO_FATALS(RunActionStdoutNone(Substitute("hms fix $0 $1", master_addr, 
hms_flags)));
+  NO_FATALS(RunActionStdoutNone(Substitute("hms fix $0 $1", master_addrs_str, 
hms_flags)));
   make_consistent({
     "default.UPPERCASE",
     "default.inconsistent_schema",
@@ -4287,11 +4328,11 @@ TEST_P(ToolTestKerberosParameterized, 
TestCheckAndAutomaticFixHmsMetadata) {
     "legacy_external",
     "legacy_hive_incompatible_name",
   }) {
-    NO_FATALS(ValidateHmsEntries(&hms_client, kudu_client, "default", table, 
master_addr));
+    NO_FATALS(ValidateHmsEntries(&hms_client, kudu_client, "default", table, 
master_addrs_str));
   }
 
   // Validate the tables in the other databases.
-  NO_FATALS(ValidateHmsEntries(&hms_client, kudu_client, "my_db", "table", 
master_addr));
+  NO_FATALS(ValidateHmsEntries(&hms_client, kudu_client, "my_db", "table", 
master_addrs_str));
 
   vector<string> kudu_tables;
   kudu_client->ListTables(&kudu_tables);
@@ -4464,21 +4505,6 @@ TEST_P(ToolTestKerberosParameterized, 
TestCheckAndManualFixHmsMetadata) {
   }), kudu_tables);
 }
 
-namespace {
-
-// Rewrites the specified HMS table, replacing the given parameters with the
-// given value.
-Status AlterHmsWithReplacedParam(HmsClient* hms_client,
-                                 const string& db, const string& table,
-                                 const string& param, const string& val) {
-  hive::Table updated_hms_table;
-  RETURN_NOT_OK(hms_client->GetTable(db, table, &updated_hms_table));
-  updated_hms_table.parameters[param] = val;
-  return hms_client->AlterTable(db, table, updated_hms_table);
-}
-
-} // anonymous namespace
-
 TEST_F(ToolTest, TestHmsIgnoresDifferentMasters) {
   ExternalMiniClusterOptions opts;
   opts.num_masters = 2;
diff --git a/src/kudu/tools/tool_action-test.cc 
b/src/kudu/tools/tool_action-test.cc
index b84c300..13ff2da 100644
--- a/src/kudu/tools/tool_action-test.cc
+++ b/src/kudu/tools/tool_action-test.cc
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include "kudu/tools/tool_action.h"
+
 #include <memory>
 #include <ostream>
 #include <string>
@@ -24,7 +26,8 @@
 #include <gflags/gflags.h>
 #include <gtest/gtest.h>
 
-#include "kudu/tools/tool_action.h"
+#include "kudu/tools/tool_action_common.h"
+#include "kudu/util/net/net_util.h"
 
 // gflags for optional action parameters
 DEFINE_bool(opt_bool, false, "obd");
@@ -115,5 +118,37 @@ TEST(ToolActionTest, TestModeBuildHelpXML) {
   ASSERT_EQ(expected_xml, xml);
 }
 
+TEST(ToolActionTest, TestMasterAddressesToSet) {
+  // A standard master address string ordered and including the default port.
+  string master_addrs_std = "host-1:7051,host-2:7051,host-3:7051";
+  UnorderedHostPortSet std_set;
+  MasterAddressesToSet(master_addrs_std, &std_set);
+  ASSERT_EQ(3, std_set.size());
+
+  // A messy master address string that is unordered, has no port, and 
includes duplicates.
+  string master_addrs_messy = "host-3,host-1,host-2,host-1:7051,host-2";
+  UnorderedHostPortSet messy_set;
+  MasterAddressesToSet(master_addrs_messy, &messy_set);
+  ASSERT_EQ(3, messy_set.size());
+
+  ASSERT_EQ(std_set, messy_set);
+
+  // A master address string that matches all but one port.
+  string master_addrs_bad_port = "host-1:7051,host-2:7050,host-3:7051";
+  UnorderedHostPortSet bad_port_set;
+  MasterAddressesToSet(master_addrs_bad_port, &bad_port_set);
+  ASSERT_EQ(3, bad_port_set.size());
+
+  ASSERT_NE(std_set, bad_port_set);
+
+  // A master address string that matches all but one host.
+  string master_addrs_bad_host = "host-1:7051,host-21:7051,host-3:7051";
+  UnorderedHostPortSet bad_host_set;
+  MasterAddressesToSet(master_addrs_bad_host, &bad_host_set);
+  ASSERT_EQ(3, bad_host_set.size());
+
+  ASSERT_NE(std_set, bad_host_set);
+}
+
 } // namespace tools
 } // namespace kudu
diff --git a/src/kudu/tools/tool_action_common.cc 
b/src/kudu/tools/tool_action_common.cc
index 4663ea8..6086508 100644
--- a/src/kudu/tools/tool_action_common.cc
+++ b/src/kudu/tools/tool_action_common.cc
@@ -28,6 +28,7 @@
 #include <numeric>
 #include <stack>
 #include <string>
+#include <type_traits>
 #include <unordered_map>
 #include <utility>
 #include <vector>
@@ -59,6 +60,7 @@
 #include "kudu/gutil/strings/stringpiece.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/gutil/strings/util.h"
+#include "kudu/master/master.h"
 #include "kudu/master/master.proxy.h" // IWYU pragma: keep
 #include "kudu/rpc/messenger.h"
 #include "kudu/rpc/response_callback.h"
@@ -566,6 +568,25 @@ Status ParseMasterAddresses(
   return ParseMasterAddresses(context, kMasterAddressesArg, master_addresses);
 }
 
+Status MasterAddressesToSet(
+    const string& master_addresses_arg,
+    UnorderedHostPortSet* res) {
+  res->clear();
+  vector<HostPort> hp_vector;
+  RETURN_NOT_OK(HostPort::ParseStrings(master_addresses_arg, 
master::Master::kDefaultPort,
+      &hp_vector));
+  *res = UnorderedHostPortSet(hp_vector.begin(), hp_vector.end());
+
+  // If we deduplicated some masters addresses, log something about it.
+  if (res->size() < hp_vector.size()) {
+    vector<HostPort> addr_list(res->begin(), res->end());
+    LOG(INFO) << "deduplicated master addresses: "
+              << HostPort::ToCommaSeparatedString(addr_list);
+  }
+
+  return Status::OK();
+}
+
 Status PrintServerStatus(const string& address, uint16_t default_port) {
   ServerStatusPB status;
   RETURN_NOT_OK(GetServerStatus(address, default_port, &status));
diff --git a/src/kudu/tools/tool_action_common.h 
b/src/kudu/tools/tool_action_common.h
index 9b7a762..2783af0 100644
--- a/src/kudu/tools/tool_action_common.h
+++ b/src/kudu/tools/tool_action_common.h
@@ -27,6 +27,7 @@
 #include "kudu/gutil/port.h"
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/rpc/response_callback.h"
+#include "kudu/util/net/net_util.h"
 #include "kudu/util/status.h"
 
 namespace kudu {
@@ -187,6 +188,13 @@ Status ParseMasterAddresses(
     const RunnerContext& context,
     std::vector<std::string>* master_addresses);
 
+// Parses a comma separated list of "host:port" pairs into an unordered set of
+// HostPort objects. If no port is specified for an entry in the comma 
separated list,
+// the default master port is used for that entry's pair.
+Status MasterAddressesToSet(
+    const std::string& master_addresses_arg,
+    kudu::UnorderedHostPortSet* res);
+
 // A table of data to present to the user.
 //
 // Supports formatting based on the --format flag.
diff --git a/src/kudu/tools/tool_action_hms.cc 
b/src/kudu/tools/tool_action_hms.cc
index b66a00d..883ceec 100644
--- a/src/kudu/tools/tool_action_hms.cc
+++ b/src/kudu/tools/tool_action_hms.cc
@@ -25,6 +25,7 @@
 #include <string>
 #include <type_traits>
 #include <unordered_map>
+#include <unordered_set>
 #include <utility>
 #include <vector>
 
@@ -47,6 +48,7 @@
 #include "kudu/tools/tool_action.h"
 #include "kudu/tools/tool_action_common.h"
 #include "kudu/util/monotime.h"
+#include "kudu/util/net/net_util.h"
 #include "kudu/util/slice.h"
 #include "kudu/util/status.h"
 
@@ -614,7 +616,15 @@ Status FixHmsMetadata(const RunnerContext& context) {
     for (hive::Table& hms_table : report.orphan_hms_tables) {
       string table_name = Substitute("$0.$1", hms_table.dbName, 
hms_table.tableName);
       const string& master_addrs_param = 
hms_table.parameters[HmsClient::kKuduMasterAddrsKey];
-      if (master_addrs_param != master_addrs && !FLAGS_force) {
+
+      // Normalize the master addresses to allow for an equality check that 
ignores
+      // missing default ports, duplicate addresses, and address order.
+      UnorderedHostPortSet param_set;
+      MasterAddressesToSet(master_addrs_param, &param_set);
+      UnorderedHostPortSet cluster_set;
+      MasterAddressesToSet(master_addrs, &cluster_set);
+
+      if (param_set != cluster_set && !FLAGS_force) {
         LOG(INFO) << "Skipping drop of orphan HMS table " << table_name
                   << " with master addresses parameter " << master_addrs_param
                   << " because it does not match the --" << 
kMasterAddressesArg << " argument"

Reply via email to