hms precheck tool

This commit adds a 'kudu hms precheck' tool that is meant to be used
before enabling the HMS integration on an existing cluster. The tool
checks for tables whose names are not unique when compared with Hive's
case-insensitive identifier rules. These conflicting tables would cause
the master to fail to startup after enabling the Hive Metastore
integration.

Change-Id: Ia7b23cb802b6434eba7e38443d3361ef70e6543e
Reviewed-on: http://gerrit.cloudera.org:8080/11040
Reviewed-by: Alexey Serbin <aser...@cloudera.com>
Tested-by: Kudu Jenkins
Reviewed-by: Hao Hao <hao....@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/f8477267
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/f8477267
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/f8477267

Branch: refs/heads/master
Commit: f847726780498b1a0ef1b9a7d95a50b4f327b36d
Parents: d2afaec
Author: Dan Burkert <danburk...@apache.org>
Authored: Tue Jul 24 14:36:13 2018 -0700
Committer: Dan Burkert <danburk...@apache.org>
Committed: Wed Aug 1 19:47:03 2018 +0000

----------------------------------------------------------------------
 src/kudu/tools/kudu-tool-test.cc  | 79 +++++++++++++++++++++++++++++++--
 src/kudu/tools/tool_action_hms.cc | 80 +++++++++++++++++++++++++++++-----
 2 files changed, 144 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/f8477267/src/kudu/tools/kudu-tool-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 0a5743a..8293ef2 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -2486,7 +2486,7 @@ TEST_P(ToolTestKerberosParameterized, 
TestCheckAndAutomaticFixHmsMetadata) {
   vector<string> kudu_tables;
   kudu_client->ListTables(&kudu_tables);
   std::sort(kudu_tables.begin(), kudu_tables.end());
-  ASSERT_EQ(kudu_tables, vector<string>({
+  ASSERT_EQ(vector<string>({
     "default.control",
     "default.inconsistent_master_addrs",
     "default.inconsistent_name_hms",
@@ -2497,7 +2497,7 @@ TEST_P(ToolTestKerberosParameterized, 
TestCheckAndAutomaticFixHmsMetadata) {
     "default.legacy_managed",
     "default.uppercase",
     "my_db.table",
-  }));
+  }), kudu_tables);
 }
 
 // Test HMS inconsistencies that must be manually fixed.
@@ -2624,14 +2624,85 @@ TEST_P(ToolTestKerberosParameterized, 
TestCheckAndManualFixHmsMetadata) {
   vector<string> kudu_tables;
   kudu_client->ListTables(&kudu_tables);
   std::sort(kudu_tables.begin(), kudu_tables.end());
-  ASSERT_EQ(kudu_tables, vector<string>({
+  ASSERT_EQ(vector<string>({
     "default.conflicting_legacy_table",
     "default.duplicate_hms_tables",
     "default.hive_compatible_name",
     "default.non_conflicting_legacy_table",
     "default.with_database",
     "non_existent_database.table",
-  }));
+  }), kudu_tables);
+}
+
+TEST_F(ToolTest, TestHmsPrecheck) {
+  ExternalMiniClusterOptions opts;
+  opts.hms_mode = HmsMode::ENABLE_HIVE_METASTORE;
+  NO_FATALS(StartExternalMiniCluster(std::move(opts)));
+
+  string master_addr = cluster_->master()->bound_rpc_addr().ToString();
+
+  shared_ptr<KuduClient> client;
+  ASSERT_OK(cluster_->CreateClient(nullptr, &client));
+
+  // Create test tables.
+  for (const string& table_name : {
+      "a.b",
+      "foo.bar",
+      "FOO.bar",
+      "foo.BAR",
+      "fuzz",
+      "FUZZ",
+      "a.b!",
+      "A.B!",
+  }) {
+      ASSERT_OK(CreateKuduTable(client, table_name));
+  }
+
+  // Run the precheck tool. It should complain about the conflicting tables.
+  string out;
+  string err;
+  Status s = RunActionStdoutStderrString(Substitute("hms precheck $0", 
master_addr), &out, &err);
+  ASSERT_FALSE(s.ok());
+  ASSERT_STR_CONTAINS(err, "found tables in Kudu with case-conflicting names");
+  ASSERT_STR_CONTAINS(out, "foo.bar");
+  ASSERT_STR_CONTAINS(out, "FOO.bar");
+  ASSERT_STR_CONTAINS(out, "foo.BAR");
+
+  // It should not complain about tables which don't have conflicting names.
+  ASSERT_STR_NOT_CONTAINS(out, "a.b");
+
+  // It should not complain about tables which have Hive-incompatible names.
+  ASSERT_STR_NOT_CONTAINS(out, "fuzz");
+  ASSERT_STR_NOT_CONTAINS(out, "FUZZ");
+  ASSERT_STR_NOT_CONTAINS(out, "a.b!");
+  ASSERT_STR_NOT_CONTAINS(out, "A.B!");
+
+  // Rename the conflicting tables. Use the rename table tool to match the 
actual workflow.
+  NO_FATALS(RunActionStdoutNone(Substitute("table rename_table $0 FOO.bar 
foo.bar2", master_addr)));
+  NO_FATALS(RunActionStdoutNone(Substitute("table rename_table $0 foo.BAR 
foo.bar3", master_addr)));
+
+  // Precheck should now pass, and the cluster should upgrade succesfully.
+  NO_FATALS(RunActionStdoutNone(Substitute("hms precheck $0", master_addr)));
+
+  // Enable the HMS integration.
+  cluster_->ShutdownNodes(cluster::ClusterNodes::MASTERS_ONLY);
+  cluster_->EnableMetastoreIntegration();
+  ASSERT_OK(cluster_->Restart());
+
+  // Sanity-check the tables.
+  vector<string> tables;
+  ASSERT_OK(client->ListTables(&tables));
+  std::sort(tables.begin(), tables.end());
+  ASSERT_EQ(vector<string>({
+      "A.B!",
+      "FUZZ",
+      "a.b",
+      "a.b!",
+      "foo.bar",
+      "foo.bar2",
+      "foo.bar3",
+      "fuzz",
+  }), tables);
 }
 
 // This test is parameterized on the serialization mode and Kerberos.

http://git-wip-us.apache.org/repos/asf/kudu/blob/f8477267/src/kudu/tools/tool_action_hms.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_hms.cc 
b/src/kudu/tools/tool_action_hms.cc
index f89e677..7468690 100644
--- a/src/kudu/tools/tool_action_hms.cc
+++ b/src/kudu/tools/tool_action_hms.cc
@@ -654,6 +654,59 @@ Status FixHmsMetadata(const RunnerContext& context) {
   return Status::RuntimeError("Failed to fix some catalog metadata 
inconsistencies");
 }
 
+Status Precheck(const RunnerContext& context) {
+  const string& master_addrs = FindOrDie(context.required_args, 
kMasterAddressesArg);
+  shared_ptr<KuduClient> client;
+  RETURN_NOT_OK(KuduClientBuilder()
+      .default_rpc_timeout(MonoDelta::FromMilliseconds(FLAGS_timeout_ms))
+      .master_server_addrs(Split(master_addrs, ","))
+      .Build(&client));
+
+  vector<string> tables;
+  RETURN_NOT_OK(client->ListTables(&tables));
+
+  // Map of normalized table name to table names.
+  bool conflicting_tables = false;
+  unordered_map<string, vector<string>> normalized_tables;
+  for (string& table : tables) {
+    string normalized_table_name(table.data(), table.size());
+    Status s = hms::HmsCatalog::NormalizeTableName(&normalized_table_name);
+    if (!s.ok()) {
+      // This is not a Hive-compatible table name, so there can't be a conflict
+      // among normalized names (see CatalogManager::NormalizeTableName).
+      continue;
+    }
+    vector<string>& tables = normalized_tables[normalized_table_name];
+    tables.emplace_back(std::move(table));
+    conflicting_tables |= tables.size() > 1;
+  }
+
+  if (!conflicting_tables) {
+    return Status::OK();
+  }
+
+  DataTable data_table({ "conflicting table names" });
+  for (auto& table : normalized_tables) {
+    if (table.second.size() > 1) {
+      for (string& kudu_table : table.second) {
+        data_table.AddRow({ std::move(kudu_table) });
+      }
+    }
+  }
+
+  cout << "Found Kudu tables whose names are not unique according to Hive's 
case-insensitive"
+       << endl
+       << "identifier requirements. These conflicting tables will cause master 
startup to" << endl
+       << "fail when the Hive Metastore integration is enabled:";
+  RETURN_NOT_OK(data_table.PrintTo(cout));
+  cout << endl
+       << "Suggestion: rename the conflicting tables to case-insensitive 
unique names:" << endl
+       << "\t$ kudu table rename_table " << master_addrs
+       << " <conflicting_table_name> <new_table_name>" << endl;
+
+  return Status::IllegalState("found tables in Kudu with case-conflicting 
names");
+}
+
 unique_ptr<Mode> BuildHmsMode() {
   const string kHmsUrisDesc =
     "Address of the Hive Metastore instance(s). If not set, the configuration "
@@ -678,6 +731,14 @@ unique_ptr<Mode> BuildHmsMode() {
           .AddOptionalParameter("hive_metastore_uris", boost::none, 
kHmsUrisDesc)
           .Build();
 
+  unique_ptr<Action> hms_downgrade =
+      ActionBuilder("downgrade", &HmsDowngrade)
+          .Description("Downgrade the metadata to legacy format for Kudu and 
the Hive Metastores")
+          .AddRequiredParameter({ kMasterAddressesArg, kMasterAddressesArgDesc 
})
+          .AddOptionalParameter("hive_metastore_sasl_enabled", boost::none, 
kHmsSaslEnabledDesc)
+          .AddOptionalParameter("hive_metastore_uris", boost::none, 
kHmsUrisDesc)
+          .Build();
+
   unique_ptr<Action> hms_fix =
     ActionBuilder("fix", &FixHmsMetadata)
         .Description("Fix automatically-repairable metadata inconsistencies in 
the "
@@ -692,21 +753,18 @@ unique_ptr<Mode> BuildHmsMode() {
         .AddOptionalParameter("hive_metastore_uris", boost::none, kHmsUrisDesc)
         .Build();
 
-  // TODO(dan): add 'hms precheck' tool to check for overlapping normalized 
table names.
-
-  unique_ptr<Action> hms_downgrade =
-      ActionBuilder("downgrade", &HmsDowngrade)
-          .Description("Downgrade the metadata to legacy format for "
-                       "Kudu and the Hive Metastores")
-          .AddRequiredParameter({ kMasterAddressesArg, kMasterAddressesArgDesc 
})
-          .AddOptionalParameter("hive_metastore_sasl_enabled", boost::none, 
kHmsSaslEnabledDesc)
-          .AddOptionalParameter("hive_metastore_uris", boost::none, 
kHmsUrisDesc)
-          .Build();
+  unique_ptr<Action> hms_precheck =
+    ActionBuilder("precheck", &Precheck)
+        .Description("Check that the Kudu cluster is prepared to enable the 
Hive "
+                     "Metastore integration")
+        .AddRequiredParameter({ kMasterAddressesArg, kMasterAddressesArgDesc })
+        .Build();
 
   return ModeBuilder("hms").Description("Operate on remote Hive Metastores")
-                           .AddAction(std::move(hms_downgrade))
                            .AddAction(std::move(hms_check))
+                           .AddAction(std::move(hms_downgrade))
                            .AddAction(std::move(hms_fix))
+                           .AddAction(std::move(hms_precheck))
                            .Build();
 }
 

Reply via email to