Repository: kudu Updated Branches: refs/heads/master 133ef7e9d -> 361ee99f0
HmsCatalog: fix bug in reconnection backoff logic This fixes an accidentally shadowed Status variable. The effect of this bug was that the HmsCatalog would not throttle HMS reconnect attempts. The tests actually took advantage of this, I've had to update them to try ops multiple times after an HMS restart. No tests are included, but I've left a TODO about adding tests when HmsCatalog stats are introduced. Change-Id: I726283ec8332d16942c82c73492709de4323a43d Reviewed-on: http://gerrit.cloudera.org:8080/10028 Tested-by: Kudu Jenkins Reviewed-by: Hao Hao <hao....@cloudera.com> Reviewed-by: Adar Dembo <a...@cloudera.com> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/361ee99f Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/361ee99f Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/361ee99f Branch: refs/heads/master Commit: 361ee99f07cc5fa5219c384539eac3e4e003be08 Parents: 133ef7e Author: Dan Burkert <danburk...@apache.org> Authored: Wed Apr 11 12:13:24 2018 -0700 Committer: Dan Burkert <danburk...@apache.org> Committed: Thu Apr 12 22:06:40 2018 +0000 ---------------------------------------------------------------------- src/kudu/hms/hms_catalog-test.cc | 10 +++++- src/kudu/hms/hms_catalog.cc | 2 +- src/kudu/integration-tests/master_hms-itest.cc | 37 ++++++++++++--------- 3 files changed, 31 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/361ee99f/src/kudu/hms/hms_catalog-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/hms/hms_catalog-test.cc b/src/kudu/hms/hms_catalog-test.cc index af0244b..7326f4c 100644 --- a/src/kudu/hms/hms_catalog-test.cc +++ b/src/kudu/hms/hms_catalog-test.cc @@ -399,6 +399,10 @@ TEST_F(HmsCatalogTest, TestReconnect) { // Shutdown the HMS and try a few operations. ASSERT_OK(StopHms()); + // TODO(dan): once we have HMS catalog stats, assert that repeated attempts + // while the HMS is unavailable results in a non-linear number of reconnect + // attempts. + Status s = hms_catalog_->CreateTable(kTableId, "default.b", schema); EXPECT_TRUE(s.IsNetworkError()) << s.ToString(); @@ -407,7 +411,11 @@ TEST_F(HmsCatalogTest, TestReconnect) { // Start the HMS back up and ensure that the same operations succeed. ASSERT_OK(StartHms()); - EXPECT_OK(hms_catalog_->CreateTable(kTableId, "default.d", schema)); + ASSERT_EVENTUALLY([&] { + // HmsCatalog throttles reconnections, so it's necessary to wait out the backoff. + ASSERT_OK(hms_catalog_->CreateTable(kTableId, "default.d", schema)); + }); + NO_FATALS(CheckTable(kHmsDatabase, "a", kTableId, schema)); NO_FATALS(CheckTable(kHmsDatabase, "d", kTableId, schema)); http://git-wip-us.apache.org/repos/asf/kudu/blob/361ee99f/src/kudu/hms/hms_catalog.cc ---------------------------------------------------------------------- diff --git a/src/kudu/hms/hms_catalog.cc b/src/kudu/hms/hms_catalog.cc index a8b70a2..49048c7 100644 --- a/src/kudu/hms/hms_catalog.cc +++ b/src/kudu/hms/hms_catalog.cc @@ -365,7 +365,7 @@ Status HmsCatalog::Reconnect() { reconnect_idx_ = (reconnect_idx_ + 1) % hms_addresses_.size(); hms_client_ = HmsClient(address, options); - Status s = hms_client_.Start(); + s = hms_client_.Start(); if (s.ok()) { VLOG(1) << "Connected to Hive Metastore " << address.ToString(); return Status::OK(); http://git-wip-us.apache.org/repos/asf/kudu/blob/361ee99f/src/kudu/integration-tests/master_hms-itest.cc ---------------------------------------------------------------------- diff --git a/src/kudu/integration-tests/master_hms-itest.cc b/src/kudu/integration-tests/master_hms-itest.cc index 979d855..a6428aa 100644 --- a/src/kudu/integration-tests/master_hms-itest.cc +++ b/src/kudu/integration-tests/master_hms-itest.cc @@ -39,6 +39,7 @@ #include "kudu/util/net/net_util.h" // IWYU pragma: keep #include "kudu/util/status.h" #include "kudu/util/test_macros.h" +#include "kudu/util/test_util.h" namespace kudu { @@ -91,12 +92,6 @@ class MasterHmsTest : public ExternalMiniClusterITestBase { return Status::OK(); } - Status RestartHms() { - RETURN_NOT_OK(StopHms()); - RETURN_NOT_OK(StartHms()); - return Status::OK(); - } - Status CreateDatabase(const string& database_name) { hive::Database db; db.name = database_name; @@ -218,7 +213,10 @@ TEST_F(MasterHmsTest, TestCreateTable) { // Start the HMS and try again. ASSERT_OK(StartHms()); - ASSERT_OK(CreateKuduTable(hms_database_name, "foo")); + ASSERT_EVENTUALLY([&] { + // HmsCatalog throttles reconnections, so it's necessary to wait out the backoff. + ASSERT_OK(CreateKuduTable(hms_database_name, "foo")); + }); NO_FATALS(CheckTable(hms_database_name, "foo")); } @@ -271,14 +269,16 @@ TEST_F(MasterHmsTest, TestRenameTable) { // Shutdown the HMS and try to rename the table. ASSERT_OK(StopHms()); - table_alterer.reset(client_->NewTableAlterer("db.c")); - s = table_alterer->RenameTo("db.a")->Alter(); + table_alterer.reset(client_->NewTableAlterer("db.c")->RenameTo("db.a")); + s = table_alterer->Alter(); ASSERT_TRUE(s.IsNetworkError()) << s.ToString(); // Start the HMS and rename the table back to the original name. This is the happy path. ASSERT_OK(StartHms()); - table_alterer.reset(client_->NewTableAlterer("db.c")); - ASSERT_OK(table_alterer->RenameTo("db.a")->Alter()); + ASSERT_EVENTUALLY([&] { + // HmsCatalog throttles reconnections, so it's necessary to wait out the backoff. + ASSERT_OK(table_alterer->Alter()); + }); NO_FATALS(CheckTable("db", "a")); NO_FATALS(CheckTableDoesNotExist("db", "c")); @@ -327,14 +327,16 @@ TEST_F(MasterHmsTest, TestAlterTable) { // Shutdown the HMS and try to alter the table. ASSERT_OK(StopHms()); - table_alterer.reset(client_->NewTableAlterer(table_name)); - Status s = table_alterer->DropColumn("int16_val")->Alter(); + table_alterer.reset(client_->NewTableAlterer(table_name)->DropColumn("int16_val")); + Status s = table_alterer->Alter(); ASSERT_TRUE(s.IsNetworkError()) << s.ToString(); // Start the HMS and try again. ASSERT_OK(StartHms()); - table_alterer.reset(client_->NewTableAlterer(table_name)); - ASSERT_OK(table_alterer->DropColumn("int16_val")->Alter()); + ASSERT_EVENTUALLY([&] { + // HmsCatalog throttles reconnections, so it's necessary to wait out the backoff. + ASSERT_OK(table_alterer->Alter()); + }); NO_FATALS(CheckTable(hms_database_name, hms_table_name)); // Drop the table from the HMS, and insert a non-Kudu table entry, then try @@ -388,7 +390,10 @@ TEST_F(MasterHmsTest, TestDeleteTable) { ASSERT_TRUE(s.IsNetworkError()) << s.ToString(); ASSERT_OK(StartHms()); NO_FATALS(CheckTable(hms_database_name, hms_table_name)); - ASSERT_OK(client_->DeleteTable(table_name)); + ASSERT_EVENTUALLY([&] { + // HmsCatalog throttles reconnections, so it's necessary to wait out the backoff. + ASSERT_OK(client_->DeleteTable(table_name)); + }); NO_FATALS(CheckTableDoesNotExist(hms_database_name, hms_table_name)); } } // namespace kudu