Repository: kudu
Updated Branches:
  refs/heads/master 133ef7e9d -> 361ee99f0


HmsCatalog: fix bug in reconnection backoff logic

This fixes an accidentally shadowed Status variable. The effect of this
bug was that the HmsCatalog would not throttle HMS reconnect attempts.
The tests actually took advantage of this, I've had to update them to
try ops multiple times after an HMS restart.

No tests are included, but I've left a TODO about adding tests when
HmsCatalog stats are introduced.

Change-Id: I726283ec8332d16942c82c73492709de4323a43d
Reviewed-on: http://gerrit.cloudera.org:8080/10028
Tested-by: Kudu Jenkins
Reviewed-by: Hao Hao <hao....@cloudera.com>
Reviewed-by: Adar Dembo <a...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/361ee99f
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/361ee99f
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/361ee99f

Branch: refs/heads/master
Commit: 361ee99f07cc5fa5219c384539eac3e4e003be08
Parents: 133ef7e
Author: Dan Burkert <danburk...@apache.org>
Authored: Wed Apr 11 12:13:24 2018 -0700
Committer: Dan Burkert <danburk...@apache.org>
Committed: Thu Apr 12 22:06:40 2018 +0000

----------------------------------------------------------------------
 src/kudu/hms/hms_catalog-test.cc               | 10 +++++-
 src/kudu/hms/hms_catalog.cc                    |  2 +-
 src/kudu/integration-tests/master_hms-itest.cc | 37 ++++++++++++---------
 3 files changed, 31 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/361ee99f/src/kudu/hms/hms_catalog-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_catalog-test.cc b/src/kudu/hms/hms_catalog-test.cc
index af0244b..7326f4c 100644
--- a/src/kudu/hms/hms_catalog-test.cc
+++ b/src/kudu/hms/hms_catalog-test.cc
@@ -399,6 +399,10 @@ TEST_F(HmsCatalogTest, TestReconnect) {
   // Shutdown the HMS and try a few operations.
   ASSERT_OK(StopHms());
 
+  // TODO(dan): once we have HMS catalog stats, assert that repeated attempts
+  // while the HMS is unavailable results in a non-linear number of reconnect
+  // attempts.
+
   Status s = hms_catalog_->CreateTable(kTableId, "default.b", schema);
   EXPECT_TRUE(s.IsNetworkError()) << s.ToString();
 
@@ -407,7 +411,11 @@ TEST_F(HmsCatalogTest, TestReconnect) {
 
   // Start the HMS back up and ensure that the same operations succeed.
   ASSERT_OK(StartHms());
-  EXPECT_OK(hms_catalog_->CreateTable(kTableId, "default.d", schema));
+  ASSERT_EVENTUALLY([&] {
+    // HmsCatalog throttles reconnections, so it's necessary to wait out the 
backoff.
+    ASSERT_OK(hms_catalog_->CreateTable(kTableId, "default.d", schema));
+  });
+
   NO_FATALS(CheckTable(kHmsDatabase, "a", kTableId, schema));
   NO_FATALS(CheckTable(kHmsDatabase, "d", kTableId, schema));
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/361ee99f/src/kudu/hms/hms_catalog.cc
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_catalog.cc b/src/kudu/hms/hms_catalog.cc
index a8b70a2..49048c7 100644
--- a/src/kudu/hms/hms_catalog.cc
+++ b/src/kudu/hms/hms_catalog.cc
@@ -365,7 +365,7 @@ Status HmsCatalog::Reconnect() {
     reconnect_idx_ = (reconnect_idx_ + 1) % hms_addresses_.size();
 
     hms_client_ = HmsClient(address, options);
-    Status s = hms_client_.Start();
+    s = hms_client_.Start();
     if (s.ok()) {
       VLOG(1) << "Connected to Hive Metastore " << address.ToString();
       return Status::OK();

http://git-wip-us.apache.org/repos/asf/kudu/blob/361ee99f/src/kudu/integration-tests/master_hms-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/master_hms-itest.cc 
b/src/kudu/integration-tests/master_hms-itest.cc
index 979d855..a6428aa 100644
--- a/src/kudu/integration-tests/master_hms-itest.cc
+++ b/src/kudu/integration-tests/master_hms-itest.cc
@@ -39,6 +39,7 @@
 #include "kudu/util/net/net_util.h" // IWYU pragma: keep
 #include "kudu/util/status.h"
 #include "kudu/util/test_macros.h"
+#include "kudu/util/test_util.h"
 
 namespace kudu {
 
@@ -91,12 +92,6 @@ class MasterHmsTest : public ExternalMiniClusterITestBase {
     return Status::OK();
   }
 
-  Status RestartHms() {
-    RETURN_NOT_OK(StopHms());
-    RETURN_NOT_OK(StartHms());
-    return Status::OK();
-  }
-
   Status CreateDatabase(const string& database_name) {
     hive::Database db;
     db.name = database_name;
@@ -218,7 +213,10 @@ TEST_F(MasterHmsTest, TestCreateTable) {
 
   // Start the HMS and try again.
   ASSERT_OK(StartHms());
-  ASSERT_OK(CreateKuduTable(hms_database_name, "foo"));
+  ASSERT_EVENTUALLY([&] {
+    // HmsCatalog throttles reconnections, so it's necessary to wait out the 
backoff.
+    ASSERT_OK(CreateKuduTable(hms_database_name, "foo"));
+  });
   NO_FATALS(CheckTable(hms_database_name, "foo"));
 }
 
@@ -271,14 +269,16 @@ TEST_F(MasterHmsTest, TestRenameTable) {
 
   // Shutdown the HMS and try to rename the table.
   ASSERT_OK(StopHms());
-  table_alterer.reset(client_->NewTableAlterer("db.c"));
-  s = table_alterer->RenameTo("db.a")->Alter();
+  table_alterer.reset(client_->NewTableAlterer("db.c")->RenameTo("db.a"));
+  s = table_alterer->Alter();
   ASSERT_TRUE(s.IsNetworkError()) << s.ToString();
 
   // Start the HMS and rename the table back to the original name.  This is 
the happy path.
   ASSERT_OK(StartHms());
-  table_alterer.reset(client_->NewTableAlterer("db.c"));
-  ASSERT_OK(table_alterer->RenameTo("db.a")->Alter());
+  ASSERT_EVENTUALLY([&] {
+    // HmsCatalog throttles reconnections, so it's necessary to wait out the 
backoff.
+    ASSERT_OK(table_alterer->Alter());
+  });
   NO_FATALS(CheckTable("db", "a"));
   NO_FATALS(CheckTableDoesNotExist("db", "c"));
 
@@ -327,14 +327,16 @@ TEST_F(MasterHmsTest, TestAlterTable) {
 
   // Shutdown the HMS and try to alter the table.
   ASSERT_OK(StopHms());
-  table_alterer.reset(client_->NewTableAlterer(table_name));
-  Status s = table_alterer->DropColumn("int16_val")->Alter();
+  
table_alterer.reset(client_->NewTableAlterer(table_name)->DropColumn("int16_val"));
+  Status s = table_alterer->Alter();
   ASSERT_TRUE(s.IsNetworkError()) << s.ToString();
 
   // Start the HMS and try again.
   ASSERT_OK(StartHms());
-  table_alterer.reset(client_->NewTableAlterer(table_name));
-  ASSERT_OK(table_alterer->DropColumn("int16_val")->Alter());
+  ASSERT_EVENTUALLY([&] {
+    // HmsCatalog throttles reconnections, so it's necessary to wait out the 
backoff.
+    ASSERT_OK(table_alterer->Alter());
+  });
   NO_FATALS(CheckTable(hms_database_name, hms_table_name));
 
   // Drop the table from the HMS, and insert a non-Kudu table entry, then try
@@ -388,7 +390,10 @@ TEST_F(MasterHmsTest, TestDeleteTable) {
   ASSERT_TRUE(s.IsNetworkError()) << s.ToString();
   ASSERT_OK(StartHms());
   NO_FATALS(CheckTable(hms_database_name, hms_table_name));
-  ASSERT_OK(client_->DeleteTable(table_name));
+  ASSERT_EVENTUALLY([&] {
+    // HmsCatalog throttles reconnections, so it's necessary to wait out the 
backoff.
+    ASSERT_OK(client_->DeleteTable(table_name));
+  });
   NO_FATALS(CheckTableDoesNotExist(hms_database_name, hms_table_name));
 }
 } // namespace kudu

Reply via email to