This is an automated email from the ASF dual-hosted git repository.

bankim pushed a commit to branch branch-1.15.x
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/branch-1.15.x by this push:
     new 863b337  [master] KUDU-2181 Unhide master change config flag & other 
improvements
863b337 is described below

commit 863b3374000795256b0b73d248f60010dc51b78f
Author: Bankim Bhavsar <[email protected]>
AuthorDate: Tue May 4 15:50:59 2021 -0700

    [master] KUDU-2181 Unhide master change config flag & other improvements
    
    This change:
    - Unhides and turns the master Raft change config flag ON by default
    - Updates the description of flags that are not passed to the new
      master
    - Uses sync logging on bringing up the new master to help
      debug any issues in startup of the new master
    - Updates the wait timeout while bringing up a new master to account
      for the higher ntp wait timeout. This timeout is also used for
      checking whether the new master's system catalog has caught up
      from WAL. So this'll bump up timeout for that case as well. Thought
      about introducing separate wait timeout for that case but felt like
      exposing too many knobs and user shouldn't care whether time
      is being taken for master bringup or catchup from WAL.
    
    Change-Id: Id1fc1dc5985601158ae58d5d190a60c1e542ea1d
    Reviewed-on: http://gerrit.cloudera.org:8080/17401
    Reviewed-by: Alexey Serbin <[email protected]>
    Tested-by: Bankim Bhavsar <[email protected]>
    (cherry picked from commit c5648cf189e0413420a553ad5f51aa8252a1fd6f)
    Reviewed-on: http://gerrit.cloudera.org:8080/17450
    Reviewed-by: Andrew Wong <[email protected]>
---
 src/kudu/master/dynamic_multi_master-test.cc | 31 +++++++++++++---------------
 src/kudu/master/master_service.cc            |  3 +--
 src/kudu/tools/tool_action_master.cc         | 16 ++++++++++----
 3 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/src/kudu/master/dynamic_multi_master-test.cc 
b/src/kudu/master/dynamic_multi_master-test.cc
index 9c3ac29..e473a93 100644
--- a/src/kudu/master/dynamic_multi_master-test.cc
+++ b/src/kudu/master/dynamic_multi_master-test.cc
@@ -138,7 +138,7 @@ class DynamicMultiMasterTest : public KuduTest {
     reserved_hp_ = HostPort(reserved_addr_);
   }
 
-  void StartCluster(const vector<string>& extra_master_flags,
+  void StartCluster(const vector<string>& extra_master_flags = {},
                     bool supply_single_master_addr = true) {
     opts_.num_masters = orig_num_masters_;
     opts_.supply_single_master_addr = supply_single_master_addr;
@@ -158,8 +158,7 @@ class DynamicMultiMasterTest : public KuduTest {
                                       const vector<string>& extra_flags = {}) {
     // Using low values of log flush threshold and segment size to trigger GC 
of the
     // sys catalog WAL
-    vector<string> flags = {"--master_support_change_config", 
"--flush_threshold_secs=0",
-                            "--log_segment_size_mb=1"};
+    vector<string> flags = {"--flush_threshold_secs=0", 
"--log_segment_size_mb=1"};
     flags.insert(flags.end(), extra_flags.begin(), extra_flags.end());
     NO_FATALS(StartCluster(flags));
 
@@ -393,7 +392,7 @@ class DynamicMultiMasterTest : public KuduTest {
   // Returns generic RuntimeError() on failure with the actual error in the 
optional 'err'
   // output parameter.
   Status AddMasterToClusterUsingCLITool(const ExternalDaemonOptions& opts, 
string* err = nullptr,
-                                        EnvVars env_vars = {}, int wait_secs = 
0,
+                                        EnvVars env_vars = {}, int wait_secs = 
10,
                                         const string& kudu_binary = "") {
     auto hps = cluster_->master_rpc_addrs();
     vector<string> addresses;
@@ -760,7 +759,7 @@ INSTANTIATE_TEST_SUITE_P(, ParameterizedAddMasterTest,
 TEST_P(ParameterizedAddMasterTest, TestAddMasterCatchupFromWAL) {
   SKIP_IF_SLOW_NOT_ALLOWED();
 
-  NO_FATALS(StartCluster({"--master_support_change_config"}));
+  NO_FATALS(StartCluster());
 
   // Verify that masters are running as VOTERs and collect their addresses to 
be used
   // for starting the new master.
@@ -871,8 +870,7 @@ INSTANTIATE_TEST_SUITE_P(, ParameterizedRemoveMasterTest,
 
 // Tests removing a non-leader master from the cluster.
 TEST_P(ParameterizedRemoveMasterTest, TestRemoveMaster) {
-  NO_FATALS(StartCluster({"--master_support_change_config",
-                          // Keeping RPC timeouts short to quickly detect 
downed servers.
+  NO_FATALS(StartCluster({// Keeping RPC timeouts short to quickly detect 
downed servers.
                           // This will put the health status into an UNKNOWN 
state until the point
                           // where they are considered FAILED.
                           "--consensus_rpc_timeout_ms=2000",
@@ -988,8 +986,7 @@ INSTANTIATE_TEST_SUITE_P(, ParameterizedRecoverMasterTest,
 TEST_P(ParameterizedRecoverMasterTest, TestRecoverDeadMasterCatchupfromWAL) {
   SKIP_IF_SLOW_NOT_ALLOWED();
 
-  NO_FATALS(StartCluster({"--master_support_change_config",
-                          // Keeping RPC timeouts short to quickly detect 
downed servers.
+  NO_FATALS(StartCluster({// Keeping RPC timeouts short to quickly detect 
downed servers.
                           // This will put the health status into an UNKNOWN 
state until the point
                           // where they are considered FAILED.
                           "--consensus_rpc_timeout_ms=2000",
@@ -1061,7 +1058,7 @@ TEST_P(ParameterizedRecoverMasterTest, 
TestRecoverDeadMasterSysCatalogCopy) {
 // expected to fail due to invalid Raft config.
 TEST_F(DynamicMultiMasterTest, TestAddMasterWithNoLastKnownAddr) {
   NO_FATALS(SetUpWithNumMasters(1));
-  NO_FATALS(StartCluster({"--master_support_change_config"}, false/* 
supply_single_master_addr */));
+  NO_FATALS(StartCluster({}, false/* supply_single_master_addr */));
 
   // Verify that existing masters are running as VOTERs.
   NO_FATALS(VerifyVoterMasters(orig_num_masters_));
@@ -1139,7 +1136,7 @@ TEST_F(DynamicMultiMasterTest, 
TestRemoveMasterFeatureFlagNotSpecified) {
 // Test that attempts to request a non-leader master to add a new master.
 TEST_F(DynamicMultiMasterTest, TestAddMasterToNonLeader) {
   NO_FATALS(SetUpWithNumMasters(2));
-  NO_FATALS(StartCluster({"--master_support_change_config"}));
+  NO_FATALS(StartCluster());
 
   // Verify that existing masters are running as VOTERs and collect their 
addresses to be used
   // for starting the new master.
@@ -1178,7 +1175,7 @@ TEST_F(DynamicMultiMasterTest, TestAddMasterToNonLeader) {
 // Test that attempts to request a non-leader master to remove a master.
 TEST_F(DynamicMultiMasterTest, TestRemoveMasterToNonLeader) {
   NO_FATALS(SetUpWithNumMasters(2));
-  NO_FATALS(StartCluster({"--master_support_change_config"}));
+  NO_FATALS(StartCluster());
 
   // Verify that existing masters are running as VOTERs and collect their 
addresses to be used
   // for starting the new master.
@@ -1214,7 +1211,7 @@ TEST_F(DynamicMultiMasterTest, 
TestRemoveMasterToNonLeader) {
 // address.
 TEST_F(DynamicMultiMasterTest, TestAddMasterMissingAndIncorrectAddress) {
   NO_FATALS(SetUpWithNumMasters(1));
-  NO_FATALS(StartCluster({"--master_support_change_config"}));
+  NO_FATALS(StartCluster());
 
   // Verify that existing masters are running as VOTERs.
   NO_FATALS(VerifyVoterMasters(orig_num_masters_));
@@ -1248,7 +1245,7 @@ TEST_F(DynamicMultiMasterTest, 
TestAddMasterMissingAndIncorrectAddress) {
 // hostname.
 TEST_F(DynamicMultiMasterTest, TestRemoveMasterMissingAndIncorrectHostname) {
   NO_FATALS(SetUpWithNumMasters(2));
-  NO_FATALS(StartCluster({"--master_support_change_config"}));
+  NO_FATALS(StartCluster());
 
   // Verify that existing masters are running as VOTERs.
   vector<HostPort> master_hps;
@@ -1278,7 +1275,7 @@ TEST_F(DynamicMultiMasterTest, 
TestRemoveMasterMissingAndIncorrectHostname) {
 // Test that attempts to remove a master with mismatching hostname and uuid.
 TEST_F(DynamicMultiMasterTest, TestRemoveMasterMismatchHostnameAndUuid) {
   NO_FATALS(SetUpWithNumMasters(2));
-  NO_FATALS(StartCluster({"--master_support_change_config"}));
+  NO_FATALS(StartCluster());
 
   // Verify that existing masters are running as VOTERs.
   vector<HostPort> master_hps;
@@ -1309,7 +1306,7 @@ TEST_F(DynamicMultiMasterTest, 
TestRemoveMasterMismatchHostnameAndUuid) {
 // Test that attempts to add a master with non-existent kudu executable path.
 TEST_F(DynamicMultiMasterTest, TestAddMasterIncorrectKuduBinary) {
   NO_FATALS(SetUpWithNumMasters(1));
-  NO_FATALS(StartCluster({ "--master_support_change_config" }));
+  NO_FATALS(StartCluster());
 
   // Verify that existing masters are running as VOTERs.
   NO_FATALS(VerifyVoterMasters(orig_num_masters_));
@@ -1341,7 +1338,7 @@ INSTANTIATE_TEST_SUITE_P(, 
ParameterizedRemoveLeaderMasterTest,
                          ::testing::Values(1, 2));
 
 TEST_P(ParameterizedRemoveLeaderMasterTest, TestRemoveLeaderMaster) {
-  NO_FATALS(StartCluster({"--master_support_change_config"}));
+  NO_FATALS(StartCluster());
 
   // Verify that existing masters are running as VOTERs and collect their 
addresses to be used
   // for starting the new master.
diff --git a/src/kudu/master/master_service.cc 
b/src/kudu/master/master_service.cc
index aa83654..8993dc0 100644
--- a/src/kudu/master/master_service.cc
+++ b/src/kudu/master/master_service.cc
@@ -95,9 +95,8 @@ DEFINE_bool(master_support_authz_tokens, true,
             "testing version compatibility in the client.");
 TAG_FLAG(master_support_authz_tokens, hidden);
 
-DEFINE_bool(master_support_change_config, false,
+DEFINE_bool(master_support_change_config, true,
             "Whether the master supports adding/removing master servers 
dynamically.");
-TAG_FLAG(master_support_change_config, hidden);
 TAG_FLAG(master_support_change_config, advanced);
 TAG_FLAG(master_support_change_config, runtime);
 
diff --git a/src/kudu/tools/tool_action_master.cc 
b/src/kudu/tools/tool_action_master.cc
index 3a49e25..9cf73d0 100644
--- a/src/kudu/tools/tool_action_master.cc
+++ b/src/kudu/tools/tool_action_master.cc
@@ -73,11 +73,17 @@ DECLARE_string(fs_data_dirs);
 
 DEFINE_string(master_uuid, "", "Permanent UUID of the master. Only needed to 
disambiguate in case "
                                "of multiple masters with same RPC address");
-DEFINE_int64(wait_secs, 8,
+
+// For catching up system catalog of a new master from WAL, we expect 8-10 
secs.
+// However bringing up a new master can take longer due to 
--ntp_initial_sync_wait_secs (60 secs)
+// hence the wait secs is set higher.
+DEFINE_int64(wait_secs, 64,
              "Timeout in seconds to wait while retrying operations like 
bringing up new master, "
-             "running ksck, waiting for the new master to be promoted as 
VOTER, etc.");
+             "running ksck, waiting for the new master to be promoted as 
VOTER, etc. This flag "
+             "is not passed to the new master.");
 DEFINE_string(kudu_abs_path, "", "Absolute file path of the 'kudu' executable 
used to bring up "
-                                 "new master and other workflow steps.");
+                                 "new master and other workflow steps. This 
flag is not passed "
+                                 "to the new master.");
 
 using kudu::master::AddMasterRequestPB;
 using kudu::master::AddMasterResponsePB;
@@ -170,7 +176,9 @@ Status BringUpNewMaster(const string& kudu_abs_path,
 
   flags.emplace_back("--master_addresses=" + JoinStrings(master_addresses, 
","));
   flags.emplace_back("--master_address_add_new_master=" + hp.ToString());
-  flags.emplace_back("--master_support_change_config");
+  // In case of an error in bringing up the master we want to ensure the last 
log lines
+  // are emitted to help debug the issue. Hence don't use async logging.
+  flags.emplace_back("--log_async=false");
   vector<string> argv = { kudu_abs_path, "master", "run" };
   argv.insert(argv.end(), std::make_move_iterator(flags.begin()),
               std::make_move_iterator(flags.end()));

Reply via email to