This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch branch-1.17.x in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 8af89ca5e2454600888873882ce543cdb0b7b3ab Author: Ádám Bakai <aba...@cloudera.com> AuthorDate: Wed Jan 17 15:43:59 2024 +0100 KUDU-3491 Destruct master before creating a new one ServerBase constructor runs MinidumpExceptionHandler constructor that calls RegisterMinidumpExceptionHandler(). This function increments the static atomic variable current_num_instances_. Then the ServerBase is destructed, a similar process happens and current_num_instances_ gets decremented. If current_num_instances_ is not zero before incrementing or not 1 before decrementing, then it is considered an error. This indicates that only one Server can run at any given time. But in case of multi-master config, the master server is replaced, and without the change it is possible that the second server's constructor precede first server's destructor. This change makes it sure that the destructor is executed before the second one's constructor. Change-Id: I3c1019d092bbf9e58f4fc33753a1218bc79735d3 Reviewed-on: http://gerrit.cloudera.org:8080/20913 Reviewed-by: Attila Bukor <abu...@apache.org> Reviewed-by: Mahesh Reddy <mre...@cloudera.com> Tested-by: Kudu Jenkins (cherry picked from commit 7562277fc6f68b0dcab593d56de03bb344a95b3e) Reviewed-on: http://gerrit.cloudera.org:8080/20917 Tested-by: Alexey Serbin <ale...@apache.org> Reviewed-by: Abhishek Chennaka <achenn...@cloudera.com> --- src/kudu/master/dynamic_multi_master-test.cc | 44 ++++++++++++++++++++++++++-- src/kudu/master/master_runner.cc | 1 + 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/kudu/master/dynamic_multi_master-test.cc b/src/kudu/master/dynamic_multi_master-test.cc index d605fe0d3..28bd92b78 100644 --- a/src/kudu/master/dynamic_multi_master-test.cc +++ b/src/kudu/master/dynamic_multi_master-test.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#include <kudu/gutil/strings/util.h> + #include <algorithm> #include <atomic> #include <cstdint> @@ -28,6 +30,7 @@ #include <string> #include <thread> #include <tuple> +#include <type_traits> #include <unordered_set> #include <utility> #include <vector> @@ -35,7 +38,6 @@ #include <gflags/gflags.h> #include <glog/logging.h> #include <gtest/gtest.h> -#include <kudu/gutil/strings/util.h> #include "kudu/client/client.h" #include "kudu/client/schema.h" @@ -1456,7 +1458,7 @@ struct MultiMasterClusterArgs { class AutoAddMasterTest : public KuduTest { public: - Status SetUpWithTestArgs(const MultiMasterClusterArgs& args) { + virtual Status SetUpWithTestArgs(const MultiMasterClusterArgs& args) { opts_.num_masters = args.orig_num_masters; opts_.enable_kerberos = args.is_secure; args_ = args; @@ -1477,6 +1479,17 @@ class AutoAddMasterTest : public KuduTest { unique_ptr<ExternalMiniCluster> cluster_; }; +class MinidumpTest : public AutoAddMasterTest { + public: + Status SetUpWithTestArgs(const MultiMasterClusterArgs& args) override { + // By default minidump is disabled for all ExternalMiniCluster instances. + // This made a bug undetected during testing, so a positive regression test + // was added where minidump is enabled. + opts_.extra_master_flags.emplace_back("--enable_minidumps=true"); + return AutoAddMasterTest::SetUpWithTestArgs(args); + } +}; + constexpr const int64_t kShortRetryIntervalSecs = 1; // Test that nothing goes wrong when starting up masters but the entire cluster @@ -1777,6 +1790,33 @@ TEST_F(AutoAddMasterTest, TestAddNewMaster) { NO_FATALS(cluster_->AssertNoCrashes()); } +// Regression test for KUDU-3491 +TEST_F(MinidumpTest, TestAddNewMasterMinidumpsEnabled) { + SKIP_IF_SLOW_NOT_ALLOWED(); + + // Let's get the current master addresses and add a new one to them + vector<HostPort> master_addrs = cluster_->master_rpc_addrs(); + unique_ptr<Socket> reserved_socket; + ASSERT_OK(MiniCluster::ReserveDaemonSocket( + MiniCluster::DaemonType::MASTER, master_addrs.size(), opts_.bind_mode, &reserved_socket)); + + Sockaddr addr; + ASSERT_OK(reserved_socket->GetSocketAddress(&addr)); + master_addrs.emplace_back(addr.host(), addr.port()); + + // Let's create the new master and start it to ensure it starts up okay. + scoped_refptr<ExternalMaster> peer; + auto idx = cluster_->master_rpc_addrs().size(); + ASSERT_OK(cluster_->CreateMaster(master_addrs, idx, &peer)); + ASSERT_OK(peer->Start()); + ASSERT_OK(peer->WaitForCatalogManager()); + auto expected_num_masters = ++idx; + ASSERT_EVENTUALLY([&] { + ASSERT_OK(VerifyVotersOnAllMasters(expected_num_masters, cluster_.get())); + }); + NO_FATALS(cluster_->AssertNoCrashes()); +} + class ParameterizedAutoAddMasterTest : public AutoAddMasterTest, public ::testing::WithParamInterface<tuple<int, bool>> { public: diff --git a/src/kudu/master/master_runner.cc b/src/kudu/master/master_runner.cc index 60a7cc68b..4861ffc60 100644 --- a/src/kudu/master/master_runner.cc +++ b/src/kudu/master/master_runner.cc @@ -455,6 +455,7 @@ Status RunMasterServer() { // If we succeeded, wipe the system catalog on this node and initiate a // copy from another node. RETURN_NOT_OK(ClearLocalSystemCatalogAndCopy(leader_hp)); + server.reset(); server.reset(new Master(opts)); RETURN_NOT_OK(server->Init()); RETURN_NOT_OK(server->Start());