This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 8af89ca5e2454600888873882ce543cdb0b7b3ab
Author: Ádám Bakai <aba...@cloudera.com>
AuthorDate: Wed Jan 17 15:43:59 2024 +0100

    KUDU-3491 Destruct master before creating a new one
    
    ServerBase constructor runs MinidumpExceptionHandler constructor that
    calls RegisterMinidumpExceptionHandler(). This function increments the
    static atomic variable current_num_instances_. Then the ServerBase is
    destructed, a similar process happens and current_num_instances_ gets
    decremented. If current_num_instances_ is not zero before incrementing
    or not 1 before decrementing, then it is considered an error. This
    indicates that only one Server can run at any given time. But in case of
    multi-master config, the master server is replaced, and without the
    change it is possible that the second server's constructor precede first
    server's destructor. This change makes it sure that the destructor is
    executed before the second one's constructor.
    
    Change-Id: I3c1019d092bbf9e58f4fc33753a1218bc79735d3
    Reviewed-on: http://gerrit.cloudera.org:8080/20913
    Reviewed-by: Attila Bukor <abu...@apache.org>
    Reviewed-by: Mahesh Reddy <mre...@cloudera.com>
    Tested-by: Kudu Jenkins
    (cherry picked from commit 7562277fc6f68b0dcab593d56de03bb344a95b3e)
    Reviewed-on: http://gerrit.cloudera.org:8080/20917
    Tested-by: Alexey Serbin <ale...@apache.org>
    Reviewed-by: Abhishek Chennaka <achenn...@cloudera.com>
---
 src/kudu/master/dynamic_multi_master-test.cc | 44 ++++++++++++++++++++++++++--
 src/kudu/master/master_runner.cc             |  1 +
 2 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/src/kudu/master/dynamic_multi_master-test.cc 
b/src/kudu/master/dynamic_multi_master-test.cc
index d605fe0d3..28bd92b78 100644
--- a/src/kudu/master/dynamic_multi_master-test.cc
+++ b/src/kudu/master/dynamic_multi_master-test.cc
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <kudu/gutil/strings/util.h>
+
 #include <algorithm>
 #include <atomic>
 #include <cstdint>
@@ -28,6 +30,7 @@
 #include <string>
 #include <thread>
 #include <tuple>
+#include <type_traits>
 #include <unordered_set>
 #include <utility>
 #include <vector>
@@ -35,7 +38,6 @@
 #include <gflags/gflags.h>
 #include <glog/logging.h>
 #include <gtest/gtest.h>
-#include <kudu/gutil/strings/util.h>
 
 #include "kudu/client/client.h"
 #include "kudu/client/schema.h"
@@ -1456,7 +1458,7 @@ struct MultiMasterClusterArgs {
 
 class AutoAddMasterTest : public KuduTest {
  public:
-  Status SetUpWithTestArgs(const MultiMasterClusterArgs& args) {
+  virtual Status SetUpWithTestArgs(const MultiMasterClusterArgs& args) {
     opts_.num_masters = args.orig_num_masters;
     opts_.enable_kerberos = args.is_secure;
     args_ = args;
@@ -1477,6 +1479,17 @@ class AutoAddMasterTest : public KuduTest {
   unique_ptr<ExternalMiniCluster> cluster_;
 };
 
+class MinidumpTest : public AutoAddMasterTest {
+ public:
+  Status SetUpWithTestArgs(const MultiMasterClusterArgs& args) override {
+    // By default minidump is disabled for all ExternalMiniCluster instances.
+    // This made a bug undetected during testing, so a positive regression test
+    // was added where minidump is enabled.
+    opts_.extra_master_flags.emplace_back("--enable_minidumps=true");
+    return AutoAddMasterTest::SetUpWithTestArgs(args);
+  }
+};
+
 constexpr const int64_t kShortRetryIntervalSecs = 1;
 
 // Test that nothing goes wrong when starting up masters but the entire cluster
@@ -1777,6 +1790,33 @@ TEST_F(AutoAddMasterTest, TestAddNewMaster) {
   NO_FATALS(cluster_->AssertNoCrashes());
 }
 
+// Regression test for KUDU-3491
+TEST_F(MinidumpTest, TestAddNewMasterMinidumpsEnabled) {
+  SKIP_IF_SLOW_NOT_ALLOWED();
+
+  // Let's get the current master addresses and add a new one to them
+  vector<HostPort> master_addrs = cluster_->master_rpc_addrs();
+  unique_ptr<Socket> reserved_socket;
+  ASSERT_OK(MiniCluster::ReserveDaemonSocket(
+      MiniCluster::DaemonType::MASTER, master_addrs.size(), opts_.bind_mode, 
&reserved_socket));
+
+  Sockaddr addr;
+  ASSERT_OK(reserved_socket->GetSocketAddress(&addr));
+  master_addrs.emplace_back(addr.host(), addr.port());
+
+  // Let's create the new master and start it to ensure it starts up okay.
+  scoped_refptr<ExternalMaster> peer;
+  auto idx = cluster_->master_rpc_addrs().size();
+  ASSERT_OK(cluster_->CreateMaster(master_addrs, idx, &peer));
+  ASSERT_OK(peer->Start());
+  ASSERT_OK(peer->WaitForCatalogManager());
+  auto expected_num_masters = ++idx;
+  ASSERT_EVENTUALLY([&] {
+    ASSERT_OK(VerifyVotersOnAllMasters(expected_num_masters, cluster_.get()));
+  });
+  NO_FATALS(cluster_->AssertNoCrashes());
+}
+
 class ParameterizedAutoAddMasterTest : public AutoAddMasterTest,
                                        public 
::testing::WithParamInterface<tuple<int, bool>> {
  public:
diff --git a/src/kudu/master/master_runner.cc b/src/kudu/master/master_runner.cc
index 60a7cc68b..4861ffc60 100644
--- a/src/kudu/master/master_runner.cc
+++ b/src/kudu/master/master_runner.cc
@@ -455,6 +455,7 @@ Status RunMasterServer() {
     // If we succeeded, wipe the system catalog on this node and initiate a
     // copy from another node.
     RETURN_NOT_OK(ClearLocalSystemCatalogAndCopy(leader_hp));
+    server.reset();
     server.reset(new Master(opts));
     RETURN_NOT_OK(server->Init());
     RETURN_NOT_OK(server->Start());

Reply via email to