This is an automated email from the ASF dual-hosted git repository.
abukor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 7562277fc KUDU-3491 Destruct master before creating a new one
7562277fc is described below
commit 7562277fc6f68b0dcab593d56de03bb344a95b3e
Author: Ádám Bakai <[email protected]>
AuthorDate: Wed Jan 17 15:43:59 2024 +0100
KUDU-3491 Destruct master before creating a new one
ServerBase constructor runs MinidumpExceptionHandler constructor that
calls RegisterMinidumpExceptionHandler(). This function increments the
static atomic variable current_num_instances_. Then the ServerBase is
destructed, a similar process happens and current_num_instances_ gets
decremented. If current_num_instances_ is not zero before incrementing
or not 1 before decrementing, then it is considered an error. This
indicates that only one Server can run at any given time. But in case of
multi-master config, the master server is replaced, and without the
change it is possible that the second server's constructor precede first
server's destructor. This change makes it sure that the destructor is
executed before the second one's constructor.
Change-Id: I3c1019d092bbf9e58f4fc33753a1218bc79735d3
Reviewed-on: http://gerrit.cloudera.org:8080/20913
Reviewed-by: Attila Bukor <[email protected]>
Reviewed-by: Mahesh Reddy <[email protected]>
Tested-by: Kudu Jenkins
---
src/kudu/master/dynamic_multi_master-test.cc | 44 ++++++++++++++++++++++++++--
src/kudu/master/master_runner.cc | 1 +
2 files changed, 43 insertions(+), 2 deletions(-)
diff --git a/src/kudu/master/dynamic_multi_master-test.cc
b/src/kudu/master/dynamic_multi_master-test.cc
index d605fe0d3..28bd92b78 100644
--- a/src/kudu/master/dynamic_multi_master-test.cc
+++ b/src/kudu/master/dynamic_multi_master-test.cc
@@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.
+#include <kudu/gutil/strings/util.h>
+
#include <algorithm>
#include <atomic>
#include <cstdint>
@@ -28,6 +30,7 @@
#include <string>
#include <thread>
#include <tuple>
+#include <type_traits>
#include <unordered_set>
#include <utility>
#include <vector>
@@ -35,7 +38,6 @@
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
-#include <kudu/gutil/strings/util.h>
#include "kudu/client/client.h"
#include "kudu/client/schema.h"
@@ -1456,7 +1458,7 @@ struct MultiMasterClusterArgs {
class AutoAddMasterTest : public KuduTest {
public:
- Status SetUpWithTestArgs(const MultiMasterClusterArgs& args) {
+ virtual Status SetUpWithTestArgs(const MultiMasterClusterArgs& args) {
opts_.num_masters = args.orig_num_masters;
opts_.enable_kerberos = args.is_secure;
args_ = args;
@@ -1477,6 +1479,17 @@ class AutoAddMasterTest : public KuduTest {
unique_ptr<ExternalMiniCluster> cluster_;
};
+class MinidumpTest : public AutoAddMasterTest {
+ public:
+ Status SetUpWithTestArgs(const MultiMasterClusterArgs& args) override {
+ // By default minidump is disabled for all ExternalMiniCluster instances.
+ // This made a bug undetected during testing, so a positive regression test
+ // was added where minidump is enabled.
+ opts_.extra_master_flags.emplace_back("--enable_minidumps=true");
+ return AutoAddMasterTest::SetUpWithTestArgs(args);
+ }
+};
+
constexpr const int64_t kShortRetryIntervalSecs = 1;
// Test that nothing goes wrong when starting up masters but the entire cluster
@@ -1777,6 +1790,33 @@ TEST_F(AutoAddMasterTest, TestAddNewMaster) {
NO_FATALS(cluster_->AssertNoCrashes());
}
+// Regression test for KUDU-3491
+TEST_F(MinidumpTest, TestAddNewMasterMinidumpsEnabled) {
+ SKIP_IF_SLOW_NOT_ALLOWED();
+
+ // Let's get the current master addresses and add a new one to them
+ vector<HostPort> master_addrs = cluster_->master_rpc_addrs();
+ unique_ptr<Socket> reserved_socket;
+ ASSERT_OK(MiniCluster::ReserveDaemonSocket(
+ MiniCluster::DaemonType::MASTER, master_addrs.size(), opts_.bind_mode,
&reserved_socket));
+
+ Sockaddr addr;
+ ASSERT_OK(reserved_socket->GetSocketAddress(&addr));
+ master_addrs.emplace_back(addr.host(), addr.port());
+
+ // Let's create the new master and start it to ensure it starts up okay.
+ scoped_refptr<ExternalMaster> peer;
+ auto idx = cluster_->master_rpc_addrs().size();
+ ASSERT_OK(cluster_->CreateMaster(master_addrs, idx, &peer));
+ ASSERT_OK(peer->Start());
+ ASSERT_OK(peer->WaitForCatalogManager());
+ auto expected_num_masters = ++idx;
+ ASSERT_EVENTUALLY([&] {
+ ASSERT_OK(VerifyVotersOnAllMasters(expected_num_masters, cluster_.get()));
+ });
+ NO_FATALS(cluster_->AssertNoCrashes());
+}
+
class ParameterizedAutoAddMasterTest : public AutoAddMasterTest,
public
::testing::WithParamInterface<tuple<int, bool>> {
public:
diff --git a/src/kudu/master/master_runner.cc b/src/kudu/master/master_runner.cc
index 00f23d769..77deb3b87 100644
--- a/src/kudu/master/master_runner.cc
+++ b/src/kudu/master/master_runner.cc
@@ -464,6 +464,7 @@ Status RunMasterServer() {
// If we succeeded, wipe the system catalog on this node and initiate a
// copy from another node.
RETURN_NOT_OK(ClearLocalSystemCatalogAndCopy(leader_hp));
+ server.reset();
server.reset(new Master(opts));
RETURN_NOT_OK(server->Init());
RETURN_NOT_OK(server->Start());