Ack for this patch with comments, marked AndersW>
regards,
Anders Widell
On 01/23/2018 09:06 AM, Gary Lee wrote:
* consult with consensus service before promoting node to active
* add watch thread and self-fence if it detects active controller
has been changed (if remote fencing is disabled)
---
src/rde/Makefile.am | 3 ++-
src/rde/rded/osaf-rded.in | 4 ++++
src/rde/rded/rde_cb.h | 4 +++-
src/rde/rded/rde_main.cc | 38 +++++++++++++++++++++++++++++++++-----
src/rde/rded/role.cc | 45 ++++++++++++++++++++++++++++++++++++++++++++-
src/rde/rded/role.h | 3 +++
6 files changed, 89 insertions(+), 8 deletions(-)
diff --git a/src/rde/Makefile.am b/src/rde/Makefile.am
index c967f9fc4..182f347ab 100644
--- a/src/rde/Makefile.am
+++ b/src/rde/Makefile.am
@@ -58,7 +58,8 @@ bin_osafrded_SOURCES = \
bin_osafrded_LDADD = \
lib/libSaAmf.la \
- lib/libopensaf_core.la
+ lib/libopensaf_core.la \
+ lib/libosaf_common.la
bin_rdegetrole_CPPFLAGS = \
$(AM_CPPFLAGS)
diff --git a/src/rde/rded/osaf-rded.in b/src/rde/rded/osaf-rded.in
index 1c1786c8d..1697936a7 100644
--- a/src/rde/rded/osaf-rded.in
+++ b/src/rde/rded/osaf-rded.in
@@ -28,6 +28,10 @@ else
. $pkgsysconfdir/rde.conf
fi
+if [ -f "$pkgsysconfdir/fmd.conf" ]; then
+ . "$pkgsysconfdir/fmd.conf"
+fi
+
binary=$pkglibdir/$osafprog
pidfile=$pkgpiddir/$osafprog.pid
tracefile=$pkglogdir/$osafprog.log
diff --git a/src/rde/rded/rde_cb.h b/src/rde/rded/rde_cb.h
index d2a3d46b2..fc100849a 100644
--- a/src/rde/rded/rde_cb.h
+++ b/src/rde/rded/rde_cb.h
@@ -39,13 +39,15 @@ struct RDE_CONTROL_BLOCK {
bool task_terminate;
RDE_RDA_CB rde_rda_cb;
RDE_AMF_CB rde_amf_cb;
+ bool monitor_lock_thread_running;
};
enum RDE_MSG_TYPE {
RDE_MSG_PEER_UP = 1,
RDE_MSG_PEER_DOWN = 2,
RDE_MSG_PEER_INFO_REQ = 3,
- RDE_MSG_PEER_INFO_RESP = 4
+ RDE_MSG_PEER_INFO_RESP = 4,
+ RDE_MSG_NEW_ACTIVE_CALLBACK = 5
};
struct rde_peer_info {
diff --git a/src/rde/rded/rde_main.cc b/src/rde/rded/rde_main.cc
index 0298bf3ff..082c1c040 100644
--- a/src/rde/rded/rde_main.cc
+++ b/src/rde/rded/rde_main.cc
@@ -28,6 +28,7 @@
#include <cerrno>
#include <cstdlib>
#include <cstring>
+#include "osaf/consensus/service.h"
#include "base/daemon.h"
#include "base/logtrace.h"
#include "base/osaf_poll.h"
@@ -37,6 +38,7 @@
#include <saAmf.h>
#include "rde/rded/rde_cb.h"
#include "rde/rded/role.h"
+#include "base/conf.h"
AndersW> Sort project include files alphabetically.
#define RDA_MAX_CLIENTS 32
@@ -92,10 +94,6 @@ static void handle_mbx_event() {
TRACE_ENTER();
msg = reinterpret_cast<rde_msg *>(ncs_ipc_non_blk_recv(&rde_cb->mbx));
- TRACE("Received %s from node 0x%x with state %s. My state is %s",
- rde_msg_name[msg->type], msg->fr_node_id,
- Role::to_string(msg->info.peer_info.ha_role),
- Role::to_string(role->role()));
switch (msg->type) {
case RDE_MSG_PEER_INFO_REQ:
@@ -118,6 +116,34 @@ static void handle_mbx_event() {
case RDE_MSG_PEER_DOWN:
LOG_NO("Peer down on node 0x%x", msg->fr_node_id);
break;
+ case RDE_MSG_NEW_ACTIVE_CALLBACK:
+ {
+ const std::string my_node = base::Conf::NodeName();
+ rde_cb->monitor_lock_thread_running = false;
+
+ // get current active controller
+ Consensus consensus_service;
AndersW> Shouldn't the Consensus instance be created once, instead of
creating a new instance each time you receive this callback? The
Consensus constructor even logs to syslog (at INFO level).
+ std::string active_controller = consensus_service.CurrentActive();
+
+ LOG_NO("New active controller notification from consensus service");
+
+ if (role->role() == PCS_RDA_ACTIVE) {
+ if (my_node.compare(active_controller) != 0) {
+ // we are meant to be active, but consensus service doesn't think
so
+ LOG_WA("Role does not match consensus service. New controller: %s",
+ active_controller.c_str());
+ if (consensus_service.IsRemoteFencingEnabled() == false ) {
+ LOG_ER("Probable split-brain. Rebooting this node");
+ opensaf_reboot(0, nullptr, "Split-brain detected by consensus
service");
+ }
+ }
+
+ // register for callback
+ rde_cb->monitor_lock_thread_running = true;
+ consensus_service.MonitorLock(Role::MonitorCallback, rde_cb->mbx);
+ }
+ }
+ break;
default:
LOG_ER("%s: discarding unknown message type %u", __FUNCTION__,
msg->type);
break;
@@ -192,6 +218,7 @@ static int initialize_rde() {
goto init_failed;
}
+ rde_cb->monitor_lock_thread_running = false;
rc = NCSCC_RC_SUCCESS;
init_failed:
@@ -205,11 +232,12 @@ int main(int argc, char *argv[]) {
NCS_SEL_OBJ mbx_sel_obj;
RDE_RDA_CB *rde_rda_cb = &rde_cb->rde_rda_cb;
int term_fd;
-
opensaf_reboot_prepare();
daemonize(argc, argv);
+ base::Conf::InitNodeName();
+
if (initialize_rde() != NCSCC_RC_SUCCESS) goto init_failed;
mbx_sel_obj = ncs_ipc_get_sel_obj(&rde_cb->mbx);
diff --git a/src/rde/rded/role.cc b/src/rde/rded/role.cc
index f7511f0d8..c821aeb33 100644
--- a/src/rde/rded/role.cc
+++ b/src/rde/rded/role.cc
@@ -27,7 +27,9 @@
#include "base/process.h"
#include "base/time.h"
#include "base/ncs_main_papi.h"
+#include "base/ncssysf_def.h"
#include "rde/rded/rde_cb.h"
+#include "osaf/consensus/service.h"
AndersW> Sort project include files alphabetically.
const char* const Role::role_names_[] = {"Undefined", "ACTIVE", "STANDBY",
"QUIESCED", "QUIESCING", "Invalid"};
@@ -42,6 +44,20 @@ const char* Role::to_string(PCS_RDA_ROLE role) {
: role_names_[0];
}
+void Role::MonitorCallback(const std::string& key,
+ const std::string& new_value, SYSF_MBX mbx)
+{
+ TRACE_ENTER();
+
+ rde_msg* msg = static_cast<rde_msg *>(malloc(sizeof(rde_msg)));
+ msg->type = RDE_MSG_NEW_ACTIVE_CALLBACK;
+
+ uint32_t status;
+ status = m_NCS_IPC_SEND(&mbx,
+ msg, NCS_IPC_PRIORITY_NORMAL);
+ osafassert(status == NCSCC_RC_SUCCESS);
+}
+
Role::Role(NODE_ID own_node_id)
: known_nodes_{},
role_{PCS_RDA_QUIESCED},
@@ -61,10 +77,26 @@ timespec* Role::Poll(timespec* ts) {
*ts = election_end_time_ - now;
timeout = ts;
} else {
+ SaAisErrorT rc;
+ Consensus consensus_service;
+ rc = consensus_service.PromoteThisNode();
+ if (rc != SA_AIS_OK) {
+ LOG_ER("Unable to set active controller in consensus service");
+ opensaf_reboot(0, nullptr, "Unable to set active controller in consensus
service");
+ }
+
ExecutePreActiveScript();
LOG_NO("Switched to ACTIVE from %s", to_string(role()));
role_ = PCS_RDA_ACTIVE;
rde_rda_send_role(role_);
+
+ // register for callback if active controller is changed
+ // in consensus service
+ RDE_CONTROL_BLOCK* cb = rde_get_control_block();
+ if (cb->monitor_lock_thread_running == false) {
+ cb->monitor_lock_thread_running = true;
+ consensus_service.MonitorLock(MonitorCallback, cb->mbx);
+ }
}
}
return timeout;
@@ -91,7 +123,18 @@ uint32_t Role::SetRole(PCS_RDA_ROLE new_role) {
}
if (new_role != old_role) {
LOG_NO("RDE role set to %s", to_string(new_role));
- if (new_role == PCS_RDA_ACTIVE) ExecutePreActiveScript();
+ if (new_role == PCS_RDA_ACTIVE) {
+ ExecutePreActiveScript();
+
+ // register for callback if active controller is changed
+ // in consensus service
+ Consensus consensus_service;
+ RDE_CONTROL_BLOCK* cb = rde_get_control_block();
+ if (cb->monitor_lock_thread_running == false) {
+ cb->monitor_lock_thread_running = true;
+ consensus_service.MonitorLock(MonitorCallback, cb->mbx);
+ }
+ }
role_ = new_role;
if (new_role == PCS_RDA_UNDEFINED) {
known_nodes_.clear();
diff --git a/src/rde/rded/role.h b/src/rde/rded/role.h
index 20219b535..bee983828 100644
--- a/src/rde/rded/role.h
+++ b/src/rde/rded/role.h
@@ -21,6 +21,7 @@
#include <time.h>
#include <cstdint>
#include <set>
+#include <string>
#include "base/macros.h"
#include "mds/mds_papi.h"
#include "rde/agent/rda_papi.h"
@@ -38,6 +39,8 @@ class Role {
uint32_t SetRole(PCS_RDA_ROLE new_role);
PCS_RDA_ROLE role() const;
static const char* to_string(PCS_RDA_ROLE role);
+ static void MonitorCallback(const std::string& key,
+ const std::string& new_value, SYSF_MBX mbx);
private:
static const uint64_t kDefaultDiscoverPeerTimeout = 2000;
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel