Re: [devel] [PATCH 2/5] rded: add split brain prevention support [#64]

2018-01-24 Thread Anders Widell
See comment below, marked AndersW2>. I also have one more comment on the 
Consensus class: I noticed that it is defined in a file called 
service.h. Could you rename the file to consensus.h, so that the file 
name reflects the name of the class defined in it? Also, the file 
keyvalue.h should be renamed to key_value.h so that we have consistent 
naming of files. Of course, service.cc and keyvalue.cc should be renamed 
as well, and the header guards.


regards,

Anders Widell


On 01/24/2018 06:31 AM, Gary Lee wrote:

Hi Anders

Will change according to your comments, one comment below:

On 24/01/18 01:53, Anders Widell wrote:

Ack for this patch with comments, marked AndersW>

regards,

Anders Widell



+   case RDE_MSG_NEW_ACTIVE_CALLBACK:
+  {
+    const std::string my_node = base::Conf::NodeName();
+    rde_cb->monitor_lock_thread_running = false;
+
+    // get current active controller
+    Consensus consensus_service;
AndersW> Shouldn't the Consensus instance be created once, instead of 
creating a new instance each time you receive this callback? The 
Consensus constructor even logs to syslog (at INFO level).


[Gary] I will remove the syslog calls in the constructor, but I'd like 
to keep it as a local variable and only instantiate when needed. It's 
fairly light weight and only constructed when there is a controller 
failover / switchover. Is that OK?


AndersW2> Ok, we can leave it like this for now. We can re-visit config 
file handling later when the generic config reader in ticket 2756 has 
been implemented.


Thanks
Gary





--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


Re: [devel] [PATCH 2/5] rded: add split brain prevention support [#64]

2018-01-23 Thread Gary Lee

Hi Anders

Will change according to your comments, one comment below:

On 24/01/18 01:53, Anders Widell wrote:

Ack for this patch with comments, marked AndersW>

regards,

Anders Widell



+   case RDE_MSG_NEW_ACTIVE_CALLBACK:
+  {
+    const std::string my_node = base::Conf::NodeName();
+    rde_cb->monitor_lock_thread_running = false;
+
+    // get current active controller
+    Consensus consensus_service;
AndersW> Shouldn't the Consensus instance be created once, instead of 
creating a new instance each time you receive this callback? The 
Consensus constructor even logs to syslog (at INFO level).


[Gary] I will remove the syslog calls in the constructor, but I'd like 
to keep it as a local variable and only instantiate when needed. It's 
fairly light weight and only constructed when there is a controller 
failover / switchover. Is that OK?


Thanks
Gary


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


Re: [devel] [PATCH 2/5] rded: add split brain prevention support [#64]

2018-01-23 Thread Anders Widell

Ack for this patch with comments, marked AndersW>

regards,

Anders Widell


On 01/23/2018 09:06 AM, Gary Lee wrote:

* consult with consensus service before promoting node to active
* add watch thread and self-fence if it detects active controller
   has been changed (if remote fencing is disabled)
---
  src/rde/Makefile.am   |  3 ++-
  src/rde/rded/osaf-rded.in |  4 
  src/rde/rded/rde_cb.h |  4 +++-
  src/rde/rded/rde_main.cc  | 38 +-
  src/rde/rded/role.cc  | 45 -
  src/rde/rded/role.h   |  3 +++
  6 files changed, 89 insertions(+), 8 deletions(-)

diff --git a/src/rde/Makefile.am b/src/rde/Makefile.am
index c967f9fc4..182f347ab 100644
--- a/src/rde/Makefile.am
+++ b/src/rde/Makefile.am
@@ -58,7 +58,8 @@ bin_osafrded_SOURCES = \
  
  bin_osafrded_LDADD = \

lib/libSaAmf.la \
-   lib/libopensaf_core.la
+   lib/libopensaf_core.la \
+   lib/libosaf_common.la
  
  bin_rdegetrole_CPPFLAGS = \

$(AM_CPPFLAGS)
diff --git a/src/rde/rded/osaf-rded.in b/src/rde/rded/osaf-rded.in
index 1c1786c8d..1697936a7 100644
--- a/src/rde/rded/osaf-rded.in
+++ b/src/rde/rded/osaf-rded.in
@@ -28,6 +28,10 @@ else
. $pkgsysconfdir/rde.conf
  fi
  
+if [ -f "$pkgsysconfdir/fmd.conf" ]; then

+  . "$pkgsysconfdir/fmd.conf"
+fi
+
  binary=$pkglibdir/$osafprog
  pidfile=$pkgpiddir/$osafprog.pid
  tracefile=$pkglogdir/$osafprog.log
diff --git a/src/rde/rded/rde_cb.h b/src/rde/rded/rde_cb.h
index d2a3d46b2..fc100849a 100644
--- a/src/rde/rded/rde_cb.h
+++ b/src/rde/rded/rde_cb.h
@@ -39,13 +39,15 @@ struct RDE_CONTROL_BLOCK {
bool task_terminate;
RDE_RDA_CB rde_rda_cb;
RDE_AMF_CB rde_amf_cb;
+  bool monitor_lock_thread_running;
  };
  
  enum RDE_MSG_TYPE {

RDE_MSG_PEER_UP = 1,
RDE_MSG_PEER_DOWN = 2,
RDE_MSG_PEER_INFO_REQ = 3,
-  RDE_MSG_PEER_INFO_RESP = 4
+  RDE_MSG_PEER_INFO_RESP = 4,
+  RDE_MSG_NEW_ACTIVE_CALLBACK = 5
  };
  
  struct rde_peer_info {

diff --git a/src/rde/rded/rde_main.cc b/src/rde/rded/rde_main.cc
index 0298bf3ff..082c1c040 100644
--- a/src/rde/rded/rde_main.cc
+++ b/src/rde/rded/rde_main.cc
@@ -28,6 +28,7 @@
  #include 
  #include 
  #include 
+#include "osaf/consensus/service.h"
  #include "base/daemon.h"
  #include "base/logtrace.h"
  #include "base/osaf_poll.h"
@@ -37,6 +38,7 @@
  #include 
  #include "rde/rded/rde_cb.h"
  #include "rde/rded/role.h"
+#include "base/conf.h"

AndersW> Sort project include files alphabetically.
  
  #define RDA_MAX_CLIENTS 32
  
@@ -92,10 +94,6 @@ static void handle_mbx_event() {

TRACE_ENTER();
  
msg = reinterpret_cast(ncs_ipc_non_blk_recv(_cb->mbx));

-  TRACE("Received %s from node 0x%x with state %s. My state is %s",
-rde_msg_name[msg->type], msg->fr_node_id,
-Role::to_string(msg->info.peer_info.ha_role),
-Role::to_string(role->role()));
  
switch (msg->type) {

  case RDE_MSG_PEER_INFO_REQ:
@@ -118,6 +116,34 @@ static void handle_mbx_event() {
  case RDE_MSG_PEER_DOWN:
LOG_NO("Peer down on node 0x%x", msg->fr_node_id);
break;
+   case RDE_MSG_NEW_ACTIVE_CALLBACK:
+  {
+const std::string my_node = base::Conf::NodeName();
+rde_cb->monitor_lock_thread_running = false;
+
+// get current active controller
+Consensus consensus_service;
AndersW> Shouldn't the Consensus instance be created once, instead of 
creating a new instance each time you receive this callback? The 
Consensus constructor even logs to syslog (at INFO level).

+std::string active_controller = consensus_service.CurrentActive();
+
+LOG_NO("New active controller notification from consensus service");
+
+if (role->role() == PCS_RDA_ACTIVE) {
+  if (my_node.compare(active_controller) != 0) {
+// we are meant to be active, but consensus service doesn't think 
so
+LOG_WA("Role does not match consensus service. New controller: %s",
+  active_controller.c_str());
+if (consensus_service.IsRemoteFencingEnabled() == false ) {
+  LOG_ER("Probable split-brain. Rebooting this node");
+  opensaf_reboot(0, nullptr, "Split-brain detected by consensus 
service");
+}
+  }
+
+  // register for callback
+  rde_cb->monitor_lock_thread_running = true;
+  consensus_service.MonitorLock(Role::MonitorCallback, rde_cb->mbx);
+}
+  }
+  break;
  default:
LOG_ER("%s: discarding unknown message type %u", __FUNCTION__, 
msg->type);
break;
@@ -192,6 +218,7 @@ static int initialize_rde() {
  goto init_failed;
}
  
+  rde_cb->monitor_lock_thread_running = false;

rc = NCSCC_RC_SUCCESS;
  
  init_failed:

@@ -205,11 +232,12 @@ int main(int argc, char *argv[]) {
NCS_SEL_OBJ mbx_sel_obj;
RDE_RDA_CB *rde_rda_cb = _cb->rde_rda_cb;
int term_fd;
-
opensaf_reboot_prepare();
  

[devel] [PATCH 2/5] rded: add split brain prevention support [#64]

2018-01-23 Thread Gary Lee
* consult with consensus service before promoting node to active
* add watch thread and self-fence if it detects active controller
  has been changed (if remote fencing is disabled)
---
 src/rde/Makefile.am   |  3 ++-
 src/rde/rded/osaf-rded.in |  4 
 src/rde/rded/rde_cb.h |  4 +++-
 src/rde/rded/rde_main.cc  | 38 +-
 src/rde/rded/role.cc  | 45 -
 src/rde/rded/role.h   |  3 +++
 6 files changed, 89 insertions(+), 8 deletions(-)

diff --git a/src/rde/Makefile.am b/src/rde/Makefile.am
index c967f9fc4..182f347ab 100644
--- a/src/rde/Makefile.am
+++ b/src/rde/Makefile.am
@@ -58,7 +58,8 @@ bin_osafrded_SOURCES = \
 
 bin_osafrded_LDADD = \
lib/libSaAmf.la \
-   lib/libopensaf_core.la
+   lib/libopensaf_core.la \
+   lib/libosaf_common.la
 
 bin_rdegetrole_CPPFLAGS = \
$(AM_CPPFLAGS)
diff --git a/src/rde/rded/osaf-rded.in b/src/rde/rded/osaf-rded.in
index 1c1786c8d..1697936a7 100644
--- a/src/rde/rded/osaf-rded.in
+++ b/src/rde/rded/osaf-rded.in
@@ -28,6 +28,10 @@ else
. $pkgsysconfdir/rde.conf
 fi 
 
+if [ -f "$pkgsysconfdir/fmd.conf" ]; then
+  . "$pkgsysconfdir/fmd.conf"
+fi
+
 binary=$pkglibdir/$osafprog
 pidfile=$pkgpiddir/$osafprog.pid
 tracefile=$pkglogdir/$osafprog.log
diff --git a/src/rde/rded/rde_cb.h b/src/rde/rded/rde_cb.h
index d2a3d46b2..fc100849a 100644
--- a/src/rde/rded/rde_cb.h
+++ b/src/rde/rded/rde_cb.h
@@ -39,13 +39,15 @@ struct RDE_CONTROL_BLOCK {
   bool task_terminate;
   RDE_RDA_CB rde_rda_cb;
   RDE_AMF_CB rde_amf_cb;
+  bool monitor_lock_thread_running;
 };
 
 enum RDE_MSG_TYPE {
   RDE_MSG_PEER_UP = 1,
   RDE_MSG_PEER_DOWN = 2,
   RDE_MSG_PEER_INFO_REQ = 3,
-  RDE_MSG_PEER_INFO_RESP = 4
+  RDE_MSG_PEER_INFO_RESP = 4,
+  RDE_MSG_NEW_ACTIVE_CALLBACK = 5
 };
 
 struct rde_peer_info {
diff --git a/src/rde/rded/rde_main.cc b/src/rde/rded/rde_main.cc
index 0298bf3ff..082c1c040 100644
--- a/src/rde/rded/rde_main.cc
+++ b/src/rde/rded/rde_main.cc
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include "osaf/consensus/service.h"
 #include "base/daemon.h"
 #include "base/logtrace.h"
 #include "base/osaf_poll.h"
@@ -37,6 +38,7 @@
 #include 
 #include "rde/rded/rde_cb.h"
 #include "rde/rded/role.h"
+#include "base/conf.h"
 
 #define RDA_MAX_CLIENTS 32
 
@@ -92,10 +94,6 @@ static void handle_mbx_event() {
   TRACE_ENTER();
 
   msg = reinterpret_cast(ncs_ipc_non_blk_recv(_cb->mbx));
-  TRACE("Received %s from node 0x%x with state %s. My state is %s",
-rde_msg_name[msg->type], msg->fr_node_id,
-Role::to_string(msg->info.peer_info.ha_role),
-Role::to_string(role->role()));
 
   switch (msg->type) {
 case RDE_MSG_PEER_INFO_REQ:
@@ -118,6 +116,34 @@ static void handle_mbx_event() {
 case RDE_MSG_PEER_DOWN:
   LOG_NO("Peer down on node 0x%x", msg->fr_node_id);
   break;
+   case RDE_MSG_NEW_ACTIVE_CALLBACK:
+  {
+const std::string my_node = base::Conf::NodeName();
+rde_cb->monitor_lock_thread_running = false;
+
+// get current active controller
+Consensus consensus_service;
+std::string active_controller = consensus_service.CurrentActive();
+
+LOG_NO("New active controller notification from consensus service");
+
+if (role->role() == PCS_RDA_ACTIVE) {
+  if (my_node.compare(active_controller) != 0) {
+// we are meant to be active, but consensus service doesn't think 
so
+LOG_WA("Role does not match consensus service. New controller: %s",
+  active_controller.c_str());
+if (consensus_service.IsRemoteFencingEnabled() == false ) {
+  LOG_ER("Probable split-brain. Rebooting this node");
+  opensaf_reboot(0, nullptr, "Split-brain detected by consensus 
service");
+}
+  }
+
+  // register for callback
+  rde_cb->monitor_lock_thread_running = true;
+  consensus_service.MonitorLock(Role::MonitorCallback, rde_cb->mbx);
+}
+  }
+  break;
 default:
   LOG_ER("%s: discarding unknown message type %u", __FUNCTION__, 
msg->type);
   break;
@@ -192,6 +218,7 @@ static int initialize_rde() {
 goto init_failed;
   }
 
+  rde_cb->monitor_lock_thread_running = false;
   rc = NCSCC_RC_SUCCESS;
 
 init_failed:
@@ -205,11 +232,12 @@ int main(int argc, char *argv[]) {
   NCS_SEL_OBJ mbx_sel_obj;
   RDE_RDA_CB *rde_rda_cb = _cb->rde_rda_cb;
   int term_fd;
-
   opensaf_reboot_prepare();
 
   daemonize(argc, argv);
 
+  base::Conf::InitNodeName();
+
   if (initialize_rde() != NCSCC_RC_SUCCESS) goto init_failed;
 
   mbx_sel_obj = ncs_ipc_get_sel_obj(_cb->mbx);
diff --git a/src/rde/rded/role.cc b/src/rde/rded/role.cc
index f7511f0d8..c821aeb33 100644
--- a/src/rde/rded/role.cc
+++ b/src/rde/rded/role.cc
@@ -27,7 +27,9 @@
 #include "base/process.h"
 #include "base/time.h"
 #include "base/ncs_main_papi.h"
+#include "base/ncssysf_def.h"
 

[devel] [PATCH 2/5] rded: add split brain prevention support [#64]

2018-01-19 Thread Gary Lee
* consult with consensus service before promoting node to active
* add watch thread and self-fence if it detects active controller
  has been changed (if remote fencing is disabled)
---
 src/rde/Makefile.am   |  3 ++-
 src/rde/rded/osaf-rded.in |  4 
 src/rde/rded/rde_cb.h |  3 ++-
 src/rde/rded/rde_main.cc  | 35 ++-
 src/rde/rded/role.cc  | 47 ++-
 src/rde/rded/role.h   |  2 ++
 6 files changed, 86 insertions(+), 8 deletions(-)

diff --git a/src/rde/Makefile.am b/src/rde/Makefile.am
index c967f9fc4..182f347ab 100644
--- a/src/rde/Makefile.am
+++ b/src/rde/Makefile.am
@@ -58,7 +58,8 @@ bin_osafrded_SOURCES = \
 
 bin_osafrded_LDADD = \
lib/libSaAmf.la \
-   lib/libopensaf_core.la
+   lib/libopensaf_core.la \
+   lib/libosaf_common.la
 
 bin_rdegetrole_CPPFLAGS = \
$(AM_CPPFLAGS)
diff --git a/src/rde/rded/osaf-rded.in b/src/rde/rded/osaf-rded.in
index 1c1786c8d..1697936a7 100644
--- a/src/rde/rded/osaf-rded.in
+++ b/src/rde/rded/osaf-rded.in
@@ -28,6 +28,10 @@ else
. $pkgsysconfdir/rde.conf
 fi 
 
+if [ -f "$pkgsysconfdir/fmd.conf" ]; then
+  . "$pkgsysconfdir/fmd.conf"
+fi
+
 binary=$pkglibdir/$osafprog
 pidfile=$pkgpiddir/$osafprog.pid
 tracefile=$pkglogdir/$osafprog.log
diff --git a/src/rde/rded/rde_cb.h b/src/rde/rded/rde_cb.h
index d2a3d46b2..83f35c691 100644
--- a/src/rde/rded/rde_cb.h
+++ b/src/rde/rded/rde_cb.h
@@ -45,7 +45,8 @@ enum RDE_MSG_TYPE {
   RDE_MSG_PEER_UP = 1,
   RDE_MSG_PEER_DOWN = 2,
   RDE_MSG_PEER_INFO_REQ = 3,
-  RDE_MSG_PEER_INFO_RESP = 4
+  RDE_MSG_PEER_INFO_RESP = 4,
+  RDE_MSG_NEW_ACTIVE_CALLBACK = 5
 };
 
 struct rde_peer_info {
diff --git a/src/rde/rded/rde_main.cc b/src/rde/rded/rde_main.cc
index 0298bf3ff..ba472fc6a 100644
--- a/src/rde/rded/rde_main.cc
+++ b/src/rde/rded/rde_main.cc
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include "osaf/consensus/service.h"
 #include "base/daemon.h"
 #include "base/logtrace.h"
 #include "base/osaf_poll.h"
@@ -37,6 +38,7 @@
 #include 
 #include "rde/rded/rde_cb.h"
 #include "rde/rded/role.h"
+#include "base/conf.h"
 
 #define RDA_MAX_CLIENTS 32
 
@@ -92,10 +94,6 @@ static void handle_mbx_event() {
   TRACE_ENTER();
 
   msg = reinterpret_cast(ncs_ipc_non_blk_recv(_cb->mbx));
-  TRACE("Received %s from node 0x%x with state %s. My state is %s",
-rde_msg_name[msg->type], msg->fr_node_id,
-Role::to_string(msg->info.peer_info.ha_role),
-Role::to_string(role->role()));
 
   switch (msg->type) {
 case RDE_MSG_PEER_INFO_REQ:
@@ -118,6 +116,32 @@ static void handle_mbx_event() {
 case RDE_MSG_PEER_DOWN:
   LOG_NO("Peer down on node 0x%x", msg->fr_node_id);
   break;
+   case RDE_MSG_NEW_ACTIVE_CALLBACK:
+  {
+const std::string my_node = base::Conf::NodeName();
+
+// get current active controller
+Consensus consensus_service;
+std::string active_controller = consensus_service.CurrentActive();
+
+LOG_NO("New active controller notification from consensus service");
+
+if (role->role() == PCS_RDA_ACTIVE) {
+  if (my_node.compare(active_controller) != 0) {
+// we are meant to be active, but consensus service doesn't think 
so
+LOG_ER("Role does not match consensus service. New controller: %s",
+  active_controller.c_str());
+if (consensus_service.IsRemoteFencingEnabled() == false ) {
+  LOG_ER("Probable split brain. Rebooting this node");
+  opensaf_reboot(0, nullptr, "Split-brain detected by consensus 
service");
+}
+  }
+
+  // register for callback
+  consensus_service.MonitorActive(Role::MonitorCallback, rde_cb->mbx);
+}
+  }
+  break;
 default:
   LOG_ER("%s: discarding unknown message type %u", __FUNCTION__, 
msg->type);
   break;
@@ -205,11 +229,12 @@ int main(int argc, char *argv[]) {
   NCS_SEL_OBJ mbx_sel_obj;
   RDE_RDA_CB *rde_rda_cb = _cb->rde_rda_cb;
   int term_fd;
-
   opensaf_reboot_prepare();
 
   daemonize(argc, argv);
 
+  base::Conf::InitNodeName();
+
   if (initialize_rde() != NCSCC_RC_SUCCESS) goto init_failed;
 
   mbx_sel_obj = ncs_ipc_get_sel_obj(_cb->mbx);
diff --git a/src/rde/rded/role.cc b/src/rde/rded/role.cc
index f7511f0d8..cccbb299f 100644
--- a/src/rde/rded/role.cc
+++ b/src/rde/rded/role.cc
@@ -27,7 +27,9 @@
 #include "base/process.h"
 #include "base/time.h"
 #include "base/ncs_main_papi.h"
+#include "base/ncssysf_def.h"
 #include "rde/rded/rde_cb.h"
+#include "osaf/consensus/service.h"
 
 const char* const Role::role_names_[] = {"Undefined", "ACTIVE","STANDBY",
  "QUIESCED",  "QUIESCING", "Invalid"};
@@ -42,6 +44,23 @@ const char* Role::to_string(PCS_RDA_ROLE role) {
  : role_names_[0];
 }
 
+void Role::MonitorCallback(const std::string& new_value, SYSF_MBX mbx)
+{
+  TRACE_ENTER();
+
+  

[devel] [PATCH 2/5] rded: add split brain prevention support [#64]

2018-01-09 Thread Gary Lee
* consult with consensus service before promoting node to active
* add watch thread and self-fence if it detects active controller
  has been changed
---
 src/rde/Makefile.am   |  3 ++-
 src/rde/rded/osaf-rded.in |  4 
 src/rde/rded/rde_cb.h |  3 ++-
 src/rde/rded/rde_main.cc  | 32 +++-
 src/rde/rded/role.cc  | 45 -
 src/rde/rded/role.h   |  2 ++
 6 files changed, 81 insertions(+), 8 deletions(-)

diff --git a/src/rde/Makefile.am b/src/rde/Makefile.am
index c967f9fc4..182f347ab 100644
--- a/src/rde/Makefile.am
+++ b/src/rde/Makefile.am
@@ -58,7 +58,8 @@ bin_osafrded_SOURCES = \
 
 bin_osafrded_LDADD = \
lib/libSaAmf.la \
-   lib/libopensaf_core.la
+   lib/libopensaf_core.la \
+   lib/libosaf_common.la
 
 bin_rdegetrole_CPPFLAGS = \
$(AM_CPPFLAGS)
diff --git a/src/rde/rded/osaf-rded.in b/src/rde/rded/osaf-rded.in
index 1c1786c8d..1697936a7 100644
--- a/src/rde/rded/osaf-rded.in
+++ b/src/rde/rded/osaf-rded.in
@@ -28,6 +28,10 @@ else
. $pkgsysconfdir/rde.conf
 fi 
 
+if [ -f "$pkgsysconfdir/fmd.conf" ]; then
+  . "$pkgsysconfdir/fmd.conf"
+fi
+
 binary=$pkglibdir/$osafprog
 pidfile=$pkgpiddir/$osafprog.pid
 tracefile=$pkglogdir/$osafprog.log
diff --git a/src/rde/rded/rde_cb.h b/src/rde/rded/rde_cb.h
index d2a3d46b2..83f35c691 100644
--- a/src/rde/rded/rde_cb.h
+++ b/src/rde/rded/rde_cb.h
@@ -45,7 +45,8 @@ enum RDE_MSG_TYPE {
   RDE_MSG_PEER_UP = 1,
   RDE_MSG_PEER_DOWN = 2,
   RDE_MSG_PEER_INFO_REQ = 3,
-  RDE_MSG_PEER_INFO_RESP = 4
+  RDE_MSG_PEER_INFO_RESP = 4,
+  RDE_MSG_NEW_ACTIVE_CALLBACK = 5
 };
 
 struct rde_peer_info {
diff --git a/src/rde/rded/rde_main.cc b/src/rde/rded/rde_main.cc
index 0298bf3ff..23c03f552 100644
--- a/src/rde/rded/rde_main.cc
+++ b/src/rde/rded/rde_main.cc
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include "osaf/consensus/service.h"
 #include "base/daemon.h"
 #include "base/logtrace.h"
 #include "base/osaf_poll.h"
@@ -37,6 +38,7 @@
 #include 
 #include "rde/rded/rde_cb.h"
 #include "rde/rded/role.h"
+#include "base/conf.h"
 
 #define RDA_MAX_CLIENTS 32
 
@@ -92,10 +94,6 @@ static void handle_mbx_event() {
   TRACE_ENTER();
 
   msg = reinterpret_cast(ncs_ipc_non_blk_recv(_cb->mbx));
-  TRACE("Received %s from node 0x%x with state %s. My state is %s",
-rde_msg_name[msg->type], msg->fr_node_id,
-Role::to_string(msg->info.peer_info.ha_role),
-Role::to_string(role->role()));
 
   switch (msg->type) {
 case RDE_MSG_PEER_INFO_REQ:
@@ -118,6 +116,29 @@ static void handle_mbx_event() {
 case RDE_MSG_PEER_DOWN:
   LOG_NO("Peer down on node 0x%x", msg->fr_node_id);
   break;
+   case RDE_MSG_NEW_ACTIVE_CALLBACK:
+  {
+const std::string my_node = base::Conf::NodeName();
+
+// get current active controller
+Consensus consensus_service;
+std::string active_controller = consensus_service.CurrentActive();
+
+LOG_NO("New active controller notification from consensus service");
+
+if (role->role() == PCS_RDA_ACTIVE) {
+  if (my_node.compare(active_controller) != 0) {
+// we are meant to be active, but consensus service doesn't think 
so
+LOG_ER("Role does not match consensus service");
+LOG_ER("Probable split brain. Rebooting this node");
+opensaf_reboot(0, nullptr, "Split-brain detected by consensus 
service");
+  } else {
+// get more callbacks
+consensus_service.MonitorActive(Role::MonitorCallback, 
rde_cb->mbx);
+  }
+}
+  }
+  break;
 default:
   LOG_ER("%s: discarding unknown message type %u", __FUNCTION__, 
msg->type);
   break;
@@ -205,11 +226,12 @@ int main(int argc, char *argv[]) {
   NCS_SEL_OBJ mbx_sel_obj;
   RDE_RDA_CB *rde_rda_cb = _cb->rde_rda_cb;
   int term_fd;
-
   opensaf_reboot_prepare();
 
   daemonize(argc, argv);
 
+  base::Conf::InitNodeName();
+
   if (initialize_rde() != NCSCC_RC_SUCCESS) goto init_failed;
 
   mbx_sel_obj = ncs_ipc_get_sel_obj(_cb->mbx);
diff --git a/src/rde/rded/role.cc b/src/rde/rded/role.cc
index f7511f0d8..28e034c44 100644
--- a/src/rde/rded/role.cc
+++ b/src/rde/rded/role.cc
@@ -21,6 +21,7 @@
 #include 
 
 #include "rde/rded/role.h"
+#include "rde/rded/rde_cb.h"
 #include 
 #include "base/logtrace.h"
 #include "base/getenv.h"
@@ -28,6 +29,7 @@
 #include "base/time.h"
 #include "base/ncs_main_papi.h"
 #include "rde/rded/rde_cb.h"
+#include "osaf/consensus/service.h"
 
 const char* const Role::role_names_[] = {"Undefined", "ACTIVE","STANDBY",
  "QUIESCED",  "QUIESCING", "Invalid"};
@@ -42,6 +44,23 @@ const char* Role::to_string(PCS_RDA_ROLE role) {
  : role_names_[0];
 }
 
+void Role::MonitorCallback(const std::string& new_value, SYSF_MBX mbx)
+{
+  TRACE_ENTER();
+
+  rde_msg* msg = static_cast(malloc(sizeof(rde_msg)));
+  msg->type =