---
 00-README.conf                           |  43 ++++++
 Makefile.am                              |   4 +-
 src/amf/amfd/osaf-amfd.in                |   4 +
 src/amf/amfd/role.cc                     |  35 ++++-
 src/fm/Makefile.am                       |   1 +
 src/fm/fmd/fm_main.cc                    |  12 ++
 src/fm/fmd/fm_rda.cc                     |  26 ++++
 src/fm/fmd/fmd.conf                      |   8 ++
 src/osaf/Makefile.am                     |   8 +-
 src/osaf/consensus/Makefile              |  18 +++
 src/osaf/consensus/keyvalue.cc           | 165 ++++++++++++++++++++++
 src/osaf/consensus/keyvalue.h            |  57 ++++++++
 src/osaf/consensus/plugins/etcd.plugin   | 217 +++++++++++++++++++++++++++++
 src/osaf/consensus/plugins/sample.plugin | 162 ++++++++++++++++++++++
 src/osaf/consensus/service.cc            | 231 +++++++++++++++++++++++++++++++
 src/osaf/consensus/service.h             |  66 +++++++++
 src/rde/Makefile.am                      |   3 +-
 src/rde/rded/osaf-rded.in                |   4 +
 src/rde/rded/rde_cb.h                    |   3 +-
 src/rde/rded/rde_main.cc                 |  32 ++++-
 src/rde/rded/role.cc                     |  45 +++++-
 src/rde/rded/role.h                      |   2 +
 22 files changed, 1130 insertions(+), 16 deletions(-)
 create mode 100644 src/osaf/consensus/Makefile
 create mode 100644 src/osaf/consensus/keyvalue.cc
 create mode 100644 src/osaf/consensus/keyvalue.h
 create mode 100644 src/osaf/consensus/plugins/etcd.plugin
 create mode 100644 src/osaf/consensus/plugins/sample.plugin
 create mode 100644 src/osaf/consensus/service.cc
 create mode 100644 src/osaf/consensus/service.h

diff --git a/00-README.conf b/00-README.conf
index 5de286225..24d583a89 100644
--- a/00-README.conf
+++ b/00-README.conf
@@ -654,3 +654,46 @@ on each node, except on the active node. This file 
indicates that a cluster
 reboot is in progress and all nodes needs to delay their start, this to give
 the active a lead.
 
+Split-Brain Prevention with Consensus Service
+=============================================
+
+Split-brain prevention is supported in OpenSAF through a pluggable arbitration
+interface, to implement a consensus service. In network partitions containing
+half of the nodes or less, a controller cannot become active thus preventing
+a split-brain scenario. When the network once again merges such that consensus 
can
+be formed, an active controller will be chosen from the controllers that have 
write
+access to the consensus service. This is assuming that each node
+of the cluster participates in the consensus service.
+
+To enable split-brain prevention, edit fmd.conf and update accordingly:
+
+export SPLIT_BRAIN_PREVENTION=1
+export KEYVALUE_STORE_PLUGIN_CMD=/usr/local/lib/opensaf/etcd.plugin
+
+The plugin must implement a key-value store interface.
+
+An example plugin is provided for etcd, an implemention of RAFT. The plugin 
assumes
+etcd is installed and available on system controllers. In clusters where
+there are only two system controllers, it is highly recommended to configure 
etcd so
+it runs on at least three nodes to facilitate a majority vote with failure 
tolerance.
+
+Other implementations of a distributed key-value store service
+can be used, provided as it implements the interface documented in 
sample.plugin.
+
+get <key> - returns <value> of <key> in key-value store
+set <key> <value> - set <key> to <value> in key-value store
+erase <key> - erase <key> from key-value store
+lock <timeout> - distributed lock with automatic unlock after <timeout> seconds
+unlock - distributed unlock
+lock_owner - returns owner of lock
+watch <key> - returns new <value> of <key> when the value is changed
+
+The key-value store does not need to reside on the same nodes as OpenSAF.
+In such a configuration, an appropriate plugin that handles
+the communication with a remotely located key-value store, must be provided.
+
+If remote fencing is enabled, then it will be used to fence a node that the 
consensus
+service believes should not be active. Otherwise, rded will initiate a 
'self-fencing'
+by rebooting the node, if it determines the node is no longer active
+according to the consensus service.
+
diff --git a/Makefile.am b/Makefile.am
index bcfd844cd..57c2585a8 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -159,7 +159,9 @@ dist_osaf_execbin_SCRIPTS += \
        $(top_srcdir)/scripts/opensaf_reboot \
        $(top_srcdir)/scripts/opensaf_sc_active \
        $(top_srcdir)/scripts/opensaf_scale_out \
-       $(top_srcdir)/scripts/plm_scale_out
+       $(top_srcdir)/scripts/plm_scale_out \
+       $(top_srcdir)/src/osaf/consensus/plugins/etcd.plugin
+# TODO remove above line before pushing
 
 include $(top_srcdir)/src/ais/Makefile.am
 include $(top_srcdir)/src/base/Makefile.am
diff --git a/src/amf/amfd/osaf-amfd.in b/src/amf/amfd/osaf-amfd.in
index 45c5ab9e4..26a77ef52 100644
--- a/src/amf/amfd/osaf-amfd.in
+++ b/src/amf/amfd/osaf-amfd.in
@@ -28,6 +28,10 @@ else
        . $pkgsysconfdir/amfd.conf
 fi     
 
+if [ -f "$pkgsysconfdir/fmd.conf" ]; then
+  . "$pkgsysconfdir/fmd.conf"
+fi
+
 binary=$pkglibdir/$osafprog
 pidfile=$pkgpiddir/$osafprog.pid
 lockfile=$lockdir/$initscript
diff --git a/src/amf/amfd/role.cc b/src/amf/amfd/role.cc
index 865d89d94..f98beea01 100644
--- a/src/amf/amfd/role.cc
+++ b/src/amf/amfd/role.cc
@@ -38,6 +38,7 @@
 #include "osaf/immutil/immutil.h"
 #include "base/logtrace.h"
 #include "rde/agent/rda_papi.h"
+#include "osaf/consensus/service.h"
 
 #include "amf/amfd/amfd.h"
 #include "amf/amfd/imm.h"
@@ -1085,6 +1086,12 @@ uint32_t amfd_switch_actv_qsd(AVD_CL_CB *cb) {
     avd_d2n_msg_dequeue(cb);
   }
 
+  Consensus consensus_service;
+  rc = consensus_service.DemoteThisNode();
+  if (rc != SA_AIS_OK) {
+    LOG_ER("Failed to demote this node from consensus service");
+  }
+
   TRACE_LEAVE();
   return NCSCC_RC_SUCCESS;
 }
@@ -1209,13 +1216,21 @@ uint32_t amfd_switch_stdby_actv(AVD_CL_CB *cb) {
   cb->avail_state_avd = SA_AMF_HA_ACTIVE;
   osaf_mutex_unlock_ordie(&imm_reinit_mutex);
 
+  Consensus consensus_service;
+  rc = consensus_service.BeginActivePromotion();
+  if (rc != SA_AIS_OK) {
+    LOG_ER("Unable to set active controller in consensus service");
+    LOG_ER("Split brain is possible");
+  }
+
   /* Declare this standby as Active. Set Vdest role role */
   if (NCSCC_RC_SUCCESS !=
       (status = avd_mds_set_vdest_role(cb, SA_AMF_HA_ACTIVE))) {
     LOG_ER("Switch Standby --> Active FAILED, MDS role set failed");
     cb->swap_switch = false;
     avd_d2d_chg_role_rsp(cb, NCSCC_RC_FAILURE, SA_AMF_HA_ACTIVE);
-    return NCSCC_RC_FAILURE;
+    status = NCSCC_RC_FAILURE;
+    goto done;
   }
 
   /* Time to send fail-over messages to all the AVND's */
@@ -1240,7 +1255,8 @@ uint32_t amfd_switch_stdby_actv(AVD_CL_CB *cb) {
     } else {
       cb->swap_switch = false;
       avd_d2d_chg_role_rsp(cb, NCSCC_RC_FAILURE, SA_AMF_HA_ACTIVE);
-      return NCSCC_RC_FAILURE;
+      status = NCSCC_RC_FAILURE;
+      goto done;
     }
   }
 
@@ -1259,7 +1275,8 @@ uint32_t amfd_switch_stdby_actv(AVD_CL_CB *cb) {
          in avd_imm_reinit_bg_thread.*/
     } else {
       avd_d2d_chg_role_rsp(cb, NCSCC_RC_FAILURE, SA_AMF_HA_ACTIVE);
-      return NCSCC_RC_FAILURE;
+      status = NCSCC_RC_FAILURE;
+      goto done;
     }
   } else
     osaf_mutex_unlock_ordie(&imm_reinit_mutex);
@@ -1274,7 +1291,8 @@ uint32_t amfd_switch_stdby_actv(AVD_CL_CB *cb) {
     LOG_ER("Switch Standby --> Active, clm track start failed");
     Fifo::queue(new ClmTrackStart());
     avd_d2d_chg_role_rsp(cb, NCSCC_RC_FAILURE, SA_AMF_HA_ACTIVE);
-    return NCSCC_RC_FAILURE;
+    status = NCSCC_RC_FAILURE;
+    goto done;
   }
 
   /* Send the message to other avd for role change rsp as success */
@@ -1291,8 +1309,15 @@ uint32_t amfd_switch_stdby_actv(AVD_CL_CB *cb) {
     }
   }
 
+  status = NCSCC_RC_SUCCESS;
+done:
+  rc = consensus_service.EndActivePromotion();
+  if (rc != SA_AIS_OK) {
+    LOG_ER("Unable to remove lock in consensus service");
+  }
+
   TRACE_LEAVE();
-  return NCSCC_RC_SUCCESS;
+  return status;
 }
 
 /****************************************************************************\
diff --git a/src/fm/Makefile.am b/src/fm/Makefile.am
index d48a9146c..0f254b94f 100644
--- a/src/fm/Makefile.am
+++ b/src/fm/Makefile.am
@@ -49,4 +49,5 @@ bin_osaffmd_SOURCES = \
 bin_osaffmd_LDADD = \
        lib/libSaAmf.la \
        lib/libSaClm.la \
+       lib/libosaf_common.la \
        lib/libopensaf_core.la
diff --git a/src/fm/fmd/fm_main.cc b/src/fm/fmd/fm_main.cc
index db8395ee7..1d3cba766 100644
--- a/src/fm/fmd/fm_main.cc
+++ b/src/fm/fmd/fm_main.cc
@@ -24,6 +24,7 @@ This file contains the main() routine for FM.
 ******************************************************************************/
 
 #include "osaf/configmake.h"
+#include "osaf/consensus/service.h"
 #include <stdlib.h>
 #include <stdbool.h>
 #include "base/daemon.h"
@@ -593,6 +594,12 @@ static void fm_mbx_msg_handler(FM_CB *fm_cb, FM_EVT 
*fm_mbx_evt)
                                 * trigerred quicker than the node_down event
                                 * has been received.
                                 */
+                               if (fm_cb->role == PCS_RDA_STANDBY) {
+                                       // update consensus service, before 
fencing old active controller
+                                       Consensus consensus_service;
+                                       consensus_service.DemoteCurrentActive();
+                               }
+
                                if (fm_cb->use_remote_fencing) {
                                        if (fm_cb->peer_node_terminated ==
                                            false) {
@@ -659,6 +666,11 @@ static void fm_mbx_msg_handler(FM_CB *fm_cb, FM_EVT 
*fm_mbx_evt)
                                    0, NULL,
                                    "Failover occurred, but this node is not 
yet ready");
                        }
+
+                       // update consensus service, before fencing old active 
controller
+                       Consensus consensus_service;
+                       consensus_service.DemoteCurrentActive();
+
                        /* Now. Try resetting other blade */
                        fm_cb->role = PCS_RDA_ACTIVE;
 
diff --git a/src/fm/fmd/fm_rda.cc b/src/fm/fmd/fm_rda.cc
index 5c1b33e2f..0cec70a05 100644
--- a/src/fm/fmd/fm_rda.cc
+++ b/src/fm/fmd/fm_rda.cc
@@ -19,7 +19,9 @@
 #include <string.h>
 #include <syslog.h>
 #include "rde/agent/rda_papi.h"
+#include "osaf/consensus/service.h"
 #include "base/logtrace.h"
+
 extern void rda_cb(uint32_t cb_hdl, PCS_RDA_CB_INFO *cb_info,
        PCSRDA_RETURN_CODE error_code);
 /****************************************************************************
@@ -83,8 +85,27 @@ uint32_t fm_rda_set_role(FM_CB *fm_cb, PCS_RDA_ROLE role)
        rda_req.req_type = PCS_RDA_SET_ROLE;
        rda_req.info.io_role = role;
 
+       osafassert(role == PCS_RDA_ACTIVE);
+
+       Consensus consensus_service;
+       rc = consensus_service.DemoteCurrentActive();
+       if (rc != SA_AIS_OK) {
+               LOG_ER("Failed to demote old active node from consensus 
service");
+       }
+
+       rc = consensus_service.BeginActivePromotion();
+       if (rc != SA_AIS_OK) {
+               LOG_ER("Unable to set active controller in consensus service");
+               LOG_ER("Split brain is possible");
+       }
+
        rc = pcs_rda_request(&rda_req);
        if (rc != PCSRDA_RC_SUCCESS) {
+               rc = consensus_service.EndActivePromotion();
+               if (rc != SA_AIS_OK) {
+                       LOG_ER("Unable to remove lock in consensus service");
+               }
+
                syslog(
                    LOG_INFO,
                    "fm_rda_set_role() Failed: CurrentState: %s, AskedState: 
%s",
@@ -92,6 +113,11 @@ uint32_t fm_rda_set_role(FM_CB *fm_cb, PCS_RDA_ROLE role)
                return NCSCC_RC_FAILURE;
        }
 
+       rc = consensus_service.EndActivePromotion();
+       if (rc != SA_AIS_OK) {
+               LOG_ER("Unable to remove lock in consensus service");
+       }
+
        syslog(LOG_INFO,
               "fm_rda_set_role() Success: CurrentState: %s, AskedState: %s",
               role_string[fm_cb->role], role_string[role]);
diff --git a/src/fm/fmd/fmd.conf b/src/fm/fmd/fmd.conf
index 6921017f7..521bfcabc 100644
--- a/src/fm/fmd/fmd.conf
+++ b/src/fm/fmd/fmd.conf
@@ -26,6 +26,14 @@ export FMS_NODE_ISOLATION_TIMEOUT=0
 # To enable remote fencing change to 1
 export FMS_USE_REMOTE_FENCING=0
 
+# TODO: change before pushing
+#export SPLIT_BRAIN_PREVENTION=0
+export SPLIT_BRAIN_PREVENTION=1
+
+# TODO: change before pushing
+#export KEYVALUE_STORE_PLUGIN_CMD=
+export KEYVALUE_STORE_PLUGIN_CMD=/usr/local/lib/opensaf/etcd.plugin
+
 # FM will supervise transitions to the ACTIVE role when this variable is set to
 # a non-zero value. The value is the time in the unit of 10 ms to wait for a
 # role change to ACTIVE to take effect. If AMF has not give FM an active
diff --git a/src/osaf/Makefile.am b/src/osaf/Makefile.am
index 05b78c988..10bbe427b 100644
--- a/src/osaf/Makefile.am
+++ b/src/osaf/Makefile.am
@@ -16,7 +16,9 @@
 
 noinst_HEADERS += \
        src/osaf/immutil/immutil.h \
-       src/osaf/saflog/saflog.h
+       src/osaf/saflog/saflog.h \
+       src/osaf/consensus/keyvalue.h \
+       src/osaf/consensus/service.h
 
 pkglib_LTLIBRARIES += lib/libosaf_common.la
 
@@ -33,7 +35,9 @@ lib_libosaf_common_la_LDFLAGS = \
 
 lib_libosaf_common_la_SOURCES = \
        src/osaf/immutil/immutil.c \
-       src/osaf/saflog/saflog.c
+       src/osaf/saflog/saflog.c \
+       src/osaf/consensus/keyvalue.cc \
+       src/osaf/consensus/service.cc
 
 nodist_EXTRA_lib_libosaf_common_la_SOURCES = dummy.cc
 
diff --git a/src/osaf/consensus/Makefile b/src/osaf/consensus/Makefile
new file mode 100644
index 000000000..a2c8bc9dd
--- /dev/null
+++ b/src/osaf/consensus/Makefile
@@ -0,0 +1,18 @@
+#      -*- OpenSAF  -*-
+#
+# (C) Copyright 2018 The OpenSAF Foundation
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed
+# under the GNU Lesser General Public License Version 2.1, February 1999.
+# The complete license can be accessed from the following location:
+# http://opensource.org/licenses/lgpl-license.php
+# See the Copying file included with the OpenSAF distribution for full
+# licensing terms.
+#
+# Author(s): Ericsson AB
+#
+
+all:
+       $(MAKE) -C ../.. lib/libconsensus.la
diff --git a/src/osaf/consensus/keyvalue.cc b/src/osaf/consensus/keyvalue.cc
new file mode 100644
index 000000000..e5a796d33
--- /dev/null
+++ b/src/osaf/consensus/keyvalue.cc
@@ -0,0 +1,165 @@
+/*      -*- OpenSAF  -*-
+ *
+ * (C) Copyright 2018 The OpenSAF Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed
+ * under the GNU Lesser General Public License Version 2.1, February 1999.
+ * The complete license can be accessed from the following location:
+ * http://opensource.org/licenses/lgpl-license.php
+ * See the Copying file included with the OpenSAF distribution for full
+ * licensing terms.
+ *
+ * Author(s): Ericsson AB
+ *
+ */
+#include "keyvalue.h"
+#include "base/logtrace.h"
+#include "base/getenv.h"
+#include "base/conf.h"
+
+int KeyValue::Execute(const std::string& command, std::string& output) {
+  TRACE_ENTER();
+  constexpr size_t buf_size = 128;
+  std::array<char, buf_size> buffer;
+  FILE* pipe = popen(command.c_str(), "r");
+  if (pipe == nullptr) {
+    return 1;
+  }
+  output = "";
+  while (feof(pipe) == 0) {
+    if (fgets(buffer.data(), buf_size, pipe) != nullptr) {
+      output += buffer.data();
+    }
+  }
+  const int exit_code = pclose(pipe);
+  if (output.empty() == false && isspace(output.back()) != 0) {
+    // remove newline at end of output
+    output.pop_back();
+  }
+  TRACE("Executed '%s', returning %d", command.c_str(), exit_code);
+  return exit_code;
+}
+
+SaAisErrorT KeyValue::Get(const std::string& key, std::string& value) {
+  TRACE_ENTER();
+
+  const std::string kv_store_cmd = base::GetEnv("KEYVALUE_STORE_PLUGIN_CMD", 
"");
+  const std::string command(kv_store_cmd + " get " + key);
+  int rc = KeyValue::Execute(command, value);
+  TRACE("Read '%s'", value.c_str());
+
+  if (rc == 0) {
+    return SA_AIS_OK;
+  } else {
+    return SA_AIS_ERR_FAILED_OPERATION;
+  }
+}
+
+SaAisErrorT KeyValue::Set(const std::string& key, const std::string& value) {
+  TRACE_ENTER();
+
+  const std::string kv_store_cmd = base::GetEnv("KEYVALUE_STORE_PLUGIN_CMD", 
"");
+  const std::string command(kv_store_cmd + " set " + key + " " + value);
+  std::string output;
+  int rc = KeyValue::Execute(command, output);
+
+  if (rc == 0) {
+    return SA_AIS_OK;
+  } else {
+    return SA_AIS_ERR_FAILED_OPERATION;
+  }
+}
+
+SaAisErrorT KeyValue::Erase(const std::string& key) {
+  TRACE_ENTER();
+
+  const std::string kv_store_cmd = base::GetEnv("KEYVALUE_STORE_PLUGIN_CMD", 
"");
+  const std::string command(kv_store_cmd + " erase " + key);
+  std::string output;
+  int rc = KeyValue::Execute(command, output);
+
+  if (rc == 0) {
+    return SA_AIS_OK;
+  } else {
+    return SA_AIS_ERR_FAILED_OPERATION;
+  }
+}
+
+SaAisErrorT KeyValue::Lock(const std::string& owner,
+                         const unsigned int timeout) {
+  TRACE_ENTER();
+
+  const std::string kv_store_cmd = base::GetEnv("KEYVALUE_STORE_PLUGIN_CMD", 
"");
+  const std::string command(kv_store_cmd + " lock " + owner + " " +
+                      std::to_string(timeout));
+  std::string output;
+  int rc = KeyValue::Execute(command, output);
+
+  if (rc == 0) {
+    return SA_AIS_OK;
+  } else {
+    return SA_AIS_ERR_FAILED_OPERATION;
+  }
+}
+
+SaAisErrorT KeyValue::Unlock() {
+  TRACE_ENTER();
+
+  const std::string kv_store_cmd = base::GetEnv("KEYVALUE_STORE_PLUGIN_CMD", 
"");
+  const std::string command(kv_store_cmd + " unlock");
+  std::string output;
+  int rc = Execute(command, output);
+
+  if (rc == 0) {
+    return SA_AIS_OK;
+  } else {
+    return SA_AIS_ERR_FAILED_OPERATION;
+  }
+}
+
+bool KeyValue::IsLockedByThisNode() {
+  TRACE_ENTER();
+
+  const std::string kv_store_cmd = base::GetEnv("KEYVALUE_STORE_PLUGIN_CMD", 
"");
+  const std::string command(kv_store_cmd + " lock_owner");
+  std::string output;
+  int rc = KeyValue::Execute(command, output);
+
+  if (rc == 0) {
+    TRACE("Lock owner is %s", output.c_str());
+    if (output.compare(base::Conf::NodeName()) == 0) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+void threadFunction(const std::string& key,
+  const ConsensusCallback& callback,
+  const uint32_t user_defined) {
+  TRACE_ENTER();
+
+  const std::string kv_store_cmd = base::GetEnv("KEYVALUE_STORE_PLUGIN_CMD", 
"");
+  const std::string command(kv_store_cmd + " watch " + key);
+  std::string value;
+  int rc = KeyValue::Execute(command, value);
+  TRACE("Read '%s'", value.c_str());
+
+  if (rc == 0) {
+    callback(value, user_defined);
+  } else {
+    LOG_ER("Failed to watch %s", key.c_str());
+  }
+}
+
+void KeyValue::Watch(const std::string& key,
+  const ConsensusCallback callback,
+  const uint32_t user_defined)
+{
+  std::thread t(threadFunction, key, callback, user_defined);
+  t.detach();
+  return;
+}
diff --git a/src/osaf/consensus/keyvalue.h b/src/osaf/consensus/keyvalue.h
new file mode 100644
index 000000000..347c820d0
--- /dev/null
+++ b/src/osaf/consensus/keyvalue.h
@@ -0,0 +1,57 @@
+/*      -*- OpenSAF  -*-
+ *
+ * (C) Copyright 2018 The OpenSAF Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed
+ * under the GNU Lesser General Public License Version 2.1, February 1999.
+ * The complete license can be accessed from the following location:
+ * http://opensource.org/licenses/lgpl-license.php
+ * See the Copying file included with the OpenSAF distribution for full
+ * licensing terms.
+ *
+ * Author(s): Ericsson AB
+ *
+ */
+#ifndef CONSENSUS_KEYVALUE_H_
+#define CONSENSUS_KEYVALUE_H_
+
+#include <saAis.h>
+#include <string>
+#include <functional>
+#include <thread>
+
+typedef std::function<void(const std::string& new_value,
+  const uint32_t user_defined)> ConsensusCallback;
+
+class KeyValue {
+ public:
+  // Retrieve value of key
+  static SaAisErrorT Get(const std::string& key, std::string& value);
+
+  // Set key to value
+  static SaAisErrorT Set(const std::string& key, const std::string& value);
+
+  // Erase key
+  static SaAisErrorT Erase(const std::string& key);
+
+  // Obtain lock, default timeout is 20 seconds
+  static SaAisErrorT Lock(const std::string& owner,
+    const unsigned int timeout = 20);
+
+  // Release lock
+  static SaAisErrorT Unlock();
+
+  // Is locked by this node?
+  static bool IsLockedByThisNode();
+
+  // starts a thread to watch key and call callback if values changes
+  static void Watch(const std::string& key, ConsensusCallback callback,
+    const uint32_t user_defined);
+
+  // internal use
+  static int Execute(const std::string& command, std::string& output);
+};
+
+#endif
diff --git a/src/osaf/consensus/plugins/etcd.plugin 
b/src/osaf/consensus/plugins/etcd.plugin
new file mode 100644
index 000000000..0b8c77b4e
--- /dev/null
+++ b/src/osaf/consensus/plugins/etcd.plugin
@@ -0,0 +1,217 @@
+#!/usr/bin/env bash
+
+readonly keyname="opensaf_consensus_lock"
+
+# get
+#   retrieve <value> of <key> from key-value store
+# params:
+#   $1 - <key>
+# returns:
+#   0 - success, <value> is echoed to stdout
+#   non-zero - failure
+get() {
+  local readonly key=$1
+
+  value=`etcdctl get $key 2>&1`
+  if [ $? -eq 0 ]; then
+    echo "$value"
+    return 0
+  else
+    return 1
+  fi
+}
+
+# set
+#   set <key> to <value> in key-value store
+# params:
+#   $1 - <key>
+#   $2 - <value>
+# returns:
+#   0 - success
+#   non-zero - failure
+set() {
+  local readonly key=$1
+  local readonly value=$2
+
+  etcdctl set $key $value 1>& /dev/null
+  if [ $? -eq 0 ]; then
+    return 0
+  else
+    return 1
+  fi
+}
+
+# erase
+#   erase <key> in key-value store
+# params:
+#   $1 - <key>
+# returns:
+#   0 - success
+#   non-zero - failure
+erase() {
+  local readonly key=$1
+
+  etcdctl rm $key 1>& /dev/null
+  if [ $? -eq 0 ]; then
+    return 0
+  else
+    return 1
+  fi
+}
+
+# lock
+# params:
+#   $1 - <owner>, owner of the lock is set to this
+#   $2 - <timeout>, will automatically unlock after <timeout> seconds
+# returns:
+#   0 - success
+#   non-zero - failure
+lock() {
+  local readonly owner=$1
+  local readonly timeout=$2
+
+  #implementation here
+  etcdctl mk $keyname $owner --ttl $timeout >& /dev/null
+  if [ $? -ne 0 ]; then
+    current_owner=`etcdctl get $keyname`
+    # see if we already hold the lock
+    if [ "$current_owner" == "$owner" ]; then
+      return 0
+    fi
+    return 1
+  else
+    return 0
+  fi
+}
+
+# get
+#   retrieve <owner> of lock
+# params:
+#   none
+# returns:
+#   0 - success, <owner> is echoed to stdout
+#   non-zero - failure or not locked
+lock_owner() {
+  get $keyname
+  return $?
+}
+
+# unlock
+# params:
+#   $1 - <forced>
+#      - (optional parameter)
+#      - if set 'true', will unlock even if lock is not held by node
+#      - defaults to 'false'
+# returns:
+#   0 - success
+#   non-zero - failure
+#
+unlock() {
+  local readonly forced=${1:-false}
+  local readonly hostname=$(hostname)
+
+  if [ "$forced" = false ]; then
+    # check we own the lock
+    owner=`etcdctl get $keyname 2>&1`
+    if [ "$owner" != "$hostname" ]; then
+      return 1
+    fi
+  fi
+
+  etcdctl rm $keyname >& /dev/null
+  if [ $? -eq 0 ]; then
+    return 0
+  else
+    return 1
+  fi
+}
+
+# watch
+#   watch <key> in key-value store
+# params:
+#   $1 - <key>
+# returns:
+#   0 - success, <new_value> is echoed to stdout
+#   non-zero - failure
+watch() {
+  local readonly key=$1
+
+  value=`etcdctl watch $key 2>&1`
+  if [ $? -eq 0 ]; then
+    # if the key is removed, then "PrevNode.Value: <value>" is returned
+    echo "$value"
+    return 0
+  else
+    return 1
+  fi
+}
+
+
+# argument parsing
+case "$1" in
+  get)
+    if [ "$#" -ne 2 ]; then
+      echo "Usage: $0 get <key>"
+      exit 1
+    fi
+    get $2
+    exit $?
+    ;;
+  set)
+    if [ "$#" -ne 3 ]; then
+      echo "Usage: $0 set <key> <value>"
+      exit 1
+    fi
+    set $2 $3
+    exit $?
+    ;;
+  erase)
+    if [ "$#" -ne 2 ]; then
+      echo "Usage: $0 erase <key>"
+      exit 1
+    fi
+    erase $2 ""
+    exit $?
+    ;;
+  lock)
+    if [ "$#" -ne 3 ]; then
+      echo "Usage: $0 lock <owner> <timeout>"
+      exit 1
+    fi
+    lock $2 $3
+    exit $?
+    ;;
+  lock_owner)
+    if [ "$#" -ne 1 ]; then
+      echo "Usage: $0 lock_owner"
+      exit 1
+    fi
+    lock_owner
+    exit $?
+    ;;
+  unlock)
+    if [ "$#" -eq 1 ]; then
+      unlock
+      exit $?
+    elif [ "$#" -eq 2 ] && [ "$2" == "--force" ]; then
+      unlock 1
+      exit $?
+    else
+      echo "Usage: $0 unlock [--force]"
+      exit 1
+    fi
+    ;;
+  watch)
+    if [ "$#" -ne 2 ]; then
+      echo "Usage: $0 watch <key>"
+      exit 1
+    fi
+    watch $2
+    exit $?
+    ;;
+  *)
+    echo $"Usage: $0 {get|set|erase|lock|unlock|lock_owner|watch}"
+    ;;
+esac
+
+exit 1
diff --git a/src/osaf/consensus/plugins/sample.plugin 
b/src/osaf/consensus/plugins/sample.plugin
new file mode 100644
index 000000000..424982448
--- /dev/null
+++ b/src/osaf/consensus/plugins/sample.plugin
@@ -0,0 +1,162 @@
+#!/usr/bin/env bash
+
+readonly keyname="opensaf_consensus_lock"
+
+# get
+#   retrieve <value> of <key> from key-value store
+# params:
+#   $1 - <key>
+# returns:
+#   0 - success, <value> is echoed to stdout
+#   non-zero - failure
+get() {
+  local readonly key=$1
+  ...
+}
+
+# set
+#   set <key> to <value> in key-value store
+# params:
+#   $1 - <key>
+#   $2 - <value>
+# returns:
+#   0 - success
+#   non-zero - failure
+set() {
+  local readonly key=$1
+  local readonly value=$2
+  ...
+}
+
+# erase
+#   erase <key> in key-value store
+# params:
+#   $1 - <key>
+# returns:
+#   0 - success
+#   non-zero - failure
+erase() {
+  local readonly key=$1
+  ...
+}
+
+# lock
+# params:
+#   $1 - <owner>, owner of the lock is set to this
+#   $2 - <timeout>, will automatically unlock after <timeout> seconds
+# returns:
+#   0 - success
+#   non-zero - failure
+lock() {
+  local readonly owner=$1
+  local readonly timeout=$2
+  ...
+}
+
+# get
+#   retrieve <owner> of lock
+# params:
+#   none
+# returns:
+#   0 - success, <owner> is echoed to stdout
+#   non-zero - failure or not locked
+lock_owner() {
+  ...
+}
+
+# unlock
+# params:
+#   $1 - <forced>
+#      - (optional parameter)
+#      - if set 'true', will unlock even if lock is not held by node
+#      - defaults to 'false'
+# returns:
+#   0 - success
+#   non-zero - failure
+#
+unlock() {
+  local readonly forced=${1:-false}
+  local readonly hostname=$(hostname)
+  ...
+}
+
+# watch
+#   watch <key> in key-value store
+# params:
+#   $1 - <key>
+# returns:
+#   0 - success, <new_value> is echoed to stdout
+#   non-zero - failure
+watch() {
+  local readonly key=$1
+  ..
+}
+
+# argument parsing
+case "$1" in
+  get)
+    if [ "$#" -ne 2 ]; then
+      echo "Usage: $0 get <key>"
+      exit 1
+    fi
+    get $2
+    exit $?
+    ;;
+  set)
+    if [ "$#" -ne 3 ]; then
+      echo "Usage: $0 set <key> <value>"
+      exit 1
+    fi
+    set $2 $3
+    exit $?
+    ;;
+  erase)
+    if [ "$#" -ne 2 ]; then
+      echo "Usage: $0 erase <key>"
+      exit 1
+    fi
+    erase $2 ""
+    exit $?
+    ;;
+  lock)
+    if [ "$#" -ne 3 ]; then
+      echo "Usage: $0 lock <owner> <timeout>"
+      exit 1
+    fi
+    lock $2 $3
+    exit $?
+    ;;
+  lock_owner)
+    if [ "$#" -ne 1 ]; then
+      echo "Usage: $0 lock_owner"
+      exit 1
+    fi
+    lock_owner
+    exit $?
+    ;;
+  unlock)
+    if [ "$#" -eq 1 ]; then
+      unlock
+      exit $?
+    elif [ "$#" -eq 2 ] && [ "$2" == "--force" ]; then
+      unlock 1
+      exit $?
+    else
+      echo "Usage: $0 unlock [--force]"
+      exit 1
+    fi
+    ;;
+  watch)
+    if [ "$#" -ne 2 ]; then
+      echo "Usage: $0 watch <key>"
+      exit 1
+    fi
+    watch $2
+    exit $?
+    ;;
+  *)
+    echo $"Usage: $0 {get|set|erase|lock|unlock|lock_owner|watch}"
+    ;;
+esac
+
+exit 1
diff --git a/src/osaf/consensus/service.cc b/src/osaf/consensus/service.cc
new file mode 100644
index 000000000..fd525b6d3
--- /dev/null
+++ b/src/osaf/consensus/service.cc
@@ -0,0 +1,231 @@
+/*      -*- OpenSAF  -*-
+ *
+ * (C) Copyright 2018 The OpenSAF Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed
+ * under the GNU Lesser General Public License Version 2.1, February 1999.
+ * The complete license can be accessed from the following location:
+ * http://opensource.org/licenses/lgpl-license.php
+ * See the Copying file included with the OpenSAF distribution for full
+ * licensing terms.
+ *
+ * Author(s): Ericsson AB
+ *
+ */
+#include "service.h"
+#include "base/logtrace.h"
+#include "base/conf.h"
+#include "base/getenv.h"
+#include "base/ncssysf_def.h"
+#include <unistd.h>
+#include <climits>
+
+SaAisErrorT Consensus::BeginActivePromotion() {
+  TRACE_ENTER();
+  SaAisErrorT rc;
+
+  if (use_consensus_ == false) {
+    return SA_AIS_OK;
+  }
+
+  rc = KeyValue::Lock(base::Conf::NodeName(), 30);
+  while (rc != SA_AIS_OK) {
+    TRACE("Waiting for lock");
+    usleep(sleep_internal);
+    rc = KeyValue::Lock(base::Conf::NodeName(), 30);
+  }
+
+  LOG_IN("Node %s obtained lock", base::Conf::NodeName().c_str());
+
+  // check current active node
+  std::string current;
+  rc = KeyValue::Get(keyname, current);
+  if (rc == SA_AIS_OK) {
+    LOG_NO("Current active controller is %s", current.c_str());
+    if (current != base::Conf::NodeName()) {
+      FenceNode(current);
+    }
+  }
+  LOG_NO("Setting active controller to %s", base::Conf::NodeName().c_str());
+  rc = KeyValue::Set(keyname, base::Conf::NodeName());
+  return rc;
+}
+
+SaAisErrorT Consensus::EndActivePromotion() {
+  TRACE_ENTER();
+  if (use_consensus_ == false) {
+    return SA_AIS_OK;
+  }
+
+  bool locked = KeyValue::IsLockedByThisNode();
+  if (locked == false) {
+    LOG_ER("Lock unexpectedly released");
+  } else {
+    SaAisErrorT rc;
+    rc = KeyValue::Unlock();
+    while (rc != SA_AIS_OK) {
+      LOG_IN("Trying to unlock");
+      usleep(sleep_internal);
+      rc = KeyValue::Unlock();
+    }
+    LOG_IN("Released lock");
+  }
+  return SA_AIS_OK;
+}
+
+SaAisErrorT Consensus::Demote(const std::string node = "")
+{
+  TRACE_ENTER();
+  SaAisErrorT rc;
+  if (use_consensus_ == false) {
+    return SA_AIS_OK;
+  }
+
+  rc = KeyValue::Lock(base::Conf::NodeName(), 30);
+  while (rc != SA_AIS_OK) {
+    LOG_IN("Waiting for lock");
+    usleep(sleep_internal);
+    rc = KeyValue::Lock(base::Conf::NodeName(), 30);
+  }
+
+  // check current active node
+  std::string current;
+  rc = KeyValue::Get(keyname, current);
+  if (rc == SA_AIS_OK) {
+    LOG_NO("Demoting %s as active controller", current.c_str());
+    if (node.empty() == false && node != current) {
+      FenceNode(node);
+    }
+    // if Demote() was called as DemoteCurrentActive() from fmd,
+    // then fmd will fence the node itself
+
+    rc = KeyValue::Erase(keyname);
+    if (rc != SA_AIS_OK) {
+      LOG_ER("Failed to clear active controller in KeyValue");
+    }
+    LOG_NO("Node %s demoted", current.c_str());
+  }
+
+  rc = KeyValue::Unlock();
+  while (rc != SA_AIS_OK) {
+    LOG_IN("Trying to unlock");
+    usleep(sleep_internal);
+    rc = KeyValue::Unlock();
+  }
+  LOG_IN("Released lock");
+
+  return SA_AIS_OK;
+}
+
+SaAisErrorT Consensus::DemoteCurrentActive() {
+  TRACE_ENTER();
+  return Demote();
+}
+
+SaAisErrorT Consensus::DemoteThisNode() {
+  TRACE_ENTER();
+  return Demote(base::Conf::NodeName());
+}
+
+bool Consensus::IsEnabled() const {
+  return use_consensus_;
+}
+
+std::string Consensus::CurrentActive() const {
+  TRACE_ENTER();
+  SaAisErrorT rc;
+  bool success = false;
+  if (use_consensus_ == false) {
+    return "";
+  }
+
+  rc = KeyValue::Lock(base::Conf::NodeName(), 30);
+  while (rc != SA_AIS_OK) {
+    LOG_IN("Waiting for lock");
+    usleep(sleep_internal);
+    rc = KeyValue::Lock(base::Conf::NodeName(), 30);
+  }
+
+  // check current active node
+  std::string current;
+  rc = KeyValue::Get(keyname, current);
+  if (rc == SA_AIS_OK) {
+    success = true;
+    LOG_IN("Current active controller is %s", current.c_str());
+  }
+
+  rc = KeyValue::Unlock();
+  while (rc != SA_AIS_OK) {
+    LOG_IN("Trying to unlock");
+    usleep(sleep_internal);
+    rc = KeyValue::Unlock();
+  }
+
+  LOG_IN("Released lock");
+
+  if (success == true) {
+    return current;
+  } else {
+    return "";
+  }
+}
+
+Consensus::Consensus() {
+  TRACE_ENTER();
+
+  uint32_t split_brain_enable = base::GetEnv("SPLIT_BRAIN_PREVENTION", 0);
+  std::string kv_store_cmd = base::GetEnv("KEYVALUE_STORE_PLUGIN_CMD", "");
+  uint32_t use_remote_fencing = base::GetEnv("FMS_USE_REMOTE_FENCING" , 0);
+
+  if (split_brain_enable == 1 && kv_store_cmd.empty() == false) {
+    use_consensus_ = true;
+  } else {
+    use_consensus_ = false;
+  }
+
+  if (use_remote_fencing == 1) {
+    use_remote_fencing_ = true;
+  }
+
+  // needed for base::Conf::NodeName() later
+  base::Conf::InitNodeName();
+
+  if (use_consensus_ == true) {
+    LOG_NO("Split brain prevention is enabled");
+  } else {
+    LOG_NO("Split brain prevention is disabled");
+  }
+}
+
+Consensus::~Consensus()
+{
+}
+
+bool Consensus::FenceNode(const std::string& node)
+{
+  if (use_remote_fencing_ == true) {
+    LOG_WA("Fencing remote node %s", node.c_str());
+    // @todo currently passing UINT_MAX as node ID, since
+    // we can't always obtain a valid node ID?
+    opensaf_reboot(UINT_MAX, node.c_str(),
+      "Fencing remote node");
+
+    return true;
+  } else {
+    LOG_WA("Fencing is not enabled. Node %s will not be fenced", node.c_str());
+    return false;
+  }
+}
+
+void Consensus::MonitorActive(ConsensusCallback callback,
+  const uint32_t user_defined)
+{
+  TRACE_ENTER();
+  if (use_consensus_ == false) {
+    return;
+  }
+
+  KeyValue::Watch(keyname, callback, user_defined);
+}
diff --git a/src/osaf/consensus/service.h b/src/osaf/consensus/service.h
new file mode 100644
index 000000000..03f7f26f9
--- /dev/null
+++ b/src/osaf/consensus/service.h
@@ -0,0 +1,66 @@
+/*      -*- OpenSAF  -*-
+ *
+ * (C) Copyright 2018 The OpenSAF Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed
+ * under the GNU Lesser General Public License Version 2.1, February 1999.
+ * The complete license can be accessed from the following location:
+ * http://opensource.org/licenses/lgpl-license.php
+ * See the Copying file included with the OpenSAF distribution for full
+ * licensing terms.
+ *
+ * Author(s): Ericsson AB
+ *
+ */
+#ifndef CONSENSUS_SERVICE_H_
+#define CONSENSUS_SERVICE_H_
+
+#include "keyvalue.h"
+#include "saAis.h"
+#include <string>
+
+class Consensus {
+public:
+
+  // Obtain lock, set active controller to this node
+  SaAisErrorT BeginActivePromotion();
+
+  // Release lock
+  SaAisErrorT EndActivePromotion();
+
+  // Obtain lock, clear current active controller, release lock
+  SaAisErrorT DemoteCurrentActive();
+
+  // Obtain lock, clear this node as active controller, release lock
+  SaAisErrorT DemoteThisNode();
+
+  // Returns active controller as known by the consensus service
+  std::string CurrentActive() const;
+
+  // If the active controller is changed as known by the consensus service,
+  // then callback will be run from a new thread, with <user_defined> returned
+  // in the callback
+  void MonitorActive(ConsensusCallback callback, const uint32_t user_defined);
+
+  // Is consensus service enabled?
+  bool IsEnabled() const;
+
+  explicit Consensus();
+  virtual ~Consensus();
+
+  Consensus(const Consensus&) = delete;
+  Consensus& operator=(const Consensus&) = delete;
+
+private:
+  bool use_consensus_ = false;
+  bool use_remote_fencing_ = false;
+  const std::string keyname = "opensaf_active_controller";
+  static constexpr int sleep_internal = 100000; // in us
+
+  SaAisErrorT Demote(const std::string node);
+  bool FenceNode(const std::string& node);
+};
+
+#endif
diff --git a/src/rde/Makefile.am b/src/rde/Makefile.am
index c967f9fc4..182f347ab 100644
--- a/src/rde/Makefile.am
+++ b/src/rde/Makefile.am
@@ -58,7 +58,8 @@ bin_osafrded_SOURCES = \
 
 bin_osafrded_LDADD = \
        lib/libSaAmf.la \
-       lib/libopensaf_core.la
+       lib/libopensaf_core.la \
+       lib/libosaf_common.la
 
 bin_rdegetrole_CPPFLAGS = \
        $(AM_CPPFLAGS)
diff --git a/src/rde/rded/osaf-rded.in b/src/rde/rded/osaf-rded.in
index 1c1786c8d..1697936a7 100644
--- a/src/rde/rded/osaf-rded.in
+++ b/src/rde/rded/osaf-rded.in
@@ -28,6 +28,10 @@ else
        . $pkgsysconfdir/rde.conf
 fi     
 
+if [ -f "$pkgsysconfdir/fmd.conf" ]; then
+  . "$pkgsysconfdir/fmd.conf"
+fi
+
 binary=$pkglibdir/$osafprog
 pidfile=$pkgpiddir/$osafprog.pid
 tracefile=$pkglogdir/$osafprog.log
diff --git a/src/rde/rded/rde_cb.h b/src/rde/rded/rde_cb.h
index d2a3d46b2..83f35c691 100644
--- a/src/rde/rded/rde_cb.h
+++ b/src/rde/rded/rde_cb.h
@@ -45,7 +45,8 @@ enum RDE_MSG_TYPE {
   RDE_MSG_PEER_UP = 1,
   RDE_MSG_PEER_DOWN = 2,
   RDE_MSG_PEER_INFO_REQ = 3,
-  RDE_MSG_PEER_INFO_RESP = 4
+  RDE_MSG_PEER_INFO_RESP = 4,
+  RDE_MSG_NEW_ACTIVE_CALLBACK = 5
 };
 
 struct rde_peer_info {
diff --git a/src/rde/rded/rde_main.cc b/src/rde/rded/rde_main.cc
index 0298bf3ff..23c03f552 100644
--- a/src/rde/rded/rde_main.cc
+++ b/src/rde/rded/rde_main.cc
@@ -28,6 +28,7 @@
 #include <cerrno>
 #include <cstdlib>
 #include <cstring>
+#include "osaf/consensus/service.h"
 #include "base/daemon.h"
 #include "base/logtrace.h"
 #include "base/osaf_poll.h"
@@ -37,6 +38,7 @@
 #include <saAmf.h>
 #include "rde/rded/rde_cb.h"
 #include "rde/rded/role.h"
+#include "base/conf.h"
 
 #define RDA_MAX_CLIENTS 32
 
@@ -92,10 +94,6 @@ static void handle_mbx_event() {
   TRACE_ENTER();
 
   msg = reinterpret_cast<rde_msg *>(ncs_ipc_non_blk_recv(&rde_cb->mbx));
-  TRACE("Received %s from node 0x%x with state %s. My state is %s",
-        rde_msg_name[msg->type], msg->fr_node_id,
-        Role::to_string(msg->info.peer_info.ha_role),
-        Role::to_string(role->role()));
 
   switch (msg->type) {
     case RDE_MSG_PEER_INFO_REQ:
@@ -118,6 +116,29 @@ static void handle_mbx_event() {
     case RDE_MSG_PEER_DOWN:
       LOG_NO("Peer down on node 0x%x", msg->fr_node_id);
       break;
+   case RDE_MSG_NEW_ACTIVE_CALLBACK:
+      {
+        const std::string my_node = base::Conf::NodeName();
+
+        // get current active controller
+        Consensus consensus_service;
+        std::string active_controller = consensus_service.CurrentActive();
+
+        LOG_NO("New active controller notification from consensus service");
+
+        if (role->role() == PCS_RDA_ACTIVE) {
+          if (my_node.compare(active_controller) != 0) {
+            // we are meant to be active, but consensus service doesn't think 
so
+            LOG_ER("Role does not match consensus service");
+            LOG_ER("Probable split brain. Rebooting this node");
+            opensaf_reboot(0, nullptr, "Split-brain detected by consensus 
service");
+          } else {
+            // get more callbacks
+            consensus_service.MonitorActive(Role::MonitorCallback, 
rde_cb->mbx);
+          }
+        }
+      }
+      break;
     default:
       LOG_ER("%s: discarding unknown message type %u", __FUNCTION__, 
msg->type);
       break;
@@ -205,11 +226,12 @@ int main(int argc, char *argv[]) {
   NCS_SEL_OBJ mbx_sel_obj;
   RDE_RDA_CB *rde_rda_cb = &rde_cb->rde_rda_cb;
   int term_fd;
-
   opensaf_reboot_prepare();
 
   daemonize(argc, argv);
 
+  base::Conf::InitNodeName();
+
   if (initialize_rde() != NCSCC_RC_SUCCESS) goto init_failed;
 
   mbx_sel_obj = ncs_ipc_get_sel_obj(&rde_cb->mbx);
diff --git a/src/rde/rded/role.cc b/src/rde/rded/role.cc
index f7511f0d8..28e034c44 100644
--- a/src/rde/rded/role.cc
+++ b/src/rde/rded/role.cc
@@ -21,6 +21,7 @@
 #include <cinttypes>
 
 #include "rde/rded/role.h"
+#include "rde/rded/rde_cb.h"
 #include <cstdint>
 #include "base/logtrace.h"
 #include "base/getenv.h"
@@ -28,6 +29,7 @@
 #include "base/time.h"
 #include "base/ncs_main_papi.h"
 #include "rde/rded/rde_cb.h"
+#include "osaf/consensus/service.h"
 
 const char* const Role::role_names_[] = {"Undefined", "ACTIVE",    "STANDBY",
                                          "QUIESCED",  "QUIESCING", "Invalid"};
@@ -42,6 +44,23 @@ const char* Role::to_string(PCS_RDA_ROLE role) {
              : role_names_[0];
 }
 
+void Role::MonitorCallback(const std::string& new_value, SYSF_MBX mbx)
+{
+  TRACE_ENTER();
+
+  rde_msg* msg = static_cast<rde_msg *>(malloc(sizeof(rde_msg)));
+  msg->type = RDE_MSG_NEW_ACTIVE_CALLBACK;
+
+  // wait a few seconds before sending msg to allow role change callbacks
+  // (from AMF) to be processed first
+  sleep(3);
+
+  uint32_t status;
+  status = m_NCS_IPC_SEND(&mbx,
+    msg, NCS_IPC_PRIORITY_NORMAL);
+  osafassert(status == NCSCC_RC_SUCCESS);
+}
+
 Role::Role(NODE_ID own_node_id)
     : known_nodes_{},
       role_{PCS_RDA_QUIESCED},
@@ -61,10 +80,27 @@ timespec* Role::Poll(timespec* ts) {
       *ts = election_end_time_ - now;
       timeout = ts;
     } else {
+      SaAisErrorT rc;
+      Consensus consensus_service;
+      rc = consensus_service.BeginActivePromotion();
+      if (rc != SA_AIS_OK) {
+        LOG_ER("Unable to set active controller in consensus service");
+        LOG_ER("Split brain is possible");
+      }
+
       ExecutePreActiveScript();
       LOG_NO("Switched to ACTIVE from %s", to_string(role()));
       role_ = PCS_RDA_ACTIVE;
       rde_rda_send_role(role_);
+
+      rc = consensus_service.EndActivePromotion();
+      if (rc != SA_AIS_OK) {
+        LOG_ER("Unable to remove lock in consensus service");
+      }
+
+      // get callback if active controller is changed in consensus service
+      RDE_CONTROL_BLOCK* cb = rde_get_control_block();
+      consensus_service.MonitorActive(MonitorCallback, cb->mbx);
     }
   }
   return timeout;
@@ -91,7 +127,14 @@ uint32_t Role::SetRole(PCS_RDA_ROLE new_role) {
   }
   if (new_role != old_role) {
     LOG_NO("RDE role set to %s", to_string(new_role));
-    if (new_role == PCS_RDA_ACTIVE) ExecutePreActiveScript();
+    if (new_role == PCS_RDA_ACTIVE) {
+      ExecutePreActiveScript();
+
+      // get callback if active controller is changed in consensus service
+      Consensus consensus_service;
+      RDE_CONTROL_BLOCK* cb = rde_get_control_block();
+      consensus_service.MonitorActive(MonitorCallback, cb->mbx);
+   }
     role_ = new_role;
     if (new_role == PCS_RDA_UNDEFINED) {
       known_nodes_.clear();
diff --git a/src/rde/rded/role.h b/src/rde/rded/role.h
index 20219b535..6d3c77d2f 100644
--- a/src/rde/rded/role.h
+++ b/src/rde/rded/role.h
@@ -24,6 +24,7 @@
 #include "base/macros.h"
 #include "mds/mds_papi.h"
 #include "rde/agent/rda_papi.h"
+#include <string>
 
 namespace base {
 class Process;
@@ -38,6 +39,7 @@ class Role {
   uint32_t SetRole(PCS_RDA_ROLE new_role);
   PCS_RDA_ROLE role() const;
   static const char* to_string(PCS_RDA_ROLE role);
+  static void MonitorCallback(const std::string& new_value, SYSF_MBX mbx);
 
  private:
   static const uint64_t kDefaultDiscoverPeerTimeout = 2000;
-- 
2.14.1



------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to