Ack with comments, marked AndersW> below.
regards,
Anders Widell
On 01/23/2018 09:06 AM, Gary Lee wrote:
---
src/fm/Makefile.am | 1 +
src/fm/fmd/fm_main.cc | 37 +++++++++++++++++++++++++++++++------
src/fm/fmd/fm_rda.cc | 13 +++++++++++++
src/fm/fmd/fmd.conf | 6 ++++++
4 files changed, 51 insertions(+), 6 deletions(-)
diff --git a/src/fm/Makefile.am b/src/fm/Makefile.am
index d48a9146c..0f254b94f 100644
--- a/src/fm/Makefile.am
+++ b/src/fm/Makefile.am
@@ -49,4 +49,5 @@ bin_osaffmd_SOURCES = \
bin_osaffmd_LDADD = \
lib/libSaAmf.la \
lib/libSaClm.la \
+ lib/libosaf_common.la \
lib/libopensaf_core.la
diff --git a/src/fm/fmd/fm_main.cc b/src/fm/fmd/fm_main.cc
index db8395ee7..74517b3b5 100644
--- a/src/fm/fmd/fm_main.cc
+++ b/src/fm/fmd/fm_main.cc
@@ -28,7 +28,8 @@ This file contains the main() routine for FM.
#include <stdbool.h>
#include "base/daemon.h"
#include "base/logtrace.h"
-
+#include "base/osaf_extended_name.h"
+#include "osaf/consensus/service.h"
#include "nid/agent/nid_api.h"
#include "fm.h"
#include "base/osaf_time.h"
@@ -553,6 +554,8 @@ static void fm_mbx_msg_handler(FM_CB *fm_cb, FM_EVT
*fm_mbx_evt)
TRACE_ENTER();
switch (fm_mbx_evt->evt_code) {
case FM_EVT_NODE_DOWN:
+ {
+ Consensus consensus_service;
LOG_NO("Current role: %s", role_string[fm_cb->role]);
if ((fm_mbx_evt->node_id == fm_cb->peer_node_id)) {
/* Check whether node(AMF) initialization is done */
@@ -593,15 +596,27 @@ static void fm_mbx_msg_handler(FM_CB *fm_cb, FM_EVT
*fm_mbx_evt)
* trigerred quicker than the node_down event
* has been received.
*/
+ if (fm_cb->role == PCS_RDA_STANDBY) {
+ const std::string current_active =
consensus_service.CurrentActive();
+ if (current_active.compare(
+
osaf_extended_name_borrow(&fm_cb->peer_node_name)) == 0) {
+ // update consensus service,
before fencing old active controller
+
consensus_service.DemoteCurrentActive();
+ }
+ }
+
if (fm_cb->use_remote_fencing) {
if (fm_cb->peer_node_terminated ==
false) {
+ // if peer_sc_up is true then
+ // the node has come up already
+ if (fm_cb->peer_sc_up == false &&
fm_cb->immnd_down == true) {
opensaf_reboot(
- fm_cb->peer_node_id,
- (char *)fm_cb
- ->peer_clm_node_name
- .value,
- "Received Node Down for peer
controller");
+ fm_cb->peer_node_id,
+ (char *)fm_cb
+
->peer_clm_node_name.value,
+ "Received Node Down for peer
controller");
+ }
} else {
LOG_NO(
"Peer node %s is terminated,
fencing will not be performed",
@@ -624,6 +639,7 @@ static void fm_mbx_msg_handler(FM_CB *fm_cb, FM_EVT
*fm_mbx_evt)
}
}
}
+ }
AndersW> Two closing curly brackets in a row, with the same indentation
depth? Is the indentation correct here?
break;
case FM_EVT_PEER_UP:
@@ -659,6 +675,15 @@ static void fm_mbx_msg_handler(FM_CB *fm_cb, FM_EVT
*fm_mbx_evt)
0, NULL,
"Failover occurred, but this node is not yet
ready");
}
+
+ Consensus consensus_service;
+ const std::string current_active =
consensus_service.CurrentActive();
+ if (current_active.compare(
+
osaf_extended_name_borrow(&fm_cb->peer_node_name)) == 0) {
+ // update consensus service, before fencing old
active controller
+ consensus_service.DemoteCurrentActive();
+ }
+
/* Now. Try resetting other blade */
fm_cb->role = PCS_RDA_ACTIVE;
diff --git a/src/fm/fmd/fm_rda.cc b/src/fm/fmd/fm_rda.cc
index 5c1b33e2f..79dc73361 100644
--- a/src/fm/fmd/fm_rda.cc
+++ b/src/fm/fmd/fm_rda.cc
@@ -19,7 +19,10 @@
#include <string.h>
#include <syslog.h>
#include "rde/agent/rda_papi.h"
+#include "osaf/consensus/service.h"
#include "base/logtrace.h"
+#include "base/ncssysf_def.h"
+
extern void rda_cb(uint32_t cb_hdl, PCS_RDA_CB_INFO *cb_info,
PCSRDA_RETURN_CODE error_code);
/****************************************************************************
@@ -83,6 +86,16 @@ uint32_t fm_rda_set_role(FM_CB *fm_cb, PCS_RDA_ROLE role)
rda_req.req_type = PCS_RDA_SET_ROLE;
rda_req.info.io_role = role;
+ osafassert(role == PCS_RDA_ACTIVE);
+
+ Consensus consensus_service;
+ rc = consensus_service.PromoteThisNode();
+ if (rc != SA_AIS_OK) {
+ LOG_ER("Unable to set active controller in consensus service");
+ opensaf_reboot(0, nullptr,
+ "Unable to set active controller in consensus service");
+ }
+
rc = pcs_rda_request(&rda_req);
if (rc != PCSRDA_RC_SUCCESS) {
syslog(
diff --git a/src/fm/fmd/fmd.conf b/src/fm/fmd/fmd.conf
index 4924abfd9..d7e9e2a1b 100644
--- a/src/fm/fmd/fmd.conf
+++ b/src/fm/fmd/fmd.conf
@@ -20,6 +20,12 @@ export FMS_NODE_ISOLATION_TIMEOUT=0
# To enable remote fencing change to 1
export FMS_USE_REMOTE_FENCING=0
+# To enable split brain prevention, change to 1
+export FMS_SPLIT_BRAIN_PREVENTION=0
+
+# Full path to key-value store plugin
+export FMS_KEYVALUE_STORE_PLUGIN_CMD=
+
AndersW> Could you comment out FMS_USE_REMOTE_FENCING,
FMS_SPLIT_BRAIN_PREVENTION and FMS_KEYVALUE_STORE_PLUGIN? This way, we
will use the default setting in the code unless the user has actively
configured something else. If the default setting in the code is changed
in a later version of OpenSAF, we will use the new default settings.
# FM will supervise transitions to the ACTIVE role when this variable is set
to
# a non-zero value. The value is the time in the unit of 10 ms to wait for a
# role change to ACTIVE to take effect. If AMF has not give FM an active
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel