osaf/services/saf/amf/amfd/include/node.h | 3 +
osaf/services/saf/amf/amfd/include/sg.h | 5 +-
osaf/services/saf/amf/amfd/ndfsm.cc | 3 +-
osaf/services/saf/amf/amfd/nodegroup.cc | 83 +++++++++++++++++++++++++++++++
osaf/services/saf/amf/amfd/sg.cc | 52 +++++++++++++++++-
osaf/services/saf/amf/amfd/sgproc.cc | 2 +-
osaf/services/saf/amf/amfd/siass.cc | 4 +-
7 files changed, 143 insertions(+), 9 deletions(-)
The SG becomes unstable because some variables used in nodegroup operation are
not
restored after headless if this admin operation on nodegroup was interrupted
just
before cluster goes into headless stage.
In order to restore nodegroup operation, AMF needs to know exactly whether
nodegroup
operation was running during headless up on @susi assignment. If susi is in
QUIESCED,
QUIESCING or being removed while its related entities su, si, sg are not in
LOCKED
and SHUTTING_DOWN, that means either node or nodegroup MUST be in LOCKED or
SHUTTING
DOWN. In case of SHUTTING_DOWN saAmfNGAdminState, that's enough to know a
nodegroup
operation was running. However, if saAmfNGAdminState is in LOCKED, this case is
an
ambiguity of locking a node. The reason of differentiation of locking a node or
node
group is because 2N SG uses both AVD_SG_FSM_SG_ADMIN and AVD_SG_FSM_SU_OPER for
node
group operation while AVD_SG_FSM_SU_OPER is only used for node operation. When
2N SG
uses AVD_SG_FSM_SG_ADMIN for nodegroup, the saAmfSGAdminState is borrowed (but
not
updated to IMM) to run the admin operation sequence. Therefore, after headless
if
AVD_SG_FSM_SG_ADMIN was being used for nodegroup then saAmfSGAdminState also
needs to
be set.
diff --git a/osaf/services/saf/amf/amfd/include/node.h
b/osaf/services/saf/amf/amfd/include/node.h
--- a/osaf/services/saf/amf/amfd/include/node.h
+++ b/osaf/services/saf/amf/amfd/include/node.h
@@ -45,6 +45,7 @@
#include <set>
#include <vector>
#include <string>
+#include <susi.h>
class AVD_SU;
struct avd_cluster_tag;
@@ -232,4 +233,6 @@ extern void ng_complete_admin_op(AVD_AMF
extern void avd_ng_admin_state_set(AVD_AMF_NG* ng, SaAmfAdminStateT state);
extern bool are_all_ngs_in_unlocked_state(const AVD_AVND *node);
extern bool any_ng_in_locked_in_state(const AVD_AVND *node);
+void avd_ng_restore_headless_states(AVD_CL_CB *cb, struct avd_su_si_rel_tag*
susi);
+
#endif
diff --git a/osaf/services/saf/amf/amfd/include/sg.h
b/osaf/services/saf/amf/amfd/include/sg.h
--- a/osaf/services/saf/amf/amfd/include/sg.h
+++ b/osaf/services/saf/amf/amfd/include/sg.h
@@ -46,6 +46,7 @@
class AVD_SU;
class AVD_SI;
class AVD_APP;
+class AVD_AMF_NG;
/* The valid SG FSM states. */
typedef enum {
@@ -576,8 +577,8 @@ private:
#define m_AVD_SET_SG_ADMIN_SI(cb,si) (si)->sg_of_si->set_admin_si((si))
#define m_AVD_CLEAR_SG_ADMIN_SI(cb,sg) (sg)->clear_admin_si()
#define m_AVD_CHK_OPLIST(i_su,flag) (flag) =
(i_su)->sg_of_su->in_su_oper_list(i_su)
-
-void avd_sg_read_headless_cached_rta(AVD_CL_CB *cb);
+void avd_sg_read_headless_fsm_state_cached_rta(AVD_CL_CB *cb);
+void avd_sg_read_headless_su_oper_list_cached_rta(AVD_CL_CB *cb);
extern void avd_sg_delete(AVD_SG *sg);
extern void avd_sg_db_add(AVD_SG *sg);
diff --git a/osaf/services/saf/amf/amfd/ndfsm.cc
b/osaf/services/saf/amf/amfd/ndfsm.cc
--- a/osaf/services/saf/amf/amfd/ndfsm.cc
+++ b/osaf/services/saf/amf/amfd/ndfsm.cc
@@ -129,8 +129,9 @@ void avd_process_state_info_queue(AVD_CL
// Reading sg must be after reading susi
if (found_state_info == true) {
avd_compcsi_cleanup_imm_object(cb);
+ avd_sg_read_headless_fsm_state_cached_rta(cb);
avd_susi_read_headless_cached_rta(cb);
- avd_sg_read_headless_cached_rta(cb);
+ avd_sg_read_headless_su_oper_list_cached_rta(cb);
avd_su_read_headless_cached_rta(cb);
}
done:
diff --git a/osaf/services/saf/amf/amfd/nodegroup.cc
b/osaf/services/saf/amf/amfd/nodegroup.cc
--- a/osaf/services/saf/amf/amfd/nodegroup.cc
+++ b/osaf/services/saf/amf/amfd/nodegroup.cc
@@ -1356,3 +1356,86 @@ void avd_ng_constructor(void)
ng_ccb_apply_cb);
}
+/*
+ * @brief: Restore nodegroup related variables which are used in nodegroup
+ * operations before headless. The main variables are @admin_ng,
+ * @ng_using_saAmfSGAdminState, and the saAmfSGAdminState which
+ * is borrowed by nodegroup operation on 2N SG
+ * @param: control block, and susi is being read after headless
+ * @return: void
+ */
+
+void avd_ng_restore_headless_states(AVD_CL_CB *cb, struct avd_su_si_rel_tag*
susi) {
+
+ TRACE_ENTER();
+ osafassert(cb->scs_absence_max_duration > 0);
+ // In order to restore nodegroup operation, AMF needs to know exactly
+ // whether nodegroup operation was running during headless up on @susi
+ // If susi is in QUIESCED/QUIESCING or being removed while its related
+ // entities su, si, sg are not in LOCKED and SHUTTING_DOWN, that means
+ // either node or nodegroup MUST be in LOCKED or SHUTTING_DOWN.
+ // In case of SHUTTING_DOWN saAmfNGAdminState, that's enough to know
+ // a nodegroup operation was running. However, if saAmfNGAdminState is
+ // in LOCKED, this case is an ambiguity of locking a node.
+ // The reason of differentiation of locking a node or nodegroup is
because
+ // 2N SG uses both AVD_SG_FSM_SG_ADMIN and AVD_SG_FSM_SU_OPER for
nodegroup
+ // operation while AVD_SG_FSM_SU_OPER is only used for node operation.
+ // When 2N SG uses AVD_SG_FSM_SG_ADMIN for nodegroup, the
saAmfSGAdminState
+ // is borrowed (but not updated to IMM) to run the admin operation
sequence.
+ // Therefore, after headless if AVD_SG_FSM_SG_ADMIN was being used for
nodegroup
+ // then saAmfSGAdminState also needs to be set.
+
+ // Make sure su, si, sg are not in LOCKED, SHUTTING_DOWN
+ if (susi->su->saAmfSUAdminState != SA_AMF_ADMIN_LOCKED &&
+ susi->su->saAmfSUAdminState !=
SA_AMF_ADMIN_SHUTTING_DOWN &&
+ susi->si->saAmfSIAdminState != SA_AMF_ADMIN_LOCKED &&
+ susi->si->saAmfSIAdminState !=
SA_AMF_ADMIN_SHUTTING_DOWN &&
+ susi->su->sg_of_su->saAmfSGAdminState !=
SA_AMF_ADMIN_LOCKED &&
+ susi->su->sg_of_su->saAmfSGAdminState !=
SA_AMF_ADMIN_SHUTTING_DOWN) {
+
+ // susi are in state transition
+ if (susi->state == SA_AMF_HA_QUIESCED || susi->state ==
SA_AMF_HA_QUIESCING ||
+ susi->fsm == AVD_SU_SI_STATE_UNASGN) {
+
+ for (std::map<std::string, AVD_AMF_NG*>::const_iterator it =
nodegroup_db->begin();
+ it != nodegroup_db->end(); it++) {
+ AVD_AMF_NG *ng = it->second;
+ for (std::set<std::string>::const_iterator iter =
ng->saAmfNGNodeList.begin();
+ iter != ng->saAmfNGNodeList.end();
++iter) {
+ AVD_AVND *node = avd_node_get(*iter);
+
+ if (susi->su->su_on_node == node) {
+ // Nodegroup was shutting down, also
needs to check whether nodegroup
+ // operation was borrowing SG ADMIN FSM
code (for 2N only)
+ if (ng->saAmfNGAdminState ==
SA_AMF_ADMIN_SHUTTING_DOWN) {
+ node->admin_ng = ng;
+ if
(susi->su->sg_of_su->sg_fsm_state == AVD_SG_FSM_SG_ADMIN) {
+
osafassert(susi->su->sg_of_su->sg_redundancy_model ==
SA_AMF_2N_REDUNDANCY_MODEL);
+
susi->su->sg_of_su->ng_using_saAmfSGAdminState = true;
+
susi->su->sg_of_su->saAmfSGAdminState = ng->saAmfNGAdminState;
+
m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(cb, susi->su->sg_of_su,
AVSV_CKPT_SG_ADMIN_STATE);
+ }
+ }
+ // Node group was LOCKED, AMFD treat
nodegroup and node operation in the same view,
+ // except nodegroup was borrowing SG
ADMIN FSM. This is because the other case that
+ // nodegroup operation was only SG
SU_OPER FSM, which is the same as of node operation
+ if (ng->saAmfNGAdminState ==
SA_AMF_ADMIN_LOCKED) {
+
if(susi->su->sg_of_su->sg_fsm_state == AVD_SG_FSM_SG_ADMIN) {
+
osafassert(susi->su->sg_of_su->sg_redundancy_model ==
SA_AMF_2N_REDUNDANCY_MODEL);
+ node->admin_ng
= ng;
+
susi->su->sg_of_su->ng_using_saAmfSGAdminState = true;
+
susi->su->sg_of_su->saAmfSGAdminState = ng->saAmfNGAdminState;
+
m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(cb, susi->su->sg_of_su,
AVSV_CKPT_SG_ADMIN_STATE);
+ }
+ }
+ TRACE("Restore nodegroup
operation, SG(%s), ng_using_saAmfSGAdminState: %u, saAmfSGAdminState:%u",
+
susi->su->sg_of_su->name.c_str(),
susi->su->sg_of_su->ng_using_saAmfSGAdminState,
+
susi->su->sg_of_su->saAmfSGAdminState);
+
+ }
+ }
+ }
+ }
+ }
+ TRACE_LEAVE();
+}
diff --git a/osaf/services/saf/amf/amfd/sg.cc b/osaf/services/saf/amf/amfd/sg.cc
--- a/osaf/services/saf/amf/amfd/sg.cc
+++ b/osaf/services/saf/amf/amfd/sg.cc
@@ -374,7 +374,6 @@ static AVD_SG *sg_create(const std::stri
if (immutil_getAttr(const_cast<SaImmAttrNameT>("saAmfSGAdminState"),
attributes, 0, &sg->saAmfSGAdminState) != SA_AIS_OK) {
sg->saAmfSGAdminState = SA_AMF_ADMIN_UNLOCKED;
}
-
/* TODO use value in type instead? */
sg->sg_redundancy_model = sgt->saAmfSgtRedundancyModel;
@@ -2083,7 +2082,7 @@ uint32_t AVD_SG::curr_non_instantiated_s
(su->saAmfSUPresenceState ==
SA_AMF_PRESENCE_UNINSTANTIATED));}));
}
-void avd_sg_read_headless_cached_rta(AVD_CL_CB *cb)
+void avd_sg_read_headless_fsm_state_cached_rta(AVD_CL_CB *cb)
{
SaAisErrorT rc;
@@ -2092,13 +2091,11 @@ void avd_sg_read_headless_cached_rta(AVD
SaNameT sg_dn;
AVD_SG *sg;
- unsigned int num_of_values = 0;
const SaImmAttrValuesT_2 **attributes;
AVD_SG_FSM_STATE imm_sg_fsm_state;
const char *className = "SaAmfSG";
const SaImmAttrNameT searchAttributes[] = {
const_cast<SaImmAttrNameT>("osafAmfSGFsmState"),
- const_cast<SaImmAttrNameT>("osafAmfSGSuOperationList"),
NULL
};
@@ -2127,6 +2124,53 @@ void avd_sg_read_headless_cached_rta(AVD
attributes, 0, &imm_sg_fsm_state);
osafassert(rc == SA_AIS_OK);
sg->set_fsm_state(imm_sg_fsm_state, false);
+ }
+ }
+
+ (void)immutil_saImmOmSearchFinalize(searchHandle);
+
+done:
+ TRACE_LEAVE();
+
+}
+
+void avd_sg_read_headless_su_oper_list_cached_rta(AVD_CL_CB *cb)
+{
+
+ SaAisErrorT rc;
+ SaImmSearchHandleT searchHandle;
+ SaImmSearchParametersT_2 searchParam;
+
+ SaNameT sg_dn;
+ AVD_SG *sg;
+ unsigned int num_of_values = 0;
+ const SaImmAttrValuesT_2 **attributes;
+ const char *className = "SaAmfSG";
+ const SaImmAttrNameT searchAttributes[] = {
+ const_cast<SaImmAttrNameT>("osafAmfSGSuOperationList"),
+ NULL
+ };
+
+ TRACE_ENTER();
+
+ osafassert(cb->scs_absence_max_duration > 0);
+
+ searchParam.searchOneAttr.attrName =
const_cast<SaImmAttrNameT>("SaImmAttrClassName");
+ searchParam.searchOneAttr.attrValueType = SA_IMM_ATTR_SASTRINGT;
+ searchParam.searchOneAttr.attrValue = &className;
+
+ if ((rc = immutil_saImmOmSearchInitialize_2(cb->immOmHandle, NULL,
SA_IMM_SUBTREE,
+ SA_IMM_SEARCH_ONE_ATTR | SA_IMM_SEARCH_GET_SOME_ATTR,
&searchParam,
+ searchAttributes, &searchHandle)) != SA_AIS_OK) {
+
+ LOG_ER("%s: saImmOmSearchInitialize_2 failed: %u",
__FUNCTION__, rc);
+ goto done;
+ }
+
+ while ((rc = immutil_saImmOmSearchNext_2(searchHandle, &sg_dn,
+ (SaImmAttrValuesT_2 ***)&attributes))
== SA_AIS_OK) {
+ sg = sg_db->find(Amf::to_string(&sg_dn));
+ if (sg && sg->sg_ncs_spec == false) {
// Read sg operation list
if
(immutil_getAttrValuesNumber(const_cast<SaImmAttrNameT>("osafAmfSGSuOperationList"),
attributes, &num_of_values) == SA_AIS_OK) {
unsigned int i;
diff --git a/osaf/services/saf/amf/amfd/sgproc.cc
b/osaf/services/saf/amf/amfd/sgproc.cc
--- a/osaf/services/saf/amf/amfd/sgproc.cc
+++ b/osaf/services/saf/amf/amfd/sgproc.cc
@@ -1098,7 +1098,7 @@ void avd_su_si_assign_evh(AVD_CL_CB *cb,
}
if (su->list_of_susi == AVD_SU_SI_REL_NULL) {
- LOG_ER("%s: no susis", __FUNCTION__);
+ LOG_WA("%s: no susis", __FUNCTION__);
goto done;
}
diff --git a/osaf/services/saf/amf/amfd/siass.cc
b/osaf/services/saf/amf/amfd/siass.cc
--- a/osaf/services/saf/amf/amfd/siass.cc
+++ b/osaf/services/saf/amf/amfd/siass.cc
@@ -261,7 +261,9 @@ void avd_susi_read_headless_cached_rta(A
m_AVD_SET_SG_ADMIN_SI(cb, si);
}
}
-
+ // only restore if not done
+ if (susi->su->su_on_node->admin_ng == nullptr)
+ avd_ng_restore_headless_states(cb, susi);
} else {
// This susi does not exist after headless, but it's
still in IMM
// delete it for now
------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel