Hi, There are some white space changes in there that blurs the picture. Please revert and take in a separate changeset. And the complexity of avd_su_oper_state_evh() which is already too high (40) is increased to 45. How can that be addressed?
See inline. Thanks, Hans On 7 June 2013 08:39, <[email protected]> wrote: > osaf/services/saf/avsv/avd/avd_clm.cc | 1 + > osaf/services/saf/avsv/avd/avd_node.cc | 7 +- > osaf/services/saf/avsv/avd/avd_sg.cc | 9 +- > osaf/services/saf/avsv/avd/avd_sgproc.cc | 231 > +++++++++++++++++++++---- > osaf/services/saf/avsv/avd/avd_su.cc | 1 + > osaf/services/saf/avsv/avd/include/avd_proc.h | 2 + > 6 files changed, 206 insertions(+), 45 deletions(-) > > > This patch separates sufailover and compfailover at amfd. Currently > suFailover will be supported only for 2N and NoRed model. During > compfailover, switchover of assignments will be done through quiesced HA > state. In case of sufailover, amfd will perform failover by giving active to > the standby SU in 2N model. For NoRed model spare SU will be made active. > Repair of faulted SU will be performed if saAmfSgAutoRepair is enabled. > > diff --git a/osaf/services/saf/avsv/avd/avd_clm.cc > b/osaf/services/saf/avsv/avd/avd_clm.cc > --- a/osaf/services/saf/avsv/avd/avd_clm.cc > +++ b/osaf/services/saf/avsv/avd/avd_clm.cc > @@ -50,6 +50,7 @@ static void clm_node_join_complete(AVD_A > (node->node_state == > AVD_AVND_STATE_NO_CONFIG) || > (node->node_state == > AVD_AVND_STATE_NCS_INIT)) { > if ((su->sg_of_su->saAmfSGAdminState != > SA_AMF_ADMIN_LOCKED_INSTANTIATION) && > + (su->saAmfSUOperState == > SA_AMF_OPERATIONAL_ENABLED) && > (su->saAmfSUAdminState != > SA_AMF_ADMIN_LOCKED_INSTANTIATION)) { > /* When the SU will instantiate then > prescence state change message will come > and so store the callback > parameters to send response later on. */ > diff --git a/osaf/services/saf/avsv/avd/avd_node.cc > b/osaf/services/saf/avsv/avd/avd_node.cc > --- a/osaf/services/saf/avsv/avd/avd_node.cc > +++ b/osaf/services/saf/avsv/avd/avd_node.cc > @@ -721,7 +721,8 @@ uint32_t node_admin_unlock_instantiation > while (su != NULL) { > if ((su->saAmfSUAdminState != > SA_AMF_ADMIN_LOCKED_INSTANTIATION) && > (su->sg_of_su->saAmfSGAdminState != > SA_AMF_ADMIN_LOCKED_INSTANTIATION) && > - (su->saAmfSUPresenceState == > SA_AMF_PRESENCE_UNINSTANTIATED)) { > + (su->saAmfSUPresenceState == > SA_AMF_PRESENCE_UNINSTANTIATED) && > + (su->saAmfSUOperState == SA_AMF_OPERATIONAL_ENABLED)) { > > if (su->saAmfSUPreInstantiable == true) { > if (su->sg_of_su->saAmfSGNumPrefInserviceSUs > > @@ -735,9 +736,7 @@ uint32_t node_admin_unlock_instantiation > LOG_WA("Failed Instantiation > '%s'", su->name.value); > } > } > - } else { > - avd_su_oper_state_set(su, > SA_AMF_OPERATIONAL_ENABLED); > - } > + } > } > su = su->avnd_list_su_next; > } > diff --git a/osaf/services/saf/avsv/avd/avd_sg.cc > b/osaf/services/saf/avsv/avd/avd_sg.cc > --- a/osaf/services/saf/avsv/avd/avd_sg.cc > +++ b/osaf/services/saf/avsv/avd/avd_sg.cc > @@ -944,8 +944,9 @@ static void sg_app_sg_admin_unlock_inst( > /* Instantiate the SUs in this SG */ > for (su = sg->list_of_su, su_try_inst = 0; su != NULL; su = > su->sg_list_su_next) { > if ((su->saAmfSUAdminState != > SA_AMF_ADMIN_LOCKED_INSTANTIATION) && > - (su->su_on_node->saAmfNodeAdminState != > SA_AMF_ADMIN_LOCKED_INSTANTIATION) > - && (su->saAmfSUPresenceState == > SA_AMF_PRESENCE_UNINSTANTIATED)) { > + (su->su_on_node->saAmfNodeAdminState != > SA_AMF_ADMIN_LOCKED_INSTANTIATION) && > + (su->saAmfSUOperState == > SA_AMF_OPERATIONAL_ENABLED) && > + (su->saAmfSUPresenceState == > SA_AMF_PRESENCE_UNINSTANTIATED)) { > > if (su->saAmfSUPreInstantiable == true) { > if (su->su_on_node->node_state == > AVD_AVND_STATE_PRESENT) { > @@ -963,9 +964,7 @@ static void sg_app_sg_admin_unlock_inst( > /* set uncondionally of msg snd outcome */ > m_AVD_SET_SU_TERM(cb, su, false); > > - } else { > - avd_su_oper_state_set(su, > SA_AMF_OPERATIONAL_ENABLED); > - } > + } > } > } > > diff --git a/osaf/services/saf/avsv/avd/avd_sgproc.cc > b/osaf/services/saf/avsv/avd/avd_sgproc.cc > --- a/osaf/services/saf/avsv/avd/avd_sgproc.cc > +++ b/osaf/services/saf/avsv/avd/avd_sgproc.cc > @@ -36,6 +36,7 @@ > #include <avd_si_dep.h> > > static SaAisErrorT avd_d2n_reboot_snd(AVD_AVND *node); > +static uint32_t avd_sg_su_failover_func(AVD_CL_CB *cb, AVD_SU *su); > > > /***************************************************************************** > * Function: avd_new_assgn_susi > @@ -370,14 +371,16 @@ void avd_su_oper_state_evh(AVD_CL_CB *cb > while (i_su != NULL) { > > avd_su_readiness_state_set(i_su, SA_AMF_READINESS_OUT_OF_SERVICE); > if (i_su->list_of_susi != > AVD_SU_SI_REL_NULL) { > - node_reboot_req = > false; > - /* Since assignments > exists call the SG FSM. > - */ > - if > (i_su->sg_of_su->su_fault(cb, i_su) == NCSCC_RC_FAILURE) { > - /* Bad > situation. Free the message and return since > - * receive id > was not processed the event will again > - * comeback > which we can then process. > - */ > + /* Delay Node reboot > if: > + a)Faulted SU > has saAmfSUFailover flag true but other healthy SUs are also present on node. "flag true" => set, remove also > + b)Only > faulted SU exists on the node and its saAmfSUFailover flag is false. > +*/ > + if (((i_su == su) ? > (!i_su->saAmfSUFailover) > + : > (i_su->saAmfSUOperState != SA_AMF_OPERATIONAL_DISABLED))) > + > node_reboot_req = false; please rewrite expression with an if else statement so that I can understand ... > + > + /* Since assignments > exists call the SG FSM.*/ > + if > (avd_su_rcvr_from_fault(i_su) == NCSCC_RC_FAILURE) { rename local function avd_su_rcvr_from_fault to su_recover_from_fault > LOG_ER("%s:%d > %s", __FUNCTION__, __LINE__, i_su->name.value); > goto done; > } > @@ -387,16 +390,12 @@ void avd_su_oper_state_evh(AVD_CL_CB *cb > * to be done for the SG on > which this SU exists. > */ > if > (avd_sg_app_su_inst_func(cb, i_su->sg_of_su) == NCSCC_RC_FAILURE) { > - /* Bad situation. > Free the message and return since > - * receive id was not > processed the event will again > - * comeback which we > can then process. > - */ > LOG_ER("%s:%d %s", > __FUNCTION__, __LINE__, i_su->name.value); > goto done; > } > > i_su = > i_su->avnd_list_su_next; > - } /* while(i_su != AVD_SU_NULL) > */ > + } > break; > default : > break; > @@ -410,41 +409,52 @@ void avd_su_oper_state_evh(AVD_CL_CB *cb > avd_d2n_reboot_snd(node); > } else { > saflog(LOG_NOTICE, > amfSvcUsrName, > - "Autorepair > disabled for '%s', NO reboot ordered", > + > "NodeAutorepair disabled for '%s', NO reboot ordered", > > node->name.value); > + > } > } > } else { /* if > (n2d_msg->msg_info.n2d_opr_state.node_oper_state == > SA_AMF_OPERATIONAL_DISABLED) */ > > - if (su->list_of_susi != > AVD_SU_SI_REL_NULL) { > - /* Since assignments exists > call the SG FSM. > - */ > + if (su->list_of_susi != AVD_SU_SI_REL_NULL) { > + /* Since assignments exists call the > SG FSM. */ > + switch > (n2d_msg->msg_info.n2d_opr_state.rec_rcvr.saf_amf) { > + case SA_AMF_COMPONENT_FAILOVER: > if > (su->sg_of_su->su_fault(cb, su) == NCSCC_RC_FAILURE) { > - /* Bad situation. > Free the message and return since > - * receive id was not > processed the event will again > - * comeback which we > can then process. > - */ > LOG_ER("%s:%d %s", > __FUNCTION__, __LINE__, su->name.value); > goto done; > } > + break; > + case AVSV_ERR_RCVR_SU_FAILOVER: > + if > (avd_sg_su_failover_func(cb,su) == NCSCC_RC_FAILURE) { > + LOG_ER("%s:%d %s", > __FUNCTION__, __LINE__, su->name.value); > + goto done; > + } > + break; > + default : > + LOG_ER("Recovery policy not > configured:%s",su->name.value); Should this default to the old behaviour instead? > + break; > } > + } > + /* Try to perfrom repair on the faulted SU */ > + avd_su_repair(su); rename to su_try_repair() and remove comment, add blank line before > > - /* Verify the SG to check if any > instantiations need > - * to be done for the SG on which > this SU exists. > + /* Verify the SG to check if any > instantiations need > + * to be done for the SG on which this SU > exists. > + */ > + if (avd_sg_app_su_inst_func(cb, su->sg_of_su) > == NCSCC_RC_FAILURE) { > + /* Bad situation. Free the message > and return since > + * receive id was not processed the > event will again > + * comeback which we can then process. > */ > - if (avd_sg_app_su_inst_func(cb, > su->sg_of_su) == NCSCC_RC_FAILURE) { > - /* Bad situation. Free the > message and return since > - * receive id was not > processed the event will again > - * comeback which we can then > process. > - */ > - LOG_ER("%s:%d %s", > __FUNCTION__, __LINE__, su->sg_of_su->name.value); > - goto done; > - } > + LOG_ER("%s:%d %s", __FUNCTION__, > __LINE__, su->sg_of_su->name.value); > + goto done; > + } > > - } /* else > (n2d_msg->msg_info.n2d_opr_state.node_oper_state == > SA_AMF_OPERATIONAL_DISABLED) */ > + } /* else > (n2d_msg->msg_info.n2d_opr_state.node_oper_state == > SA_AMF_OPERATIONAL_DISABLED) */ > > - } > - /* else if(cb->init_state == AVD_APP_STATE) */ > + } > + /* else if(cb->init_state == AVD_APP_STATE) */ > } /* if (n2d_msg->msg_info.n2d_opr_state.su_oper_state == > SA_AMF_OPERATIONAL_DISABLED) */ > else if (n2d_msg->msg_info.n2d_opr_state.su_oper_state == > SA_AMF_OPERATIONAL_ENABLED) { > avd_su_oper_state_set(su, SA_AMF_OPERATIONAL_ENABLED); > @@ -472,6 +482,13 @@ void avd_su_oper_state_evh(AVD_CL_CB *cb > m_AVD_GET_SU_NODE_PTR(cb, su, su_node_ptr); > > if (m_AVD_APP_SU_IS_INSVC(su, su_node_ptr)) { > + /* If oper state of Uninstantiated SU got > ENABLED so try to instantiate it > + after evaluating SG. */ > + if (su->saAmfSUPresenceState == > SA_AMF_PRESENCE_UNINSTANTIATED) { > + avd_sg_app_su_inst_func(cb, > su->sg_of_su); > + goto done; > + } m_AVD_APP_SU_IS_INSVC should check presence state so the above does not make sense. Move to above INSVC macro? > + > avd_su_readiness_state_set(su, > SA_AMF_READINESS_IN_SERVICE); > if ((cb->init_state == AVD_APP_STATE) && > (old_state == SA_AMF_READINESS_OUT_OF_SERVICE)) { > /* An application SU has become in > service call SG FSM */ > @@ -1035,7 +1052,7 @@ void avd_su_si_assign_evh(AVD_CL_CB *cb, > avd_d2n_reboot_snd(node); > } else { > saflog(LOG_NOTICE, amfSvcUsrName, > - "Autorepair disabled for '%s', NO > reboot ordered", > + "NodeAutorepair disabled for '%s', NO > reboot ordered", > node->name.value); > } > } > @@ -1089,6 +1106,7 @@ void avd_sg_app_node_su_inst_func(AVD_CL > (i_su->saAmfSUPresenceState == > SA_AMF_PRESENCE_UNINSTANTIATED) && > (i_su->saAmfSUAdminState != > SA_AMF_ADMIN_LOCKED_INSTANTIATION) && > (i_su->sg_of_su->saAmfSGAdminState != > SA_AMF_ADMIN_LOCKED_INSTANTIATION) && > + (i_su->saAmfSUOperState == > SA_AMF_OPERATIONAL_ENABLED) && > (i_su->su_on_node->saAmfNodeAdminState != > SA_AMF_ADMIN_LOCKED_INSTANTIATION)) { > if (i_su->saAmfSUPreInstantiable == true) { > if > (i_su->sg_of_su->saAmfSGNumPrefInserviceSUs > > @@ -1184,8 +1202,8 @@ uint32_t avd_sg_app_su_inst_func(AVD_CL_ > if ((i_su->saAmfSUPreInstantiable == false) && > (i_su->saAmfSUPresenceState == > SA_AMF_PRESENCE_UNINSTANTIATED) && > (su_node_ptr->saAmfNodeOperState == > SA_AMF_OPERATIONAL_ENABLED) && > + (i_su->saAmfSUOperState == > SA_AMF_OPERATIONAL_ENABLED) && > (i_su->term_state == false)) { > - avd_su_oper_state_set(i_su, > SA_AMF_OPERATIONAL_ENABLED); > m_AVD_GET_SU_NODE_PTR(cb, i_su, su_node_ptr); > > if (m_AVD_APP_SU_IS_INSVC(i_su, su_node_ptr)) > { > @@ -1208,6 +1226,7 @@ uint32_t avd_sg_app_su_inst_func(AVD_CL_ > > (i_su->su_on_node->saAmfNodeAdminState != SA_AMF_ADMIN_LOCKED_INSTANTIATION) > && > (su_node_ptr->saAmfNodeOperState == > SA_AMF_OPERATIONAL_ENABLED) && > (su_node_ptr->node_info.member == > true) && > + (i_su->saAmfSUOperState == > SA_AMF_OPERATIONAL_ENABLED) && > (i_su->term_state == false)) { > > /* Try to Instantiate this SU */ > @@ -2042,4 +2061,144 @@ void avd_su_role_failover(AVD_SU *su, AV > TRACE_LEAVE(); > } > > +/** > + * @brief Performs sufailover or component failover > + * At present sufailover is supported only for 2N and NoRed > model. > + * > + * @param[in] su > + * > + * @return NCSCC_RC_FAILURE/NCSCC_RC_SUCCESS > + **/ > +uint32_t avd_su_rcvr_from_fault(AVD_SU *su) static, remove avd prefix, add const > +{ > + uint32_t rc = NCSCC_RC_SUCCESS; > > + if (((su->sg_of_su->sg_redundancy_model == > SA_AMF_NO_REDUNDANCY_MODEL) || > + (su->sg_of_su->sg_redundancy_model == > SA_AMF_2N_REDUNDANCY_MODEL)) && > + (su->saAmfSUFailover) && (su->saAmfSUOperState == > SA_AMF_OPERATIONAL_DISABLED)) { > + rc = avd_sg_su_failover_func(avd_cb, su); > + } else { > + rc = su->sg_of_su->su_fault(avd_cb, su); > + } > + return rc; > +} > + > +/** > + * @brief Sends repair operation message to avnd to perform repiar of > the faulted SU. > + * This function is used to perform repiar after sufailover > recovery. > + * Autorepair will be performed only for 2N and NoRed model. > + * > + * @param[in] su > + * > + **/ > +void avd_su_repair(AVD_SU *su) static, remove avd prefix, add const > +{ > + TRACE_ENTER2(" Repair for SU:'%s'",su->name.value); > + > + if (((su->sg_of_su->sg_redundancy_model == > SA_AMF_NO_REDUNDANCY_MODEL) || > + (su->sg_of_su->sg_redundancy_model == > SA_AMF_2N_REDUNDANCY_MODEL)) && > + (su->sg_of_su->saAmfSGAutoRepair) && > (su->saAmfSUFailover) && > + (su->saAmfSUOperState == SA_AMF_OPERATIONAL_DISABLED) > && > + (su->saAmfSUPresenceState != > SA_AMF_PRESENCE_INSTANTIATION_FAILED) && > + (su->saAmfSUPresenceState != > SA_AMF_PRESENCE_TERMINATION_FAILED)) { > + > + saflog(LOG_NOTICE, amfSvcUsrName, "Ordering Auto repair of > '%s' as sufailover repair action", > + su->sg_of_su->name.value); > + avd_admin_op_msg_snd(&su->name, AVSV_SA_AMF_SU, > + > static_cast<SaAmfAdminOperationIdT>(SA_AMF_ADMIN_REPAIRED), su->su_on_node); > + } else { > + saflog(LOG_NOTICE, amfSvcUsrName, "Autorepair not done for > '%s'", su->sg_of_su->name.value); > + } > + > + TRACE_LEAVE(); > +} > + > +/** > + * @brief This function is called when AMFD gets oper_state_evh for > disabled SU and > + * with recovery as saAmfSUFailover. SU failover will be > performed honoring SI dependency > + * if configured. > + * @param cb > + * @param su > + * > + * @return SaAisErrorT > + */ > +static uint32_t avd_sg_su_failover_func(AVD_CL_CB *cb, AVD_SU *su) remove avd prefix, remove cb param, add const to su > +{ > + uint32_t rc = NCSCC_RC_FAILURE; > + AVD_COMP *i_comp; > + > + TRACE_ENTER2("'%s', %u", su->name.value, su->sg_of_su->sg_fsm_state); > + > + if (su->list_of_susi == AVD_SU_SI_REL_NULL) { > + LOG_NO("'%s' has no assignments", su->name.value); > + rc = NCSCC_RC_SUCCESS; > + goto done; > + } > + > + /* In case of other than 2N and NoRed model perform component > failover. */ > + if ((su->sg_of_su->sg_redundancy_model != SA_AMF_NO_REDUNDANCY_MODEL) > && > + (su->sg_of_su->sg_redundancy_model != > SA_AMF_2N_REDUNDANCY_MODEL)) { > + rc = su->sg_of_su->su_fault(avd_cb, su); > + goto done; > + } > + > + avd_su_oper_state_set(su, SA_AMF_OPERATIONAL_DISABLED); > + avd_su_readiness_state_set(su, SA_AMF_READINESS_OUT_OF_SERVICE); > + > + /* Check if there was any admin operations going on this SU. */ > + if (su->pend_cbk.invocation != 0) { > + avd_saImmOiAdminOperationResult(avd_cb->immOiHandle, > su->pend_cbk.invocation, > + SA_AIS_ERR_TIMEOUT); > + su->pend_cbk.invocation = 0; > + su->pend_cbk.admin_oper = > static_cast<SaAmfAdminOperationIdT>(0); > + } > + > + i_comp = su->list_of_comp; > + while (i_comp != NULL) { > + i_comp->curr_num_csi_actv = 0; > + i_comp->curr_num_csi_stdby = 0; > + avd_comp_oper_state_set(i_comp, SA_AMF_OPERATIONAL_DISABLED); > + i_comp->saAmfCompRestartCount = 0; > + if (i_comp->admin_pend_cbk.invocation != 0) { > + avd_saImmOiAdminOperationResult(avd_cb->immOiHandle, > i_comp->admin_pend_cbk.invocation, SA_AIS_ERR_TIMEOUT); > + i_comp->admin_pend_cbk.invocation = 0; > + i_comp->admin_pend_cbk.admin_oper = > static_cast<SaAmfAdminOperationIdT>(0); > + } > + m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, i_comp, > AVSV_CKPT_AVD_COMP_CONFIG); > + i_comp = i_comp->su_comp_next; > + } There is big overlap added here with other functions like avd_node_down_appl_susi_failover(). Can you create some more general utility function that can be used from other places? > + > + > + > + /*If the AvD is in AVD_APP_STATE then reassign all the SUSI > assignments for this SU */ > + if (avd_cb->init_state == AVD_APP_STATE) { > + /* Now analyze the service group for the new HA state > + assignments and send the SU SI assign messages > + accordingly. > + */ > + su->sg_of_su->node_fail(avd_cb, su); > + > + /* Free all the SU SI assignments for all the SIs on the > + the SU if there are any. > + */ > + > + while (su->list_of_susi != AVD_SU_SI_REL_NULL) { > + > + /* free all the CSI assignments */ > + avd_compcsi_delete(avd_cb, su->list_of_susi, false); > + /* Unassign the SUSI */ > + m_AVD_SU_SI_TRG_DEL(avd_cb, su->list_of_susi); > + } > + > + su->saAmfSUNumCurrActiveSIs = 0; > + su->saAmfSUNumCurrStandbySIs = 0; > + m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, su, > AVSV_CKPT_AVD_SU_CONFIG); > + > + } > + rc = NCSCC_RC_SUCCESS; > + > +done: > + TRACE_LEAVE(); > + return rc; > +} > + > diff --git a/osaf/services/saf/avsv/avd/avd_su.cc > b/osaf/services/saf/avsv/avd/avd_su.cc > --- a/osaf/services/saf/avsv/avd/avd_su.cc > +++ b/osaf/services/saf/avsv/avd/avd_su.cc > @@ -1117,6 +1117,7 @@ static void su_admin_op_cb(SaImmOiHandle > if ((node->node_state == AVD_AVND_STATE_PRESENT) && > ((node->saAmfNodeAdminState != > SA_AMF_ADMIN_LOCKED_INSTANTIATION) && > (su->sg_of_su->saAmfSGAdminState != > SA_AMF_ADMIN_LOCKED_INSTANTIATION)) && > + (su->saAmfSUOperState == > SA_AMF_OPERATIONAL_ENABLED) && > (su->sg_of_su->saAmfSGNumPrefInserviceSUs > > sg_instantiated_su_count(su->sg_of_su))) { > /* When the SU will instantiate then prescence state > change message will come > and so store the callback parameters to send > response later on. */ > diff --git a/osaf/services/saf/avsv/avd/include/avd_proc.h > b/osaf/services/saf/avsv/avd/include/avd_proc.h > --- a/osaf/services/saf/avsv/avd/include/avd_proc.h > +++ b/osaf/services/saf/avsv/avd/include/avd_proc.h > @@ -160,5 +160,7 @@ extern void avd_node_mark_absent(AVD_AVN > extern void avd_tmr_snd_hb_evh(AVD_CL_CB *cb, AVD_EVT *evt); > extern void avd_node_failover(AVD_AVND *node); > extern AVD_SU *get_other_su_from_oper_list(AVD_SU *su); > +extern uint32_t avd_su_rcvr_from_fault(AVD_SU *su); > +extern void avd_su_repair(AVD_SU *su); > > #endif > > ------------------------------------------------------------------------------ > How ServiceNow helps IT people transform IT departments: > 1. A cloud service to automate IT design, transition and operations > 2. Dashboards that offer high-level views of enterprise services > 3. A single system of record for all IT processes > http://p.sf.net/sfu/servicenow-d2d-j > _______________________________________________ > Opensaf-devel mailing list > [email protected] > https://lists.sourceforge.net/lists/listinfo/opensaf-devel ------------------------------------------------------------------------------ This SF.net email is sponsored by Windows: Build for Windows Store. http://p.sf.net/sfu/windows-dev2dev _______________________________________________ Opensaf-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/opensaf-devel
