Hi, I think it should be safe to do SU failover for all redundancy models. It is a much easier operation than comp failover. It would simplify the patches, specially the AMF node director parts in 5/6 patches.
Please explain (with consequences) why it is needed to know the SG redundancy model in the AMF node director. Thanks, Hans On 7 June 2013 08:39, <[email protected]> wrote: > osaf/services/saf/avsv/avnd/avnd_err.c | 150 > ++++++++++++++++++++++---------- > 1 files changed, 103 insertions(+), 47 deletions(-) > > > This patch handles compfailover and sufailover in comformance with the > AMF-B.04.01 spec at amfnd. Currently only 2N model and NoRed models are > supported. For other models, saAmfSUFailover will be ignored and > compFailover will be performed. During suFailover SU will be disabled and all > comps will be abruptly terminated. Also handles the case when saAmfSUFailover > is true and Nodswitchover gets escalated. > > diff --git a/osaf/services/saf/avsv/avnd/avnd_err.c > b/osaf/services/saf/avsv/avnd/avnd_err.c > --- a/osaf/services/saf/avsv/avnd/avnd_err.c > +++ b/osaf/services/saf/avsv/avnd/avnd_err.c > @@ -401,8 +401,15 @@ uint32_t avnd_err_escalate(AVND_CB *cb, > *io_esc_rcvr = comp->err_info.def_rec; > > /* disallow comp-restart if it's disabled */ > - if ((SA_AMF_COMPONENT_RESTART == *io_esc_rcvr) && > m_AVND_COMP_IS_RESTART_DIS(comp)) > + if ((SA_AMF_COMPONENT_RESTART == *io_esc_rcvr) && > m_AVND_COMP_IS_RESTART_DIS(comp) && (!su->is_ncs)) { > + LOG_NO("saAmfCompDisableRestart is true for > '%s'",comp->name.value); > + *io_esc_rcvr = SA_AMF_COMPONENT_FAILOVER; > + } > + > + if ((SA_AMF_COMPONENT_FAILOVER== *io_esc_rcvr) && (su->sufailover) && > (!su->is_ncs)) { > + LOG_NO("saAmfSUFailover is true for > '%s'",comp->su->name.value); > *io_esc_rcvr = AVSV_ERR_RCVR_SU_FAILOVER; > + } > > switch (*io_esc_rcvr) { > case SA_AMF_COMPONENT_FAILOVER: /* treat it as su failover */ > @@ -519,7 +526,6 @@ uint32_t avnd_err_recover(AVND_CB *cb, A > break; > > case SA_AMF_COMPONENT_FAILOVER: > - /* not supported */ > rc = avnd_err_rcvr_comp_failover(cb, comp); > break; > > @@ -671,45 +677,21 @@ uint32_t avnd_err_rcvr_su_restart(AVND_C > return rc; > } > > -/**************************************************************************** > - Name : avnd_err_rcvr_comp_failover > - > - Description : This routine executes component failover recovery. > - > - Arguments : cb - ptr to the AvND control block > - comp - ptr to the comp > - > - Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE. > - > - Notes : None. > -******************************************************************************/ > -uint32_t avnd_err_rcvr_comp_failover(AVND_CB *cb, AVND_COMP *comp) > +/** > + * This function performs component failover recovery action. > + * > + * @param cb: ptr to AvND contol block. > + * @param comp: ptr to failed component. > + * > + * @return NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE. > + */ > +uint32_t avnd_err_rcvr_comp_failover(AVND_CB *cb, AVND_COMP *failed_comp) > { > uint32_t rc = NCSCC_RC_SUCCESS; > - LOG_NO("%s, Unsupported",__FUNCTION__); > + AVND_SU *su; > > - return rc; > -} > - > -/**************************************************************************** > - Name : avnd_err_rcvr_su_failover > - > - Description : This routine executes SU failover recovery. > - > - Arguments : cb - ptr to the AvND control block > - su - ptr to the SU to which the comp belongs > - failed_comp - ptr to the failed comp that triggered this > - recovery > - > - Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE. > - > - Notes : None. > -******************************************************************************/ > -uint32_t avnd_err_rcvr_su_failover(AVND_CB *cb, AVND_SU *su, AVND_COMP > *failed_comp) > -{ > - uint32_t rc = NCSCC_RC_SUCCESS; > - TRACE_ENTER(); > - > + TRACE_ENTER2("'%s'", failed_comp->name.value); > + su = failed_comp->su; > /* mark the comp failed */ > m_AVND_COMP_FAILED_SET(failed_comp); > m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, failed_comp, > AVND_CKPT_COMP_FLAG_CHANGE); > @@ -732,7 +714,7 @@ uint32_t avnd_err_rcvr_su_failover(AVND_ > m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, su, AVND_CKPT_SU_OPER_STATE); > > /* inform AvD */ > - rc = avnd_di_oper_send(cb, su, AVSV_ERR_RCVR_SU_FAILOVER); > + rc = avnd_di_oper_send(cb, su, SA_AMF_COMPONENT_FAILOVER); > > /* > * su-sis may be in assigning/removing state. signal csi > @@ -763,6 +745,52 @@ uint32_t avnd_err_rcvr_su_failover(AVND_ > return rc; > } > > +/** > + * This function performs SU failover recovery action. > + * > + * @param cb: ptr to AvND contol block. > + * @param su: ptr to the SU which contains the failed component. > + * @param comp: ptr to failed component. > + * > + * @return NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE. > + */ > +uint32_t avnd_err_rcvr_su_failover(AVND_CB *cb, AVND_SU *su, AVND_COMP > *failed_comp) > +{ > + AVND_COMP *comp; > + uint32_t rc = NCSCC_RC_SUCCESS; > + > + > + TRACE_ENTER2("'%s' '%s'", su->name.value, failed_comp->name.value); > + if ((su->sg_redundancy_model != SA_AMF_2N_REDUNDANCY_MODEL) && > + (su->sg_redundancy_model != > SA_AMF_NO_REDUNDANCY_MODEL)) { > + rc = avnd_err_rcvr_comp_failover(cb, failed_comp); > + goto done; > + } > + m_AVND_COMP_FAILED_SET(failed_comp); > + m_AVND_COMP_OPER_STATE_SET(failed_comp, SA_AMF_OPERATIONAL_DISABLED); > + m_AVND_SU_FAILED_SET(su); > + m_AVND_SU_OPER_STATE_SET(su, SA_AMF_OPERATIONAL_DISABLED); > + > + LOG_NO("Terminating components of '%s'(abruptly & > unordered)",su->name.value); > + /* Unordered cleanup of components of failed SU */ > + for (comp = > m_AVND_COMP_FROM_SU_DLL_NODE_GET(m_NCS_DBLIST_FIND_FIRST(&su->comp_list)); > + comp; > + comp = > m_AVND_COMP_FROM_SU_DLL_NODE_GET(m_NCS_DBLIST_FIND_NEXT(&comp->su_dll_node))) > { > + if (comp->su->su_is_external) > + continue; > + > + rc = avnd_comp_clc_fsm_run(cb, comp, > AVND_COMP_CLC_PRES_FSM_EV_CLEANUP); > + if (NCSCC_RC_SUCCESS != rc) { > + LOG_ER("'%s' termination failed", comp->name.value); > + goto done; > + } > + } > +done: > + > + TRACE_LEAVE2("%u", rc); > + return rc; > +} > + > /**************************************************************************** > Name : avnd_err_rcvr_node_switchover > > @@ -781,7 +809,7 @@ uint32_t avnd_err_rcvr_node_switchover(A > { > uint32_t rc = NCSCC_RC_SUCCESS; > TRACE_ENTER(); > - > + AVND_COMP *comp; > /* increase log level to info */ > setlogmask(LOG_UPTO(LOG_INFO)); > > @@ -836,11 +864,33 @@ uint32_t avnd_err_rcvr_node_switchover(A > if (NCSCC_RC_SUCCESS != rc) > goto done; > > - /* terminate the failed comp */ > - if (m_AVND_SU_IS_PREINSTANTIABLE(failed_su)) { > - rc = avnd_comp_clc_fsm_run(cb, failed_comp, > AVND_COMP_CLC_PRES_FSM_EV_CLEANUP); > - if (NCSCC_RC_SUCCESS != rc) > - goto done; > + if (m_AVND_SU_IS_FAILED(failed_comp->su) && > (failed_comp->su->sufailover) && > + ((failed_comp->su->sg_redundancy_model == > SA_AMF_NO_REDUNDANCY_MODEL) || > + (failed_comp->su->sg_redundancy_model == > SA_AMF_2N_REDUNDANCY_MODEL))) > + { > + LOG_NO("Terminating components of '%s'(abruptly & > unordered)",failed_su->name.value); > + /* Unordered cleanup of components of failed SU */ > + for (comp = > m_AVND_COMP_FROM_SU_DLL_NODE_GET(m_NCS_DBLIST_FIND_FIRST(&failed_su->comp_list)); > + comp; > + comp = > m_AVND_COMP_FROM_SU_DLL_NODE_GET(m_NCS_DBLIST_FIND_NEXT(&comp->su_dll_node))) > { > + if (comp->su->su_is_external) > + continue; > + > + rc = avnd_comp_clc_fsm_run(cb, comp, > AVND_COMP_CLC_PRES_FSM_EV_CLEANUP); > + if (NCSCC_RC_SUCCESS != rc) { > + LOG_ER("'%s' termination failed", > comp->name.value); > + goto done; > + } > + } > + avnd_su_si_del(cb, &failed_comp->su->name); > + } > + else { > + /* terminate the failed comp */ > + if (m_AVND_SU_IS_PREINSTANTIABLE(failed_su)) { > + rc = avnd_comp_clc_fsm_run(cb, failed_comp, > AVND_COMP_CLC_PRES_FSM_EV_CLEANUP); > + if (NCSCC_RC_SUCCESS != rc) > + goto done; > + } > } > > done: > @@ -1216,7 +1266,10 @@ uint32_t avnd_err_restart_esc_level_2(AV > TRACE_ENTER(); > > /* first time in this level */ > - *esc_rcvr = AVSV_ERR_RCVR_SU_FAILOVER; > + if (su->sufailover) > + *esc_rcvr = AVSV_ERR_RCVR_SU_FAILOVER; > + else > + *esc_rcvr = SA_AMF_COMPONENT_FAILOVER; > > /* External components are not supposed to escalate SU Failover of > cluster components. For Ext component, SU Failover will be limited > to > @@ -1278,7 +1331,10 @@ AVSV_ERR_RCVR avnd_err_esc_su_failover(A > TRACE_ENTER(); > > /* initalize */ > - *esc_rcvr = AVSV_ERR_RCVR_SU_FAILOVER; > + if (su->sufailover) > + *esc_rcvr = AVSV_ERR_RCVR_SU_FAILOVER; > + else > + *esc_rcvr = SA_AMF_COMPONENT_FAILOVER; > > if (true == su->su_is_external) { > /* External component should not contribute to NODE FAILOVER > of cluster > > ------------------------------------------------------------------------------ > How ServiceNow helps IT people transform IT departments: > 1. A cloud service to automate IT design, transition and operations > 2. Dashboards that offer high-level views of enterprise services > 3. A single system of record for all IT processes > http://p.sf.net/sfu/servicenow-d2d-j > _______________________________________________ > Opensaf-devel mailing list > [email protected] > https://lists.sourceforge.net/lists/listinfo/opensaf-devel ------------------------------------------------------------------------------ This SF.net email is sponsored by Windows: Build for Windows Store. http://p.sf.net/sfu/windows-dev2dev _______________________________________________ Opensaf-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/opensaf-devel
