Hi,

I think it should be safe to do SU failover for all redundancy models.
It is a much easier operation than comp failover. It would simplify
the patches, specially the AMF node director parts in 5/6 patches.

Please explain (with consequences) why it is needed to know the SG
redundancy model in the AMF node director.

Thanks,
Hans


On 7 June 2013 08:39,  <[email protected]> wrote:
>  osaf/services/saf/avsv/avnd/avnd_err.c |  150 
> ++++++++++++++++++++++----------
>  1 files changed, 103 insertions(+), 47 deletions(-)
>
>
>  This patch handles compfailover and sufailover in comformance with the 
> AMF-B.04.01 spec at amfnd. Currently only 2N model and NoRed models are 
> supported.  For other models, saAmfSUFailover will be ignored and 
> compFailover will be performed. During suFailover SU will be disabled and all 
> comps will be abruptly terminated. Also handles the case when saAmfSUFailover 
> is true and Nodswitchover gets escalated.
>
> diff --git a/osaf/services/saf/avsv/avnd/avnd_err.c 
> b/osaf/services/saf/avsv/avnd/avnd_err.c
> --- a/osaf/services/saf/avsv/avnd/avnd_err.c
> +++ b/osaf/services/saf/avsv/avnd/avnd_err.c
> @@ -401,8 +401,15 @@ uint32_t avnd_err_escalate(AVND_CB *cb,
>                 *io_esc_rcvr = comp->err_info.def_rec;
>
>         /* disallow comp-restart if it's disabled */
> -       if ((SA_AMF_COMPONENT_RESTART == *io_esc_rcvr) && 
> m_AVND_COMP_IS_RESTART_DIS(comp))
> +       if ((SA_AMF_COMPONENT_RESTART == *io_esc_rcvr) && 
> m_AVND_COMP_IS_RESTART_DIS(comp) && (!su->is_ncs)) {
> +               LOG_NO("saAmfCompDisableRestart is true for 
> '%s'",comp->name.value);
> +               *io_esc_rcvr = SA_AMF_COMPONENT_FAILOVER;
> +       }
> +
> +       if ((SA_AMF_COMPONENT_FAILOVER== *io_esc_rcvr) && (su->sufailover) && 
> (!su->is_ncs)) {
> +               LOG_NO("saAmfSUFailover is true for 
> '%s'",comp->su->name.value);
>                 *io_esc_rcvr = AVSV_ERR_RCVR_SU_FAILOVER;
> +       }
>
>         switch (*io_esc_rcvr) {
>         case SA_AMF_COMPONENT_FAILOVER: /* treat it as su failover */
> @@ -519,7 +526,6 @@ uint32_t avnd_err_recover(AVND_CB *cb, A
>                 break;
>
>         case SA_AMF_COMPONENT_FAILOVER:
> -               /* not supported */
>                 rc = avnd_err_rcvr_comp_failover(cb, comp);
>                 break;
>
> @@ -671,45 +677,21 @@ uint32_t avnd_err_rcvr_su_restart(AVND_C
>         return rc;
>  }
>
> -/****************************************************************************
> -  Name          : avnd_err_rcvr_comp_failover
> -
> -  Description   : This routine executes component failover recovery.
> -
> -  Arguments     : cb   - ptr to the AvND control block
> -                  comp - ptr to the comp
> -
> -  Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE.
> -
> -  Notes         : None.
> -******************************************************************************/
> -uint32_t avnd_err_rcvr_comp_failover(AVND_CB *cb, AVND_COMP *comp)
> +/**
> + * This function performs component failover recovery action.
> + *
> + * @param cb: ptr to AvND contol block.
> + * @param comp: ptr to failed component.
> + *
> + * @return NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE.
> + */
> +uint32_t avnd_err_rcvr_comp_failover(AVND_CB *cb, AVND_COMP *failed_comp)
>  {
>         uint32_t rc = NCSCC_RC_SUCCESS;
> -       LOG_NO("%s, Unsupported",__FUNCTION__);
> +       AVND_SU *su;
>
> -       return rc;
> -}
> -
> -/****************************************************************************
> -  Name          : avnd_err_rcvr_su_failover
> -
> -  Description   : This routine executes SU failover recovery.
> -
> -  Arguments     : cb          - ptr to the AvND control block
> -                  su          - ptr to the SU to which the comp belongs
> -                  failed_comp - ptr to the failed comp that triggered this
> -                                recovery
> -
> -  Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE.
> -
> -  Notes         : None.
> -******************************************************************************/
> -uint32_t avnd_err_rcvr_su_failover(AVND_CB *cb, AVND_SU *su, AVND_COMP 
> *failed_comp)
> -{
> -       uint32_t rc = NCSCC_RC_SUCCESS;
> -       TRACE_ENTER();
> -
> +       TRACE_ENTER2("'%s'", failed_comp->name.value);
> +       su = failed_comp->su;
>         /* mark the comp failed */
>         m_AVND_COMP_FAILED_SET(failed_comp);
>         m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, failed_comp, 
> AVND_CKPT_COMP_FLAG_CHANGE);
> @@ -732,7 +714,7 @@ uint32_t avnd_err_rcvr_su_failover(AVND_
>         m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, su, AVND_CKPT_SU_OPER_STATE);
>
>         /* inform AvD */
> -       rc = avnd_di_oper_send(cb, su, AVSV_ERR_RCVR_SU_FAILOVER);
> +       rc = avnd_di_oper_send(cb, su, SA_AMF_COMPONENT_FAILOVER);
>
>         /*
>          *  su-sis may be in assigning/removing state. signal csi
> @@ -763,6 +745,52 @@ uint32_t avnd_err_rcvr_su_failover(AVND_
>         return rc;
>  }
>
> +/**
> + * This function performs SU failover recovery action.
> + *
> + * @param cb: ptr to AvND contol block.
> + * @param su: ptr to the SU which contains the failed component.
> + * @param comp: ptr to failed component.
> + *
> + * @return NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE.
> + */
> +uint32_t avnd_err_rcvr_su_failover(AVND_CB *cb, AVND_SU *su, AVND_COMP 
> *failed_comp)
> +{
> +       AVND_COMP *comp;
> +       uint32_t rc = NCSCC_RC_SUCCESS;
> +
> +
> +       TRACE_ENTER2("'%s' '%s'", su->name.value, failed_comp->name.value);
> +       if ((su->sg_redundancy_model != SA_AMF_2N_REDUNDANCY_MODEL) &&
> +                       (su->sg_redundancy_model != 
> SA_AMF_NO_REDUNDANCY_MODEL)) {
> +               rc = avnd_err_rcvr_comp_failover(cb, failed_comp);
> +               goto done;
> +       }
> +       m_AVND_COMP_FAILED_SET(failed_comp);
> +       m_AVND_COMP_OPER_STATE_SET(failed_comp, SA_AMF_OPERATIONAL_DISABLED);
> +       m_AVND_SU_FAILED_SET(su);
> +       m_AVND_SU_OPER_STATE_SET(su, SA_AMF_OPERATIONAL_DISABLED);
> +
> +       LOG_NO("Terminating components of '%s'(abruptly & 
> unordered)",su->name.value);
> +       /* Unordered cleanup of components of failed SU */
> +       for (comp = 
> m_AVND_COMP_FROM_SU_DLL_NODE_GET(m_NCS_DBLIST_FIND_FIRST(&su->comp_list));
> +                       comp;
> +                       comp = 
> m_AVND_COMP_FROM_SU_DLL_NODE_GET(m_NCS_DBLIST_FIND_NEXT(&comp->su_dll_node))) 
> {
> +               if (comp->su->su_is_external)
> +                       continue;
> +
> +               rc = avnd_comp_clc_fsm_run(cb, comp, 
> AVND_COMP_CLC_PRES_FSM_EV_CLEANUP);
> +               if (NCSCC_RC_SUCCESS != rc) {
> +                       LOG_ER("'%s' termination failed", comp->name.value);
> +                       goto done;
> +               }
> +       }
> +done:
> +
> +       TRACE_LEAVE2("%u", rc);
> +       return rc;
> +}
> +
>  /****************************************************************************
>    Name          : avnd_err_rcvr_node_switchover
>
> @@ -781,7 +809,7 @@ uint32_t avnd_err_rcvr_node_switchover(A
>  {
>         uint32_t rc = NCSCC_RC_SUCCESS;
>         TRACE_ENTER();
> -
> +       AVND_COMP *comp;
>         /* increase log level to info */
>         setlogmask(LOG_UPTO(LOG_INFO));
>
> @@ -836,11 +864,33 @@ uint32_t avnd_err_rcvr_node_switchover(A
>         if (NCSCC_RC_SUCCESS != rc)
>                 goto done;
>
> -       /* terminate the failed comp */
> -       if (m_AVND_SU_IS_PREINSTANTIABLE(failed_su)) {
> -               rc = avnd_comp_clc_fsm_run(cb, failed_comp, 
> AVND_COMP_CLC_PRES_FSM_EV_CLEANUP);
> -               if (NCSCC_RC_SUCCESS != rc)
> -                       goto done;
> +       if (m_AVND_SU_IS_FAILED(failed_comp->su) && 
> (failed_comp->su->sufailover) &&
> +                       ((failed_comp->su->sg_redundancy_model == 
> SA_AMF_NO_REDUNDANCY_MODEL) ||
> +                        (failed_comp->su->sg_redundancy_model == 
> SA_AMF_2N_REDUNDANCY_MODEL)))
> +       {
> +               LOG_NO("Terminating components of '%s'(abruptly & 
> unordered)",failed_su->name.value);
> +               /* Unordered cleanup of components of failed SU */
> +               for (comp = 
> m_AVND_COMP_FROM_SU_DLL_NODE_GET(m_NCS_DBLIST_FIND_FIRST(&failed_su->comp_list));
> +                               comp;
> +                               comp = 
> m_AVND_COMP_FROM_SU_DLL_NODE_GET(m_NCS_DBLIST_FIND_NEXT(&comp->su_dll_node))) 
> {
> +                       if (comp->su->su_is_external)
> +                               continue;
> +
> +                       rc = avnd_comp_clc_fsm_run(cb, comp, 
> AVND_COMP_CLC_PRES_FSM_EV_CLEANUP);
> +                       if (NCSCC_RC_SUCCESS != rc) {
> +                               LOG_ER("'%s' termination failed", 
> comp->name.value);
> +                               goto done;
> +                       }
> +               }
> +               avnd_su_si_del(cb, &failed_comp->su->name);
> +       }
> +       else {
> +               /* terminate the failed comp */
> +               if (m_AVND_SU_IS_PREINSTANTIABLE(failed_su)) {
> +                       rc = avnd_comp_clc_fsm_run(cb, failed_comp, 
> AVND_COMP_CLC_PRES_FSM_EV_CLEANUP);
> +                       if (NCSCC_RC_SUCCESS != rc)
> +                               goto done;
> +               }
>         }
>
>   done:
> @@ -1216,7 +1266,10 @@ uint32_t avnd_err_restart_esc_level_2(AV
>         TRACE_ENTER();
>
>         /* first time in this level */
> -       *esc_rcvr = AVSV_ERR_RCVR_SU_FAILOVER;
> +       if (su->sufailover)
> +               *esc_rcvr = AVSV_ERR_RCVR_SU_FAILOVER;
> +       else
> +               *esc_rcvr = SA_AMF_COMPONENT_FAILOVER;
>
>         /* External components are not supposed to escalate SU Failover of
>            cluster components. For Ext component, SU Failover will be limited 
> to
> @@ -1278,7 +1331,10 @@ AVSV_ERR_RCVR avnd_err_esc_su_failover(A
>         TRACE_ENTER();
>
>         /* initalize */
> -       *esc_rcvr = AVSV_ERR_RCVR_SU_FAILOVER;
> +       if (su->sufailover)
> +               *esc_rcvr = AVSV_ERR_RCVR_SU_FAILOVER;
> +       else
> +               *esc_rcvr = SA_AMF_COMPONENT_FAILOVER;
>
>         if (true == su->su_is_external) {
>                 /* External component should not contribute to NODE FAILOVER 
> of cluster
>
> ------------------------------------------------------------------------------
> How ServiceNow helps IT people transform IT departments:
> 1. A cloud service to automate IT design, transition and operations
> 2. Dashboards that offer high-level views of enterprise services
> 3. A single system of record for all IT processes
> http://p.sf.net/sfu/servicenow-d2d-j
> _______________________________________________
> Opensaf-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/opensaf-devel

------------------------------------------------------------------------------
This SF.net email is sponsored by Windows:

Build for Windows Store.

http://p.sf.net/sfu/windows-dev2dev
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to