ack, code review only/Thanks HansN

On 06/20/2016 08:35 AM, [email protected] wrote:
>   osaf/services/saf/amf/amfnd/err.cc  |  19 +++++++++++++++++++
>   osaf/services/saf/amf/amfnd/su.cc   |  16 ++++++++++++++++
>   osaf/services/saf/amf/amfnd/susm.cc |   2 +-
>   3 files changed, 36 insertions(+), 1 deletions(-)
>
>
> AMFND crashes during sufailover recovery which resulted because comp does not 
> response
> with SA_AIS_OK for CSI remove callback in su lock operation.
>
> Applicable to sufailover, node-failover and node-switchover along with 
> su-failover.
> AMFND calls oper_done logic multiple times. This logic must be called when 
> AMFND
> has to respond to AMFD for pending assignments. In these recovery policies, 
> AMFD will
> perform recovery of assignments on escalation request. So calling oper_done 
> logic for
> these recoveries must be avoided in AMFND.
> There is also one more case when same crash is observed. If component faults 
> with
> su-failover recovery after responding successfully for quiesced state during 
> SU lock.
> Since AMFND responds successfully for the quiesced state, it gets removal of 
> assignment.
> AMFND crashes during removal of assignment.
>
> Patach solves problem by resetting the pending assignment flag in SU for 
> these recovery
> policies. AMFD will perform recovery when it gets recovery request from AMFND.
> For second crash, when AMFND gets removal of assignment during su-failover, 
> it will discard
> removal of assignment.
> In both the cases, AMFD takes care of repyling for admin op as part ofr 
> su-failover recovery.
>
> diff --git a/osaf/services/saf/amf/amfnd/err.cc 
> b/osaf/services/saf/amf/amfnd/err.cc
> --- a/osaf/services/saf/amf/amfnd/err.cc
> +++ b/osaf/services/saf/amf/amfnd/err.cc
> @@ -841,6 +841,13 @@ uint32_t avnd_err_rcvr_su_failover(AVND_
>               reset_suRestart_flag(su);
>               su->admin_op_Id = static_cast<SaAmfAdminOperationIdT>(0);
>       }
> +     /* If SU faulted during assignments, reset its pending assignment flag. 
> AMFD will perform
> +        fail-over as a part of recovery request.*/
> +     if (m_AVND_SU_IS_ALL_SI(su)) {
> +             TRACE_1("Reset pending assignment flag in su.");
> +             m_AVND_SU_ALL_SI_RESET(su);
> +     }
> +
>       //Remember component-failover/su-failover context.
>       m_AVND_SU_FAILOVER_SET(failed_comp->su);
>   
> @@ -922,6 +929,12 @@ uint32_t avnd_err_rcvr_node_switchover(A
>       if (m_AVND_SU_IS_RESTART(failed_su))
>               failed_su->admin_op_Id = static_cast<SaAmfAdminOperationIdT>(0);
>   
> +     /* If SU faulted during assignments, reset its pending assignment flag. 
> AMFD will perform
> +        fail-over as a part of recovery request.*/
> +     if ((m_AVND_SU_IS_ALL_SI(failed_su)) && (failed_comp->su->sufailover == 
> true)) {
> +             TRACE_1("Reset pending assignment flag in su.");
> +             m_AVND_SU_ALL_SI_RESET(failed_su);
> +     }
>       /* In nodeswitchover context:
>          a)If saAmfSUFailover is set for the faulted SU then this SU will be 
> failed-over
>               as a single entity.
> @@ -1016,6 +1029,12 @@ uint32_t avnd_err_rcvr_node_failover(AVN
>               reset_suRestart_flag(failed_su);
>               failed_su->admin_op_Id = static_cast<SaAmfAdminOperationIdT>(0);
>       }
> +     /* If SU faulted during assignments, reset its pending assignment flag. 
> AMFD will perform
> +        fail-over as a part of recovery request or node down.*/
> +     if (m_AVND_SU_IS_ALL_SI(failed_su)) {
> +             TRACE_1("Reset pending assignment flag in su.");
> +             m_AVND_SU_ALL_SI_RESET(failed_su);
> +     }
>       /* Unordered cleanup of all local application components */
>       for (comp = (AVND_COMP *)ncs_patricia_tree_getnext(&cb->compdb, 
> (uint8_t *)nullptr);
>                 comp != nullptr;
> diff --git a/osaf/services/saf/amf/amfnd/su.cc 
> b/osaf/services/saf/amf/amfnd/su.cc
> --- a/osaf/services/saf/amf/amfnd/su.cc
> +++ b/osaf/services/saf/amf/amfnd/su.cc
> @@ -430,6 +430,22 @@ uint32_t avnd_evt_avd_info_su_si_assign_
>                               goto done;
>                       }
>               }
> +             /*
> +                SU failover and Node-switchover (with sufailover true) is in 
> progress
> +                and AMFND gets deletion of assignment for failed SU. Since 
> AMFND launches
> +                cleanup of all the components failed SU, it must discard 
> deletion of assignment
> +                in it. After successful cleanup of all the components, AMFND 
> sends recovery
> +                request to AMFD and it will take care of failover of this 
> failed SU. Also AMFD
> +                will be able to respond to any pending admin op while 
> processing recovery request.
> +                For Node-failover, assignment can be discarded for any SU as 
> AMFND launches clean up
> +                of all the components.
> +              */
> +             if ((sufailover_in_progress(su) || 
> sufailover_during_nodeswitchover(su) ||
> +                     (cb->term_state == 
> AVND_TERM_STATE_NODE_FAILOVER_TERMINATING)) &&
> +                             (info->msg_act == AVSV_SUSI_ACT_DEL)) {
> +                     TRACE_2("Discarding assignment deletion for '%s'", 
> su->name.value);
> +                     goto done;
> +             }
>       }
>   
>       if (cb->term_state == AVND_TERM_STATE_NODE_FAILOVER_TERMINATED) {
> diff --git a/osaf/services/saf/amf/amfnd/susm.cc 
> b/osaf/services/saf/amf/amfnd/susm.cc
> --- a/osaf/services/saf/amf/amfnd/susm.cc
> +++ b/osaf/services/saf/amf/amfnd/susm.cc
> @@ -1128,7 +1128,7 @@ uint32_t avnd_su_si_oper_done(AVND_CB *c
>   
>       /* finally delete the si(s) if they are removed */
>       curr_si = (si) ? si : (AVND_SU_SI_REC 
> *)m_NCS_DBLIST_FIND_FIRST(&su->si_list);
> -     if (m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_REMOVED(curr_si)) {
> +     if ((curr_si != nullptr) && 
> m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_REMOVED(curr_si)) {
>               rc = (si) ? avnd_su_si_rec_del(cb, &su->name, &si->name) : 
> avnd_su_si_del(cb, &su->name);
>               if (NCSCC_RC_SUCCESS != rc)
>                       goto done;


------------------------------------------------------------------------------
What NetFlow Analyzer can do for you? Monitors network bandwidth and traffic
patterns at an interface-level. Reveals which users, apps, and protocols are 
consuming the most bandwidth. Provides multi-vendor support for NetFlow, 
J-Flow, sFlow and other flows. Make informed decisions using capacity 
planning reports. http://sdm.link/zohodev2dev
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to