You mean with the addition you propose?

I guess cleanup fail is anyway a special case that we need to look at to 
complete #538

Thanks,
Hans

On 04/07/2014 11:57 AM, praveen malviya wrote:
> Any update on this. I have tested and it works.
>
> Thanks
> Praveen
>
> On 02-Apr-14 3:25 PM, praveen malviya wrote:
>> The removed code in err.c
>>
>> -    /*
>> -     *  su-sis may be in assigning/removing state. signal csi
>> -     * assign/remove done so that su-si assignment/removal algo can proceed.
>> -     */
>> -    avnd_comp_cmplete_all_assignment(cb, failed_comp);
>> -
>>
>> and
>>
>> /* delete curr info of the failed comp */
>> -    rc = avnd_comp_curr_info_del(cb, failed_comp);
>> -    if (NCSCC_RC_SUCCESS != rc)
>> -        goto done;
>> -
>>
>> is added in avnd_comp_clc_terming_cleansucc_hdler() and
>> avnd_comp_clc_terming_termsucc_hdler().
>> Don't we require the code in avnd_comp_clc_terming_cleanfail_hdler()
>> when cleanup of component fails?
>>
>> Thanks
>> Praveen
>>
>> On 31-Mar-14 4:37 PM, Hans Feldt wrote:
>>>    osaf/services/saf/amf/amfnd/clc.cc |  26 +++++++++++++++++++-------
>>>    osaf/services/saf/amf/amfnd/err.cc |  30 ------------------------------
>>>    2 files changed, 19 insertions(+), 37 deletions(-)
>>>
>>>
>>> During component fail-over a standby component can be activated before 
>>> cleanup
>>> of the faulty component has finished effectively introducing split brain on
>>> component level.
>>>
>>> This happens because cleanup is not awaited before the SUSI response 
>>> message is
>>> sent to the director.
>>>
>>> Fix this by sending the response after the cleanup has finished.
>>>
>>> diff --git a/osaf/services/saf/amf/amfnd/clc.cc 
>>> b/osaf/services/saf/amf/amfnd/clc.cc
>>> --- a/osaf/services/saf/amf/amfnd/clc.cc
>>> +++ b/osaf/services/saf/amf/amfnd/clc.cc
>>> @@ -846,13 +846,6 @@ uint32_t avnd_comp_clc_fsm_run(AVND_CB *
>>>        /* get the final presence state */
>>>        final_st = comp->pres;
>>> -    if (ev == AVND_COMP_CLC_PRES_FSM_EV_CLEANUP || ev == 
>>> AVND_COMP_CLC_PRES_FSM_EV_TERM_SUCC) {
>>> -        /* we need to delete all curr_info, pxied will have cbk for 
>>> cleanup */
>>> -        if (!m_AVND_COMP_TYPE_IS_PROXIED(comp)) {
>>> -            avnd_comp_curr_info_del(cb, comp);
>>> -        }
>>> -    }
>>> -
>>>        TRACE_1("Exited CLC FSM");
>>>        TRACE_1("'%s':FSM Enter presence state: '%s':FSM Exit presence 
>>> state:%s",
>>>                        
>>> comp->name.value,pres_state[prv_st],pres_state[final_st]);
>>> @@ -1629,6 +1622,15 @@ uint32_t avnd_comp_clc_xxxing_cleansucc_
>>>            goto done;
>>>        }
>>> +    /*
>>> +     *  su-sis may be in assigning/removing state. signal csi
>>> +     * assign/remove done so that su-si assignment/removal algo can 
>>> proceed.
>>> +     */
>>> +    avnd_comp_cmplete_all_assignment(cb, comp);
>>> +
>>> +    /* delete curr info of the failed comp */
>>> +    avnd_comp_curr_info_del(cb, comp);
>>> +
>>>        if ((clc_info->inst_retry_cnt < clc_info->inst_retry_max) &&
>>>            (AVND_COMP_INST_EXIT_CODE_NO_RETRY != clc_info->inst_code_rcvd)) 
>>> {
>>>            /* => keep retrying */
>>> @@ -1971,6 +1973,7 @@ uint32_t avnd_comp_clc_terming_termsucc_
>>>        if (!m_AVND_COMP_TYPE_IS_PROXIED(comp)) {
>>>            m_AVND_COMP_REG_PARAM_RESET(cb, comp);
>>>            m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, comp, 
>>> AVND_CKPT_COMP_CONFIG);
>>> +        avnd_comp_curr_info_del(cb, comp);
>>>        }
>>>        TRACE_LEAVE();
>>> @@ -2078,6 +2081,15 @@ uint32_t avnd_comp_clc_terming_cleansucc
>>>            }
>>>        }
>>> +    /*
>>> +     *  su-sis may be in assigning/removing state. signal csi
>>> +     * assign/remove done so that su-si assignment/removal algo can 
>>> proceed.
>>> +     */
>>> +    avnd_comp_cmplete_all_assignment(cb, comp);
>>> +
>>> +    /* delete curr info of the failed comp */
>>> +    avnd_comp_curr_info_del(cb, comp);
>>> +
>>>        /* reset the comp-reg & instantiate params */
>>>        if (!m_AVND_COMP_TYPE_IS_PROXIED(comp)) {
>>>            m_AVND_COMP_REG_PARAM_RESET(cb, comp);
>>> diff --git a/osaf/services/saf/amf/amfnd/err.cc 
>>> b/osaf/services/saf/amf/amfnd/err.cc
>>> --- a/osaf/services/saf/amf/amfnd/err.cc
>>> +++ b/osaf/services/saf/amf/amfnd/err.cc
>>> @@ -521,13 +521,6 @@ uint32_t avnd_err_recover(AVND_CB *cb, A
>>>                return rc;
>>>            m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, comp, 
>>> AVND_CKPT_COMP_OPER_STATE);
>>> -        /*
>>> -         * SU may be in the middle of SU_SI in assigning/removing state.
>>> -         * signal csi assign/remove done so that su-si assignment/removal
>>> -         * algo can proceed.
>>> -         */
>>> -        avnd_comp_cmplete_all_assignment(cb, comp);
>>> -
>>>            /* clean up the comp */
>>>            rc = avnd_comp_clc_fsm_run(cb, comp, 
>>> AVND_COMP_CLC_PRES_FSM_EV_CLEANUP);
>>> @@ -702,23 +695,12 @@ uint32_t avnd_err_rcvr_comp_failover(AVN
>>>        m_AVND_SU_OPER_STATE_SET(su, SA_AMF_OPERATIONAL_DISABLED);
>>>        m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, su, AVND_CKPT_SU_OPER_STATE);
>>> -    /*
>>> -     *  su-sis may be in assigning/removing state. signal csi
>>> -     * assign/remove done so that su-si assignment/removal algo can 
>>> proceed.
>>> -     */
>>> -    avnd_comp_cmplete_all_assignment(cb, failed_comp);
>>> -
>>>        /* We are now in the context of failover, forget the restart */
>>>        if (su->pres == SA_AMF_PRESENCE_RESTARTING || 
>>> m_AVND_SU_IS_RESTART(su)) {
>>>            m_AVND_SU_RESTART_RESET(su);
>>>            m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, su, 
>>> AVND_CKPT_SU_FLAG_CHANGE);
>>>        }
>>> -    /* delete curr info of the failed comp */
>>> -    rc = avnd_comp_curr_info_del(cb, failed_comp);
>>> -    if (NCSCC_RC_SUCCESS != rc)
>>> -        goto done;
>>> -
>>>        // TODO: there should be no difference between PI/NPI comps
>>>        if (m_AVND_SU_IS_PREINSTANTIABLE(su)) {
>>>            /* clean the failed comp */
>>> @@ -832,24 +814,12 @@ uint32_t avnd_err_rcvr_node_switchover(A
>>>                goto done;
>>>        }
>>> -
>>> -    /*
>>> -     *  su-sis may be in assigning/removing state. signal csi
>>> -     * assign/remove done so that su-si assignment/removal algo can 
>>> proceed.
>>> -     */
>>> -    avnd_comp_cmplete_all_assignment(cb, failed_comp);
>>> -
>>>        /* We are now in the context of failover, forget the restart */
>>>        if (failed_su->pres == SA_AMF_PRESENCE_RESTARTING || 
>>> m_AVND_SU_IS_RESTART(failed_su)) {
>>>            m_AVND_SU_RESTART_RESET(failed_su);
>>>            m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, failed_su, 
>>> AVND_CKPT_SU_FLAG_CHANGE);
>>>        }
>>> -    /* delete curr info of the failed comp */
>>> -    rc = avnd_comp_curr_info_del(cb, failed_comp);
>>> -    if (NCSCC_RC_SUCCESS != rc)
>>> -        goto done;
>>> -
>>>        /* In nodeswitchover context:
>>>           a)If saAmfSUFailover is set for the faulted SU then this SU will 
>>> be failed-over
>>>               as a single entity.
>>
>> ------------------------------------------------------------------------------
>> _______________________________________________
>> Opensaf-devel mailing list
>> Opensaf-devel@lists.sourceforge.net
>> https://lists.sourceforge.net/lists/listinfo/opensaf-devel
>
>
>

------------------------------------------------------------------------------
Put Bad Developers to Shame
Dominate Development with Jenkins Continuous Integration
Continuously Automate Build, Test & Deployment 
Start a new project now. Try Jenkins in the cloud.
http://p.sf.net/sfu/13600_Cloudbees_APR
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to