Please go ahead an push it. I had not included this change and tested
the floated patch only.
Thanks,
Praveen
On 07-Apr-14 5:27 PM, Hans Feldt wrote:
> You mean with the addition you propose?
>
> I guess cleanup fail is anyway a special case that we need to look at
> to complete #538
>
> Thanks,
> Hans
>
> On 04/07/2014 11:57 AM, praveen malviya wrote:
>> Any update on this. I have tested and it works.
>>
>> Thanks
>> Praveen
>>
>> On 02-Apr-14 3:25 PM, praveen malviya wrote:
>>> The removed code in err.c
>>>
>>> - /*
>>> - * su-sis may be in assigning/removing state. signal csi
>>> - * assign/remove done so that su-si assignment/removal algo can
>>> proceed.
>>> - */
>>> - avnd_comp_cmplete_all_assignment(cb, failed_comp);
>>> -
>>>
>>> and
>>>
>>> /* delete curr info of the failed comp */
>>> - rc = avnd_comp_curr_info_del(cb, failed_comp);
>>> - if (NCSCC_RC_SUCCESS != rc)
>>> - goto done;
>>> -
>>>
>>> is added in avnd_comp_clc_terming_cleansucc_hdler() and
>>> avnd_comp_clc_terming_termsucc_hdler().
>>> Don't we require the code in avnd_comp_clc_terming_cleanfail_hdler()
>>> when cleanup of component fails?
>>>
>>> Thanks
>>> Praveen
>>>
>>> On 31-Mar-14 4:37 PM, Hans Feldt wrote:
>>>> osaf/services/saf/amf/amfnd/clc.cc | 26 +++++++++++++++++++-------
>>>> osaf/services/saf/amf/amfnd/err.cc | 30
>>>> ------------------------------
>>>> 2 files changed, 19 insertions(+), 37 deletions(-)
>>>>
>>>>
>>>> During component fail-over a standby component can be activated
>>>> before cleanup
>>>> of the faulty component has finished effectively introducing split
>>>> brain on
>>>> component level.
>>>>
>>>> This happens because cleanup is not awaited before the SUSI
>>>> response message is
>>>> sent to the director.
>>>>
>>>> Fix this by sending the response after the cleanup has finished.
>>>>
>>>> diff --git a/osaf/services/saf/amf/amfnd/clc.cc
>>>> b/osaf/services/saf/amf/amfnd/clc.cc
>>>> --- a/osaf/services/saf/amf/amfnd/clc.cc
>>>> +++ b/osaf/services/saf/amf/amfnd/clc.cc
>>>> @@ -846,13 +846,6 @@ uint32_t avnd_comp_clc_fsm_run(AVND_CB *
>>>> /* get the final presence state */
>>>> final_st = comp->pres;
>>>> - if (ev == AVND_COMP_CLC_PRES_FSM_EV_CLEANUP || ev ==
>>>> AVND_COMP_CLC_PRES_FSM_EV_TERM_SUCC) {
>>>> - /* we need to delete all curr_info, pxied will have cbk
>>>> for cleanup */
>>>> - if (!m_AVND_COMP_TYPE_IS_PROXIED(comp)) {
>>>> - avnd_comp_curr_info_del(cb, comp);
>>>> - }
>>>> - }
>>>> -
>>>> TRACE_1("Exited CLC FSM");
>>>> TRACE_1("'%s':FSM Enter presence state: '%s':FSM Exit
>>>> presence state:%s",
>>>> comp->name.value,pres_state[prv_st],pres_state[final_st]);
>>>> @@ -1629,6 +1622,15 @@ uint32_t avnd_comp_clc_xxxing_cleansucc_
>>>> goto done;
>>>> }
>>>> + /*
>>>> + * su-sis may be in assigning/removing state. signal csi
>>>> + * assign/remove done so that su-si assignment/removal algo
>>>> can proceed.
>>>> + */
>>>> + avnd_comp_cmplete_all_assignment(cb, comp);
>>>> +
>>>> + /* delete curr info of the failed comp */
>>>> + avnd_comp_curr_info_del(cb, comp);
>>>> +
>>>> if ((clc_info->inst_retry_cnt < clc_info->inst_retry_max) &&
>>>> (AVND_COMP_INST_EXIT_CODE_NO_RETRY !=
>>>> clc_info->inst_code_rcvd)) {
>>>> /* => keep retrying */
>>>> @@ -1971,6 +1973,7 @@ uint32_t avnd_comp_clc_terming_termsucc_
>>>> if (!m_AVND_COMP_TYPE_IS_PROXIED(comp)) {
>>>> m_AVND_COMP_REG_PARAM_RESET(cb, comp);
>>>> m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, comp,
>>>> AVND_CKPT_COMP_CONFIG);
>>>> + avnd_comp_curr_info_del(cb, comp);
>>>> }
>>>> TRACE_LEAVE();
>>>> @@ -2078,6 +2081,15 @@ uint32_t avnd_comp_clc_terming_cleansucc
>>>> }
>>>> }
>>>> + /*
>>>> + * su-sis may be in assigning/removing state. signal csi
>>>> + * assign/remove done so that su-si assignment/removal algo
>>>> can proceed.
>>>> + */
>>>> + avnd_comp_cmplete_all_assignment(cb, comp);
>>>> +
>>>> + /* delete curr info of the failed comp */
>>>> + avnd_comp_curr_info_del(cb, comp);
>>>> +
>>>> /* reset the comp-reg & instantiate params */
>>>> if (!m_AVND_COMP_TYPE_IS_PROXIED(comp)) {
>>>> m_AVND_COMP_REG_PARAM_RESET(cb, comp);
>>>> diff --git a/osaf/services/saf/amf/amfnd/err.cc
>>>> b/osaf/services/saf/amf/amfnd/err.cc
>>>> --- a/osaf/services/saf/amf/amfnd/err.cc
>>>> +++ b/osaf/services/saf/amf/amfnd/err.cc
>>>> @@ -521,13 +521,6 @@ uint32_t avnd_err_recover(AVND_CB *cb, A
>>>> return rc;
>>>> m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, comp,
>>>> AVND_CKPT_COMP_OPER_STATE);
>>>> - /*
>>>> - * SU may be in the middle of SU_SI in assigning/removing
>>>> state.
>>>> - * signal csi assign/remove done so that su-si
>>>> assignment/removal
>>>> - * algo can proceed.
>>>> - */
>>>> - avnd_comp_cmplete_all_assignment(cb, comp);
>>>> -
>>>> /* clean up the comp */
>>>> rc = avnd_comp_clc_fsm_run(cb, comp,
>>>> AVND_COMP_CLC_PRES_FSM_EV_CLEANUP);
>>>> @@ -702,23 +695,12 @@ uint32_t avnd_err_rcvr_comp_failover(AVN
>>>> m_AVND_SU_OPER_STATE_SET(su, SA_AMF_OPERATIONAL_DISABLED);
>>>> m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, su,
>>>> AVND_CKPT_SU_OPER_STATE);
>>>> - /*
>>>> - * su-sis may be in assigning/removing state. signal csi
>>>> - * assign/remove done so that su-si assignment/removal algo
>>>> can proceed.
>>>> - */
>>>> - avnd_comp_cmplete_all_assignment(cb, failed_comp);
>>>> -
>>>> /* We are now in the context of failover, forget the restart */
>>>> if (su->pres == SA_AMF_PRESENCE_RESTARTING ||
>>>> m_AVND_SU_IS_RESTART(su)) {
>>>> m_AVND_SU_RESTART_RESET(su);
>>>> m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, su,
>>>> AVND_CKPT_SU_FLAG_CHANGE);
>>>> }
>>>> - /* delete curr info of the failed comp */
>>>> - rc = avnd_comp_curr_info_del(cb, failed_comp);
>>>> - if (NCSCC_RC_SUCCESS != rc)
>>>> - goto done;
>>>> -
>>>> // TODO: there should be no difference between PI/NPI comps
>>>> if (m_AVND_SU_IS_PREINSTANTIABLE(su)) {
>>>> /* clean the failed comp */
>>>> @@ -832,24 +814,12 @@ uint32_t avnd_err_rcvr_node_switchover(A
>>>> goto done;
>>>> }
>>>> -
>>>> - /*
>>>> - * su-sis may be in assigning/removing state. signal csi
>>>> - * assign/remove done so that su-si assignment/removal algo
>>>> can proceed.
>>>> - */
>>>> - avnd_comp_cmplete_all_assignment(cb, failed_comp);
>>>> -
>>>> /* We are now in the context of failover, forget the restart */
>>>> if (failed_su->pres == SA_AMF_PRESENCE_RESTARTING ||
>>>> m_AVND_SU_IS_RESTART(failed_su)) {
>>>> m_AVND_SU_RESTART_RESET(failed_su);
>>>> m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, failed_su,
>>>> AVND_CKPT_SU_FLAG_CHANGE);
>>>> }
>>>> - /* delete curr info of the failed comp */
>>>> - rc = avnd_comp_curr_info_del(cb, failed_comp);
>>>> - if (NCSCC_RC_SUCCESS != rc)
>>>> - goto done;
>>>> -
>>>> /* In nodeswitchover context:
>>>> a)If saAmfSUFailover is set for the faulted SU then this
>>>> SU will be failed-over
>>>> as a single entity.
>>>
>>> ------------------------------------------------------------------------------
>>>
>>>
>>> _______________________________________________
>>> Opensaf-devel mailing list
>>> [email protected]
>>> https://lists.sourceforge.net/lists/listinfo/opensaf-devel
>>
>>
>>
------------------------------------------------------------------------------
Put Bad Developers to Shame
Dominate Development with Jenkins Continuous Integration
Continuously Automate Build, Test & Deployment
Start a new project now. Try Jenkins in the cloud.
http://p.sf.net/sfu/13600_Cloudbees_APR
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel