You mean with the addition you propose? I guess cleanup fail is anyway a special case that we need to look at to complete #538
Thanks, Hans On 04/07/2014 11:57 AM, praveen malviya wrote: > Any update on this. I have tested and it works. > > Thanks > Praveen > > On 02-Apr-14 3:25 PM, praveen malviya wrote: >> The removed code in err.c >> >> - /* >> - * su-sis may be in assigning/removing state. signal csi >> - * assign/remove done so that su-si assignment/removal algo can proceed. >> - */ >> - avnd_comp_cmplete_all_assignment(cb, failed_comp); >> - >> >> and >> >> /* delete curr info of the failed comp */ >> - rc = avnd_comp_curr_info_del(cb, failed_comp); >> - if (NCSCC_RC_SUCCESS != rc) >> - goto done; >> - >> >> is added in avnd_comp_clc_terming_cleansucc_hdler() and >> avnd_comp_clc_terming_termsucc_hdler(). >> Don't we require the code in avnd_comp_clc_terming_cleanfail_hdler() >> when cleanup of component fails? >> >> Thanks >> Praveen >> >> On 31-Mar-14 4:37 PM, Hans Feldt wrote: >>> osaf/services/saf/amf/amfnd/clc.cc | 26 +++++++++++++++++++------- >>> osaf/services/saf/amf/amfnd/err.cc | 30 ------------------------------ >>> 2 files changed, 19 insertions(+), 37 deletions(-) >>> >>> >>> During component fail-over a standby component can be activated before >>> cleanup >>> of the faulty component has finished effectively introducing split brain on >>> component level. >>> >>> This happens because cleanup is not awaited before the SUSI response >>> message is >>> sent to the director. >>> >>> Fix this by sending the response after the cleanup has finished. >>> >>> diff --git a/osaf/services/saf/amf/amfnd/clc.cc >>> b/osaf/services/saf/amf/amfnd/clc.cc >>> --- a/osaf/services/saf/amf/amfnd/clc.cc >>> +++ b/osaf/services/saf/amf/amfnd/clc.cc >>> @@ -846,13 +846,6 @@ uint32_t avnd_comp_clc_fsm_run(AVND_CB * >>> /* get the final presence state */ >>> final_st = comp->pres; >>> - if (ev == AVND_COMP_CLC_PRES_FSM_EV_CLEANUP || ev == >>> AVND_COMP_CLC_PRES_FSM_EV_TERM_SUCC) { >>> - /* we need to delete all curr_info, pxied will have cbk for >>> cleanup */ >>> - if (!m_AVND_COMP_TYPE_IS_PROXIED(comp)) { >>> - avnd_comp_curr_info_del(cb, comp); >>> - } >>> - } >>> - >>> TRACE_1("Exited CLC FSM"); >>> TRACE_1("'%s':FSM Enter presence state: '%s':FSM Exit presence >>> state:%s", >>> >>> comp->name.value,pres_state[prv_st],pres_state[final_st]); >>> @@ -1629,6 +1622,15 @@ uint32_t avnd_comp_clc_xxxing_cleansucc_ >>> goto done; >>> } >>> + /* >>> + * su-sis may be in assigning/removing state. signal csi >>> + * assign/remove done so that su-si assignment/removal algo can >>> proceed. >>> + */ >>> + avnd_comp_cmplete_all_assignment(cb, comp); >>> + >>> + /* delete curr info of the failed comp */ >>> + avnd_comp_curr_info_del(cb, comp); >>> + >>> if ((clc_info->inst_retry_cnt < clc_info->inst_retry_max) && >>> (AVND_COMP_INST_EXIT_CODE_NO_RETRY != clc_info->inst_code_rcvd)) >>> { >>> /* => keep retrying */ >>> @@ -1971,6 +1973,7 @@ uint32_t avnd_comp_clc_terming_termsucc_ >>> if (!m_AVND_COMP_TYPE_IS_PROXIED(comp)) { >>> m_AVND_COMP_REG_PARAM_RESET(cb, comp); >>> m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, comp, >>> AVND_CKPT_COMP_CONFIG); >>> + avnd_comp_curr_info_del(cb, comp); >>> } >>> TRACE_LEAVE(); >>> @@ -2078,6 +2081,15 @@ uint32_t avnd_comp_clc_terming_cleansucc >>> } >>> } >>> + /* >>> + * su-sis may be in assigning/removing state. signal csi >>> + * assign/remove done so that su-si assignment/removal algo can >>> proceed. >>> + */ >>> + avnd_comp_cmplete_all_assignment(cb, comp); >>> + >>> + /* delete curr info of the failed comp */ >>> + avnd_comp_curr_info_del(cb, comp); >>> + >>> /* reset the comp-reg & instantiate params */ >>> if (!m_AVND_COMP_TYPE_IS_PROXIED(comp)) { >>> m_AVND_COMP_REG_PARAM_RESET(cb, comp); >>> diff --git a/osaf/services/saf/amf/amfnd/err.cc >>> b/osaf/services/saf/amf/amfnd/err.cc >>> --- a/osaf/services/saf/amf/amfnd/err.cc >>> +++ b/osaf/services/saf/amf/amfnd/err.cc >>> @@ -521,13 +521,6 @@ uint32_t avnd_err_recover(AVND_CB *cb, A >>> return rc; >>> m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, comp, >>> AVND_CKPT_COMP_OPER_STATE); >>> - /* >>> - * SU may be in the middle of SU_SI in assigning/removing state. >>> - * signal csi assign/remove done so that su-si assignment/removal >>> - * algo can proceed. >>> - */ >>> - avnd_comp_cmplete_all_assignment(cb, comp); >>> - >>> /* clean up the comp */ >>> rc = avnd_comp_clc_fsm_run(cb, comp, >>> AVND_COMP_CLC_PRES_FSM_EV_CLEANUP); >>> @@ -702,23 +695,12 @@ uint32_t avnd_err_rcvr_comp_failover(AVN >>> m_AVND_SU_OPER_STATE_SET(su, SA_AMF_OPERATIONAL_DISABLED); >>> m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, su, AVND_CKPT_SU_OPER_STATE); >>> - /* >>> - * su-sis may be in assigning/removing state. signal csi >>> - * assign/remove done so that su-si assignment/removal algo can >>> proceed. >>> - */ >>> - avnd_comp_cmplete_all_assignment(cb, failed_comp); >>> - >>> /* We are now in the context of failover, forget the restart */ >>> if (su->pres == SA_AMF_PRESENCE_RESTARTING || >>> m_AVND_SU_IS_RESTART(su)) { >>> m_AVND_SU_RESTART_RESET(su); >>> m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, su, >>> AVND_CKPT_SU_FLAG_CHANGE); >>> } >>> - /* delete curr info of the failed comp */ >>> - rc = avnd_comp_curr_info_del(cb, failed_comp); >>> - if (NCSCC_RC_SUCCESS != rc) >>> - goto done; >>> - >>> // TODO: there should be no difference between PI/NPI comps >>> if (m_AVND_SU_IS_PREINSTANTIABLE(su)) { >>> /* clean the failed comp */ >>> @@ -832,24 +814,12 @@ uint32_t avnd_err_rcvr_node_switchover(A >>> goto done; >>> } >>> - >>> - /* >>> - * su-sis may be in assigning/removing state. signal csi >>> - * assign/remove done so that su-si assignment/removal algo can >>> proceed. >>> - */ >>> - avnd_comp_cmplete_all_assignment(cb, failed_comp); >>> - >>> /* We are now in the context of failover, forget the restart */ >>> if (failed_su->pres == SA_AMF_PRESENCE_RESTARTING || >>> m_AVND_SU_IS_RESTART(failed_su)) { >>> m_AVND_SU_RESTART_RESET(failed_su); >>> m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, failed_su, >>> AVND_CKPT_SU_FLAG_CHANGE); >>> } >>> - /* delete curr info of the failed comp */ >>> - rc = avnd_comp_curr_info_del(cb, failed_comp); >>> - if (NCSCC_RC_SUCCESS != rc) >>> - goto done; >>> - >>> /* In nodeswitchover context: >>> a)If saAmfSUFailover is set for the faulted SU then this SU will >>> be failed-over >>> as a single entity. >> >> ------------------------------------------------------------------------------ >> _______________________________________________ >> Opensaf-devel mailing list >> Opensaf-devel@lists.sourceforge.net >> https://lists.sourceforge.net/lists/listinfo/opensaf-devel > > > ------------------------------------------------------------------------------ Put Bad Developers to Shame Dominate Development with Jenkins Continuous Integration Continuously Automate Build, Test & Deployment Start a new project now. Try Jenkins in the cloud. http://p.sf.net/sfu/13600_Cloudbees_APR _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel