Any update on this. I have tested and it works.
Thanks
Praveen
On 02-Apr-14 3:25 PM, praveen malviya wrote:
> The removed code in err.c
>
> - /*
> - * su-sis may be in assigning/removing state. signal csi
> - * assign/remove done so that su-si assignment/removal algo can proceed.
> - */
> - avnd_comp_cmplete_all_assignment(cb, failed_comp);
> -
>
> and
>
> /* delete curr info of the failed comp */
> - rc = avnd_comp_curr_info_del(cb, failed_comp);
> - if (NCSCC_RC_SUCCESS != rc)
> - goto done;
> -
>
> is added in avnd_comp_clc_terming_cleansucc_hdler() and
> avnd_comp_clc_terming_termsucc_hdler().
> Don't we require the code in avnd_comp_clc_terming_cleanfail_hdler()
> when cleanup of component fails?
>
> Thanks
> Praveen
>
> On 31-Mar-14 4:37 PM, Hans Feldt wrote:
>> osaf/services/saf/amf/amfnd/clc.cc | 26 +++++++++++++++++++-------
>> osaf/services/saf/amf/amfnd/err.cc | 30 ------------------------------
>> 2 files changed, 19 insertions(+), 37 deletions(-)
>>
>>
>> During component fail-over a standby component can be activated before
>> cleanup
>> of the faulty component has finished effectively introducing split brain on
>> component level.
>>
>> This happens because cleanup is not awaited before the SUSI response message
>> is
>> sent to the director.
>>
>> Fix this by sending the response after the cleanup has finished.
>>
>> diff --git a/osaf/services/saf/amf/amfnd/clc.cc
>> b/osaf/services/saf/amf/amfnd/clc.cc
>> --- a/osaf/services/saf/amf/amfnd/clc.cc
>> +++ b/osaf/services/saf/amf/amfnd/clc.cc
>> @@ -846,13 +846,6 @@ uint32_t avnd_comp_clc_fsm_run(AVND_CB *
>> /* get the final presence state */
>> final_st = comp->pres;
>>
>> - if (ev == AVND_COMP_CLC_PRES_FSM_EV_CLEANUP || ev ==
>> AVND_COMP_CLC_PRES_FSM_EV_TERM_SUCC) {
>> - /* we need to delete all curr_info, pxied will have cbk for
>> cleanup */
>> - if (!m_AVND_COMP_TYPE_IS_PROXIED(comp)) {
>> - avnd_comp_curr_info_del(cb, comp);
>> - }
>> - }
>> -
>> TRACE_1("Exited CLC FSM");
>> TRACE_1("'%s':FSM Enter presence state: '%s':FSM Exit presence
>> state:%s",
>>
>> comp->name.value,pres_state[prv_st],pres_state[final_st]);
>> @@ -1629,6 +1622,15 @@ uint32_t avnd_comp_clc_xxxing_cleansucc_
>> goto done;
>> }
>>
>> + /*
>> + * su-sis may be in assigning/removing state. signal csi
>> + * assign/remove done so that su-si assignment/removal algo can proceed.
>> + */
>> + avnd_comp_cmplete_all_assignment(cb, comp);
>> +
>> + /* delete curr info of the failed comp */
>> + avnd_comp_curr_info_del(cb, comp);
>> +
>> if ((clc_info->inst_retry_cnt < clc_info->inst_retry_max) &&
>> (AVND_COMP_INST_EXIT_CODE_NO_RETRY != clc_info->inst_code_rcvd)) {
>> /* => keep retrying */
>> @@ -1971,6 +1973,7 @@ uint32_t avnd_comp_clc_terming_termsucc_
>> if (!m_AVND_COMP_TYPE_IS_PROXIED(comp)) {
>> m_AVND_COMP_REG_PARAM_RESET(cb, comp);
>> m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, comp,
>> AVND_CKPT_COMP_CONFIG);
>> + avnd_comp_curr_info_del(cb, comp);
>> }
>>
>> TRACE_LEAVE();
>> @@ -2078,6 +2081,15 @@ uint32_t avnd_comp_clc_terming_cleansucc
>> }
>> }
>>
>> + /*
>> + * su-sis may be in assigning/removing state. signal csi
>> + * assign/remove done so that su-si assignment/removal algo can proceed.
>> + */
>> + avnd_comp_cmplete_all_assignment(cb, comp);
>> +
>> + /* delete curr info of the failed comp */
>> + avnd_comp_curr_info_del(cb, comp);
>> +
>> /* reset the comp-reg & instantiate params */
>> if (!m_AVND_COMP_TYPE_IS_PROXIED(comp)) {
>> m_AVND_COMP_REG_PARAM_RESET(cb, comp);
>> diff --git a/osaf/services/saf/amf/amfnd/err.cc
>> b/osaf/services/saf/amf/amfnd/err.cc
>> --- a/osaf/services/saf/amf/amfnd/err.cc
>> +++ b/osaf/services/saf/amf/amfnd/err.cc
>> @@ -521,13 +521,6 @@ uint32_t avnd_err_recover(AVND_CB *cb, A
>> return rc;
>> m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, comp,
>> AVND_CKPT_COMP_OPER_STATE);
>>
>> - /*
>> - * SU may be in the middle of SU_SI in assigning/removing state.
>> - * signal csi assign/remove done so that su-si
>> assignment/removal
>> - * algo can proceed.
>> - */
>> - avnd_comp_cmplete_all_assignment(cb, comp);
>> -
>> /* clean up the comp */
>> rc = avnd_comp_clc_fsm_run(cb, comp,
>> AVND_COMP_CLC_PRES_FSM_EV_CLEANUP);
>>
>> @@ -702,23 +695,12 @@ uint32_t avnd_err_rcvr_comp_failover(AVN
>> m_AVND_SU_OPER_STATE_SET(su, SA_AMF_OPERATIONAL_DISABLED);
>> m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, su, AVND_CKPT_SU_OPER_STATE);
>>
>> - /*
>> - * su-sis may be in assigning/removing state. signal csi
>> - * assign/remove done so that su-si assignment/removal algo can proceed.
>> - */
>> - avnd_comp_cmplete_all_assignment(cb, failed_comp);
>> -
>> /* We are now in the context of failover, forget the restart */
>> if (su->pres == SA_AMF_PRESENCE_RESTARTING || m_AVND_SU_IS_RESTART(su))
>> {
>> m_AVND_SU_RESTART_RESET(su);
>> m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, su,
>> AVND_CKPT_SU_FLAG_CHANGE);
>> }
>>
>> - /* delete curr info of the failed comp */
>> - rc = avnd_comp_curr_info_del(cb, failed_comp);
>> - if (NCSCC_RC_SUCCESS != rc)
>> - goto done;
>> -
>> // TODO: there should be no difference between PI/NPI comps
>> if (m_AVND_SU_IS_PREINSTANTIABLE(su)) {
>> /* clean the failed comp */
>> @@ -832,24 +814,12 @@ uint32_t avnd_err_rcvr_node_switchover(A
>> goto done;
>> }
>>
>> -
>> - /*
>> - * su-sis may be in assigning/removing state. signal csi
>> - * assign/remove done so that su-si assignment/removal algo can proceed.
>> - */
>> - avnd_comp_cmplete_all_assignment(cb, failed_comp);
>> -
>> /* We are now in the context of failover, forget the restart */
>> if (failed_su->pres == SA_AMF_PRESENCE_RESTARTING ||
>> m_AVND_SU_IS_RESTART(failed_su)) {
>> m_AVND_SU_RESTART_RESET(failed_su);
>> m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, failed_su,
>> AVND_CKPT_SU_FLAG_CHANGE);
>> }
>>
>> - /* delete curr info of the failed comp */
>> - rc = avnd_comp_curr_info_del(cb, failed_comp);
>> - if (NCSCC_RC_SUCCESS != rc)
>> - goto done;
>> -
>> /* In nodeswitchover context:
>> a)If saAmfSUFailover is set for the faulted SU then this SU will be
>> failed-over
>> as a single entity.
>
> ------------------------------------------------------------------------------
> _______________________________________________
> Opensaf-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/opensaf-devel
------------------------------------------------------------------------------
Put Bad Developers to Shame
Dominate Development with Jenkins Continuous Integration
Continuously Automate Build, Test & Deployment
Start a new project now. Try Jenkins in the cloud.
http://p.sf.net/sfu/13600_Cloudbees_APR
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel