osaf/services/saf/amf/amfnd/clc.cc | 100 +++-- osaf/services/saf/amf/amfnd/clm.cc | 11 +- osaf/services/saf/amf/amfnd/comp.cc | 42 ++- osaf/services/saf/amf/amfnd/compdb.cc | 45 ++- osaf/services/saf/amf/amfnd/di.cc | 419 +++++++++++++++++++++++- osaf/services/saf/amf/amfnd/err.cc | 112 +++++- osaf/services/saf/amf/amfnd/evt.cc | 2 + osaf/services/saf/amf/amfnd/hcdb.cc | 8 +- osaf/services/saf/amf/amfnd/include/avnd_cb.h | 13 +- osaf/services/saf/amf/amfnd/include/avnd_comp.h | 17 +- osaf/services/saf/amf/amfnd/include/avnd_di.h | 4 + osaf/services/saf/amf/amfnd/include/avnd_evt.h | 2 + osaf/services/saf/amf/amfnd/include/avnd_mds.h | 4 +- osaf/services/saf/amf/amfnd/include/avnd_proc.h | 1 + osaf/services/saf/amf/amfnd/include/avnd_su.h | 4 +- osaf/services/saf/amf/amfnd/include/avnd_tmr.h | 1 + osaf/services/saf/amf/amfnd/include/avnd_util.h | 4 + osaf/services/saf/amf/amfnd/main.cc | 103 +++++- osaf/services/saf/amf/amfnd/mds.cc | 19 +- osaf/services/saf/amf/amfnd/sidb.cc | 9 +- osaf/services/saf/amf/amfnd/su.cc | 39 +- osaf/services/saf/amf/amfnd/susm.cc | 103 +++-- osaf/services/saf/amf/amfnd/tmr.cc | 1 + osaf/services/saf/amf/amfnd/util.cc | 153 ++++++++- 24 files changed, 1059 insertions(+), 157 deletions(-)
Outline changes: . amfnd does not reboot if amfd is down . componentRestart and suRestart is supported, the node reboot if any escalation to component/su failover . SC absence timer is introduced, node will reboot if timeout . amfnd sends sync information if amfd is up after headless diff --git a/osaf/services/saf/amf/amfnd/clc.cc b/osaf/services/saf/amf/amfnd/clc.cc --- a/osaf/services/saf/amf/amfnd/clc.cc +++ b/osaf/services/saf/amf/amfnd/clc.cc @@ -454,7 +454,7 @@ uint32_t avnd_evt_comp_pres_fsm_evh(AVND if ((is_uninst == true) && (comp->pres == SA_AMF_PRESENCE_INSTANTIATING)) - avnd_su_pres_state_set(comp->su, SA_AMF_PRESENCE_INSTANTIATING); + avnd_su_pres_state_set(cb, comp->su, SA_AMF_PRESENCE_INSTANTIATING); done: TRACE_LEAVE2("%u", rc); @@ -767,7 +767,7 @@ uint32_t avnd_comp_clc_fsm_run(AVND_CB * TRACE("Term state is NODE_FAILOVER, event '%s'", pres_state_evt[ev]); switch (ev) { case AVND_COMP_CLC_PRES_FSM_EV_CLEANUP_SUCC: - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_UNINSTANTIATED); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_UNINSTANTIATED); if (all_app_comps_terminated()) { AVND_SU *tmp_su; cb->term_state = AVND_TERM_STATE_NODE_FAILOVER_TERMINATED; @@ -924,8 +924,10 @@ uint32_t avnd_comp_clc_st_chng_prc(AVND_ TRACE_1("Component restart not through admin operation"); /* inform avd of the change in restart count */ - avnd_di_uns32_upd_send(AVSV_SA_AMF_COMP, saAmfCompRestartCount_ID, + if (cb->is_avd_down == false) { + avnd_di_uns32_upd_send(AVSV_SA_AMF_COMP, saAmfCompRestartCount_ID, &comp->name, comp->err_info.restart_cnt); + } } /* reset the admin-oper flag to false */ if ((comp->admin_oper == true) && (final_st == SA_AMF_PRESENCE_INSTANTIATED)) { @@ -981,7 +983,9 @@ uint32_t avnd_comp_clc_st_chng_prc(AVND_ (final_st == SA_AMF_PRESENCE_TERMINATION_FAILED))) { /* instantiation failed.. log it */ m_AVND_COMP_OPER_STATE_SET(comp, SA_AMF_OPERATIONAL_DISABLED); - m_AVND_COMP_OPER_STATE_AVD_SYNC(cb, comp, rc); + if (cb->is_avd_down == false) { + m_AVND_COMP_OPER_STATE_AVD_SYNC(cb, comp, rc); + } m_AVND_COMP_FAILED_SET(comp); } @@ -996,7 +1000,9 @@ uint32_t avnd_comp_clc_st_chng_prc(AVND_ if (m_AVND_COMP_IS_FAILED(comp)) { m_AVND_COMP_FAILED_RESET(comp); m_AVND_COMP_OPER_STATE_SET(comp, SA_AMF_OPERATIONAL_ENABLED); - m_AVND_COMP_OPER_STATE_AVD_SYNC(cb, comp, rc); + if (cb->is_avd_down == false) { + m_AVND_COMP_OPER_STATE_AVD_SYNC(cb, comp, rc); + } if (NCSCC_RC_SUCCESS != rc) goto done; clear_error_report_alarm(comp); @@ -1082,7 +1088,9 @@ uint32_t avnd_comp_clc_st_chng_prc(AVND_ if ((SA_AMF_PRESENCE_TERMINATING == prv_st) && (SA_AMF_PRESENCE_UNINSTANTIATED == final_st)) { /* npi comps are enabled in uninstantiated state */ m_AVND_COMP_OPER_STATE_SET(comp, SA_AMF_OPERATIONAL_ENABLED); - m_AVND_COMP_OPER_STATE_AVD_SYNC(cb, comp, rc); + if (cb->is_avd_down == false) { + m_AVND_COMP_OPER_STATE_AVD_SYNC(cb, comp, rc); + } if (NCSCC_RC_SUCCESS != rc) goto done; @@ -1133,7 +1141,9 @@ uint32_t avnd_comp_clc_st_chng_prc(AVND_ if (m_AVND_COMP_IS_FAILED(comp)) { m_AVND_COMP_FAILED_RESET(comp); m_AVND_COMP_OPER_STATE_SET(comp, SA_AMF_OPERATIONAL_ENABLED); - m_AVND_COMP_OPER_STATE_AVD_SYNC(cb, comp, rc); + if (cb->is_avd_down == false) { + m_AVND_COMP_OPER_STATE_AVD_SYNC(cb, comp, rc); + } if (NCSCC_RC_SUCCESS != rc) goto done; clear_error_report_alarm(comp); @@ -1156,7 +1166,9 @@ uint32_t avnd_comp_clc_st_chng_prc(AVND_ /* update comp oper state */ m_AVND_COMP_OPER_STATE_SET(comp, SA_AMF_OPERATIONAL_DISABLED); - m_AVND_COMP_OPER_STATE_AVD_SYNC(cb, comp, rc); + if (cb->is_avd_down == false) { + m_AVND_COMP_OPER_STATE_AVD_SYNC(cb, comp, rc); + } m_AVND_SU_FAILED_SET(comp->su); /* csi-set Failed.. Respond failure for Su-Si */ @@ -1175,7 +1187,9 @@ uint32_t avnd_comp_clc_st_chng_prc(AVND_ if ((SA_AMF_PRESENCE_RESTARTING == prv_st) && (SA_AMF_PRESENCE_INSTANTIATED == final_st)) { m_AVND_COMP_FAILED_RESET(comp); m_AVND_COMP_OPER_STATE_SET(comp, SA_AMF_OPERATIONAL_ENABLED); - m_AVND_COMP_OPER_STATE_AVD_SYNC(cb, comp, rc); + if (cb->is_avd_down == false) { + m_AVND_COMP_OPER_STATE_AVD_SYNC(cb, comp, rc); + } if (NCSCC_RC_SUCCESS != rc) goto done; clear_error_report_alarm(comp); @@ -1189,7 +1203,9 @@ uint32_t avnd_comp_clc_st_chng_prc(AVND_ else { /* npi comps are enabled in uninstantiated state */ m_AVND_COMP_OPER_STATE_SET(comp, SA_AMF_OPERATIONAL_ENABLED); - m_AVND_COMP_OPER_STATE_AVD_SYNC(cb, comp, rc); + if (cb->is_avd_down == false) { + m_AVND_COMP_OPER_STATE_AVD_SYNC(cb, comp, rc); + } if (NCSCC_RC_SUCCESS != rc) goto done; } @@ -1201,7 +1217,9 @@ uint32_t avnd_comp_clc_st_chng_prc(AVND_ (final_st == SA_AMF_PRESENCE_TERMINATION_FAILED))) { m_AVND_COMP_FAILED_SET(comp); m_AVND_COMP_OPER_STATE_SET(comp, SA_AMF_OPERATIONAL_DISABLED); - m_AVND_COMP_OPER_STATE_AVD_SYNC(cb, comp, rc); + if (cb->is_avd_down == false) { + m_AVND_COMP_OPER_STATE_AVD_SYNC(cb, comp, rc); + } if (NCSCC_RC_SUCCESS != rc) goto done; } @@ -1407,7 +1425,7 @@ uint32_t avnd_comp_clc_uninst_inst_hdler } /* transition to 'instantiating' state */ - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_INSTANTIATING); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_INSTANTIATING); goto done; } @@ -1423,7 +1441,7 @@ uint32_t avnd_comp_clc_uninst_inst_hdler comp->clc_info.inst_retry_cnt++; /* transition to 'instantiating' state */ - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_INSTANTIATING); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_INSTANTIATING); } done: @@ -1495,7 +1513,7 @@ uint32_t avnd_comp_clc_insting_instsucc_ comp->clc_info.inst_retry_cnt = 0; /* transition to 'instantiated' state */ - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_INSTANTIATED); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_INSTANTIATED); TRACE_LEAVE(); return rc; @@ -1581,7 +1599,7 @@ uint32_t avnd_comp_clc_insting_term_hdle avnd_comp_pm_rec_del_all(cb, comp); /*if at all anythnig is left behind */ /* transition to 'terminating' state */ - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_TERMINATING); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_TERMINATING); } TRACE_LEAVE(); @@ -1616,7 +1634,7 @@ uint32_t avnd_comp_clc_insting_clean_hdl m_AVND_COMP_TERM_FAIL_RESET(comp); /* transition to 'terminating' state */ - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_TERMINATING); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_TERMINATING); } TRACE_LEAVE(); @@ -1650,7 +1668,7 @@ static bool is_failed_comp_eligible_for_ component was cleaned up in the context of comp-restart recovery. Since further escalation has reached to surestart, same cleanup can be used and thus comp can be marked uninstantiated.*/ - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_UNINSTANTIATED); + avnd_comp_pres_state_set(avnd_cb, comp, SA_AMF_PRESENCE_UNINSTANTIATED); return false; } } else { //Case of RESTART admin op or assignment phase of surestart recovery. @@ -1750,7 +1768,7 @@ uint32_t avnd_comp_clc_xxxing_cleansucc_ m_AVND_TMR_PXIED_COMP_INST_STOP(cb, *comp); } /* => retries over... transition to inst-failed state */ - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_INSTANTIATION_FAILED); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_INSTANTIATION_FAILED); } done: TRACE_LEAVE(); @@ -1776,7 +1794,7 @@ uint32_t avnd_comp_clc_insting_cleanfail TRACE_ENTER2("'%s': Cleanup Fail event in the instantiating state", comp->name.value); /* nothing can be done now.. just transition to term-failed state */ - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_TERMINATION_FAILED); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_TERMINATION_FAILED); TRACE_LEAVE(); return rc; @@ -1808,7 +1826,7 @@ uint32_t avnd_comp_clc_insting_restart_h m_AVND_COMP_CLC_INST_PARAM_RESET(comp); /* transition to 'restarting' state */ - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_RESTARTING); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_RESTARTING); } TRACE_LEAVE(); @@ -1864,7 +1882,7 @@ uint32_t avnd_comp_clc_inst_term_hdler(A m_AVND_COMP_CLC_INST_PARAM_RESET(comp); /* transition to 'terminating' state */ - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_TERMINATING); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_TERMINATING); } TRACE_LEAVE(); @@ -1925,7 +1943,7 @@ uint32_t avnd_comp_clc_inst_clean_hdler( m_AVND_COMP_TERM_FAIL_RESET(comp); /* transition to 'terminating' state */ - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_TERMINATING); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_TERMINATING); } done: @@ -2008,9 +2026,9 @@ uint32_t avnd_comp_clc_inst_restart_hdle /* If DisableRestart=0 then transition to 'restarting' state and DisableRestart=1 then transition to 'terminating' state */ if (!m_AVND_COMP_IS_RESTART_DIS(comp)) - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_RESTARTING); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_RESTARTING); else - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_TERMINATING); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_TERMINATING); } done: TRACE_LEAVE(); @@ -2041,7 +2059,7 @@ uint32_t avnd_comp_clc_inst_orph_hdler(A m_AVND_TMR_PXIED_COMP_REG_START(cb, *comp, rc); if (NCSCC_RC_SUCCESS == rc) { - avnd_comp_pres_state_set(comp, static_cast<SaAmfPresenceStateT>(SA_AMF_PRESENCE_ORPHANED)); + avnd_comp_pres_state_set(cb, comp, static_cast<SaAmfPresenceStateT>(SA_AMF_PRESENCE_ORPHANED)); } TRACE_LEAVE(); @@ -2067,7 +2085,7 @@ uint32_t avnd_comp_clc_terming_termsucc_ TRACE_ENTER2("'%s': Terminate success event in the terminating state", comp->name.value); /* just transition to 'uninstantiated' state */ - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_UNINSTANTIATED); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_UNINSTANTIATED); /* reset the comp-reg & instantiate params */ if (!m_AVND_COMP_TYPE_IS_PROXIED(comp)) { @@ -2137,7 +2155,7 @@ uint32_t avnd_comp_clc_terming_cleansucc TRACE_ENTER2("'%s': Cleanup success event in the terminating state", comp->name.value); /* just transition to 'uninstantiated' state */ - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_UNINSTANTIATED); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_UNINSTANTIATED); if (AVND_TERM_STATE_OPENSAF_SHUTDOWN_STARTED == cb->term_state) { /* @@ -2238,7 +2256,7 @@ uint32_t avnd_comp_clc_terming_cleanfail TRACE_ENTER2("'%s': Cleanup fail event in the terminating state", comp->name.value); /* just transition to 'term-failed' state */ - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_TERMINATION_FAILED); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_TERMINATION_FAILED); avnd_comp_curr_info_del(cb, comp); if ((cb->term_state == AVND_TERM_STATE_OPENSAF_SHUTDOWN_STARTED) && all_comps_terminated()) { @@ -2328,7 +2346,7 @@ uint32_t avnd_comp_clc_restart_instsucc_ } /* just transition back to 'instantiated' state */ - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_INSTANTIATED); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_INSTANTIATED); TRACE_LEAVE(); return rc; @@ -2373,7 +2391,7 @@ uint32_t avnd_comp_clc_restart_term_hdle m_AVND_COMP_CLC_INST_PARAM_RESET(comp); /* transition to 'terminating' state */ - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_TERMINATING); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_TERMINATING); } TRACE_LEAVE(); @@ -2468,7 +2486,7 @@ uint32_t avnd_comp_clc_restart_termfail_ /* transition to 'term-failed' state */ if (NCSCC_RC_SUCCESS == rc) { - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_TERMINATION_FAILED); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_TERMINATION_FAILED); } TRACE_LEAVE(); @@ -2514,7 +2532,7 @@ uint32_t avnd_comp_clc_restart_clean_hdl /* transition to 'terminating' state */ if (!m_AVND_COMP_IS_TERM_FAIL(comp)) - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_TERMINATING); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_TERMINATING); else m_AVND_COMP_TERM_FAIL_RESET(comp); } @@ -2542,7 +2560,7 @@ uint32_t avnd_comp_clc_restart_cleanfail TRACE_ENTER2("'%s': Cleanup fail event in the restarting state", comp->name.value); /* transition to 'term-failed' state */ - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_TERMINATION_FAILED); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_TERMINATION_FAILED); TRACE_LEAVE(); return rc; @@ -2572,7 +2590,7 @@ uint32_t avnd_comp_clc_orph_instsucc_hdl } /* just transition to 'instantiated' state */ - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_INSTANTIATED); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_INSTANTIATED); TRACE_LEAVE(); return rc; @@ -2636,7 +2654,7 @@ uint32_t avnd_comp_clc_orph_clean_hdler( m_AVND_COMP_CLC_INST_PARAM_RESET(comp); /* transition to 'terminating' state */ - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_TERMINATING); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_TERMINATING); } TRACE_LEAVE(); @@ -3012,12 +3030,22 @@ uint32_t avnd_instfail_su_failover(AVND_ /* inform AvD */ rc = avnd_di_oper_send(cb, su, SA_AMF_COMPONENT_FAILOVER); + + if (cb->is_avd_down == true) { + // remove assignment if instantiation fails and leads + // to comp failover in headless mode for PI SU + if (m_AVND_SU_IS_PREINSTANTIABLE(su)) { + LOG_WA("Director is down. Remove all SIs from '%s'", su->name.value); + avnd_su_si_del(avnd_cb, &su->name); + } + } } done: - if (rc == NCSCC_RC_SUCCESS) + if (rc == NCSCC_RC_SUCCESS) { LOG_NO("Component Failover trigerred for '%s': Failed component: '%s'", su->name.value, failed_comp->name.value); + } TRACE_LEAVE2("%u", rc); return rc; } diff --git a/osaf/services/saf/amf/amfnd/clm.cc b/osaf/services/saf/amf/amfnd/clm.cc --- a/osaf/services/saf/amf/amfnd/clm.cc +++ b/osaf/services/saf/amf/amfnd/clm.cc @@ -37,6 +37,7 @@ #include "avnd.h" #include "mds_pvt.h" #include "nid_api.h" +#include "amf_si_assign.h" static void clm_node_left(SaClmNodeIdT node_id) { @@ -118,7 +119,12 @@ static void clm_to_amf_node(void) searchParam.searchOneAttr.attrValueType = SA_IMM_ATTR_SASTRINGT; searchParam.searchOneAttr.attrValue = &className; - immutil_saImmOmInitialize(&immOmHandle, nullptr, &immVersion); + error = saImmOmInitialize_cond(&immOmHandle, nullptr, &immVersion); + if (SA_AIS_OK != error) { + LOG_CR("saImmOmInitialize failed. Use previous value of nodeName."); + osafassert(avnd_cb->amf_nodeName.length != 0); + goto done1; + } error = immutil_saImmOmSearchInitialize_2(immOmHandle, nullptr, SA_IMM_SUBTREE, SA_IMM_SEARCH_ONE_ATTR | SA_IMM_SEARCH_GET_ALL_ATTR, @@ -140,6 +146,7 @@ static void clm_to_amf_node(void) done: immutil_saImmOmSearchFinalize(searchHandle); immutil_saImmOmFinalize(immOmHandle); +done1: TRACE_LEAVE2("%u", error); } @@ -170,6 +177,8 @@ uint32_t avnd_evt_avd_node_up_evh(AVND_C cb->su_failover_max = info->su_failover_max; cb->su_failover_prob = info->su_failover_prob; + cb->amfd_sync_required = false; + TRACE_LEAVE(); return rc; } diff --git a/osaf/services/saf/amf/amfnd/comp.cc b/osaf/services/saf/amf/amfnd/comp.cc --- a/osaf/services/saf/amf/amfnd/comp.cc +++ b/osaf/services/saf/amf/amfnd/comp.cc @@ -72,6 +72,11 @@ uint32_t avnd_evt_ava_finalize_evh(AVND_ TRACE_ENTER(); + if (cb->is_avd_down == true) { + LOG_ER("AVD is down. Component finalization not available."); + goto done; + } + /* * See appendix B. Non registered processes can use parts of the API. * For such processes finalize is OK, AMF has no allocated resources. @@ -244,6 +249,12 @@ uint32_t avnd_evt_ava_comp_unreg_evh(AVN TRACE_ENTER(); + /* return error if amfd is down */ + if (cb->is_avd_down == true) { + LOG_ER("AVD is down. Component unregistration not available."); + goto done; + } + if (AVND_EVT_AVND_AVND_MSG == evt->type) { /* This means that the message has come from proxy AvND to this AvND. */ msg_from_avnd = true; @@ -2749,7 +2760,7 @@ static SaAisErrorT avnd_validate_comp_an * @param comp * @param newstate */ -void avnd_comp_pres_state_set(AVND_COMP *comp, SaAmfPresenceStateT newstate) +void avnd_comp_pres_state_set(const AVND_CB *cb, AVND_COMP *comp, SaAmfPresenceStateT newstate) { SaAmfPresenceStateT prv_st = comp->pres; @@ -2769,7 +2780,9 @@ void avnd_comp_pres_state_set(AVND_COMP if ((SA_AMF_PRESENCE_ORPHANED != newstate) && (!((SA_AMF_PRESENCE_INSTANTIATED == newstate) && (SA_AMF_PRESENCE_ORPHANED == prv_st)))) { - avnd_di_uns32_upd_send(AVSV_SA_AMF_COMP, saAmfCompPresenceState_ID, &comp->name, comp->pres); + if (cb->is_avd_down == false) { + avnd_di_uns32_upd_send(AVSV_SA_AMF_COMP, saAmfCompPresenceState_ID, &comp->name, comp->pres); + } } /* create failed state file meaning system restart/cleanup needed */ @@ -2807,12 +2820,14 @@ bool comp_has_quiesced_assignment(const * @brief Resets component restart count. * @param comp */ -void comp_reset_restart_count(AVND_COMP *comp) +void comp_reset_restart_count(const AVND_CB *cb, AVND_COMP *comp) { if (comp->err_info.restart_cnt != 0) { comp->err_info.restart_cnt = 0; - avnd_di_uns32_upd_send(AVSV_SA_AMF_COMP, saAmfCompRestartCount_ID, + if (cb->is_avd_down == false) { + avnd_di_uns32_upd_send(AVSV_SA_AMF_COMP, saAmfCompRestartCount_ID, &comp->name, comp->err_info.restart_cnt); + } } } /** @@ -2832,6 +2847,25 @@ void clear_error_report_alarm(AVND_COMP } } +void m_AVND_COMP_OPER_STATE_AVD_SYNC(struct avnd_cb_tag *cb, const AVND_COMP *comp, uint32_t& o_rc) +{ + AVSV_PARAM_INFO param; + if (cb->is_avd_down == true) { + // pretend it's successful + o_rc = NCSCC_RC_SUCCESS; + return; + } + memset(¶m, 0, sizeof(AVSV_PARAM_INFO)); + param.class_id = AVSV_SA_AMF_COMP; + param.attr_id = saAmfCompOperState_ID; + param.name = (comp)->name; + param.act = AVSV_OBJ_OPR_MOD; + *((uint32_t *)param.value) = m_NCS_OS_HTONL((comp)->oper); + param.value_len = sizeof(uint32_t); + (o_rc) = avnd_di_object_upd_send((cb), ¶m); +} + + /** * @brief Checks if comp is nonrestartable (DisableRestart=1). * @param comp diff --git a/osaf/services/saf/amf/amfnd/compdb.cc b/osaf/services/saf/amf/amfnd/compdb.cc --- a/osaf/services/saf/amf/amfnd/compdb.cc +++ b/osaf/services/saf/amf/amfnd/compdb.cc @@ -186,10 +186,16 @@ uint32_t avnd_compdb_init(AVND_CB *cb) uint32_t rc; SaImmHandleT immOmHandle; SaVersionT immVersion = { 'A', 2, 1 }; + SaAisErrorT error; TRACE_ENTER(); - immutil_saImmOmInitialize(&immOmHandle, nullptr, &immVersion); + error = saImmOmInitialize_cond(&immOmHandle, nullptr, &immVersion); + if (error != SA_AIS_OK) { + LOG_CR("saImmOmInitialize failed: %u", error); + rc = NCSCC_RC_FAILURE; + goto done1; + } if (avnd_compglobalattrs_config_get(immOmHandle) != SA_AIS_OK) { rc = NCSCC_RC_FAILURE; @@ -205,6 +211,7 @@ uint32_t avnd_compdb_init(AVND_CB *cb) done: immutil_saImmOmFinalize(immOmHandle); +done1: TRACE_LEAVE(); return rc; } @@ -1518,10 +1525,15 @@ static int comp_init(AVND_COMP *comp, co SaStringT env; SaImmHandleT immOmHandle; SaVersionT immVersion = { 'A', 2, 1 }; + SaAisErrorT error; TRACE_ENTER2("%s", comp->name.value); - immutil_saImmOmInitialize(&immOmHandle, nullptr, &immVersion); + error = saImmOmInitialize_cond(&immOmHandle, nullptr, &immVersion); + if (error != SA_AIS_OK) { + LOG_CR("saImmOmInitialize failed: %u", error); + goto done1; + } if ((comptype = avnd_comptype_create(immOmHandle, &comp->saAmfCompType)) == nullptr) { LOG_ER("%s: avnd_comptype_create FAILED for '%s'", __FUNCTION__, @@ -1658,6 +1670,7 @@ done: delete [] path_prefix; avnd_comptype_delete(comptype); immutil_saImmOmFinalize(immOmHandle); +done1: TRACE_LEAVE(); return res; } @@ -1698,7 +1711,7 @@ void avnd_comp_delete(AVND_COMP *comp) */ static AVND_COMP *avnd_comp_create(const SaNameT *comp_name, const SaImmAttrValuesT_2 **attributes, AVND_SU *su) { - int rc = -1; + uint32_t rc = NCSCC_RC_SUCCESS; AVND_COMP *comp; SaAisErrorT error; @@ -1812,7 +1825,11 @@ unsigned int avnd_comp_config_get_su(AVN TRACE_ENTER2("SU'%s'", su->name.value); - immutil_saImmOmInitialize(&immOmHandle, nullptr, &immVersion); + error = saImmOmInitialize_cond(&immOmHandle, nullptr, &immVersion); + if (error != SA_AIS_OK) { + LOG_CR("saImmOmInitialize failed: %u", error); + goto done; + } searchParam.searchOneAttr.attrName = const_cast<SaImmAttrNameT>("SaImmAttrClassName"); searchParam.searchOneAttr.attrValueType = SA_IMM_ATTR_SASTRINGT; searchParam.searchOneAttr.attrValue = &className; @@ -1843,6 +1860,7 @@ unsigned int avnd_comp_config_get_su(AVN (void)immutil_saImmOmSearchFinalize(searchHandle); done1: immutil_saImmOmFinalize(immOmHandle); + done: TRACE_LEAVE(); return rc; } @@ -1861,6 +1879,7 @@ int avnd_comp_config_reinit(AVND_COMP *c const SaImmAttrValuesT_2 **attributes; SaImmHandleT immOmHandle; SaVersionT immVersion = { 'A', 2, 1 }; + SaAisErrorT error; TRACE_ENTER2("'%s'", comp->name.value); @@ -1877,14 +1896,21 @@ int avnd_comp_config_reinit(AVND_COMP *c TRACE_1("%s", comp->name.value); - immutil_saImmOmInitialize(&immOmHandle, nullptr, &immVersion); - immutil_saImmOmAccessorInitialize(immOmHandle, &accessorHandle); - + error = saImmOmInitialize_cond(&immOmHandle, nullptr, &immVersion); + if (error != SA_AIS_OK) { + LOG_CR("saImmOmInitialize FAILED for '%s'", comp->name.value); + goto done1; + } + error = immutil_saImmOmAccessorInitialize(immOmHandle, &accessorHandle); + if (error != SA_AIS_OK) { + LOG_CR("immutil_saImmOmAccessorInitialize FAILED for '%s'", comp->name.value); + goto done2; + } if (immutil_saImmOmAccessorGet_2(accessorHandle, &comp->name, nullptr, (SaImmAttrValuesT_2 ***)&attributes) != SA_AIS_OK) { LOG_ER("saImmOmAccessorGet_2 FAILED for '%s'", comp->name.value); - goto done2; + goto done3; } res = comp_init(comp, attributes); @@ -1894,8 +1920,9 @@ int avnd_comp_config_reinit(AVND_COMP *c /* need to get HC type configuration also if that has been recently created */ avnd_hctype_config_get(immOmHandle, &comp->saAmfCompType); +done3: + immutil_saImmOmAccessorFinalize(accessorHandle); done2: - immutil_saImmOmAccessorFinalize(accessorHandle); immutil_saImmOmFinalize(immOmHandle); done1: TRACE_LEAVE2("%u", res); diff --git a/osaf/services/saf/amf/amfnd/di.cc b/osaf/services/saf/amf/amfnd/di.cc --- a/osaf/services/saf/amf/amfnd/di.cc +++ b/osaf/services/saf/amf/amfnd/di.cc @@ -266,6 +266,110 @@ done: } /**************************************************************************** + Name : add_sisu_state_info + + Description : This routine adds susi assignment to sisu state info message + + Arguments : msg - ptr to message + si_assign - ptr to sisu assignment + + Return Values : None + + Notes : None. +******************************************************************************/ +void add_sisu_state_info(AVND_MSG *msg, SaAmfSIAssignment *si_assign) +{ + AVSV_SISU_STATE_MSG *sisu_state = new AVSV_SISU_STATE_MSG(); + + sisu_state->safSU = si_assign->su; + sisu_state->safSI = si_assign->si; + sisu_state->saAmfSISUHAState = si_assign->saAmfSISUHAState; + + sisu_state->next = msg->info.avd->msg_info.n2d_nd_sisu_state_info.sisu_list; + msg->info.avd->msg_info.n2d_nd_sisu_state_info.sisu_list = sisu_state; + msg->info.avd->msg_info.n2d_nd_sisu_state_info.num_sisu++; +} + +/**************************************************************************** + Name : add_su_state_info + + Description : This routine adds su info to sisu state info message + + Arguments : msg - ptr to message + su - ptr to su + + Return Values : None + + Notes : None. +******************************************************************************/ +void add_su_state_info(AVND_MSG *msg, const AVND_SU* su) +{ + AVSV_SU_STATE_MSG *su_state = new AVSV_SU_STATE_MSG(); + + su_state->safSU = su->name; + su_state->su_restart_cnt = su->su_restart_cnt; + su_state->su_oper_state = su->oper; + su_state->su_pres_state = su->pres; + + su_state->next = msg->info.avd->msg_info.n2d_nd_sisu_state_info.su_list; + msg->info.avd->msg_info.n2d_nd_sisu_state_info.su_list = su_state; + msg->info.avd->msg_info.n2d_nd_sisu_state_info.num_su++; +} + +/**************************************************************************** + Name : add_csicomp_state_info + + Description : This routine adds csi assignment info to csi comp state info + message + + Arguments : msg - ptr to message + csi_assign - ptr to csi assignment + + Return Values : None + + Notes : None. +******************************************************************************/ +void add_csicomp_state_info(AVND_MSG *msg, SaAmfCSIAssignment *csi_assign) +{ + AVSV_CSICOMP_STATE_MSG *csicomp_state = new AVSV_CSICOMP_STATE_MSG(); + + csicomp_state->safCSI = csi_assign->csi; + csicomp_state->safComp = csi_assign->comp; + csicomp_state->saAmfCSICompHAState = csi_assign->saAmfCSICompHAState; + + csicomp_state->next = msg->info.avd->msg_info.n2d_nd_csicomp_state_info.csicomp_list; + msg->info.avd->msg_info.n2d_nd_csicomp_state_info.csicomp_list = csicomp_state; + msg->info.avd->msg_info.n2d_nd_csicomp_state_info.num_csicomp++; +} + +/**************************************************************************** + Name : add_comp_state_info + + Description : This routine adds csi assignment info to comp state info + message + + Arguments : msg - ptr to message + comp - ptr to comp + + Return Values : None + + Notes : None. +******************************************************************************/ +void add_comp_state_info(AVND_MSG *msg, const AVND_COMP *comp) +{ + AVSV_COMP_STATE_MSG *comp_state = new AVSV_COMP_STATE_MSG(); + + comp_state->safComp = comp->name; + comp_state->comp_restart_cnt = comp->err_info.restart_cnt; + comp_state->comp_oper_state = comp->oper; + comp_state->comp_pres_state = comp->pres; + + comp_state->next = msg->info.avd->msg_info.n2d_nd_csicomp_state_info.comp_list; + msg->info.avd->msg_info.n2d_nd_csicomp_state_info.comp_list = comp_state; + msg->info.avd->msg_info.n2d_nd_csicomp_state_info.num_comp++; +} + +/**************************************************************************** Name : avnd_evt_avd_ack_message Description : This routine processes Ack message @@ -335,6 +439,7 @@ void avnd_send_node_up_msg(void) { AVND_CB *cb = avnd_cb; AVND_MSG msg = {}; + AVND_DND_MSG_LIST *pending_rec = 0; uint32_t rc; TRACE_ENTER(); @@ -349,10 +454,20 @@ void avnd_send_node_up_msg(void) goto done; } + // We don't send node_up if it has already been sent and waiting for ACK + for (pending_rec = cb->dnd_list.head; pending_rec != nullptr; pending_rec = pending_rec->next) { + if (pending_rec->msg.info.avd->msg_type == AVSV_N2D_NODE_UP_MSG) { + TRACE("Don't send another node_up since it has been sent and waiting for ack"); + goto done; + } + } + msg.info.avd = new AVSV_DND_MSG(); msg.type = AVND_MSG_AVD; msg.info.avd->msg_type = AVSV_N2D_NODE_UP_MSG; msg.info.avd->msg_info.n2d_node_up.msg_id = ++(cb->snd_msg_id); + msg.info.avd->msg_info.n2d_node_up.leds_set = cb->led_state == AVND_LED_STATE_GREEN ? true : false; + msg.info.avd->msg_info.n2d_node_up.node_name = cb->amf_nodeName; msg.info.avd->msg_info.n2d_node_up.node_id = cb->node_info.nodeId; msg.info.avd->msg_info.n2d_node_up.adest_address = cb->avnd_dest; @@ -419,9 +534,39 @@ uint32_t avnd_evt_mds_avd_up_evh(AVND_CB /* store the AVD MDS address */ cb->avd_dest = evt->info.mds.mds_dest; - avnd_send_node_up_msg(); + /* amfnd receives NCSMDS_UP in either cluster start up; or recovery from headless + * after a long gap greater than no-active timer in MDS. We send node_up in both cases + * but only sync info is sent for recovery + */ + if (evt->info.mds.i_change == NCSMDS_UP) { + if (cb->amfd_sync_required && cb->led_state == AVND_LED_STATE_GREEN) { + avnd_sync_sisu(cb); + avnd_sync_csicomp(cb); + } + + LOG_NO("Sending node up due to NCSMDS_UP"); + avnd_send_node_up_msg(); + } + /* amfnd receives NCSMDS_NEW_ACTIVE in either Failover; or recovery from headless + * provided that the no-active timer in MDS has not expired. We only want to send + * node_up/sync info in case of recovery. + */ + if (evt->info.mds.i_change == NCSMDS_NEW_ACTIVE && cb->amfd_sync_required) { + if (cb->led_state == AVND_LED_STATE_GREEN) { + LOG_NO("Sending node up due to NCSMDS_NEW_ACTIVE"); + + // node_up, sync sisu, compcsi info to AVND for recovery + avnd_sync_sisu(cb); + avnd_sync_csicomp(cb); + avnd_send_node_up_msg(); + } + } } + cb->is_avd_down = false; + if (m_AVND_TMR_IS_ACTIVE(cb->sc_absence_tmr)) + avnd_stop_tmr(cb, &cb->sc_absence_tmr); + done: TRACE_LEAVE(); return NCSCC_RC_SUCCESS; @@ -454,15 +599,94 @@ uint32_t avnd_evt_mds_avd_dn_evh(AVND_CB } } - LOG_ER("AMF director unexpectedly crashed"); + m_AVND_CB_AVD_UP_RESET(cb); + cb->active_avd_adest = 0; - /* Don't issue reboot if it has been already issued.*/ - if (false == cb->reboot_in_progress) { - cb->reboot_in_progress = true; - opensaf_reboot(avnd_cb->node_info.nodeId, (char *)avnd_cb->node_info.executionEnvironment.value, + LOG_WA("AMF director unexpectedly crashed"); + + if (cb->scs_absence_max_duration == 0) { + /* Don't issue reboot if it has been already issued.*/ + if (false == cb->reboot_in_progress) { + cb->reboot_in_progress = true; + opensaf_reboot(avnd_cb->node_info.nodeId, (char *) avnd_cb->node_info.executionEnvironment.value, "local AVD down(Adest) or both AVD down(Vdest) received"); + } + + TRACE_LEAVE(); + return rc; } + /* + * No contact with any controller + * Reboot this node if: + * 1) director is of an older version that does not support restart + * 2) we have a pending message TO the director + * 3) we have a pending message FROM the director + */ + + if (cb->scs_absence_max_duration == 0) { + // check for pending messages TO director + if ((cb->dnd_list.head != nullptr)) { + uint32_t no_pending_msg = 0; + AVND_DND_MSG_LIST *rec = 0; + for (rec = cb->dnd_list.head; rec != nullptr; rec = rec->next, no_pending_msg++) { + osafassert(rec->msg.type == AVND_MSG_AVD); + } + + /* Don't issue reboot if it has been already issued.*/ + if (false == cb->reboot_in_progress) { + LOG_ER("%d pending messages to director. Rebooting to re-sync.", no_pending_msg); + + cb->reboot_in_progress = true; + opensaf_reboot(avnd_cb->node_info.nodeId, + (char *)avnd_cb->node_info.executionEnvironment.value, + "local AVD down(Adest) or both AVD down(Vdest) received"); + } + } + } else { + TRACE("Delete all pending messages to be sent to AMFD"); + avnd_diq_del(cb); + } + + // check for pending messages FROM director + // scan all SUs "siq" message list, if anyone is not empty reboot + const AVND_SU *su = (AVND_SU *)ncs_patricia_tree_getnext(&cb->sudb, (uint8_t *)0); + while (su != 0) { + LOG_NO("Checking '%s' for pending messages", su->name.value); + + const AVND_SU_SIQ_REC *siq = + (AVND_SU_SIQ_REC *)m_NCS_DBLIST_FIND_LAST(&su->siq); + + if (siq != nullptr) { + /* Don't issue reboot if it has been already issued.*/ + if (false == cb->reboot_in_progress) { + LOG_ER("Pending messages from director. Rebooting to re-sync."); + + cb->reboot_in_progress = true; + opensaf_reboot(avnd_cb->node_info.nodeId, + (char *)avnd_cb->node_info.executionEnvironment.value, + "local AVD down(Adest) or both AVD down(Vdest) received"); + } + } + + su = (AVND_SU *)ncs_patricia_tree_getnext( + &cb->sudb, (uint8_t *)&su->name); + } + // record we are now 'headless' + cb->is_avd_down = true; + cb->amfd_sync_required = true; + // start the sc absence timer if it hasn't started. + // During headless, MDS reports avd_down 2 times, + // the 2nd time is 3 mins later then the 1st time. + // The absence timer should only start at the 1st time. + if (!m_AVND_TMR_IS_ACTIVE(cb->sc_absence_tmr)) { + avnd_start_tmr(cb, &cb->sc_absence_tmr, AVND_TMR_SC_ABSENCE, + cb->scs_absence_max_duration, 0); + } + + // reset msg_id counter + cb->rcv_msg_id = 0; + cb->snd_msg_id = 0; TRACE_LEAVE(); return rc; } @@ -486,6 +710,13 @@ uint32_t avnd_di_oper_send(AVND_CB *cb, AVND_MSG msg; uint32_t rc = NCSCC_RC_SUCCESS; + if (cb->is_avd_down == true) { + LOG_NO("avnd_di_oper_send() deferred as AMF director is offline"); + + // reconcile operational states later + return rc; + } + memset(&msg, 0, sizeof(AVND_MSG)); TRACE_ENTER2("SU '%p', recv '%u'", su, rcvr); @@ -570,6 +801,11 @@ uint32_t avnd_di_susi_resp_send(AVND_CB if (cb->term_state == AVND_TERM_STATE_OPENSAF_SHUTDOWN_STARTED) return rc; + if (cb->is_avd_down == true) { + m_AVND_SU_ALL_SI_RESET(su); + return rc; + } + // should be in assignment pending state to be here osafassert(m_AVND_SU_IS_ASSIGN_PEND(su)); @@ -665,6 +901,11 @@ uint32_t avnd_di_object_upd_send(AVND_CB uint32_t rc = NCSCC_RC_SUCCESS; TRACE_ENTER2("Comp '%s'", param->name.value); + if (cb->is_avd_down == true) { + TRACE_LEAVE2("AVD is down. %u", rc); + return rc; + } + memset(&msg, 0, sizeof(AVND_MSG)); /* populate the msg */ @@ -785,6 +1026,10 @@ uint32_t avnd_di_msg_send(AVND_CB *cb, A TRACE_1("%s, Active AVD Adest: %" PRIu64, __FUNCTION__, cb->active_avd_adest); rc = avnd_mds_red_send(cb, msg, &cb->avd_dest, &cb->active_avd_adest); goto done; + } else if ((msg->info.avd->msg_type == AVSV_N2D_ND_SISU_STATE_INFO_MSG) || + (msg->info.avd->msg_type == AVSV_N2D_ND_CSICOMP_STATE_INFO_MSG)) { + rc = avnd_mds_send(cb, msg, &cb->avd_dest, 0); + goto done; } /* add the record to the AvD msg list */ @@ -1005,8 +1250,17 @@ void avnd_diq_rec_del(AVND_CB *cb, AVND_ ncshm_destroy_hdl(NCS_SERVICE_ID_AVND, rec->opq_hdl); /* stop the AvD msg response timer */ - if (m_AVND_TMR_IS_ACTIVE(rec->resp_tmr)) + if (m_AVND_TMR_IS_ACTIVE(rec->resp_tmr)) { m_AVND_TMR_MSG_RESP_STOP(cb, *rec); + // Resend msgs from queue because amfd dropped during sync + if ((cb->dnd_list.head != nullptr)) { + TRACE("retransmit message to amfd"); + AVND_DND_MSG_LIST *pending_rec = 0; + for (pending_rec = cb->dnd_list.head; pending_rec != nullptr; pending_rec = pending_rec->next) { + avnd_diq_rec_send(cb, pending_rec); + } + } + } /* free the avnd message contents */ avnd_msg_content_free(cb, &rec->msg); @@ -1204,3 +1458,154 @@ uint32_t avnd_evt_avd_role_change_evh(AV TRACE_LEAVE(); return rc; } + +/** + * The SC absence timer expired. Reboot this node. + * @param cb + * @param evt + * + * @return uns32 + */ +uint32_t avnd_evt_tmr_sc_absence_evh(AVND_CB *cb, AVND_EVT *evt) +{ + TRACE_ENTER(); + + LOG_ER("AMF director absence timeout"); + + opensaf_reboot(avnd_cb->node_info.nodeId, + (char *)avnd_cb->node_info.executionEnvironment.value, + "AMF director absence timeout"); + + return NCSCC_RC_SUCCESS; +} + +/** + * Send csi comp state info to amfd when cluster comes back from headless + * @param cb + * + * @return void + */ +void avnd_sync_csicomp(AVND_CB *cb) +{ + AVND_MSG msg; + uint32_t rc = NCSCC_RC_SUCCESS; + const AVND_COMP* comp; + const AVND_COMP_CSI_REC* csi; + SaAmfCSIAssignment csi_assignment; + + TRACE_ENTER(); + + /* Send the state info to avd. */ + memset(&msg, 0, sizeof(AVND_MSG)); + msg.info.avd = new AVSV_DND_MSG(); + + msg.type = AVND_MSG_AVD; + msg.info.avd->msg_type = AVSV_N2D_ND_CSICOMP_STATE_INFO_MSG; + msg.info.avd->msg_info.n2d_nd_csicomp_state_info.msg_id = cb->snd_msg_id; + msg.info.avd->msg_info.n2d_nd_csicomp_state_info.node_id = cb->node_info.nodeId; + msg.info.avd->msg_info.n2d_nd_csicomp_state_info.num_csicomp = 0; + msg.info.avd->msg_info.n2d_nd_csicomp_state_info.csicomp_list = nullptr; + + // add CSICOMP objects + comp = (AVND_COMP *)ncs_patricia_tree_getnext(&cb->compdb, (uint8_t *)0); + while (comp != nullptr) { + TRACE("syncing comp: %s", comp->name.value); + for (csi = m_AVND_CSI_REC_FROM_COMP_DLL_NODE_GET(m_NCS_DBLIST_FIND_FIRST(&comp->csi_list)); + csi != nullptr; + csi = m_AVND_CSI_REC_FROM_COMP_DLL_NODE_GET(m_NCS_DBLIST_FIND_NEXT(&csi->comp_dll_node))) { + osafassert(csi != nullptr); + + csi_assignment.comp = comp->name; + csi_assignment.csi = csi->name; + + if (csi->si != nullptr) { + csi_assignment.saAmfCSICompHAState = csi->si->curr_state; + TRACE("si found. HA state is %d", csi_assignment.saAmfCSICompHAState); + } else { + TRACE("csi->si is nullptr"); + csi_assignment.saAmfCSICompHAState = SA_AMF_HA_QUIESCED; + } + + add_csicomp_state_info(&msg, &csi_assignment); + } + + add_comp_state_info(&msg, comp); + comp = (AVND_COMP *)ncs_patricia_tree_getnext(&cb->compdb, (uint8_t *)&comp->name); + } + + LOG_NO("%d CSICOMP states synced", msg.info.avd->msg_info.n2d_nd_csicomp_state_info.num_csicomp); + LOG_NO("%d SU states sent", msg.info.avd->msg_info.n2d_nd_csicomp_state_info.num_comp); + + rc = avnd_di_msg_send(cb, &msg); + if (rc == NCSCC_RC_SUCCESS) + msg.info.avd = 0; + else + LOG_ER("avnd_di_msg_send FAILED"); + + avnd_msg_content_free(cb, &msg); + + TRACE_LEAVE(); +} + +/** + * Send susi state info to amfd when cluster comes back from headless + * @param cb + * + * @return void + */ +void avnd_sync_sisu(AVND_CB *cb) +{ + AVND_MSG msg; + uint32_t rc = NCSCC_RC_SUCCESS; + SaAmfSIAssignment si_assignment; + const AVND_SU* su; + const AVND_SU_SI_REC* si; + + TRACE_ENTER(); + + /* Send the state info to avd. */ + memset(&msg, 0, sizeof(AVND_MSG)); + msg.info.avd = new AVSV_DND_MSG(); + + msg.type = AVND_MSG_AVD; + msg.info.avd->msg_type = AVSV_N2D_ND_SISU_STATE_INFO_MSG; //AVSV_N2D_ND_ASSIGN_STATES_MSG + msg.info.avd->msg_info.n2d_nd_sisu_state_info.msg_id = cb->snd_msg_id; + msg.info.avd->msg_info.n2d_nd_sisu_state_info.node_id = cb->node_info.nodeId; + msg.info.avd->msg_info.n2d_nd_sisu_state_info.num_sisu = 0; + msg.info.avd->msg_info.n2d_nd_sisu_state_info.sisu_list = nullptr; + + // gather SISU states + su = (AVND_SU *)ncs_patricia_tree_getnext(&cb->sudb, (uint8_t *)0); + while (su != nullptr) { + TRACE("syncing su: %s", su->name.value); + + // attach SISUs + for (si = (AVND_SU_SI_REC *)m_NCS_DBLIST_FIND_FIRST(&su->si_list); + si != nullptr; + si = (AVND_SU_SI_REC *)m_NCS_DBLIST_FIND_NEXT(&si->su_dll_node)) { + + si_assignment.su = su->name; + si_assignment.si = si->name; + si_assignment.saAmfSISUHAState = si->curr_state; + + add_sisu_state_info(&msg, &si_assignment); + } + + add_su_state_info(&msg, su); + + su = (AVND_SU *)ncs_patricia_tree_getnext(&cb->sudb, (uint8_t *)&su->name); + } + + LOG_NO("%d SISU states sent", msg.info.avd->msg_info.n2d_nd_sisu_state_info.num_sisu); + LOG_NO("%d SU states sent", msg.info.avd->msg_info.n2d_nd_sisu_state_info.num_su); + + rc = avnd_di_msg_send(cb, &msg); + if (rc == NCSCC_RC_SUCCESS) + msg.info.avd = 0; + else + LOG_ER("avnd_di_msg_send FAILED"); + + avnd_msg_content_free(cb, &msg); + + TRACE_LEAVE(); +} diff --git a/osaf/services/saf/amf/amfnd/err.cc b/osaf/services/saf/amf/amfnd/err.cc --- a/osaf/services/saf/amf/amfnd/err.cc +++ b/osaf/services/saf/amf/amfnd/err.cc @@ -77,6 +77,8 @@ static uint32_t avnd_err_restart_esc_lev static uint32_t avnd_err_restart_esc_level_1(AVND_CB *, AVND_SU *, AVND_ERR_ESC_LEVEL *, AVSV_ERR_RCVR *); static uint32_t avnd_err_restart_esc_level_2(AVND_CB *, AVND_SU *, AVND_ERR_ESC_LEVEL *, AVSV_ERR_RCVR *); +static void cleanup_all_comps_and_reboot(AVND_CB *cb); + /* LSB Changes. Strings to represent source of component Error */ static const char *g_comp_err[] = { @@ -414,6 +416,7 @@ uint32_t avnd_err_process(AVND_CB *cb, A goto done; done: + TRACE_LEAVE2("Return value:'%u'", rc); return rc; } @@ -474,8 +477,11 @@ uint32_t avnd_err_escalate(AVND_CB *cb, case SA_AMF_NODE_SWITCHOVER: case SA_AMF_NODE_FAILOVER: + case SA_AMF_CLUSTER_RESET: + break; + case SA_AMF_NODE_FAILFAST: - case SA_AMF_CLUSTER_RESET: + // this is still supported in headless mode break; case AVSV_ERR_RCVR_SU_RESTART: @@ -765,11 +771,41 @@ uint32_t avnd_err_rcvr_comp_failover(AVN if (m_AVND_SU_IS_PREINSTANTIABLE(su)) { /* clean the failed comp */ rc = avnd_comp_clc_fsm_run(cb, failed_comp, AVND_COMP_CLC_PRES_FSM_EV_CLEANUP); - if (NCSCC_RC_SUCCESS != rc) + if (NCSCC_RC_SUCCESS != rc) { + LOG_ER("cleanup of '%s' failed", failed_comp->name.value); goto done; + } + + // if headless, remove all assignments from this SU + if (cb->is_avd_down == true) { + AVND_SU_SI_REC *si = 0; + AVND_SU_SI_REC *next_si = 0; + uint32_t rc = NCSCC_RC_SUCCESS; + TRACE("Removing assignments from '%s'", su->name.value); + + m_AVND_SU_ASSIGN_PEND_SET(su); + + /* scan the su-si list & remove the sis */ + for (si = (AVND_SU_SI_REC *)m_NCS_DBLIST_FIND_FIRST(&su->si_list); si;) { + next_si = (AVND_SU_SI_REC *)m_NCS_DBLIST_FIND_NEXT(&si->su_dll_node); + rc = avnd_su_si_remove(cb, su, si); + if (NCSCC_RC_SUCCESS != rc) { + LOG_ER("failed to remove SI assignment from '%s'", + su->name.value); + break; + } + si = next_si; + } + } } else { /* request director to orchestrate component failover */ rc = avnd_di_oper_send(cb, failed_comp->su, AVSV_ERR_RCVR_SU_FAILOVER); + + // if headless, we have to perform the 'failover' without amfd + if (cb->is_avd_down == true) { + // SU failover results in a node failfast if headless (not nice) + cleanup_all_comps_and_reboot(cb); + } } done: @@ -816,9 +852,13 @@ uint32_t avnd_err_rcvr_su_failover(AVND_ LOG_ER("'%s' termination failed", comp->name.value); goto done; } - avnd_su_pres_state_set(comp->su, SA_AMF_PRESENCE_TERMINATING); + avnd_su_pres_state_set(cb, comp->su, SA_AMF_PRESENCE_TERMINATING); } + done: + if (cb->is_avd_down == true) { + cleanup_all_comps_and_reboot(cb); + } TRACE_LEAVE2("%u", rc); return rc; @@ -906,7 +946,7 @@ uint32_t avnd_err_rcvr_node_switchover(A LOG_ER("'%s' termination failed", comp->name.value); goto done; } - avnd_su_pres_state_set(failed_comp->su, SA_AMF_PRESENCE_TERMINATING); + avnd_su_pres_state_set(cb, failed_comp->su, SA_AMF_PRESENCE_TERMINATING); } } else { @@ -918,7 +958,13 @@ uint32_t avnd_err_rcvr_node_switchover(A } } - done: +done: + // TODO - try to see if we can avoid a reboot & terminate components more gracefully + // if headless, reboot as we can't perform a switchover without amfd + if (cb->is_avd_down == true) { + cleanup_all_comps_and_reboot(cb); + } + TRACE_LEAVE2("%u", rc); return rc; } @@ -980,7 +1026,17 @@ uint32_t avnd_err_rcvr_node_failover(AVN LOG_ER("Exiting (due to comp term failed) to aid fast node reboot"); exit(1); } - avnd_su_pres_state_set(comp->su, SA_AMF_PRESENCE_TERMINATING); + avnd_su_pres_state_set(cb, comp->su, SA_AMF_PRESENCE_TERMINATING); + } + + // TODO - try to see if we can avoid a reboot + // if headless, reboot as we can't perform a failover without amfd + if (cb->is_avd_down == true) { + opensaf_reboot(avnd_cb->node_info.nodeId, + (char *)avnd_cb->node_info.executionEnvironment.value, + "Can't perform node failover while controllers are down. Recovery is node failfast."); + LOG_ER("Exiting to aid fast node reboot"); + exit(1); } TRACE_LEAVE2("%u", rc); @@ -1059,7 +1115,7 @@ uint32_t avnd_err_su_repair(AVND_CB *cb, is no such event handler and event in SU FSM. */ if ((is_uninst == true) && (is_comp_insting == true)) - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_INSTANTIATING); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_INSTANTIATING); done: TRACE_LEAVE2("retval=%u", rc); return rc; @@ -1162,7 +1218,7 @@ uint32_t avnd_err_restart_esc_level_0(AV /*stop the comp-err-esc-timer */ tmr_comp_err_esc_stop(cb, su); su->comp_restart_cnt = 0; - su_reset_restart_count_in_comps(su); + su_reset_restart_count_in_comps(cb, su); /* go to the next possible level, is su restart capable? */ if (su->su_restart_max != 0 && !m_AVND_SU_IS_SU_RESTART_DIS(su)) { @@ -1250,7 +1306,7 @@ uint32_t avnd_err_restart_esc_level_1(AV /* stop timer */ tmr_su_err_esc_stop(cb, su); su->su_restart_cnt = 0; - su_reset_restart_count_in_comps(su); + su_reset_restart_count_in_comps(cb, su); /* go to the next possible level, get escalated recovery and modify count */ if ((cb->su_failover_max != 0) || (true == su->su_is_external)) { @@ -1267,7 +1323,10 @@ uint32_t avnd_err_restart_esc_level_1(AV } done: - avnd_di_uns32_upd_send(AVSV_SA_AMF_SU, saAmfSURestartCount_ID, &su->name, su->su_restart_cnt); + if (cb->is_avd_down == false) { + avnd_di_uns32_upd_send(AVSV_SA_AMF_SU, saAmfSURestartCount_ID, &su->name, su->su_restart_cnt); + } + TRACE_LEAVE2("retval=%u", rc); return rc; } @@ -1442,7 +1501,7 @@ uint32_t avnd_evt_tmr_node_err_esc_evh(A if (su->su_err_esc_level == AVND_ERR_ESC_LEVEL_2) { su->comp_restart_cnt = 0; su->su_restart_cnt = 0; - su_reset_restart_count_in_comps(su); + su_reset_restart_count_in_comps(cb, su); avnd_di_uns32_upd_send(AVSV_SA_AMF_SU, saAmfSURestartCount_ID, &su->name, su->su_restart_cnt); su->su_err_esc_level = AVND_ERR_ESC_LEVEL_0; @@ -1539,3 +1598,34 @@ bool is_no_assignment_due_to_escalations TRACE_LEAVE2("false"); return false; } + +void cleanup_all_comps_and_reboot(AVND_CB *cb) +{ + AVND_COMP *comp; + uint32_t rc = NCSCC_RC_SUCCESS; + + /* Unordered cleanup of all local application components */ + for (comp = (AVND_COMP *)ncs_patricia_tree_getnext(&cb->compdb, (uint8_t *)nullptr); + comp != nullptr; + comp = (AVND_COMP *) ncs_patricia_tree_getnext(&cb->compdb, (uint8_t *)&comp->name)) { + + if (comp->su->is_ncs || comp->su->su_is_external) + continue; + + rc = avnd_comp_clc_fsm_run(cb, comp, AVND_COMP_CLC_PRES_FSM_EV_CLEANUP); + if (rc != NCSCC_RC_SUCCESS) { + LOG_ER("'%s' termination failed", comp->name.value); + opensaf_reboot(avnd_cb->node_info.nodeId, + (char *)avnd_cb->node_info.executionEnvironment.value, + "Component termination failed at node switchover"); + LOG_ER("Exiting (due to comp term failed) to aid fast node reboot"); + exit(1); + } + } + + opensaf_reboot(avnd_cb->node_info.nodeId, + (char *)avnd_cb->node_info.executionEnvironment.value, + "Can't perform recovery while controllers are down. Recovery is node failfast."); + LOG_ER("Exiting to aid fast node reboot"); + exit(1); +} diff --git a/osaf/services/saf/amf/amfnd/evt.cc b/osaf/services/saf/amf/amfnd/evt.cc --- a/osaf/services/saf/amf/amfnd/evt.cc +++ b/osaf/services/saf/amf/amfnd/evt.cc @@ -124,6 +124,7 @@ AVND_EVT *avnd_evt_create(AVND_CB *cb, case AVND_EVT_TMR_CLC_PXIED_COMP_INST: case AVND_EVT_TMR_CLC_PXIED_COMP_REG: case AVND_EVT_TMR_HB_DURATION: + case AVND_EVT_TMR_SC_ABSENCE: case AVND_EVT_TMR_QSCING_CMPL: evt->priority = NCS_IPC_PRIORITY_HIGH; /* bump up the priority */ evt->info.tmr.opq_hdl = *(uint32_t *)info; @@ -263,6 +264,7 @@ void avnd_evt_destroy(AVND_EVT *evt) case AVND_EVT_TMR_CLC_PXIED_COMP_INST: case AVND_EVT_TMR_CLC_PXIED_COMP_REG: case AVND_EVT_TMR_HB_DURATION: + case AVND_EVT_TMR_SC_ABSENCE: case AVND_EVT_TMR_QSCING_CMPL: break; diff --git a/osaf/services/saf/amf/amfnd/hcdb.cc b/osaf/services/saf/amf/amfnd/hcdb.cc --- a/osaf/services/saf/amf/amfnd/hcdb.cc +++ b/osaf/services/saf/amf/amfnd/hcdb.cc @@ -232,7 +232,11 @@ SaAisErrorT avnd_hc_config_get(AVND_COMP SaImmHandleT immOmHandle; SaVersionT immVersion = { 'A', 2, 1 }; - immutil_saImmOmInitialize(&immOmHandle, nullptr, &immVersion); + error = saImmOmInitialize_cond(&immOmHandle, nullptr, &immVersion); + if (error != SA_AIS_OK) { + LOG_CR("saImmOmInitialize failed: %u", error); + goto done; + } avnd_hctype_config_get(immOmHandle, &comp->saAmfCompType); @@ -263,7 +267,7 @@ SaAisErrorT avnd_hc_config_get(AVND_COMP (void)immutil_saImmOmSearchFinalize(searchHandle); done1: immutil_saImmOmFinalize(immOmHandle); - + done: return error; } diff --git a/osaf/services/saf/amf/amfnd/include/avnd_cb.h b/osaf/services/saf/amf/amfnd/include/avnd_cb.h --- a/osaf/services/saf/amf/amfnd/include/avnd_cb.h +++ b/osaf/services/saf/amf/amfnd/include/avnd_cb.h @@ -42,6 +42,7 @@ typedef struct avnd_cb_tag { MDS_DEST avnd_dest; /* AvND mds addr */ MDS_DEST avd_dest; /* AvD mds addr */ bool is_avd_down; /* Temp: Indicates if AvD went down */ + bool amfd_sync_required; /* cb related params */ NCS_LOCK mon_lock; /* PID monitor lock */ @@ -90,7 +91,12 @@ typedef struct avnd_cb_tag { uint32_t rcv_msg_id; /* Message ID of the last message received */ /* AvD messaging params (retransmit list etc.) */ uint32_t snd_msg_id; /* send msg id */ - AVND_DND_LIST dnd_list; /* list of messages sent to AvD */ + + /** List of messages sent to director but not yet acked. + * Messages are removed when acked with the ACK message. + * At director failover the list is scanned handling the + * VERIFY message from the director and possibly resent again */ + AVND_DND_LIST dnd_list; AVND_TERM_STATE term_state; AVND_LED_STATE led_state; @@ -109,6 +115,11 @@ typedef struct avnd_cb_tag { bool reboot_in_progress; AVND_SU *failed_su; bool cont_reboot_in_progress; + + /* the duration that amfnd should tolerate absence of any SC */ + SaTimeT scs_absence_max_duration; + /* the timer for supervision of the absence of SC */ + AVND_TMR sc_absence_tmr; } AVND_CB; #define AVND_CB_NULL ((AVND_CB *)0) diff --git a/osaf/services/saf/amf/amfnd/include/avnd_comp.h b/osaf/services/saf/amf/amfnd/include/avnd_comp.h --- a/osaf/services/saf/amf/amfnd/include/avnd_comp.h +++ b/osaf/services/saf/amf/amfnd/include/avnd_comp.h @@ -650,18 +650,7 @@ typedef struct avnd_comp_tag { (o_rec) = (o_rec)->next); \ } -#define m_AVND_COMP_OPER_STATE_AVD_SYNC(cb, comp, o_rc) \ -{ \ - AVSV_PARAM_INFO param; \ - memset(¶m, 0, sizeof(AVSV_PARAM_INFO)); \ - param.class_id = AVSV_SA_AMF_COMP; \ - param.attr_id = saAmfCompOperState_ID; \ - param.name = (comp)->name; \ - param.act = AVSV_OBJ_OPR_MOD; \ - *((uint32_t *)param.value) = m_NCS_OS_HTONL((comp)->oper); \ - param.value_len = sizeof(uint32_t); \ - (o_rc) = avnd_di_object_upd_send((cb), ¶m); \ -}; +void m_AVND_COMP_OPER_STATE_AVD_SYNC(struct avnd_cb_tag *cb, const AVND_COMP *comp, uint32_t& o_rc); /* macro to parse the clc cmd string */ #define m_AVND_COMP_CLC_STR_PARSE(st, sc, ac, av, tav) \ @@ -918,7 +907,7 @@ extern uint32_t avnd_comptype_oper_req(s extern unsigned int avnd_comp_config_get_su(struct avnd_su_tag *su); extern int avnd_comp_config_reinit(AVND_COMP *comp); extern void avnd_comp_delete(AVND_COMP *comp); -extern void avnd_comp_pres_state_set(AVND_COMP *comp, SaAmfPresenceStateT newstate); +extern void avnd_comp_pres_state_set(const struct avnd_cb_tag *cb, AVND_COMP *comp, SaAmfPresenceStateT newstate); bool comp_has_quiesced_assignment(const AVND_COMP *comp); bool IsCompQualifiedAssignment(const AVND_COMP *comp); /** @@ -929,7 +918,7 @@ bool IsCompQualifiedAssignment(const AVN * * Faulty NPI/PI components: launch the cleanup CLC CLI script */ extern uint32_t comp_restart_initiate(AVND_COMP *comp); -extern void comp_reset_restart_count(AVND_COMP *comp); +extern void comp_reset_restart_count(const struct avnd_cb_tag *cb, AVND_COMP *comp); extern void clear_error_report_alarm(AVND_COMP *comp); bool nonrestartable(const AVND_COMP *comp); uint32_t csi_count(const AVND_COMP *comp); diff --git a/osaf/services/saf/amf/amfnd/include/avnd_di.h b/osaf/services/saf/amf/amfnd/include/avnd_di.h --- a/osaf/services/saf/amf/amfnd/include/avnd_di.h +++ b/osaf/services/saf/amf/amfnd/include/avnd_di.h @@ -30,6 +30,8 @@ #ifndef AVND_DI_H #define AVND_DI_H +#include "amf_si_assign.h" + /* macro to find the matching record (based on the msg-id) */ /* * Caution!!! It is assumed that the msg-id is the 1st element in the message @@ -81,5 +83,7 @@ uint32_t avnd_diq_rec_send(struct avnd_c uint32_t avnd_di_reg_su_rsp_snd(struct avnd_cb_tag *cb, SaNameT *su_name, uint32_t ret_code); uint32_t avnd_di_ack_nack_msg_send(struct avnd_cb_tag *cb, uint32_t rcv_id, uint32_t view_num); extern void avnd_di_uns32_upd_send(int class_id, int attr_id, const SaNameT *dn, uint32_t value); +void avnd_sync_sisu(struct avnd_cb_tag *cb); +void avnd_sync_csicomp(struct avnd_cb_tag *cb); #endif /* !AVND_OPER_H */ diff --git a/osaf/services/saf/amf/amfnd/include/avnd_evt.h b/osaf/services/saf/amf/amfnd/include/avnd_evt.h --- a/osaf/services/saf/amf/amfnd/include/avnd_evt.h +++ b/osaf/services/saf/amf/amfnd/include/avnd_evt.h @@ -82,6 +82,7 @@ typedef enum avnd_evt_type { AVND_EVT_TMR_CLC_PXIED_COMP_INST, AVND_EVT_TMR_CLC_PXIED_COMP_REG, AVND_EVT_TMR_HB_DURATION, + AVND_EVT_TMR_SC_ABSENCE, AVND_EVT_TMR_MAX, /* mds event types */ @@ -128,6 +129,7 @@ typedef struct avnd_tmr_evt { typedef struct avnd_mds_evt { MDS_DEST mds_dest; /* mds address */ NODE_ID node_id; + NCSMDS_CHG i_change; } AVND_MDS_EVT; /* HA STATE change event definition */ diff --git a/osaf/services/saf/amf/amfnd/include/avnd_mds.h b/osaf/services/saf/amf/amfnd/include/avnd_mds.h --- a/osaf/services/saf/amf/amfnd/include/avnd_mds.h +++ b/osaf/services/saf/amf/amfnd/include/avnd_mds.h @@ -31,10 +31,10 @@ #define AVND_MDS_H /* In Service upgrade support */ -#define AVND_MDS_SUB_PART_VERSION 5 +#define AVND_MDS_SUB_PART_VERSION 6 #define AVND_AVD_SUBPART_VER_MIN 1 -#define AVND_AVD_SUBPART_VER_MAX 5 +#define AVND_AVD_SUBPART_VER_MAX 6 #define AVND_AVND_SUBPART_VER_MIN 1 #define AVND_AVND_SUBPART_VER_MAX 1 diff --git a/osaf/services/saf/amf/amfnd/include/avnd_proc.h b/osaf/services/saf/amf/amfnd/include/avnd_proc.h --- a/osaf/services/saf/amf/amfnd/include/avnd_proc.h +++ b/osaf/services/saf/amf/amfnd/include/avnd_proc.h @@ -115,6 +115,7 @@ uint32_t avnd_evt_comp_admin_op_req (str uint32_t avnd_evt_avd_admin_op_req_evh(struct avnd_cb_tag *cb, struct avnd_evt_tag *evt); uint32_t avnd_evt_avd_hb_evh(struct avnd_cb_tag *, struct avnd_evt_tag *); uint32_t avnd_evt_tmr_avd_hb_duration_evh(struct avnd_cb_tag *, struct avnd_evt_tag *); +uint32_t avnd_evt_tmr_sc_absence_evh(struct avnd_cb_tag *, struct avnd_evt_tag *); uint32_t avnd_evt_avd_reboot_evh(struct avnd_cb_tag *, struct avnd_evt_tag *); #endif diff --git a/osaf/services/saf/amf/amfnd/include/avnd_su.h b/osaf/services/saf/amf/amfnd/include/avnd_su.h --- a/osaf/services/saf/amf/amfnd/include/avnd_su.h +++ b/osaf/services/saf/amf/amfnd/include/avnd_su.h @@ -396,7 +396,7 @@ uint32_t avnd_su_oper_req(struct avnd_cb extern uint32_t avnd_evt_su_admin_op_req(struct avnd_cb_tag *cb, struct avnd_evt_tag *evt); extern struct avnd_comp_csi_rec *avnd_su_si_csi_rec_add(struct avnd_cb_tag *, AVND_SU *, struct avnd_su_si_rec *, struct avsv_susi_asgn *, uint32_t *); -extern void avnd_su_pres_state_set(AVND_SU *su, SaAmfPresenceStateT newstate); +extern void avnd_su_pres_state_set(const struct avnd_cb_tag *cb, AVND_SU *su, SaAmfPresenceStateT newstate); extern void avnd_silist_init(struct avnd_cb_tag *cb); extern struct avnd_su_si_rec *avnd_silist_getfirst(void); @@ -406,7 +406,7 @@ extern struct avnd_su_si_rec *avnd_silis extern bool sufailover_in_progress(const AVND_SU *su); extern bool sufailover_during_nodeswitchover(const AVND_SU *su); extern bool all_csis_in_removed_state(const AVND_SU *su); -extern void su_reset_restart_count_in_comps(const AVND_SU *su); +extern void su_reset_restart_count_in_comps(const struct avnd_cb_tag *cb, const AVND_SU *su); extern bool all_comps_terminated_in_su(const AVND_SU *su); void su_increment_su_restart_count(AVND_SU& su); diff --git a/osaf/services/saf/amf/amfnd/include/avnd_tmr.h b/osaf/services/saf/amf/amfnd/include/avnd_tmr.h --- a/osaf/services/saf/amf/amfnd/include/avnd_tmr.h +++ b/osaf/services/saf/amf/amfnd/include/avnd_tmr.h @@ -46,6 +46,7 @@ typedef enum avnd_tmr_type { AVND_TMR_CLC_PXIED_COMP_INST, /* proxied inst timer */ AVND_TMR_CLC_PXIED_COMP_REG, /* proxied orphan timer */ AVND_TMR_HB_DURATION, + AVND_TMR_SC_ABSENCE, /* SC absence timer */ AVND_TMR_QSCING_CMPL_RESP, /* Qscing complete timer */ AVND_TMR_MAX } AVND_TMR_TYPE; diff --git a/osaf/services/saf/amf/amfnd/include/avnd_util.h b/osaf/services/saf/amf/amfnd/include/avnd_util.h --- a/osaf/services/saf/amf/amfnd/include/avnd_util.h +++ b/osaf/services/saf/amf/amfnd/include/avnd_util.h @@ -57,4 +57,8 @@ uint32_t amf_cbk_copy(AVSV_AMF_CBK_INFO void amf_cbk_free(AVSV_AMF_CBK_INFO* cbk_info); void nd2nd_avnd_msg_free(AVSV_ND2ND_AVND_MSG *msg); +void free_n2d_nd_csicomp_state_info(AVSV_DND_MSG *msg); +void free_n2d_nd_sisu_state_info(AVSV_DND_MSG *msg); +SaAisErrorT saImmOmInitialize_cond(SaImmHandleT *immHandle, + const SaImmCallbacksT *immCallbacks, SaVersionT *version); #endif /* !AVND_UTIL_H */ diff --git a/osaf/services/saf/amf/amfnd/main.cc b/osaf/services/saf/amf/amfnd/main.cc --- a/osaf/services/saf/amf/amfnd/main.cc +++ b/osaf/services/saf/amf/amfnd/main.cc @@ -95,6 +95,7 @@ extern const AVND_EVT_HDLR g_avnd_func_l avnd_evt_tmr_clc_pxied_comp_inst_evh, /* AVND_EVT_TMR_CLC_PXIED_COMP_INST */ avnd_evt_tmr_clc_pxied_comp_reg_evh, /* AVND_EVT_TMR_CLC_PXIED_COMP_REG */ avnd_evt_tmr_avd_hb_duration_evh, + avnd_evt_tmr_sc_absence_evh, /* AVND_EVT_TMR_SC_ABSENCE */ /* mds event types */ avnd_evt_mds_avd_up_evh, /* AVND_EVT_MDS_AVD_UP */ @@ -119,6 +120,8 @@ extern const AVND_EVT_HDLR g_avnd_func_l avnd_evt_tmr_qscing_cmpl_evh /* AVND_EVT_TMR_QSCING_CMPL */ }; +extern struct ImmutilWrapperProfile immutilWrapperProfile; + /* global task handle */ NCSCONTEXT gl_avnd_task_hdl = 0; @@ -134,6 +137,8 @@ static uint32_t avnd_mbx_create(AVND_CB static uint32_t avnd_ext_intf_create(AVND_CB *); +static void hydra_config_get(AVND_CB *); + static int __init_avnd(void) { @@ -173,6 +178,10 @@ int main(int argc, char *argv[]) goto done; } + immutilWrapperProfile.retryInterval = 400; + immutilWrapperProfile.nTries = 25; + immutilWrapperProfile.errorsAreFatal = 0; + /* should never return */ avnd_main_process(); @@ -328,6 +337,16 @@ AVND_CB *avnd_cb_create() /* iniialize the error escaltion paramaets */ cb->node_err_esc_level = AVND_ERR_ESC_LEVEL_0; + cb->is_avd_down = true; + cb->amfd_sync_required = false; + + // retrieve hydra configuration from IMM + hydra_config_get(cb); + cb->sc_absence_tmr.is_active = false; + cb->sc_absence_tmr.type = AVND_TMR_SC_ABSENCE; + + memset(&cb->amf_nodeName, 0, sizeof(cb->amf_nodeName)); + /*** initialize avnd dbs ***/ avnd_silist_init(cb); @@ -536,6 +555,8 @@ void avnd_main_process(void) struct pollfd fds[4]; nfds_t nfds = 3; AVND_EVT *evt; + SaAisErrorT result = SA_AIS_OK; + SaAisErrorT rc = SA_AIS_OK; TRACE_ENTER(); @@ -578,7 +599,18 @@ void avnd_main_process(void) if (fds[FD_CLM].revents & POLLIN) { TRACE("CLM event recieved"); - saClmDispatch(avnd_cb->clmHandle, SA_DISPATCH_ALL); + result = saClmDispatch(avnd_cb->clmHandle, SA_DISPATCH_ALL); + switch (result) { + case SA_AIS_OK: + break; + case SA_AIS_ERR_BAD_HANDLE: + usleep(100000); + rc = avnd_clm_init(); + osafassert(rc == SA_AIS_OK); + break; + default: + goto done; + } } if (fds[FD_MBX].revents & POLLIN) { @@ -621,9 +653,11 @@ void avnd_evt_process(AVND_EVT *evt) } /* Temp: AvD Down Handling */ - if (true == cb->is_avd_down){ - LOG_IN("%s: AvD is down, dropping event %u",__FUNCTION__,evt->type); - goto done; + if (cb->scs_absence_max_duration == 0) { + if (true == cb->is_avd_down){ + LOG_IN("%s: AvD is down, dropping event %u",__FUNCTION__,evt->type); + goto done; + } } /* log the event reception */ @@ -660,3 +694,64 @@ static uint32_t avnd_evt_invalid_evh(AVN LOG_NO("avnd_evt_invalid_func: %u", evt->type); return NCSCC_RC_SUCCESS; } + +/***************************************************************************** + * Function: hydra_config_get + * + * Purpose: This function checks if Hydra configuration is enabled in IMM + * then set the corresponding value to scs_absence_max_duration + * variable in avnd_cb. + * + * Input: None. + * + * Returns: None. + * + * NOTES: If IMM attribute fetching fails that means Hydra configuration + * is disabled thus sc_absence_max_duration is set to 0 + * + **************************************************************************/ +static void hydra_config_get(AVND_CB *cb) +{ + SaAisErrorT rc = SA_AIS_OK; + SaImmHandleT immOmHandle; + SaVersionT immVersion = { 'A', 2, 1 }; + const SaImmAttrValuesT_2 **attributes; + SaImmAccessorHandleT accessorHandle; + SaNameT dn = {0, "opensafImm=opensafImm,safApp=safImmService"}; + SaImmAttrNameT attrName = const_cast<SaImmAttrNameT>("scAbsenceAllowed"); + SaImmAttrNameT attributeNames[] = {attrName, nullptr}; + const SaUint32T *value = nullptr; + + TRACE_ENTER(); + + /* Set to default value */ + cb->scs_absence_max_duration = 0; + + dn.length = strlen((char *)dn.value); + + immutil_saImmOmInitialize(&immOmHandle, nullptr, &immVersion); + immutil_saImmOmAccessorInitialize(immOmHandle, &accessorHandle); + rc = immutil_saImmOmAccessorGet_2(accessorHandle, &dn, attributeNames, + (SaImmAttrValuesT_2 ***)&attributes); + + if (rc != SA_AIS_OK) { + LOG_WA("saImmOmAccessorGet_2 FAILED %u for %s", rc, dn.value); + goto done; + } + + value = immutil_getUint32Attr(attributes, attrName, 0); + if (value == nullptr) { + LOG_WA("immutil_getUint32Attr FAILED for %s", dn.value); + goto done; + } + + avnd_cb->scs_absence_max_duration = *value * SA_TIME_ONE_SECOND; + +done: + immutil_saImmOmAccessorFinalize(accessorHandle); + immutil_saImmOmFinalize(immOmHandle); + LOG_IN("scs_absence_max_duration: %llu", avnd_cb->scs_absence_max_duration); + + TRACE_LEAVE(); + return; +} diff --git a/osaf/services/saf/amf/amfnd/mds.cc b/osaf/services/saf/amf/amfnd/mds.cc --- a/osaf/services/saf/amf/amfnd/mds.cc +++ b/osaf/services/saf/amf/amfnd/mds.cc @@ -41,14 +41,14 @@ const MDS_CLIENT_MSG_FORMAT_VER avnd_avd_msg_fmt_map_table[] = { AVSV_AVD_AVND_MSG_FMT_VER_1, AVSV_AVD_AVND_MSG_FMT_VER_2, AVSV_AVD_AVND_MSG_FMT_VER_3, AVSV_AVD_AVND_MSG_FMT_VER_4, - AVSV_AVD_AVND_MSG_FMT_VER_4 + AVSV_AVD_AVND_MSG_FMT_VER_4, AVSV_AVD_AVND_MSG_FMT_VER_6 }; /* messages from director */ const MDS_CLIENT_MSG_FORMAT_VER avd_avnd_msg_fmt_map_table[] = { AVSV_AVD_AVND_MSG_FMT_VER_1, AVSV_AVD_AVND_MSG_FMT_VER_2, AVSV_AVD_AVND_MSG_FMT_VER_3, AVSV_AVD_AVND_MSG_FMT_VER_4, - AVSV_AVD_AVND_MSG_FMT_VER_5 + AVSV_AVD_AVND_MSG_FMT_VER_5, AVSV_AVD_AVND_MSG_FMT_VER_6 }; const MDS_CLIENT_MSG_FORMAT_VER avnd_avnd_msg_fmt_map_table[] = { @@ -513,11 +513,26 @@ uint32_t avnd_mds_svc_evt(AVND_CB *cb, M /* assign mds-dest for AVD, AVND & AVA as per the MDS event */ switch (evt_info->i_change) { + case NCSMDS_NEW_ACTIVE: + if (evt_info->i_svc_id == NCSMDS_SVC_ID_AVD) { + LOG_NO("AVD NEW_ACTIVE, adest:%" PRIu64, evt_info->i_dest); + + // sometimes NEW_ACTIVE director is received before + // DOWN is received for the old director .. + if (m_AVND_CB_IS_AVD_UP(cb)) { + m_AVND_CB_AVD_UP_RESET(cb); + } + + evt = avnd_evt_create(cb, AVND_EVT_MDS_AVD_UP, 0, &evt_info->i_dest, 0, 0, 0); + evt->info.mds.i_change = evt_info->i_change; + } + break; case NCSMDS_UP: switch (evt_info->i_svc_id) { case NCSMDS_SVC_ID_AVD: /* create the mds event */ evt = avnd_evt_create(cb, AVND_EVT_MDS_AVD_UP, 0, &evt_info->i_dest, 0, 0, 0); + evt->info.mds.i_change = evt_info->i_change; break; case NCSMDS_SVC_ID_AVA: diff --git a/osaf/services/saf/amf/amfnd/sidb.cc b/osaf/services/saf/amf/amfnd/sidb.cc --- a/osaf/services/saf/amf/amfnd/sidb.cc +++ b/osaf/services/saf/amf/amfnd/sidb.cc @@ -281,7 +281,12 @@ static SaAmfCompCapabilityModelT get_com TRACE_ENTER2("comptype = '%s' : csi = '%s'", comp_type->value, csi_name->value); - immutil_saImmOmInitialize(&immOmHandle, nullptr, &immVersion); + error = saImmOmInitialize_cond(&immOmHandle, nullptr, &immVersion); + if (error != SA_AIS_OK ) { + // TODO - what should comp_cap be? + LOG_CR("saImmOmInitialize failed: %u", error); + goto done1; + } immutil_saImmOmAccessorInitialize(immOmHandle, &accessorHandle); get_cstype(immOmHandle, accessorHandle, csi_name, &cs_type); @@ -300,7 +305,7 @@ static SaAmfCompCapabilityModelT get_com done: immutil_saImmOmAccessorFinalize(accessorHandle); immutil_saImmOmFinalize(immOmHandle); - +done1: TRACE_LEAVE2("%u", comp_cap); return comp_cap; } diff --git a/osaf/services/saf/amf/amfnd/su.cc b/osaf/services/saf/amf/amfnd/su.cc --- a/osaf/services/saf/amf/amfnd/su.cc +++ b/osaf/services/saf/amf/amfnd/su.cc @@ -56,13 +56,18 @@ static bool get_su_failover(const SaName const_cast<SaImmAttrNameT>("saAmfSUType"), nullptr }; + SaAisErrorT error; TRACE_ENTER2("'%s'", name->value); // TODO remove, just for test LOG_NO("get_su_failover '%s'", name->value); - immutil_saImmOmInitialize(&immOmHandle, nullptr, &immVersion); + error = saImmOmInitialize_cond(&immOmHandle, nullptr, &immVersion); + if (error != SA_AIS_OK ) { + LOG_CR("saImmOmInitialize failed: %u", error); + goto done1; + } immutil_saImmOmAccessorInitialize(immOmHandle, &accessorHandle); /* Use an attribute name list to avoid reading runtime attributes which @@ -92,6 +97,7 @@ static bool get_su_failover(const SaName done: immutil_saImmOmAccessorFinalize(accessorHandle); immutil_saImmOmFinalize(immOmHandle); +done1: TRACE_LEAVE2(); return (sufailover == SA_TRUE) ? true : false; } @@ -171,7 +177,7 @@ uint32_t avnd_evt_avd_reg_su_evh(AVND_CB if ((su->pres == SA_AMF_PRESENCE_INSTANTIATED) && (su_is_instantiated == false)) { - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_UNINSTANTIATED); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_UNINSTANTIATED); rc = avnd_su_pres_fsm_run(cb, su, 0, AVND_SU_PRES_FSM_EV_INST); } } @@ -321,7 +327,11 @@ static uint32_t get_sirank(const SaNameT // TODO remove, just for test LOG_NO("get_sirank %s", dn->value); - immutil_saImmOmInitialize(&immOmHandle, nullptr, &immVersion); + error = saImmOmInitialize_cond(&immOmHandle, nullptr, &immVersion); + if (error != SA_AIS_OK ) { + LOG_CR("saImmOmInitialize failed: %u", error); + goto done; + } immutil_saImmOmAccessorInitialize(immOmHandle, &accessorHandle); osafassert((error = immutil_saImmOmAccessorGet_2(accessorHandle, dn, @@ -338,6 +348,7 @@ static uint32_t get_sirank(const SaNameT immutil_saImmOmAccessorFinalize(accessorHandle); immutil_saImmOmFinalize(immOmHandle); +done: return rank; } @@ -466,7 +477,7 @@ uint32_t avnd_evt_tmr_su_err_esc_evh(AVN TRACE("'%s'", su->name.value); - LOG_NO("'%s' SU restart probation timer expired", su->name.value); + LOG_NO("'%s' Component or SU restart probation timer expired", su->name.value); if (NCSCC_RC_SUCCESS == m_AVND_CHECK_FOR_STDBY_FOR_EXT_COMP(cb, su->su_is_external)) goto done; @@ -475,13 +486,13 @@ uint32_t avnd_evt_tmr_su_err_esc_evh(AVN case AVND_ERR_ESC_LEVEL_0: su->comp_restart_cnt = 0; su->su_err_esc_level = AVND_ERR_ESC_LEVEL_0; - su_reset_restart_count_in_comps(su); + su_reset_restart_count_in_comps(cb, su); break; case AVND_ERR_ESC_LEVEL_1: su->su_restart_cnt = 0; su->su_err_esc_level = AVND_ERR_ESC_LEVEL_0; cb->node_err_esc_level = AVND_ERR_ESC_LEVEL_0; - su_reset_restart_count_in_comps(su); + su_reset_restart_count_in_comps(cb, su); avnd_di_uns32_upd_send(AVSV_SA_AMF_SU, saAmfSURestartCount_ID, &su->name, su->su_restart_cnt); break; case AVND_ERR_ESC_LEVEL_2: @@ -557,7 +568,7 @@ uint32_t avnd_su_curr_info_del(AVND_CB * if (!m_AVND_SU_IS_FAILED(su)) { su->su_err_esc_level = AVND_ERR_ESC_LEVEL_0; su->comp_restart_cnt = 0; - su_reset_restart_count_in_comps(su); + su_reset_restart_count_in_comps(cb, su); su->su_restart_cnt = 0; avnd_di_uns32_upd_send(AVSV_SA_AMF_SU, saAmfSURestartCount_ID, &su->name, su->su_restart_cnt); /* stop su_err_esc_tmr TBD Later */ @@ -630,7 +641,7 @@ uint32_t avnd_evt_su_admin_op_req(AVND_C comp->admin_oper = false; m_AVND_COMP_STATE_RESET(comp); - avnd_comp_pres_state_set(comp, SA_AMF_PRESENCE_UNINSTANTIATED); + avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_UNINSTANTIATED); m_AVND_COMP_OPER_STATE_SET(comp, SA_AMF_OPERATIONAL_ENABLED); avnd_di_uns32_upd_send(AVSV_SA_AMF_COMP, saAmfCompOperState_ID, &comp->name, comp->oper); @@ -645,7 +656,7 @@ uint32_t avnd_evt_su_admin_op_req(AVND_C m_AVND_SU_STATE_RESET(su); m_AVND_SU_OPER_STATE_SET(su, SA_AMF_OPERATIONAL_ENABLED); avnd_di_uns32_upd_send(AVSV_SA_AMF_SU, saAmfSUOperState_ID, &su->name, su->oper); - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_UNINSTANTIATED); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_UNINSTANTIATED); rc = avnd_di_oper_send(cb, su, 0); break; @@ -684,26 +695,28 @@ done: * @param su * @param newstate */ -void avnd_su_pres_state_set(AVND_SU *su, SaAmfPresenceStateT newstate) +void avnd_su_pres_state_set(const AVND_CB *cb, AVND_SU *su, SaAmfPresenceStateT newstate) { osafassert(newstate <= SA_AMF_PRESENCE_TERMINATION_FAILED); LOG_NO("'%s' Presence State %s => %s", su->name.value, presence_state[su->pres], presence_state[newstate]); su->pres = newstate; - avnd_di_uns32_upd_send(AVSV_SA_AMF_SU, saAmfSUPresenceState_ID, &su->name, su->pres); + if (cb->is_avd_down == false) { + avnd_di_uns32_upd_send(AVSV_SA_AMF_SU, saAmfSUPresenceState_ID, &su->name, su->pres); + } } /** * @brief Resets component restart count for each component of SU. * @param su */ -void su_reset_restart_count_in_comps(const AVND_SU *su) +void su_reset_restart_count_in_comps(const AVND_CB *cb, const AVND_SU *su) { AVND_COMP *comp; for (comp = m_AVND_COMP_FROM_SU_DLL_NODE_GET(m_NCS_DBLIST_FIND_FIRST(&su->comp_list)); comp; comp = m_AVND_COMP_FROM_SU_DLL_NODE_GET(m_NCS_DBLIST_FIND_NEXT(&comp->su_dll_node))) { - comp_reset_restart_count(comp); + comp_reset_restart_count(cb, comp); } } diff --git a/osaf/services/saf/amf/amfnd/susm.cc b/osaf/services/saf/amf/amfnd/susm.cc --- a/osaf/services/saf/amf/amfnd/susm.cc +++ b/osaf/services/saf/amf/amfnd/susm.cc @@ -447,7 +447,7 @@ static bool csi_of_same_si_in_assigning_ * * @return uns32 */ -static uint32_t assign_si_to_su(AVND_SU_SI_REC *si, AVND_SU *su, int single_csi) +static uint32_t assign_si_to_su(const AVND_CB *cb, AVND_SU_SI_REC *si, AVND_SU *su, int single_csi) { uint32_t rc = NCSCC_RC_SUCCESS; AVND_COMP_CSI_REC *curr_csi; @@ -553,7 +553,7 @@ static uint32_t assign_si_to_su(AVND_SU_ osafassert(curr_csi); if (si->curr_state == SA_AMF_HA_ACTIVE) { - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_INSTANTIATING); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_INSTANTIATING); rc = avnd_comp_csi_assign(avnd_cb, curr_csi->comp, curr_csi); } else { curr_csi->single_csi_add_rem_in_si = AVSV_SUSI_ACT_BASE; @@ -657,7 +657,7 @@ uint32_t avnd_su_si_assign(AVND_CB *cb, /* mark the si(s) assigning and assign to su */ if (si) { m_AVND_SU_SI_CURR_ASSIGN_STATE_SET(si, AVND_SU_SI_ASSIGN_STATE_ASSIGNING); - rc = assign_si_to_su(si, su, true); + rc = assign_si_to_su(cb, si, su, true); } else { for (curr_si = (AVND_SU_SI_REC *)m_NCS_DBLIST_FIND_FIRST(&su->si_list); curr_si != nullptr; @@ -685,7 +685,7 @@ uint32_t avnd_su_si_assign(AVND_CB *cb, curr_si != nullptr; curr_si = (AVND_SU_SI_REC *)m_NCS_DBLIST_FIND_NEXT(&curr_si->su_dll_node)) { - rc = assign_si_to_su(curr_si, su, false); + rc = assign_si_to_su(cb, curr_si, su, false); if (NCSCC_RC_SUCCESS != rc) goto done; } @@ -808,7 +808,7 @@ uint32_t avnd_su_si_remove(AVND_CB *cb, osafassert(curr_csi != nullptr); rc = avnd_comp_csi_remove(cb, curr_csi->comp, curr_csi); if (rc == NCSCC_RC_SUCCESS) - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_TERMINATING); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_TERMINATING); } else { /* nothing to be done, termination already done in quiescing/quiesced state */ @@ -1357,11 +1357,17 @@ uint32_t avnd_evt_avd_su_pres_evh(AVND_C of openSAF SUs, so don't refresh config info if it is openSAF SU. */ if ((false == su->is_ncs) && (avnd_comp_config_get_su(su) != NCSCC_RC_SUCCESS)) { - m_AVND_SU_REG_FAILED_SET(su); - /* Will transition to instantiation-failed when instantiated */ - LOG_ER("'%s':FAILED", __FUNCTION__); - rc = NCSCC_RC_FAILURE; + if (cb->scs_absence_max_duration == 0) { + m_AVND_SU_REG_FAILED_SET(su); + /* Will transition to instantiation-failed when instantiated */ + LOG_ER("'%s':FAILED", __FUNCTION__); + rc = NCSCC_RC_FAILURE; goto done; + } else { + // @TODO(garylee) this is a temporary workaround: IMM is not accepting OM connections + // and a component needs to be restarted. + LOG_CR("'%s': failed to refresh components in SU. Attempt to reuse old config", __FUNCTION__); + } } /* trigger su instantiation for pi su */ if (m_AVND_SU_IS_PREINSTANTIABLE(su)) { @@ -1372,7 +1378,7 @@ uint32_t avnd_evt_avd_su_pres_evh(AVND_C if (m_AVND_SU_IS_REG_FAILED(su)) { /* The SU configuration is bad, we cannot do much other transition to failed state */ TRACE_2("SU Configuration is bad"); - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_INSTANTIATION_FAILED); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_INSTANTIATION_FAILED); m_AVND_SU_ALL_TERM_RESET(su); } else osafassert(0); @@ -1502,7 +1508,7 @@ uint32_t avnd_su_pres_fsm_run(AVND_CB *c osafassert(NCSCC_RC_SUCCESS == rc); avnd_su_si_del(avnd_cb, &su->name); if (!m_AVND_SU_IS_PREINSTANTIABLE(su)) - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_UNINSTANTIATED); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_UNINSTANTIATED); goto done; } else if ((cb->term_state == AVND_TERM_STATE_NODE_SWITCHOVER_STARTED) && (cb->oper_state == SA_AMF_OPERATIONAL_DISABLED) && @@ -1788,6 +1794,11 @@ uint32_t avnd_su_pres_st_chng_prc(AVND_C reset_suRestart_flag(su); //Ask AMFD to remove assignments. rc = avnd_di_oper_send(cb, su, SA_AMF_COMPONENT_FAILOVER); + if (cb->is_avd_down == true) { + LOG_WA("Director is down. Remove all SIs from '%s'", su->name.value); + avnd_su_si_del(avnd_cb, &su->name); + } + } if ((SA_AMF_PRESENCE_RESTARTING == prv_st) && (SA_AMF_PRESENCE_INSTANTIATION_FAILED == final_st)) { TRACE("Restarting -> Instantiation Failed"); @@ -1960,11 +1971,11 @@ uint32_t avnd_su_pres_uninst_suinst_hdle } /* transition to instantiating state */ - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_INSTANTIATING); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_INSTANTIATING); done: if (rc == NCSCC_RC_FAILURE) - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_INSTANTIATION_FAILED); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_INSTANTIATION_FAILED); TRACE_LEAVE2("%u", rc); return rc; } @@ -2015,7 +2026,7 @@ uint32_t avnd_su_pres_insting_suterm_hdl } /* for */ /* transition to terminating state */ - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_TERMINATING); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_TERMINATING); done: TRACE_LEAVE2("%u", rc); @@ -2095,7 +2106,7 @@ uint32_t avnd_su_pres_insting_compinst_h /* determine su presence state */ m_AVND_SU_IS_INSTANTIATED(su, is); if (true == is) { - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_INSTANTIATED); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_INSTANTIATED); } } @@ -2114,7 +2125,7 @@ uint32_t avnd_su_pres_insting_compinst_h if (curr_csi->single_csi_add_rem_in_si == AVSV_SUSI_ACT_ASGN) { // we are adding a single CSI, the comp is instantiated so now we're done curr_csi->single_csi_add_rem_in_si = AVSV_SUSI_ACT_BASE; - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_INSTANTIATED); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_INSTANTIATED); goto done; } @@ -2130,7 +2141,7 @@ uint32_t avnd_su_pres_insting_compinst_h } else { /* => si assignment done */ TRACE("SI Assignment done"); - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_INSTANTIATED); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_INSTANTIATED); } } @@ -2172,7 +2183,7 @@ uint32_t avnd_su_pres_insting_compinstfa su->name.value, compname); /* transition to inst-failed state */ - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_INSTANTIATION_FAILED); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_INSTANTIATION_FAILED); m_AVND_SU_ALL_TERM_RESET(su); /* @@ -2328,11 +2339,11 @@ uint32_t avnd_su_pres_inst_suterm_hdler( if ((csi->comp->pres == SA_AMF_PRESENCE_UNINSTANTIATED) && (cb->term_state == AVND_TERM_STATE_OPENSAF_SHUTDOWN_STARTED)) { m_AVND_COMP_CSI_CURR_ASSIGN_STATE_SET(csi, AVND_COMP_CSI_ASSIGN_STATE_REMOVED); - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_TERMINATING); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_TERMINATING); AVND_COMP_CSI_REC *assigned_csi = get_next_assigned_csi_from_end(si); if (assigned_csi == nullptr) { //Components of all the CSIs in SI are cleaned up. - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_UNINSTANTIATED); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_UNINSTANTIATED); goto done; } else { //One CSI is still assigned. @@ -2348,7 +2359,7 @@ uint32_t avnd_su_pres_inst_suterm_hdler( /* transition to terminating state */ if (su->pres != SA_AMF_PRESENCE_TERMINATING) - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_TERMINATING); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_TERMINATING); done: TRACE_LEAVE2("%u", rc); @@ -2457,14 +2468,14 @@ uint32_t avnd_su_pres_inst_surestart_hdl else rc = avnd_comp_clc_fsm_run(cb, curr_comp, AVND_COMP_CLC_PRES_FSM_EV_RESTART); if (curr_comp->pres == SA_AMF_PRESENCE_TERMINATING) - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_TERMINATING); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_TERMINATING); break; } } } /* for */ if ((su_evaluate_restarting_state(su) == true) && (!m_AVND_SU_IS_FAILED(su))) { TRACE("Mark su restarting"); - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_RESTARTING); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_RESTARTING); } } @@ -2498,7 +2509,7 @@ uint32_t avnd_su_pres_inst_surestart_hdl } if ((all_csis_in_restarting_state(su) == true) && (!m_AVND_SU_IS_FAILED(su))) { TRACE("All CSIs are in restarting state, so marking SU restarting"); - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_RESTARTING); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_RESTARTING); } } @@ -2546,7 +2557,7 @@ uint32_t avnd_su_pres_inst_comprestart_h } } if (su_evaluate_restarting_state(su) == true) - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_RESTARTING); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_RESTARTING); } if (!m_AVND_SU_IS_PREINSTANTIABLE(su)) { @@ -2569,7 +2580,7 @@ uint32_t avnd_su_pres_inst_comprestart_h } if (all_csis_in_restarting_state(su) == true) { TRACE_2("All CSIs are in restarting state, so marking SU restarting"); - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_RESTARTING); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_RESTARTING); } } done: @@ -2600,7 +2611,7 @@ uint32_t avnd_su_pres_inst_compterming_h //A SU enters in TERMINATING state when any component is terminating. if (((comp != nullptr) && (comp->admin_oper == true)) || m_AVND_SU_IS_FAILED(su) || (su->admin_op_Id == SA_AMF_ADMIN_RESTART)) { - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_TERMINATING); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_TERMINATING); } TRACE_LEAVE2("%u", rc); @@ -2637,7 +2648,7 @@ uint32_t avnd_su_pres_terming_compinst_h /* determine if su can be transitioned to instantiated state */ m_AVND_SU_IS_INSTANTIATED(su, is); if (true == is) { - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_INSTANTIATED); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_INSTANTIATED); } if (m_AVND_SU_IS_RESTART(su)) { if (su->admin_op_Id == SA_AMF_ADMIN_RESTART) @@ -2740,7 +2751,7 @@ uint32_t avnd_su_pres_terming_comptermfa } /* transition to term-failed state */ - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_TERMINATION_FAILED); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_TERMINATION_FAILED); if (true == su->is_ncs) { char reason[SA_MAX_NAME_LENGTH + 64]; @@ -2814,7 +2825,7 @@ uint32_t avnd_su_pres_terming_compuninst if (m_AVND_SU_IS_FAILED(su)) { TRACE("SU is in Failed state"); if (pi_su_all_comps_uninstantiated(*su) == true) - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_UNINSTANTIATED); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_UNINSTANTIATED); if (m_AVND_SU_IS_RESTART(su)) { for (curr_comp = m_AVND_COMP_FROM_SU_DLL_NODE_GET(m_NCS_DBLIST_FIND_LAST(&su->comp_list)); @@ -2851,7 +2862,7 @@ uint32_t avnd_su_pres_terming_compuninst else rc = avnd_comp_clc_fsm_run(cb, curr_comp, AVND_COMP_CLC_PRES_FSM_EV_RESTART); if (curr_comp->pres == SA_AMF_PRESENCE_TERMINATING) - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_TERMINATING); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_TERMINATING); break; } } @@ -2862,9 +2873,9 @@ uint32_t avnd_su_pres_terming_compuninst (m_AVND_COMP_IS_RESTART_DIS(comp))) { TRACE("Admin operation on component"); if (pi_su_all_comps_uninstantiated(*su) == true) - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_UNINSTANTIATED); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_UNINSTANTIATED); avnd_comp_clc_fsm_run(cb, comp, AVND_COMP_CLC_PRES_FSM_EV_INST); - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_INSTANTIATING); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_INSTANTIATING); } else { TRACE("Admin operation on SU"); for (curr_comp = m_AVND_COMP_FROM_SU_DLL_NODE_GET(m_NCS_DBLIST_FIND_PREV(&comp->su_dll_node)); @@ -2904,7 +2915,7 @@ uint32_t avnd_su_pres_terming_compuninst } } if (pi_su_all_comps_uninstantiated(*su) == true) - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_UNINSTANTIATED); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_UNINSTANTIATED); else if ((curr_comp == nullptr) && (su->admin_op_Id == SA_AMF_ADMIN_RESTART)) { /* It means it is a SU comprising of assigned non restartable comps and @@ -2938,7 +2949,7 @@ uint32_t avnd_su_pres_terming_compuninst /* get here when a CSI is removed from a component in an NPI SU */ assert(curr_csi->si->single_csi_add_rem_in_si == AVSV_SUSI_ACT_DEL); rc = avnd_su_si_oper_done(cb, su, curr_csi->si); - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_INSTANTIATED); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_INSTANTIATED); goto done; } @@ -2959,7 +2970,7 @@ uint32_t avnd_su_pres_terming_compuninst if (all_csis_in_assigned_state(su) || all_csis_in_removed_state(su)) { TRACE("SI Assignment done"); - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_UNINSTANTIATED); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_UNINSTANTIATED); goto done; } @@ -3035,7 +3046,7 @@ uint32_t avnd_su_pres_restart_suterm_hdl } /* for */ /* transition to terminating state */ - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_TERMINATING); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_TERMINATING); done: TRACE_LEAVE2("%u", rc); @@ -3155,7 +3166,7 @@ uint32_t avnd_su_pres_restart_compinst_h curr_comp = m_AVND_COMP_FROM_SU_DLL_NODE_GET(m_NCS_DBLIST_FIND_NEXT(&curr_comp->su_dll_node))) { if ((curr_comp->pres == SA_AMF_PRESENCE_INSTANTIATED) && (m_AVND_COMP_TYPE_IS_PREINSTANTIABLE(curr_comp))) - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_INSTANTIATED); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_INSTANTIATED); } for (curr_comp = m_AVND_COMP_FROM_SU_DLL_NODE_GET(m_NCS_DBLIST_FIND_NEXT(&comp->su_dll_node)); curr_comp; @@ -3190,7 +3201,7 @@ uint32_t avnd_su_pres_restart_compinst_h /* mark the csi state assigned */ m_AVND_COMP_CSI_CURR_ASSIGN_STATE_SET(curr_csi, AVND_COMP_CSI_ASSIGN_STATE_ASSIGNED); if (su->pres != SA_AMF_PRESENCE_INSTANTIATED) - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_INSTANTIATED); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_INSTANTIATED); /* get the next csi */ curr_csi = (AVND_COMP_CSI_REC *)m_NCS_DBLIST_FIND_NEXT(&curr_csi->si_dll_node); @@ -3285,7 +3296,7 @@ uint32_t avnd_su_pres_restart_comptermin } /* for */ /* transition to terminating state */ - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_TERMINATING); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_TERMINATING); done: TRACE_LEAVE2("%u", rc); @@ -3322,7 +3333,7 @@ uint32_t avnd_su_pres_inst_compinstfail_ su->name.value, compname); /* transition to inst-failed state */ - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_INSTANTIATION_FAILED); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_INSTANTIATION_FAILED); m_AVND_SU_ALL_TERM_RESET(su); /* @@ -3563,7 +3574,7 @@ uint32_t avnd_su_pres_terming_surestart_ } if (all_csis_in_restarting_state(su) == true) { TRACE("All CSIs are in restarting state, so marking SU restarting"); - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_RESTARTING); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_RESTARTING); } } done: @@ -3658,7 +3669,7 @@ uint32_t avnd_su_pres_terming_suinst_hdl } /* for */ if ((curr_comp) && (curr_comp->pres == SA_AMF_PRESENCE_INSTANTIATING) && (su->pres == SA_AMF_PRESENCE_TERMINATING)) - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_INSTANTIATING); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_INSTANTIATING); } /*TODO_SURESTART:Will relook for NPI SU as there seems a rare possbility for su instantiate @@ -3685,12 +3696,12 @@ uint32_t avnd_su_pres_terming_suinst_hdl } if ((csi->comp) && (csi->comp->pres == SA_AMF_PRESENCE_INSTANTIATING) && (su->pres == SA_AMF_PRESENCE_TERMINATING)) - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_INSTANTIATING); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_INSTANTIATING); } done: if (rc == NCSCC_RC_FAILURE) - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_INSTANTIATION_FAILED); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_INSTANTIATION_FAILED); TRACE_LEAVE2("%u", rc); return rc; } @@ -3745,7 +3756,7 @@ uint32_t avnd_su_pres_inst_compinst_hdle m_AVND_COMP_CSI_CURR_ASSIGN_STATE_SET(curr_csi, AVND_COMP_CSI_ASSIGN_STATE_ASSIGNED); if (su->pres != SA_AMF_PRESENCE_INSTANTIATED) - avnd_su_pres_state_set(su, SA_AMF_PRESENCE_INSTANTIATED); + avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_INSTANTIATED); /* get the next csi */ curr_csi = (AVND_COMP_CSI_REC *)m_NCS_DBLIST_FIND_NEXT(&curr_csi->si_dll_node); diff --git a/osaf/services/saf/amf/amfnd/tmr.cc b/osaf/services/saf/amf/amfnd/tmr.cc --- a/osaf/services/saf/amf/amfnd/tmr.cc +++ b/osaf/services/saf/amf/amfnd/tmr.cc @@ -38,6 +38,7 @@ static const char *tmr_type[] = "proxied inst timer", "proxied orphan timer", "HB tmr", + "SC absence timer", "Qscing Complete", "AVND_TMR_MAX" }; diff --git a/osaf/services/saf/amf/amfnd/util.cc b/osaf/services/saf/amf/amfnd/util.cc --- a/osaf/services/saf/amf/amfnd/util.cc +++ b/osaf/services/saf/amf/amfnd/util.cc @@ -30,7 +30,7 @@ ****************************************************************************** */ - +#include <immutil.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> @@ -365,6 +365,12 @@ void dnd_msg_free(AVSV_DND_MSG *msg) case AVSV_D2N_PG_TRACK_ACT_RSP_MSG: free_d2n_pg_msg_info(msg); break; + case AVSV_N2D_ND_SISU_STATE_INFO_MSG: + free_n2d_nd_sisu_state_info(msg); + break; + case AVSV_N2D_ND_CSICOMP_STATE_INFO_MSG: + free_n2d_nd_csicomp_state_info(msg); + break; default: break; } @@ -638,6 +644,7 @@ void amf_cbk_free(AVSV_AMF_CBK_INFO *cbk /* free the cbk-info ptr */ delete cbk_info; + cbk_info = nullptr; return; } @@ -667,3 +674,147 @@ void nd2nd_avnd_msg_free(AVSV_ND2ND_AVND return; } + +/**************************************************************************** + Name : free_n2d_nd_csicomp_state_info + + Description : This routine frees csicomp_state message. + + Arguments : msg - ptr to the msg + + Return Values : None + + Notes : None. +******************************************************************************/ +void free_n2d_nd_csicomp_state_info(AVSV_DND_MSG *msg) +{ + TRACE_ENTER(); + + AVSV_N2D_ND_CSICOMP_STATE_MSG_INFO *info = nullptr; + AVSV_CSICOMP_STATE_MSG *ptr = nullptr; + AVSV_CSICOMP_STATE_MSG *next_ptr = nullptr; + + AVSV_COMP_STATE_MSG *comp_ptr = nullptr; + AVSV_COMP_STATE_MSG *comp_next_ptr = nullptr; + + if (msg == nullptr) + goto done; + + osafassert(msg->msg_type == AVSV_N2D_ND_CSICOMP_STATE_INFO_MSG); + + info = &msg->msg_info.n2d_nd_csicomp_state_info; + osafassert(info); + + ptr = info->csicomp_list; + + TRACE("%u csicomp records to free", info->num_csicomp); + + while (ptr != nullptr) { + TRACE("freeing %s:%s", (char*)ptr->safCSI.value, (char*)ptr->safComp.value); + next_ptr = ptr->next; + delete ptr; + ptr = next_ptr; + } + + comp_ptr = info->comp_list; + + TRACE("%u comp records to free", info->num_comp); + + while (comp_ptr != nullptr) { + comp_next_ptr = comp_ptr->next; + delete comp_ptr; + comp_ptr = comp_next_ptr; + } + + info->num_csicomp = 0; + info->csicomp_list = nullptr; + info->num_comp = 0; + info->comp_list = nullptr; + +done: + TRACE_LEAVE(); +} + +/**************************************************************************** + Name : free_n2d_nd_sisu_state_info + + Description : This routine frees sisu_state message. + + Arguments : msg - ptr to the msg + + Return Values : None + + Notes : None. +******************************************************************************/ +void free_n2d_nd_sisu_state_info(AVSV_DND_MSG *msg) +{ + TRACE_ENTER(); + + AVSV_N2D_ND_SISU_STATE_MSG_INFO *info = &msg->msg_info.n2d_nd_sisu_state_info; + AVSV_SISU_STATE_MSG *ptr = info->sisu_list; + AVSV_SISU_STATE_MSG *next_ptr = nullptr; + AVSV_SU_STATE_MSG *su_ptr = info->su_list; + AVSV_SU_STATE_MSG *su_next_ptr = nullptr; + + if (msg == nullptr) + goto done; + + osafassert(msg->msg_type == AVSV_N2D_ND_SISU_STATE_INFO_MSG); + + info = &msg->msg_info.n2d_nd_sisu_state_info; + osafassert(info); + + ptr = info->sisu_list; + + TRACE("%u sisu records to free", info->num_sisu); + + while (ptr != nullptr) { + TRACE("freeing %s:%s", (char*)ptr->safSI.value, (char*)ptr->safSU.value); + next_ptr = ptr->next; + delete ptr; + ptr = next_ptr; + } + + su_ptr = info->su_list; + + TRACE("%u su records to free", info->num_su); + + while (su_ptr != nullptr) { + su_next_ptr = su_ptr->next; + delete su_ptr; + su_ptr = su_next_ptr; + } + + + info->num_sisu = 0; + info->sisu_list = nullptr; + info->num_su = 0; + info->su_list = nullptr; + +done: + TRACE_LEAVE(); +} + +/**************************************************************************** + Name : saImmOmInitialize_cond + + Description : A wrapper of saImmOmInitialize for headless. + + Arguments : msg - ptr to the msg + + Return Values : SA_AIS_OK or other SA_AIS_ERR_xxx code + + Notes : None. +******************************************************************************/ +SaAisErrorT saImmOmInitialize_cond(SaImmHandleT *immHandle, + const SaImmCallbacksT *immCallbacks, SaVersionT *version) +{ + if (avnd_cb->scs_absence_max_duration == 0) { + return immutil_saImmOmInitialize(immHandle, immCallbacks, version); + } + + // if headless mode is enabled, don't retry as IMMA already has a 30s + // initial connection timeout towards IMMND. If we retry, we may + // cause the watchdog to kill AMFND. + return saImmOmInitialize(immHandle, immCallbacks, version); +} ------------------------------------------------------------------------------ Site24x7 APM Insight: Get Deep Visibility into Application Performance APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month Monitor end-to-end web transactions and take corrective actions now Troubleshoot faster and improve end-user experience. Signup Now! http://pubads.g.doubleclick.net/gampad/clk?id=272487151&iu=/4140 _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel