Ack, only tested comprestart on this patch.
Please updated AMF PR doc.
Node-switchover discussion that is going on in another related mail
thread can be considered after FC tag.
I will be considering this enhancement in "#2065, cluster reset
recovery" after FC tag and will publish a small patch.
Thanks,
Praveen
On 24-Feb-17 2:03 AM, Alex Jones wrote:
> src/amf/amfd/comp.cc | 3 +-
> src/amf/amfd/ntf.cc | 50
> ++++++++++++++++++++++++++++++++++++----------
> src/amf/amfd/ntf.h | 36 +-------------------------------
> src/amf/amfd/sgproc.cc | 42 ++++++++++++++++++++++++++++++++++++--
> src/amf/amfd/su.cc | 22 ++++++++++++++++---
> src/amf/amfd/su.h | 3 +-
> src/amf/amfnd/avnd_su.h | 2 +
> src/amf/amfnd/clc.cc | 26 ++++++++++++++++-------
> src/amf/amfnd/comp.cc | 15 ++++++++++++++
> src/amf/amfnd/err.cc | 35 +++++++++++++++++++++++++++++---
> src/amf/amfnd/sudb.cc | 3 ++
> src/amf/amfnd/susm.cc | 10 +++++++-
> src/amf/common/amf_defs.h | 1 +
> 13 files changed, 180 insertions(+), 68 deletions(-)
>
>
> This patch adds support for Section 3.11.1.4.2 of AMF B.04.01 spec:
> Restrictions
> to Auto-Repair.
>
> diff --git a/src/amf/amfd/comp.cc b/src/amf/amfd/comp.cc
> --- a/src/amf/amfd/comp.cc
> +++ b/src/amf/amfd/comp.cc
> @@ -155,7 +155,8 @@ void AVD_COMP::avd_comp_pres_state_set(S
> (((node->saAmfNodeFailfastOnTerminationFailure == true) &&
> (saAmfCompPresenceState ==
> SA_AMF_PRESENCE_TERMINATION_FAILED)) ||
> ((node->saAmfNodeFailfastOnInstantiationFailure == true) &&
> - (saAmfCompPresenceState ==
> SA_AMF_PRESENCE_INSTANTIATION_FAILED)))) {
> + (saAmfCompPresenceState ==
> SA_AMF_PRESENCE_INSTANTIATION_FAILED))) &&
> + (su->saAmfSUMaintenanceCampaign.empty())) {
>
> saflog(LOG_NOTICE, amfSvcUsrName, "%s PresenceState %s => %s",
> osaf_extended_name_borrow(&comp_info.name),
> avd_pres_state_name[old_state],
> diff --git a/src/amf/amfd/ntf.cc b/src/amf/amfd/ntf.cc
> --- a/src/amf/amfd/ntf.cc
> +++ b/src/amf/amfd/ntf.cc
> @@ -29,6 +29,28 @@
> #include "base/osaf_time.h"
> #include <queue>
>
> +static uint32_t sendAlarmNotificationAvd(AVD_CL_CB *avd_cb,
> + const std::string& ntf_object,
> + SaUint8T *add_text,
> + SaUint16T majorId,
> + SaUint16T minorId,
> + uint32_t probableCause,
> + uint32_t perceivedSeverity,
> + NCSCONTEXT add_info,
> + bool add_info_is_present);
> +
> +static uint32_t sendStateChangeNotificationAvd(AVD_CL_CB *avd_cb,
> + const std::string& ntf_object,
> + SaUint8T *add_text,
> + SaUint16T majorId,
> + SaUint16T minorId,
> + uint32_t sourceIndicator,
> + SaUint16T stateId,
> + SaUint16T oldstate,
> + SaUint16T newState,
> + NCSCONTEXT add_info,
> + bool additional_info_is_present);
> +
>
> /*****************************************************************************
> Name : avd_send_comp_inst_failed_alarm
>
> @@ -231,9 +253,11 @@ void avd_send_admin_state_chg_ntf(const
> Notes :
>
> *****************************************************************************/
> void avd_send_oper_chg_ntf(const std::string& name,
> SaAmfNotificationMinorIdT minor_id,
> - SaAmfOperationalStateT old_state, SaAmfOperationalStateT
> new_state)
> + SaAmfOperationalStateT old_state, SaAmfOperationalStateT
> new_state,
> + const std::string *maintenanceCampaign)
> {
> const std::string add_text("Oper state " + name + " changed");
> + const SaNameTWrapper mc(maintenanceCampaign ? *maintenanceCampaign :
> "");
>
> TRACE_ENTER();
>
> @@ -246,8 +270,8 @@ void avd_send_oper_chg_ntf(const std::st
> SA_AMF_OP_STATE,
> old_state,
> new_state,
> - nullptr,
> - false);
> + (NCSCONTEXT)static_cast<const
> SaNameT*>(mc),
> + maintenanceCampaign ?
> !maintenanceCampaign->empty() : false);
> TRACE_LEAVE();
> }
>
> @@ -464,7 +488,7 @@ void avd_alarm_clear(const std::string&
> TRACE_LEAVE();
> }
>
> -SaAisErrorT fill_ntf_header_part_avd(SaNtfNotificationHeaderT
> *notificationHeader,
> +static SaAisErrorT fill_ntf_header_part_avd(SaNtfNotificationHeaderT
> *notificationHeader,
> SaNtfEventTypeT eventType,
> const std::string &comp_name,
> SaUint8T *add_text,
> @@ -472,7 +496,7 @@ SaAisErrorT fill_ntf_header_part_avd(SaN
> SaUint16T minorId,
> SaInt8T *avd_name,
> NCSCONTEXT add_info,
> - int additional_info_is_present,
> + bool additional_info_is_present,
> SaNtfNotificationHandleT notificationHandle)
> {
>
> @@ -512,7 +536,11 @@ SaAisErrorT fill_ntf_header_part_avd(SaN
> notificationHeader->additionalInfo[0].infoId =
> SA_AMF_SI_NAME;
> notificationHeader->additionalInfo[0].infoType
> = SA_NTF_VALUE_LDAP_NAME;
>
> - }
> + } else if (minorId == SA_AMF_NTFID_SU_OP_STATE) {
> + /* maintenance campaign */
> + notificationHeader->additionalInfo[0].infoId
> = SA_AMF_MAINTENANCE_CAMPAIGN_DN;
> + notificationHeader->additionalInfo[0].infoType
> = SA_NTF_VALUE_LDAP_NAME;
> + }
>
> ret = saNtfPtrValAllocate(notificationHandle,
> sizeof (SaNameT) + 1,
> @@ -590,7 +618,7 @@ uint32_t sendAlarmNotificationAvd(AVD_CL
> uint32_t probableCause,
> uint32_t perceivedSeverity,
> NCSCONTEXT add_info,
> - int type)
> + bool add_info_is_present)
> {
> uint32_t status = NCSCC_RC_FAILURE;
> SaUint16T add_info_items = 0;
> @@ -608,7 +636,7 @@ uint32_t sendAlarmNotificationAvd(AVD_CL
> }
> NtfSend *job = new NtfSend{};
>
> - if (type != 0) {
> + if (add_info_is_present) {
> add_info_items = 1;
> allocation_size = SA_NTF_ALLOC_SYSTEM_LIMIT;
> }
> @@ -643,7 +671,7 @@ uint32_t sendAlarmNotificationAvd(AVD_CL
> minorId,
> const_cast<SaInt8T*>(AMF_NTF_SENDER),
> add_info,
> - type,
> + add_info_is_present,
>
> job->myntf.notification.alarmNotification.notificationHandle);
>
> if (status != SA_AIS_OK) {
> @@ -665,7 +693,7 @@ uint32_t sendAlarmNotificationAvd(AVD_CL
>
> }
>
> -uint32_t sendStateChangeNotificationAvd(AVD_CL_CB *avd_cb,
> +static uint32_t sendStateChangeNotificationAvd(AVD_CL_CB *avd_cb,
> const std::string& ntf_object,
> SaUint8T *add_text,
> SaUint16T majorId,
> @@ -675,7 +703,7 @@ uint32_t sendStateChangeNotificationAvd(
> SaUint16T oldstate,
> SaUint16T newState,
> NCSCONTEXT add_info,
> - int additional_info_is_present)
> + bool additional_info_is_present)
> {
> uint32_t status = NCSCC_RC_FAILURE;
> SaUint16T add_info_items = 0;
> diff --git a/src/amf/amfd/ntf.h b/src/amf/amfd/ntf.h
> --- a/src/amf/amfd/ntf.h
> +++ b/src/amf/amfd/ntf.h
> @@ -55,7 +55,8 @@ void avd_send_comp_proxy_status_unproxie
> void avd_send_admin_state_chg_ntf(const std::string& name,
> SaAmfNotificationMinorIdT minor_id,
> SaAmfAdminStateT old_state,
> SaAmfAdminStateT new_state);
> void avd_send_oper_chg_ntf(const std::string& name,
> SaAmfNotificationMinorIdT minor_id,
> - SaAmfOperationalStateT old_state,
> SaAmfOperationalStateT new_state);
> + SaAmfOperationalStateT old_state,
> SaAmfOperationalStateT new_state,
> + const std::string *maintenanceCampaign
> = 0);
> void avd_send_su_pres_state_chg_ntf(const std::string& su_name,
> SaAmfPresenceStateT old_state,
> SaAmfPresenceStateT new_state);
> void avd_send_su_ha_state_chg_ntf(const std::string& su_name, const
> std::string& si_name,
> @@ -67,39 +68,6 @@ void avd_send_si_assigned_ntf(const std:
> void avd_send_comp_proxy_status_proxied_ntf(const std::string& comp_name,
> SaAmfProxyStatusT old_state,
> SaAmfProxyStatusT new_state);
>
> -/* general functions */
> -SaAisErrorT fill_ntf_header_part(SaNtfNotificationHeaderT
> *notificationHeader,
> - SaNtfEventTypeT eventType,
> - const std::string& comp_name,
> - SaUint8T *add_text,
> - SaUint16T majorId,
> - SaUint16T minorId,
> - SaInt8T *avnd_name,
> - NCSCONTEXT add_info,
> - int type); /* add_info 0 --> no, 1-->
> node_name, 2--> si_name*/
> -
> -uint32_t sendAlarmNotificationAvd(AVD_CL_CB *avd_cb,
> - const std::string& comp_name,
> - SaUint8T *add_text,
> - SaUint16T majorId,
> - SaUint16T minorId,
> - uint32_t probableCause,
> - uint32_t perceivedSeverity,
> - NCSCONTEXT add_info,
> - int type); /* add_info 0 --> no, 1-->
> node_name, 2--> si_name*/
> -
> -uint32_t sendStateChangeNotificationAvd(AVD_CL_CB *avd_cb,
> - const std::string& comp_name,
> - SaUint8T *add_text,
> - SaUint16T majorId,
> - SaUint16T minorId,
> - uint32_t sourceIndicator,
> - SaUint16T stateId,
> - SaUint16T oldState,
> - SaUint16T newState,
> - NCSCONTEXT add_info,
> - int type); /* add_info 0 --> no,
> 1--> node_name, 2--> si_name*/
> -
> /* Clearing of alarms */
> void avd_alarm_clear(const std::string& name, SaUint16T minorId, uint32_t
> probableCause);
>
> diff --git a/src/amf/amfd/sgproc.cc b/src/amf/amfd/sgproc.cc
> --- a/src/amf/amfd/sgproc.cc
> +++ b/src/amf/amfd/sgproc.cc
> @@ -286,7 +286,8 @@ void su_try_repair(const AVD_SU *su)
> if ((su->sg_of_su->saAmfSGAutoRepair) && (su->saAmfSUFailover) &&
> (su->saAmfSUOperState == SA_AMF_OPERATIONAL_DISABLED) &&
> (su->saAmfSUPresenceState !=
> SA_AMF_PRESENCE_INSTANTIATION_FAILED) &&
> - (su->saAmfSUPresenceState !=
> SA_AMF_PRESENCE_TERMINATION_FAILED)) {
> + (su->saAmfSUPresenceState !=
> SA_AMF_PRESENCE_TERMINATION_FAILED) &&
> + (su->saAmfSUMaintenanceCampaign.empty())) {
>
> saflog(LOG_NOTICE, amfSvcUsrName, "Ordering Auto repair of '%s'
> as sufailover repair action",
> su->name.c_str());
> @@ -615,6 +616,11 @@ static void perform_nodeswitchover_recov
> if (su->list_of_susi == nullptr)
> continue;
>
> + if (!su->saAmfSUMaintenanceCampaign.empty()) {
> + node_reboot = false;
> + continue;
> + }
> +
> if (su_recover_from_fault(su) == NCSCC_RC_FAILURE) {
> LOG_ER("%s:%d %s", __FUNCTION__, __LINE__,
> su->name.c_str());
> goto done;
> @@ -712,6 +718,15 @@ void avd_su_oper_state_evh(AVD_CL_CB *cb
> TRACE("Component in %s requested FAILFAST",
> su->name.c_str());
> }
>
> + if (!su->saAmfSUMaintenanceCampaign.empty()) {
> + saflog(LOG_NOTICE,
> + amfSvcUsrName,
> + "Node Fail-Fast disabled because maintenance
> campaign %s is set for su: %s",
> + su->saAmfSUMaintenanceCampaign.c_str(),
> + su->name.c_str());
> + goto done;
> + }
> +
> avd_nd_ncs_su_failed(cb, node);
> goto done;
> }
> @@ -751,6 +766,16 @@ void avd_su_oper_state_evh(AVD_CL_CB *cb
> */
> if (su->sg_of_su->sg_ncs_spec == true) {
> su->set_oper_state(SA_AMF_OPERATIONAL_DISABLED);
> +
> + if (!su->saAmfSUMaintenanceCampaign.empty()) {
> + saflog(LOG_NOTICE,
> + amfSvcUsrName,
> + "Node Fail-Fast disabled because maintenance
> campaign %s is set for su: %s",
> + su->saAmfSUMaintenanceCampaign.c_str(),
> + su->name.c_str());
> + goto done;
> + }
> +
> avd_nd_ncs_su_failed(cb, node);
> goto done;
> }
> @@ -789,10 +814,16 @@ void avd_su_oper_state_evh(AVD_CL_CB *cb
> */
> avd_node_oper_state_set(node,
> SA_AMF_OPERATIONAL_DISABLED);
> node->recvr_fail_sw = true;
> +
> + // if maintenance campaign is ongoing, disable
> node reboot
> + if (!su->saAmfSUMaintenanceCampaign.empty())
> + node_reboot_req = false;
> +
> switch
> (n2d_msg->msg_info.n2d_opr_state.rec_rcvr.raw) {
> case SA_AMF_NODE_FAILOVER:
> if ((node->node_info.nodeId ==
> cb->node_id_avd) &&
> -
> (node->saAmfNodeAutoRepair)) {
> +
> (node->saAmfNodeAutoRepair) &&
> +
> (su->saAmfSUMaintenanceCampaign.empty())) {
> /* This is a case when Act ctlr
> is rebooting. Don't do appl failover
> as of now because during
> appl failover if Act controller reboots,
> then there may be packet
> losses. Anyway, this controller is
> @@ -1566,12 +1597,17 @@ void avd_su_si_assign_evh(AVD_CL_CB *cb,
>
> /* We are checking only application components as on payload
> all ncs comp are in no_red model.
> We are doing the same thing for controller also. */
> + bool maintenanceCampaignSet(false);
> +
> + if (su && !su->saAmfSUMaintenanceCampaign.empty())
> + maintenanceCampaignSet = true;
> +
> for (const auto& temp_su : node->list_of_su) {
> if (nullptr != temp_su->list_of_susi) {
> all_su_unassigned = false;
> }
> }
> - if (true == all_su_unassigned) {
> + if (true == all_su_unassigned && !maintenanceCampaignSet) {
> /* All app su got unassigned, Safe to reboot the blade
> now. */
> node_try_repair(node);
> }
> diff --git a/src/amf/amfd/su.cc b/src/amf/amfd/su.cc
> --- a/src/amf/amfd/su.cc
> +++ b/src/amf/amfd/su.cc
> @@ -737,7 +737,8 @@ void AVD_SU::set_pres_state(SaAmfPresenc
> else if ((pres_state == SA_AMF_PRESENCE_TERMINATION_FAILED) &&
> (su_on_node->saAmfNodeFailfastOnTerminationFailure ==
> true) &&
> (sg_of_su->saAmfSGAutoRepair == true) &&
> - (su_on_node->saAmfNodeAutoRepair == true))
> + (su_on_node->saAmfNodeAutoRepair == true) &&
> + (saAmfSUMaintenanceCampaign.empty()))
> /* According to AMF B.04.01 Section 4.8 Page 214 if user
> configures
> saAmfNodeFailfastOnTerminationFailure = true, AMF has to
> perform
> node failfast recovery action. So mark SU to
> SA_AMF_PRESENCE_TERMINATION_FAILED
> @@ -747,7 +748,8 @@ void AVD_SU::set_pres_state(SaAmfPresenc
> else if ((pres_state == SA_AMF_PRESENCE_INSTANTIATION_FAILED) &&
> (su_on_node->saAmfNodeFailfastOnInstantiationFailure ==
> true) &&
> (sg_of_su->saAmfSGAutoRepair == true) &&
> - (su_on_node->saAmfNodeAutoRepair == true))
> + (su_on_node->saAmfNodeAutoRepair == true) &&
> + (saAmfSUMaintenanceCampaign.empty()))
> /* According to AMF B.04.01 Section 4.6 Page 212 if user
> configures
> saAmfNodeFailfastOnInstantiationFailure = true, AMF has to
> perform
> node failfast recovery action. So mark SU to
> SA_AMF_PRESENCE_INSTANTIATION_FAILED
> @@ -811,7 +813,7 @@ void AVD_SU::set_oper_state(SaAmfOperati
> saAmfSUOperState = oper_state;
>
> avd_send_oper_chg_ntf(name, SA_AMF_NTFID_SU_OP_STATE, old_state,
> - saAmfSUOperState);
> + saAmfSUOperState, &saAmfSUMaintenanceCampaign);
>
> avd_saImmOiRtObjectUpdate(name, "saAmfSUOperState",
> SA_IMM_ATTR_SAUINT32T, &saAmfSUOperState);
> @@ -1884,6 +1886,7 @@ static void su_ccb_apply_modify_hdlr(str
> TRACE("saAmfSUMaintenanceCampaign set to '%s'
> for '%s'",
>
> su->saAmfSUMaintenanceCampaign.c_str(), su->name.c_str());
> }
> + su->set_su_maintenance_campaign();
> } else if (!strcmp(attr_mod->modAttr.attrName, "saAmfSUType")) {
> AVD_SUTYPE *sut;
> SaNameT sutype_name = *(SaNameT*)
> attr_mod->modAttr.attrValues[0];
> @@ -2118,6 +2121,13 @@ void AVD_SU::send_attribute_update(AVSV_
> memcpy(¶m.value[0], &sufailover, param.value_len);
> break;
> }
> + case saAmfSUMaintenanceCampaign_ID:
> + {
> + param.attr_id = saAmfSUMaintenanceCampaign_ID;
> + param.value_len = saAmfSUMaintenanceCampaign.length();
> + memcpy(¶m.value[0],
> saAmfSUMaintenanceCampaign.data(), param.value_len);
> + break;
> + }
> default:
> osafassert(0);
> }
> @@ -2137,6 +2147,10 @@ void AVD_SU::set_su_failover(bool value)
> send_attribute_update(saAmfSUFailOver_ID);
> }
>
> +void AVD_SU::set_su_maintenance_campaign(void) {
> + send_attribute_update(saAmfSUMaintenanceCampaign_ID);
> +}
> +
> /**
> * Delete all SUSIs assigned to the SU.
> *
> @@ -2209,7 +2223,7 @@ void AVD_SU::set_su_switch(SaToggleState
> m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, this, AVSV_CKPT_SU_SWITCH);
> }
>
> -AVD_AVND *AVD_SU::get_node_ptr(void) {
> +AVD_AVND *AVD_SU::get_node_ptr(void) const {
> if (su_is_external == true)
> return avd_cb->ext_comp_info.local_avnd_node;
> else
> diff --git a/src/amf/amfd/su.h b/src/amf/amfd/su.h
> --- a/src/amf/amfd/su.h
> +++ b/src/amf/amfd/su.h
> @@ -96,6 +96,7 @@ class AVD_SU {
> AVD_SU *su_list_su_type_next;
>
> void set_su_failover(bool value);
> + void set_su_maintenance_campaign(void);
> void dec_curr_stdby_si();
> void inc_curr_stdby_si();
> void inc_curr_act_si();
> @@ -116,7 +117,7 @@ class AVD_SU {
> void set_term_state(bool state);
> void remove_from_model();
> void set_su_switch(SaToggleState state, bool wrt_to_imm = true);
> - AVD_AVND *get_node_ptr(void);
> + AVD_AVND *get_node_ptr(void) const;
> bool is_in_service(void);
> bool is_instantiable(void);
> void reset_all_comps_assign_flag();
> diff --git a/src/amf/amfnd/avnd_su.h b/src/amf/amfnd/avnd_su.h
> --- a/src/amf/amfnd/avnd_su.h
> +++ b/src/amf/amfnd/avnd_su.h
> @@ -143,6 +143,8 @@ typedef struct avnd_su_tag {
> uint32_t si_active_cnt; /* no of active SIs assigned to this su */
> uint32_t si_standby_cnt; /* no of standby SIs assigned to this
> su */
>
> + std::string suMaintenanceCampaign;
> +
> /*
> * Ordered comp list (based on inst level). Note that as the
> * lexicographic key (comp-name) & the keys used for
> diff --git a/src/amf/amfnd/clc.cc b/src/amf/amfnd/clc.cc
> --- a/src/amf/amfnd/clc.cc
> +++ b/src/amf/amfnd/clc.cc
> @@ -971,19 +971,29 @@ uint32_t avnd_comp_clc_st_chng_prc(AVND_
> if (comp->su->is_ncs == true) {
> if(SA_AMF_PRESENCE_INSTANTIATION_FAILED == final_st) {
> LOG_ER("'%s'got Inst failed", comp->name.c_str());
> - opensaf_reboot(avnd_cb->node_info.nodeId,
> -
> osaf_extended_name_borrow(&avnd_cb->node_info.executionEnvironment),
> - "NCS component
> Instantiation failed");
> - LOG_ER("Amfnd is exiting (due to ncs comp inst failed)
> to aid fast reboot");
> - exit(0);
> + if (comp->su->suMaintenanceCampaign.empty()) {
> + opensaf_reboot(avnd_cb->node_info.nodeId,
> +
> osaf_extended_name_borrow(&avnd_cb->node_info.executionEnvironment),
> + "NCS component Instantiation failed");
> + LOG_ER("Amfnd is exiting (due to ncs comp inst
> failed) to aid fast reboot");
> + exit(0);
> + } else {
> + LOG_NO("not rebooting as maintenance campaign
> is ongoing");
> + goto done;
> + }
> }
> if(SA_AMF_PRESENCE_TERMINATION_FAILED == final_st) {
> LOG_ER("'%s'got Term failed", comp->name.c_str());
> - opensaf_reboot(avnd_cb->node_info.nodeId,
> + if (comp->su->suMaintenanceCampaign.empty()) {
> + opensaf_reboot(avnd_cb->node_info.nodeId,
>
> osaf_extended_name_borrow(&avnd_cb->node_info.executionEnvironment),
> "NCS component
> Termination failed");
> - LOG_ER("Amfnd is exiting (due to ncs comp term failed)
> to aid fast reboot");
> - exit(0);
> + LOG_ER("Amfnd is exiting (due to ncs comp term
> failed) to aid fast reboot");
> + exit(0);
> + } else {
> + LOG_NO("not rebooting as maintenance campaign
> is ongoing");
> + goto done;
> + }
> }
> }
>
> diff --git a/src/amf/amfnd/comp.cc b/src/amf/amfnd/comp.cc
> --- a/src/amf/amfnd/comp.cc
> +++ b/src/amf/amfnd/comp.cc
> @@ -2675,6 +2675,21 @@ uint32_t comp_restart_initiate(AVND_COMP
> if (NCSCC_RC_SUCCESS != rc)
> goto done;
>
> + if (!comp->su->suMaintenanceCampaign.empty()) {
> + LOG_NO("not restarting comp because maintenance campaign is set: %s",
> + comp->su->suMaintenanceCampaign.c_str());
> +
> + m_AVND_COMP_OPER_STATE_SET(comp, SA_AMF_OPERATIONAL_DISABLED);
> + rc = avnd_comp_oper_state_avd_sync(cb, comp);
> +
> + /* update su oper state */
> + if (m_AVND_SU_OPER_STATE_IS_ENABLED(comp->su)) {
> + m_AVND_SU_OPER_STATE_SET(comp->su, SA_AMF_OPERATIONAL_DISABLED);
> + avnd_di_uns32_upd_send(AVSV_SA_AMF_SU, saAmfSUOperState_ID,
> comp->su->name, comp->su->oper);
> + }
> + goto done;
> + }
> +
> rc = avnd_comp_clc_fsm_run(cb, comp, AVND_COMP_CLC_PRES_FSM_EV_RESTART);
> if (NCSCC_RC_SUCCESS != rc)
> goto done;
> diff --git a/src/amf/amfnd/err.cc b/src/amf/amfnd/err.cc
> --- a/src/amf/amfnd/err.cc
> +++ b/src/amf/amfnd/err.cc
> @@ -413,8 +413,14 @@ uint32_t avnd_err_process(AVND_CB *cb, A
> LOG_ER("%s Faulted due to:%s Recovery is:%s",
> comp->name.c_str(), g_comp_err[comp->err_info.src],
> g_comp_rcvr[esc_rcvr - 1]);
> /* do the local node reboot for node_failfast or ncs component
> failure*/
> - opensaf_reboot(avnd_cb->node_info.nodeId,
> osaf_extended_name_borrow(&avnd_cb->node_info.executionEnvironment),
> + if (comp->su->suMaintenanceCampaign.empty()) {
> + opensaf_reboot(avnd_cb->node_info.nodeId,
> osaf_extended_name_borrow(&avnd_cb->node_info.executionEnvironment),
> "Component faulted: recovery is node failfast");
> + } else {
> + LOG_NO("not rebooting because maintenance campaign is
> set: %s",
> + comp->su->suMaintenanceCampaign.c_str());
> + goto done;
> + }
> }
>
> /* execute the recovery */
> @@ -671,6 +677,8 @@ uint32_t avnd_err_rcvr_su_restart(AVND_C
> if (NCSCC_RC_SUCCESS != rc)
> goto done;
>
> + avnd_di_uns32_upd_send(AVSV_SA_AMF_SU, saAmfSUOperState_ID, su->name,
> su->oper);
> +
> set_suRestart_flag(su);
>
> if (su_all_comps_restartable(*su) == true) {
> @@ -694,6 +702,13 @@ uint32_t avnd_err_rcvr_su_restart(AVND_C
> rc = avnd_su_si_unmark(cb, su);
> if (NCSCC_RC_SUCCESS != rc)
> goto done;
> +
> + if (!su->suMaintenanceCampaign.empty()) {
> + LOG_NO("not restarting su because maintenance campaign
> is set: %s",
> + su->suMaintenanceCampaign.c_str());
> + goto done;
> + }
> +
> rc = avnd_su_pres_fsm_run(cb, su, 0,
> AVND_SU_PRES_FSM_EV_RESTART);
> if (NCSCC_RC_SUCCESS != rc)
> goto done;
> @@ -717,6 +732,12 @@ uint32_t avnd_err_rcvr_su_restart(AVND_C
> TODO:In future when AMF supports comp-failover in spec
> compliance then this
> case should be alligned with that.
> */
> + if (!su->suMaintenanceCampaign.empty()) {
> + LOG_NO("not restarting su because maintenance campaign
> is set: %s",
> + su->suMaintenanceCampaign.c_str());
> + goto done;
> + }
> +
> if (m_AVND_SU_IS_PREINSTANTIABLE(su)) {
> if (m_AVND_COMP_TYPE_IS_PREINSTANTIABLE(failed_comp))
> rc = avnd_comp_clc_fsm_run(cb, failed_comp,
> AVND_COMP_CLC_PRES_FSM_EV_CLEANUP);
> @@ -1026,11 +1047,17 @@ uint32_t avnd_err_rcvr_node_failover(AVN
> rc = avnd_comp_clc_fsm_run(cb, comp,
> AVND_COMP_CLC_PRES_FSM_EV_CLEANUP);
> if (rc != NCSCC_RC_SUCCESS) {
> LOG_ER("'%s' termination failed", comp->name.c_str());
> - opensaf_reboot(avnd_cb->node_info.nodeId,
> + if (comp->su->suMaintenanceCampaign.empty()) {
> + opensaf_reboot(avnd_cb->node_info.nodeId,
>
> osaf_extended_name_borrow(&avnd_cb->node_info.executionEnvironment),
> "Component termination
> failed at node failover");
> - LOG_ER("Exiting (due to comp term failed) to aid fast
> node reboot");
> - exit(1);
> + LOG_ER("Exiting (due to comp term failed) to
> aid fast node reboot");
> + exit(1);
> + } else {
> + LOG_NO("not rebooting because maintenance
> campaign is set: %s",
> +
> comp->su->suMaintenanceCampaign.c_str());
> + continue;
> + }
> }
> avnd_su_pres_state_set(cb, comp->su,
> SA_AMF_PRESENCE_TERMINATING);
> }
> diff --git a/src/amf/amfnd/sudb.cc b/src/amf/amfnd/sudb.cc
> --- a/src/amf/amfnd/sudb.cc
> +++ b/src/amf/amfnd/sudb.cc
> @@ -226,6 +226,9 @@ uint32_t avnd_su_oper_req(AVND_CB *cb, A
> osafassert(sizeof(uint32_t) == param->value_len);
> su->sufailover = m_NCS_OS_NTOHL(*(uint32_t
> *)(param->value));
> break;
> + case saAmfSUMaintenanceCampaign_ID:
> + su->suMaintenanceCampaign = std::string(param->value,
> param->value_len);
> + break;
> default:
> LOG_NO("%s: Unsupported attribute %u", __FUNCTION__,
> param->attr_id);
> goto done;
> diff --git a/src/amf/amfnd/susm.cc b/src/amf/amfnd/susm.cc
> --- a/src/amf/amfnd/susm.cc
> +++ b/src/amf/amfnd/susm.cc
> @@ -2832,8 +2832,14 @@ uint32_t avnd_su_pres_terming_comptermfa
>
> if (true == su->is_ncs) {
> std::string reason = "SU '" + su->name + "' Termination-failed";
> - opensaf_reboot(avnd_cb->node_info.nodeId,
> osaf_extended_name_borrow(&avnd_cb->node_info.executionEnvironment),
> - reason.c_str());
> + if (su->suMaintenanceCampaign.empty()) {
> + opensaf_reboot(avnd_cb->node_info.nodeId,
> osaf_extended_name_borrow(&avnd_cb->node_info.executionEnvironment),
> + reason.c_str());
> + } else {
> + LOG_ER("%s", reason.c_str());
> + LOG_NO("not rebooting because su maintenance campaign
> is set: %s",
> + su->suMaintenanceCampaign.c_str());
> + }
> }
>
> done:
> diff --git a/src/amf/common/amf_defs.h b/src/amf/common/amf_defs.h
> --- a/src/amf/common/amf_defs.h
> +++ b/src/amf/common/amf_defs.h
> @@ -204,6 +204,7 @@ typedef enum
> saAmfSUParentSGName_ID = 12,
> saAmfSUIsExternal_ID = 13,
> saAmfSURestartCount_ID = 14,
> + saAmfSUMaintenanceCampaign_ID = 15,
> } AVSV_AMF_SU_ATTR_ID;
>
> /* Attribute ID enum for the saAmfComp class */
>
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel