Ack, only tested comprestart on this patch.
Please updated AMF PR doc.

Node-switchover discussion that is going on in another related mail 
thread can be considered after FC tag.

I will be considering this enhancement in "#2065, cluster reset 
recovery" after FC tag and will publish a small patch.

Thanks,
Praveen



On 24-Feb-17 2:03 AM, Alex Jones wrote:
>  src/amf/amfd/comp.cc      |   3 +-
>  src/amf/amfd/ntf.cc       |  50 
> ++++++++++++++++++++++++++++++++++++----------
>  src/amf/amfd/ntf.h        |  36 +-------------------------------
>  src/amf/amfd/sgproc.cc    |  42 ++++++++++++++++++++++++++++++++++++--
>  src/amf/amfd/su.cc        |  22 ++++++++++++++++---
>  src/amf/amfd/su.h         |   3 +-
>  src/amf/amfnd/avnd_su.h   |   2 +
>  src/amf/amfnd/clc.cc      |  26 ++++++++++++++++-------
>  src/amf/amfnd/comp.cc     |  15 ++++++++++++++
>  src/amf/amfnd/err.cc      |  35 +++++++++++++++++++++++++++++---
>  src/amf/amfnd/sudb.cc     |   3 ++
>  src/amf/amfnd/susm.cc     |  10 +++++++-
>  src/amf/common/amf_defs.h |   1 +
>  13 files changed, 180 insertions(+), 68 deletions(-)
>
>
> This patch adds support for Section 3.11.1.4.2 of AMF B.04.01 spec: 
> Restrictions
> to Auto-Repair.
>
> diff --git a/src/amf/amfd/comp.cc b/src/amf/amfd/comp.cc
> --- a/src/amf/amfd/comp.cc
> +++ b/src/amf/amfd/comp.cc
> @@ -155,7 +155,8 @@ void AVD_COMP::avd_comp_pres_state_set(S
>               (((node->saAmfNodeFailfastOnTerminationFailure == true) &&
>                (saAmfCompPresenceState == 
> SA_AMF_PRESENCE_TERMINATION_FAILED)) ||
>                ((node->saAmfNodeFailfastOnInstantiationFailure == true) &&
> -               (saAmfCompPresenceState == 
> SA_AMF_PRESENCE_INSTANTIATION_FAILED)))) {
> +               (saAmfCompPresenceState == 
> SA_AMF_PRESENCE_INSTANTIATION_FAILED))) &&
> +             (su->saAmfSUMaintenanceCampaign.empty())) {
>
>               saflog(LOG_NOTICE, amfSvcUsrName, "%s PresenceState %s => %s",
>                               osaf_extended_name_borrow(&comp_info.name), 
> avd_pres_state_name[old_state],
> diff --git a/src/amf/amfd/ntf.cc b/src/amf/amfd/ntf.cc
> --- a/src/amf/amfd/ntf.cc
> +++ b/src/amf/amfd/ntf.cc
> @@ -29,6 +29,28 @@
>  #include "base/osaf_time.h"
>  #include <queue>
>
> +static uint32_t sendAlarmNotificationAvd(AVD_CL_CB *avd_cb,
> +                            const std::string& ntf_object,
> +                            SaUint8T *add_text,
> +                            SaUint16T majorId,
> +                            SaUint16T minorId,
> +                            uint32_t probableCause,
> +                            uint32_t perceivedSeverity,
> +                            NCSCONTEXT add_info,
> +                            bool add_info_is_present);
> +
> +static uint32_t sendStateChangeNotificationAvd(AVD_CL_CB *avd_cb,
> +                                  const std::string& ntf_object,
> +                                  SaUint8T *add_text,
> +                                  SaUint16T majorId,
> +                                  SaUint16T minorId,
> +                                  uint32_t sourceIndicator,
> +                                  SaUint16T stateId,
> +                                  SaUint16T oldstate,
> +                                  SaUint16T newState,
> +                                  NCSCONTEXT add_info,
> +                                  bool additional_info_is_present);
> +
>  
> /*****************************************************************************
>    Name          :  avd_send_comp_inst_failed_alarm
>
> @@ -231,9 +253,11 @@ void avd_send_admin_state_chg_ntf(const
>    Notes         :
>  
> *****************************************************************************/
>  void avd_send_oper_chg_ntf(const std::string& name, 
> SaAmfNotificationMinorIdT minor_id,
> -             SaAmfOperationalStateT old_state, SaAmfOperationalStateT 
> new_state)
> +             SaAmfOperationalStateT old_state, SaAmfOperationalStateT 
> new_state,
> +             const std::string *maintenanceCampaign)
>  {
>       const std::string add_text("Oper state " + name + " changed");
> +     const SaNameTWrapper mc(maintenanceCampaign ? *maintenanceCampaign : 
> "");
>
>       TRACE_ENTER();
>
> @@ -246,8 +270,8 @@ void avd_send_oper_chg_ntf(const std::st
>                                       SA_AMF_OP_STATE,
>                                       old_state,
>                                       new_state,
> -                                     nullptr,
> -                                     false);
> +                                     (NCSCONTEXT)static_cast<const 
> SaNameT*>(mc),
> +                                     maintenanceCampaign ? 
> !maintenanceCampaign->empty() : false);
>       TRACE_LEAVE();
>  }
>
> @@ -464,7 +488,7 @@ void avd_alarm_clear(const std::string&
>         TRACE_LEAVE();
>  }
>
> -SaAisErrorT fill_ntf_header_part_avd(SaNtfNotificationHeaderT 
> *notificationHeader,
> +static SaAisErrorT fill_ntf_header_part_avd(SaNtfNotificationHeaderT 
> *notificationHeader,
>                             SaNtfEventTypeT eventType,
>                             const std::string &comp_name,
>                             SaUint8T *add_text,
> @@ -472,7 +496,7 @@ SaAisErrorT fill_ntf_header_part_avd(SaN
>                             SaUint16T minorId,
>                             SaInt8T *avd_name,
>                             NCSCONTEXT add_info,
> -                           int additional_info_is_present,
> +                           bool additional_info_is_present,
>                             SaNtfNotificationHandleT notificationHandle)
>  {
>
> @@ -512,7 +536,11 @@ SaAisErrorT fill_ntf_header_part_avd(SaN
>                               notificationHeader->additionalInfo[0].infoId = 
> SA_AMF_SI_NAME;
>                               notificationHeader->additionalInfo[0].infoType 
> = SA_NTF_VALUE_LDAP_NAME;
>
> -                     }
> +                     } else if (minorId == SA_AMF_NTFID_SU_OP_STATE) {
> +                             /* maintenance campaign */
> +                             notificationHeader->additionalInfo[0].infoId   
> = SA_AMF_MAINTENANCE_CAMPAIGN_DN;
> +                             notificationHeader->additionalInfo[0].infoType 
> = SA_NTF_VALUE_LDAP_NAME;
> +      }
>
>                       ret = saNtfPtrValAllocate(notificationHandle,
>                                       sizeof (SaNameT) + 1,
> @@ -590,7 +618,7 @@ uint32_t sendAlarmNotificationAvd(AVD_CL
>                              uint32_t probableCause,
>                              uint32_t perceivedSeverity,
>                              NCSCONTEXT add_info,
> -                            int type)
> +                            bool add_info_is_present)
>  {
>       uint32_t status = NCSCC_RC_FAILURE;
>       SaUint16T add_info_items = 0;
> @@ -608,7 +636,7 @@ uint32_t sendAlarmNotificationAvd(AVD_CL
>       }
>       NtfSend *job = new NtfSend{};
>
> -     if (type != 0) {
> +     if (add_info_is_present) {
>               add_info_items = 1;
>               allocation_size = SA_NTF_ALLOC_SYSTEM_LIMIT;
>       }
> @@ -643,7 +671,7 @@ uint32_t sendAlarmNotificationAvd(AVD_CL
>                                minorId,
>                                const_cast<SaInt8T*>(AMF_NTF_SENDER),
>                                add_info,
> -                              type,
> +                              add_info_is_present,
>                                
> job->myntf.notification.alarmNotification.notificationHandle);
>       
>       if (status != SA_AIS_OK) {
> @@ -665,7 +693,7 @@ uint32_t sendAlarmNotificationAvd(AVD_CL
>
>  }
>
> -uint32_t sendStateChangeNotificationAvd(AVD_CL_CB *avd_cb,
> +static uint32_t sendStateChangeNotificationAvd(AVD_CL_CB *avd_cb,
>                                    const std::string& ntf_object,
>                                    SaUint8T *add_text,
>                                    SaUint16T majorId,
> @@ -675,7 +703,7 @@ uint32_t sendStateChangeNotificationAvd(
>                                    SaUint16T oldstate,
>                                    SaUint16T newState,
>                                    NCSCONTEXT add_info,
> -                                  int additional_info_is_present)
> +                                  bool additional_info_is_present)
>  {
>       uint32_t status = NCSCC_RC_FAILURE;
>       SaUint16T add_info_items = 0;
> diff --git a/src/amf/amfd/ntf.h b/src/amf/amfd/ntf.h
> --- a/src/amf/amfd/ntf.h
> +++ b/src/amf/amfd/ntf.h
> @@ -55,7 +55,8 @@ void avd_send_comp_proxy_status_unproxie
>  void avd_send_admin_state_chg_ntf(const std::string& name, 
> SaAmfNotificationMinorIdT minor_id,
>                                       SaAmfAdminStateT old_state, 
> SaAmfAdminStateT new_state);
>  void avd_send_oper_chg_ntf(const std::string& name, 
> SaAmfNotificationMinorIdT minor_id,
> -                                     SaAmfOperationalStateT old_state, 
> SaAmfOperationalStateT new_state);
> +                                     SaAmfOperationalStateT old_state, 
> SaAmfOperationalStateT new_state,
> +                                     const std::string *maintenanceCampaign 
> = 0);
>  void avd_send_su_pres_state_chg_ntf(const std::string& su_name, 
> SaAmfPresenceStateT old_state,
>                                       SaAmfPresenceStateT new_state);
>  void avd_send_su_ha_state_chg_ntf(const std::string& su_name, const 
> std::string& si_name,
> @@ -67,39 +68,6 @@ void avd_send_si_assigned_ntf(const std:
>  void avd_send_comp_proxy_status_proxied_ntf(const std::string& comp_name,
>                                       SaAmfProxyStatusT old_state, 
> SaAmfProxyStatusT new_state);
>
> -/* general functions */
> -SaAisErrorT fill_ntf_header_part(SaNtfNotificationHeaderT 
> *notificationHeader,
> -                                SaNtfEventTypeT eventType,
> -                                const std::string& comp_name,
> -                                SaUint8T *add_text,
> -                                SaUint16T majorId,
> -                                SaUint16T minorId,
> -                                SaInt8T *avnd_name,
> -                                NCSCONTEXT add_info,
> -                                int type); /* add_info 0 --> no,  1--> 
> node_name, 2--> si_name*/
> -
> -uint32_t sendAlarmNotificationAvd(AVD_CL_CB *avd_cb,
> -                                     const std::string& comp_name,
> -                                     SaUint8T *add_text,
> -                                     SaUint16T majorId,
> -                                     SaUint16T minorId,
> -                                     uint32_t probableCause,
> -                                     uint32_t perceivedSeverity,
> -                                     NCSCONTEXT add_info,
> -                                     int type); /* add_info 0 --> no,  1--> 
> node_name, 2--> si_name*/
> -
> -uint32_t sendStateChangeNotificationAvd(AVD_CL_CB *avd_cb,
> -                                           const std::string& comp_name,
> -                                           SaUint8T *add_text,
> -                                           SaUint16T majorId,
> -                                           SaUint16T minorId,
> -                                           uint32_t sourceIndicator,
> -                                           SaUint16T stateId,
> -                                           SaUint16T oldState,
> -                                           SaUint16T newState,
> -                                           NCSCONTEXT add_info,
> -                                           int type); /* add_info 0 --> no,  
> 1--> node_name, 2--> si_name*/
> -
>  /* Clearing of alarms */
>  void avd_alarm_clear(const std::string& name, SaUint16T minorId, uint32_t 
> probableCause);
>
> diff --git a/src/amf/amfd/sgproc.cc b/src/amf/amfd/sgproc.cc
> --- a/src/amf/amfd/sgproc.cc
> +++ b/src/amf/amfd/sgproc.cc
> @@ -286,7 +286,8 @@ void su_try_repair(const AVD_SU *su)
>       if ((su->sg_of_su->saAmfSGAutoRepair) && (su->saAmfSUFailover) &&
>                       (su->saAmfSUOperState == SA_AMF_OPERATIONAL_DISABLED) &&
>                       (su->saAmfSUPresenceState != 
> SA_AMF_PRESENCE_INSTANTIATION_FAILED) &&
> -                     (su->saAmfSUPresenceState != 
> SA_AMF_PRESENCE_TERMINATION_FAILED)) {
> +                     (su->saAmfSUPresenceState != 
> SA_AMF_PRESENCE_TERMINATION_FAILED) &&
> +                     (su->saAmfSUMaintenanceCampaign.empty())) {
>
>               saflog(LOG_NOTICE, amfSvcUsrName, "Ordering Auto repair of '%s' 
> as sufailover repair action",
>                               su->name.c_str());
> @@ -615,6 +616,11 @@ static void perform_nodeswitchover_recov
>               if (su->list_of_susi == nullptr)
>                       continue;
>
> +             if (!su->saAmfSUMaintenanceCampaign.empty()) {
> +                     node_reboot = false;
> +                     continue;
> +             }
> +
>               if (su_recover_from_fault(su) == NCSCC_RC_FAILURE) {
>                       LOG_ER("%s:%d %s", __FUNCTION__, __LINE__, 
> su->name.c_str());
>                       goto done;
> @@ -712,6 +718,15 @@ void avd_su_oper_state_evh(AVD_CL_CB *cb
>                       TRACE("Component in %s requested FAILFAST", 
> su->name.c_str());
>               }
>
> +             if (!su->saAmfSUMaintenanceCampaign.empty()) {
> +                     saflog(LOG_NOTICE,
> +                             amfSvcUsrName,
> +                             "Node Fail-Fast disabled because maintenance 
> campaign %s is set for su: %s",
> +                             su->saAmfSUMaintenanceCampaign.c_str(),
> +                             su->name.c_str());
> +                     goto done;
> +             }
> +
>               avd_nd_ncs_su_failed(cb, node);
>               goto done;
>       }
> @@ -751,6 +766,16 @@ void avd_su_oper_state_evh(AVD_CL_CB *cb
>                */
>               if (su->sg_of_su->sg_ncs_spec == true) {
>                       su->set_oper_state(SA_AMF_OPERATIONAL_DISABLED);
> +
> +             if (!su->saAmfSUMaintenanceCampaign.empty()) {
> +                     saflog(LOG_NOTICE,
> +                             amfSvcUsrName,
> +                             "Node Fail-Fast disabled because maintenance 
> campaign %s is set for su: %s",
> +                             su->saAmfSUMaintenanceCampaign.c_str(),
> +                             su->name.c_str());
> +                     goto done;
> +             }
> +
>                       avd_nd_ncs_su_failed(cb, node);
>                       goto done;
>               }
> @@ -789,10 +814,16 @@ void avd_su_oper_state_evh(AVD_CL_CB *cb
>                                */
>                               avd_node_oper_state_set(node, 
> SA_AMF_OPERATIONAL_DISABLED);
>                               node->recvr_fail_sw = true;
> +
> +                             // if maintenance campaign is ongoing, disable 
> node reboot
> +                             if (!su->saAmfSUMaintenanceCampaign.empty())
> +                                     node_reboot_req = false;
> +
>                               switch 
> (n2d_msg->msg_info.n2d_opr_state.rec_rcvr.raw) {
>                               case SA_AMF_NODE_FAILOVER:
>                                       if ((node->node_info.nodeId == 
> cb->node_id_avd) &&
> -                                                     
> (node->saAmfNodeAutoRepair)) {
> +                                                     
> (node->saAmfNodeAutoRepair) &&
> +                                                     
> (su->saAmfSUMaintenanceCampaign.empty())) {
>                                               /* This is a case when Act ctlr 
> is rebooting. Don't do appl failover
>                                                  as of now because during 
> appl failover if Act controller reboots,
>                                                  then there may be packet 
> losses. Anyway, this controller is
> @@ -1566,12 +1597,17 @@ void avd_su_si_assign_evh(AVD_CL_CB *cb,
>
>               /* We are checking only application components as on payload 
> all ncs comp are in no_red model.
>                  We are doing the same thing for controller also. */
> +             bool maintenanceCampaignSet(false);
> +
> +             if (su && !su->saAmfSUMaintenanceCampaign.empty())
> +                     maintenanceCampaignSet = true;
> +
>               for (const auto& temp_su : node->list_of_su) {
>                       if (nullptr != temp_su->list_of_susi) {
>                               all_su_unassigned = false;
>                       }
>               }
> -             if (true == all_su_unassigned) {
> +             if (true == all_su_unassigned && !maintenanceCampaignSet) {
>                       /* All app su got unassigned, Safe to reboot the blade 
> now. */
>                       node_try_repair(node);
>               }
> diff --git a/src/amf/amfd/su.cc b/src/amf/amfd/su.cc
> --- a/src/amf/amfd/su.cc
> +++ b/src/amf/amfd/su.cc
> @@ -737,7 +737,8 @@ void AVD_SU::set_pres_state(SaAmfPresenc
>       else if ((pres_state == SA_AMF_PRESENCE_TERMINATION_FAILED) &&
>                       (su_on_node->saAmfNodeFailfastOnTerminationFailure == 
> true) &&
>                       (sg_of_su->saAmfSGAutoRepair == true) &&
> -                     (su_on_node->saAmfNodeAutoRepair == true))
> +                     (su_on_node->saAmfNodeAutoRepair == true) &&
> +                     (saAmfSUMaintenanceCampaign.empty()))
>               /* According to AMF B.04.01 Section 4.8 Page 214 if user 
> configures
>                  saAmfNodeFailfastOnTerminationFailure = true, AMF has to 
> perform
>                  node failfast recovery action. So mark SU to 
> SA_AMF_PRESENCE_TERMINATION_FAILED
> @@ -747,7 +748,8 @@ void AVD_SU::set_pres_state(SaAmfPresenc
>       else if ((pres_state == SA_AMF_PRESENCE_INSTANTIATION_FAILED) &&
>                       (su_on_node->saAmfNodeFailfastOnInstantiationFailure == 
> true) &&
>                       (sg_of_su->saAmfSGAutoRepair == true) &&
> -                     (su_on_node->saAmfNodeAutoRepair == true))
> +                     (su_on_node->saAmfNodeAutoRepair == true) &&
> +                     (saAmfSUMaintenanceCampaign.empty()))
>               /* According to AMF B.04.01 Section 4.6 Page 212 if user 
> configures
>                  saAmfNodeFailfastOnInstantiationFailure = true, AMF has to 
> perform
>                  node failfast recovery action. So mark SU to 
> SA_AMF_PRESENCE_INSTANTIATION_FAILED
> @@ -811,7 +813,7 @@ void AVD_SU::set_oper_state(SaAmfOperati
>       saAmfSUOperState = oper_state;
>
>       avd_send_oper_chg_ntf(name, SA_AMF_NTFID_SU_OP_STATE, old_state,
> -             saAmfSUOperState);
> +             saAmfSUOperState, &saAmfSUMaintenanceCampaign);
>
>       avd_saImmOiRtObjectUpdate(name, "saAmfSUOperState",
>               SA_IMM_ATTR_SAUINT32T, &saAmfSUOperState);
> @@ -1884,6 +1886,7 @@ static void su_ccb_apply_modify_hdlr(str
>                               TRACE("saAmfSUMaintenanceCampaign set to '%s' 
> for '%s'",
>                                         
> su->saAmfSUMaintenanceCampaign.c_str(), su->name.c_str());
>                       }
> +                     su->set_su_maintenance_campaign();
>               } else if (!strcmp(attr_mod->modAttr.attrName, "saAmfSUType")) {
>                       AVD_SUTYPE *sut;
>                       SaNameT sutype_name = *(SaNameT*) 
> attr_mod->modAttr.attrValues[0];
> @@ -2118,6 +2121,13 @@ void AVD_SU::send_attribute_update(AVSV_
>                       memcpy(&param.value[0], &sufailover, param.value_len);
>                       break;
>               }
> +             case saAmfSUMaintenanceCampaign_ID:
> +             {
> +                     param.attr_id = saAmfSUMaintenanceCampaign_ID;
> +                     param.value_len = saAmfSUMaintenanceCampaign.length();
> +                     memcpy(&param.value[0], 
> saAmfSUMaintenanceCampaign.data(), param.value_len);
> +                     break;
> +             }
>               default:
>                       osafassert(0);
>               }
> @@ -2137,6 +2147,10 @@ void AVD_SU::set_su_failover(bool value)
>       send_attribute_update(saAmfSUFailOver_ID);
>  }
>
> +void AVD_SU::set_su_maintenance_campaign(void) {
> +     send_attribute_update(saAmfSUMaintenanceCampaign_ID);
> +}
> +
>  /**
>   * Delete all SUSIs assigned to the SU.
>   *
> @@ -2209,7 +2223,7 @@ void AVD_SU::set_su_switch(SaToggleState
>       m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, this, AVSV_CKPT_SU_SWITCH);
>  }
>
> -AVD_AVND *AVD_SU::get_node_ptr(void) {
> +AVD_AVND *AVD_SU::get_node_ptr(void) const {
>        if (su_is_external == true)
>                return avd_cb->ext_comp_info.local_avnd_node;
>        else
> diff --git a/src/amf/amfd/su.h b/src/amf/amfd/su.h
> --- a/src/amf/amfd/su.h
> +++ b/src/amf/amfd/su.h
> @@ -96,6 +96,7 @@ class AVD_SU {
>       AVD_SU *su_list_su_type_next;
>
>       void set_su_failover(bool value);
> +     void set_su_maintenance_campaign(void);
>       void dec_curr_stdby_si();
>       void inc_curr_stdby_si();
>       void inc_curr_act_si();
> @@ -116,7 +117,7 @@ class AVD_SU {
>       void set_term_state(bool state);
>       void remove_from_model();
>       void set_su_switch(SaToggleState state, bool wrt_to_imm = true);
> -     AVD_AVND *get_node_ptr(void);
> +     AVD_AVND *get_node_ptr(void) const;
>       bool is_in_service(void);
>       bool is_instantiable(void);
>       void reset_all_comps_assign_flag();
> diff --git a/src/amf/amfnd/avnd_su.h b/src/amf/amfnd/avnd_su.h
> --- a/src/amf/amfnd/avnd_su.h
> +++ b/src/amf/amfnd/avnd_su.h
> @@ -143,6 +143,8 @@ typedef struct avnd_su_tag {
>       uint32_t si_active_cnt; /* no of active SIs assigned to this su */
>       uint32_t si_standby_cnt;        /* no of standby SIs assigned to this 
> su */
>
> +     std::string suMaintenanceCampaign;
> +
>       /*
>        * Ordered comp list (based on inst level). Note that as the
>        * lexicographic key (comp-name) & the keys used for
> diff --git a/src/amf/amfnd/clc.cc b/src/amf/amfnd/clc.cc
> --- a/src/amf/amfnd/clc.cc
> +++ b/src/amf/amfnd/clc.cc
> @@ -971,19 +971,29 @@ uint32_t avnd_comp_clc_st_chng_prc(AVND_
>       if (comp->su->is_ncs == true) {
>               if(SA_AMF_PRESENCE_INSTANTIATION_FAILED == final_st) {
>                       LOG_ER("'%s'got Inst failed", comp->name.c_str());
> -                     opensaf_reboot(avnd_cb->node_info.nodeId,
> -                                                     
> osaf_extended_name_borrow(&avnd_cb->node_info.executionEnvironment),
> -                                                     "NCS component 
> Instantiation failed");
> -                     LOG_ER("Amfnd is exiting (due to ncs comp inst failed) 
> to aid fast reboot");
> -                     exit(0);
> +                     if (comp->su->suMaintenanceCampaign.empty()) {
> +                       opensaf_reboot(avnd_cb->node_info.nodeId,
> +                                       
> osaf_extended_name_borrow(&avnd_cb->node_info.executionEnvironment),
> +                                       "NCS component Instantiation failed");
> +                       LOG_ER("Amfnd is exiting (due to ncs comp inst 
> failed) to aid fast reboot");
> +                       exit(0);
> +                     } else {
> +                             LOG_NO("not rebooting as maintenance campaign 
> is ongoing");
> +                             goto done;
> +                     }
>               }
>               if(SA_AMF_PRESENCE_TERMINATION_FAILED == final_st) {
>                       LOG_ER("'%s'got Term failed", comp->name.c_str());
> -                     opensaf_reboot(avnd_cb->node_info.nodeId,
> +                     if (comp->su->suMaintenanceCampaign.empty()) {
> +                             opensaf_reboot(avnd_cb->node_info.nodeId,
>                                                       
> osaf_extended_name_borrow(&avnd_cb->node_info.executionEnvironment),
>                                                       "NCS component 
> Termination failed");
> -                     LOG_ER("Amfnd is exiting (due to ncs comp term failed) 
> to aid fast reboot");
> -                     exit(0);
> +                             LOG_ER("Amfnd is exiting (due to ncs comp term 
> failed) to aid fast reboot");
> +                             exit(0);
> +                     } else {
> +                             LOG_NO("not rebooting as maintenance campaign 
> is ongoing");
> +                             goto done;
> +                     }
>               }
>       }
>
> diff --git a/src/amf/amfnd/comp.cc b/src/amf/amfnd/comp.cc
> --- a/src/amf/amfnd/comp.cc
> +++ b/src/amf/amfnd/comp.cc
> @@ -2675,6 +2675,21 @@ uint32_t comp_restart_initiate(AVND_COMP
>       if (NCSCC_RC_SUCCESS != rc)
>               goto done;
>
> +  if (!comp->su->suMaintenanceCampaign.empty()) {
> +    LOG_NO("not restarting comp because maintenance campaign is set: %s",
> +    comp->su->suMaintenanceCampaign.c_str());
> +
> +    m_AVND_COMP_OPER_STATE_SET(comp, SA_AMF_OPERATIONAL_DISABLED);
> +    rc = avnd_comp_oper_state_avd_sync(cb, comp);
> +
> +    /* update su oper state */
> +    if (m_AVND_SU_OPER_STATE_IS_ENABLED(comp->su)) {
> +      m_AVND_SU_OPER_STATE_SET(comp->su, SA_AMF_OPERATIONAL_DISABLED);
> +      avnd_di_uns32_upd_send(AVSV_SA_AMF_SU, saAmfSUOperState_ID, 
> comp->su->name, comp->su->oper);
> +    }
> +    goto done;
> +  }
> +
>       rc = avnd_comp_clc_fsm_run(cb, comp, AVND_COMP_CLC_PRES_FSM_EV_RESTART);
>       if (NCSCC_RC_SUCCESS != rc)
>               goto done;
> diff --git a/src/amf/amfnd/err.cc b/src/amf/amfnd/err.cc
> --- a/src/amf/amfnd/err.cc
> +++ b/src/amf/amfnd/err.cc
> @@ -413,8 +413,14 @@ uint32_t avnd_err_process(AVND_CB *cb, A
>               LOG_ER("%s Faulted due to:%s Recovery is:%s",
>                      comp->name.c_str(), g_comp_err[comp->err_info.src], 
> g_comp_rcvr[esc_rcvr - 1]);
>               /* do the local node reboot for node_failfast or ncs component 
> failure*/
> -             opensaf_reboot(avnd_cb->node_info.nodeId, 
> osaf_extended_name_borrow(&avnd_cb->node_info.executionEnvironment),
> +             if (comp->su->suMaintenanceCampaign.empty()) {
> +                     opensaf_reboot(avnd_cb->node_info.nodeId, 
> osaf_extended_name_borrow(&avnd_cb->node_info.executionEnvironment),
>                               "Component faulted: recovery is node failfast");
> +             } else {
> +                     LOG_NO("not rebooting because maintenance campaign is 
> set: %s",
> +                     comp->su->suMaintenanceCampaign.c_str());
> +                     goto done;
> +             }
>       }
>
>       /* execute the recovery */
> @@ -671,6 +677,8 @@ uint32_t avnd_err_rcvr_su_restart(AVND_C
>       if (NCSCC_RC_SUCCESS != rc)
>               goto done;
>
> +     avnd_di_uns32_upd_send(AVSV_SA_AMF_SU, saAmfSUOperState_ID, su->name, 
> su->oper);
> +
>       set_suRestart_flag(su);
>
>       if (su_all_comps_restartable(*su) == true) {
> @@ -694,6 +702,13 @@ uint32_t avnd_err_rcvr_su_restart(AVND_C
>               rc = avnd_su_si_unmark(cb, su);
>               if (NCSCC_RC_SUCCESS != rc)
>                       goto done;
> +
> +             if (!su->suMaintenanceCampaign.empty()) {
> +                     LOG_NO("not restarting su because maintenance campaign 
> is set: %s",
> +                             su->suMaintenanceCampaign.c_str());
> +                     goto done;
> +             }
> +
>               rc = avnd_su_pres_fsm_run(cb, su, 0, 
> AVND_SU_PRES_FSM_EV_RESTART);
>               if (NCSCC_RC_SUCCESS != rc)
>                       goto done;
> @@ -717,6 +732,12 @@ uint32_t avnd_err_rcvr_su_restart(AVND_C
>               TODO:In future when AMF supports comp-failover in spec 
> compliance then this
>                       case should be alligned with that.
>               */
> +             if (!su->suMaintenanceCampaign.empty()) {
> +                     LOG_NO("not restarting su because maintenance campaign 
> is set: %s",
> +                             su->suMaintenanceCampaign.c_str());
> +                     goto done;
> +             }
> +
>               if (m_AVND_SU_IS_PREINSTANTIABLE(su)) {
>                       if (m_AVND_COMP_TYPE_IS_PREINSTANTIABLE(failed_comp))
>                               rc = avnd_comp_clc_fsm_run(cb, failed_comp, 
> AVND_COMP_CLC_PRES_FSM_EV_CLEANUP);
> @@ -1026,11 +1047,17 @@ uint32_t avnd_err_rcvr_node_failover(AVN
>               rc = avnd_comp_clc_fsm_run(cb, comp, 
> AVND_COMP_CLC_PRES_FSM_EV_CLEANUP);
>               if (rc != NCSCC_RC_SUCCESS) {
>                       LOG_ER("'%s' termination failed", comp->name.c_str());
> -                     opensaf_reboot(avnd_cb->node_info.nodeId,
> +                     if (comp->su->suMaintenanceCampaign.empty()) {
> +                             opensaf_reboot(avnd_cb->node_info.nodeId,
>                                                  
> osaf_extended_name_borrow(&avnd_cb->node_info.executionEnvironment),
>                                                  "Component termination 
> failed at node failover");
> -                     LOG_ER("Exiting (due to comp term failed) to aid fast 
> node reboot");
> -                     exit(1);
> +                             LOG_ER("Exiting (due to comp term failed) to 
> aid fast node reboot");
> +                             exit(1);
> +                     } else {
> +                             LOG_NO("not rebooting because maintenance 
> campaign is set: %s",
> +                                     
> comp->su->suMaintenanceCampaign.c_str());
> +                             continue;
> +                     }
>               }
>               avnd_su_pres_state_set(cb, comp->su, 
> SA_AMF_PRESENCE_TERMINATING);
>       }
> diff --git a/src/amf/amfnd/sudb.cc b/src/amf/amfnd/sudb.cc
> --- a/src/amf/amfnd/sudb.cc
> +++ b/src/amf/amfnd/sudb.cc
> @@ -226,6 +226,9 @@ uint32_t avnd_su_oper_req(AVND_CB *cb, A
>                       osafassert(sizeof(uint32_t) == param->value_len);
>                       su->sufailover = m_NCS_OS_NTOHL(*(uint32_t 
> *)(param->value));
>                       break;
> +             case saAmfSUMaintenanceCampaign_ID:
> +                     su->suMaintenanceCampaign = std::string(param->value, 
> param->value_len);
> +                     break;
>               default:
>                       LOG_NO("%s: Unsupported attribute %u", __FUNCTION__, 
> param->attr_id);
>                       goto done;
> diff --git a/src/amf/amfnd/susm.cc b/src/amf/amfnd/susm.cc
> --- a/src/amf/amfnd/susm.cc
> +++ b/src/amf/amfnd/susm.cc
> @@ -2832,8 +2832,14 @@ uint32_t avnd_su_pres_terming_comptermfa
>
>       if (true == su->is_ncs) {
>               std::string reason = "SU '" + su->name + "' Termination-failed";
> -             opensaf_reboot(avnd_cb->node_info.nodeId, 
> osaf_extended_name_borrow(&avnd_cb->node_info.executionEnvironment),
> -                             reason.c_str());
> +             if (su->suMaintenanceCampaign.empty()) {
> +                     opensaf_reboot(avnd_cb->node_info.nodeId, 
> osaf_extended_name_borrow(&avnd_cb->node_info.executionEnvironment),
> +                               reason.c_str());
> +             } else {
> +                     LOG_ER("%s", reason.c_str());
> +                     LOG_NO("not rebooting because su maintenance campaign 
> is set: %s",
> +                             su->suMaintenanceCampaign.c_str());
> +             }
>       }
>
>   done:
> diff --git a/src/amf/common/amf_defs.h b/src/amf/common/amf_defs.h
> --- a/src/amf/common/amf_defs.h
> +++ b/src/amf/common/amf_defs.h
> @@ -204,6 +204,7 @@ typedef enum
>     saAmfSUParentSGName_ID = 12,
>     saAmfSUIsExternal_ID = 13,
>     saAmfSURestartCount_ID = 14,
> +   saAmfSUMaintenanceCampaign_ID = 15,
>  } AVSV_AMF_SU_ATTR_ID;
>
>  /* Attribute ID enum for the saAmfComp class */
>

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to