Hi Praveen, Nagu, I have been testing admin restart on su which contains at least one non-restartable comp. I can see AMF switchover SI to another SU.
Though at p383, it says: /"If all components within the service unit have a configured recovery policy of restart,//that is, the saAmfCompDisableRestart configuration attribute of all components is//set to SA_FALSE (see the SaAmfComp object class in //Section 8.13.2//), it is not necessary to reassign the assigned service instances; //*however, if at-least one component*//*within the service unit has the saAmfCompDisableRestart configuration attribute*//*set to SA_TRUE, a reassignment of the service instances assigned to a service unit*//*during its restart (before termination) must be attempted by the Availability Management Framework in course of this administrative action to prevent potential service*//*disruption.*//In this case, the Availability Management Framework does not set the//presence state of the component to restarting and transitions through the individual//terminating, terminated, instantiating, instantiated presence states instead."// / If I understand correctly, AMF should try to avoid assignment to another SU. Therefore: - For component with DisableRestart=TRUE (which means component does not have restart policy) the csi assignment will be removed and reassigned back after instantiation. - For component with DisableRestart=FALSE, the csi assignment is not removed (no csi_remove callback) and get reassigned (csi_set callback) after instantiation. (the above behavior is I can see with a single INSERVICE SU, of course it can't be failover :) ) However, at p452, it says: /*"3.11.1.3.2 Fail-Over Recovery Action*//Either because the restart recovery action has been disabled in the configuration of a//particular component (its saAmfCompDisableRestart configuration attribute is set//to SA_TRUE, see the SaAmfComp object class in //Section 8.13.2//), or because previous attempts to restart the component failed, or because the error report specified//another recommended recovery action, the Availability Management Framework may//decide to recover by reassigning service instances to service units other than the one//to which they are currently assigned.//"/ So it sounds likely AMF will initiate a failover if any comp has saAmfCompDisableRestart=TRUE Do you think the spec means that AMF should treat saAmfCompDisableRestart=TRUE/FALSE differently according to the situations are happening, which depends on the SU-RESTART admin operation or Escalation&Recovery flow? Thanks, Minh On 9/11/2015 12:04 AM, [email protected] wrote: > Message: 3 > Date: Thu, 10 Sep 2015 19:33:31 +0530 > From:[email protected] > Subject: [devel] [PATCH 3 of 3] amf: support RESTART admin op on su > [#1455] > To:[email protected],[email protected], > [email protected] > Cc:[email protected] > Message-ID: <a302365f232bee974ccc.1441893811@CON-PC> > Content-Type: text/plain; charset="us-ascii" > > osaf/services/saf/amf/amfd/include/su.h | 6 +- > osaf/services/saf/amf/amfd/ndproc.cc | 77 ++++++++++++++++++++++- > osaf/services/saf/amf/amfd/sgproc.cc | 27 ++++++++ > osaf/services/saf/amf/amfd/su.cc | 106 > +++++++++++++++++++++++++++++++- > osaf/services/saf/amf/amfnd/su.cc | 21 ++++++- > 5 files changed, 230 insertions(+), 7 deletions(-) > > > Admin operation is supported for both restartable and non-restartable > su. > Patch for #315 contains changes in amfnd and amfd which are common > for all the three tickets viz. #315, #334 and #1455. > > TODO: To take care of controller role change when operation in > progress. > > diff --git a/osaf/services/saf/amf/amfd/include/su.h > b/osaf/services/saf/amf/amfd/include/su.h > --- a/osaf/services/saf/amf/amfd/include/su.h > +++ b/osaf/services/saf/amf/amfd/include/su.h > @@ -132,6 +132,7 @@ class AVD_SU { > void unlock_instantiation(SaImmOiHandleT immoi_handle, > SaInvocationT invocation); > void repaired(SaImmOiHandleT immoi_handle, SaInvocationT invocation); > + void restart(SaImmOiHandleT immoi_handle, SaInvocationT invocation); > void shutdown(SaImmOiHandleT immoi_handle, SaInvocationT invocation); > void lock(SaImmOiHandleT immoi_handle, SaInvocationT invocation, > SaAmfAdminStateT adm_state); > @@ -140,7 +141,10 @@ class AVD_SU { > SaAisErrorT check_su_stability(); > bool su_any_comp_undergoing_restart_admin_op(); > AVD_COMP *su_get_comp_undergoing_restart_admin_op(); > - > + bool su_all_comps_restartable(); > + bool is_any_non_restartable_comp_assigned(); > + bool all_pi_comps_restartable(); > + bool all_pi_comps_nonrestartable(); > private: > void initialize(); > void send_attribute_update(AVSV_AMF_SU_ATTR_ID attrib_id); > diff --git a/osaf/services/saf/amf/amfd/ndproc.cc > b/osaf/services/saf/amf/amfd/ndproc.cc > --- a/osaf/services/saf/amf/amfd/ndproc.cc > +++ b/osaf/services/saf/amf/amfd/ndproc.cc > @@ -352,6 +352,72 @@ done: > TRACE_LEAVE2("(%llu)", comp->admin_pend_cbk.invocation); > } > > +static void surestart_admin_op_report_to_imm(AVD_SU *su, SaAmfPresenceStateT > pres) > +{ > + TRACE_ENTER2("%s", avd_pres_state_name[pres]); > + SaAisErrorT rc = SA_AIS_OK; > + > + if ((su->su_all_comps_restartable() == true) || > + ((su->saAmfSUPreInstantiable == true) && > + (su->all_pi_comps_restartable() == true))) { > + if ((su->saAmfSUPresenceState == SA_AMF_PRESENCE_INSTANTIATED) > && > + (pres != SA_AMF_PRESENCE_RESTARTING)) > + rc = SA_AIS_ERR_BAD_OPERATION; > + else if ((su->saAmfSUPresenceState == > SA_AMF_PRESENCE_RESTARTING) && > + (pres != SA_AMF_PRESENCE_INSTANTIATING)) > + rc = SA_AIS_ERR_BAD_OPERATION; > + else if (su->saAmfSUPresenceState == > SA_AMF_PRESENCE_INSTANTIATING) { > + if (pres == SA_AMF_PRESENCE_INSTANTIATED) > + rc = SA_AIS_OK; > + else > + rc = SA_AIS_ERR_REPAIR_PENDING; > + } else if ((pres == SA_AMF_PRESENCE_RESTARTING) || > + (pres == SA_AMF_PRESENCE_INSTANTIATING)) { > + TRACE("Valid state transition, wait for final > transition."); > + goto done; > + } > + > + } else { > + if ((su->saAmfSUPresenceState == SA_AMF_PRESENCE_INSTANTIATED) > && > + (pres != SA_AMF_PRESENCE_TERMINATING)) > + rc = SA_AIS_ERR_BAD_OPERATION; > + else if ((su->saAmfSUPresenceState == > SA_AMF_PRESENCE_TERMINATING) && > + (pres != SA_AMF_PRESENCE_INSTANTIATING)) { > + if (((su->saAmfSUPreInstantiable == false) || > + > (su->all_pi_comps_nonrestartable() == true)) && > + (pres == > SA_AMF_PRESENCE_UNINSTANTIATED)) { > + TRACE("Valid state transition, wait for final > transition."); > + goto done; > + } > + rc = SA_AIS_ERR_BAD_OPERATION; > + } else if ((su->all_pi_comps_nonrestartable() == true) && > + (su->saAmfSUPresenceState == > SA_AMF_PRESENCE_UNINSTANTIATED) && > + (pres != SA_AMF_PRESENCE_INSTANTIATING)) > + rc = SA_AIS_ERR_BAD_OPERATION; > + else if (su->saAmfSUPresenceState == > SA_AMF_PRESENCE_INSTANTIATING) { > + if (pres == SA_AMF_PRESENCE_INSTANTIATED) > + rc = SA_AIS_OK; > + else > + rc = SA_AIS_ERR_REPAIR_PENDING; > + } else if ((pres == SA_AMF_PRESENCE_TERMINATING) || > + (pres == SA_AMF_PRESENCE_INSTANTIATING)) { > + TRACE("Valid state transition, wait for final > transition."); > + goto done; > + } > + } > + if (rc == SA_AIS_OK) { > + > avd_saImmOiAdminOperationResult(avd_cb->immOiHandle,su->pend_cbk.invocation, > rc); > + } else { > + report_admin_op_error(avd_cb->immOiHandle, > su->pend_cbk.invocation, > + rc, &su->pend_cbk, "Couldn't restart su '%s'", > + su->name.value); > + } > + su->pend_cbk.admin_oper = static_cast<SaAmfAdminOperationIdT>(0); > + su->pend_cbk.invocation = 0; > +done: > + TRACE_LEAVE2("(%llu)", su->pend_cbk.invocation); > +} > + > /** > * handler to report error response to imm for any pending admin operation > on su > * > @@ -412,6 +478,9 @@ static void su_admin_op_report_to_imm(AV > su->name.value); > } > break; > + case SA_AMF_ADMIN_RESTART: > + surestart_admin_op_report_to_imm(su, pres); > + break; > default: > break; > } > @@ -809,16 +878,18 @@ void avd_data_update_req_evh(AVD_CL_CB * > TRACE("su pres state"); > if > (n2d_msg->msg_info.n2d_data_req.param_info.value_len == sizeof(uint32_t)) { > l_val = ntohl(*((uint32_t > *)&n2d_msg->msg_info.n2d_data_req.param_info.value[0])); > - > su->set_pres_state(static_cast<SaAmfPresenceStateT>(l_val)); > > /* Send response to any admin callbacks > delivered by IMM if not sent already. */ > if > (su->su_on_node->admin_node_pend_cbk.invocation != 0) { > node_admin_op_report_to_imm(su, > static_cast<SaAmfPresenceStateT>(l_val)); > } else if (su->pend_cbk.invocation != > 0) { > su_admin_op_report_to_imm(su, > static_cast<SaAmfPresenceStateT>(l_val)); > - } else if (su->su_on_node->admin_ng != > NULL) { > + } > + > + > su->set_pres_state(static_cast<SaAmfPresenceStateT>(l_val)); > + > + if (su->su_on_node->admin_ng != NULL) > > process_su_si_response_for_ng(su, SA_AIS_OK); > - } > > if (l_val == > SA_AMF_PRESENCE_TERMINATION_FAILED) { > for (AVD_SI* si = > su->sg_of_su->list_of_si; si != NULL; > diff --git a/osaf/services/saf/amf/amfd/sgproc.cc > b/osaf/services/saf/amf/amfd/sgproc.cc > --- a/osaf/services/saf/amf/amfd/sgproc.cc > +++ b/osaf/services/saf/amf/amfd/sgproc.cc > @@ -931,6 +931,31 @@ void process_su_si_response_for_comp(AVD > } > TRACE_LEAVE(); > } > + > +void process_su_si_response_for_surestart_admin_op(AVD_SU *su) > +{ > + TRACE_ENTER(); > + if (su->list_of_susi != NULL) { > + if ((su->saAmfSUPreInstantiable == false) && > (su->su_all_comps_restartable() == false) && > + (su->list_of_susi->state == > SA_AMF_HA_QUIESCED)) { > + TRACE("For NPI '%s' RESTART admin op > ends.",su->name.value); > + su->complete_admin_op(SA_AIS_OK); > + } > + TRACE_LEAVE(); > + return; > + } > + > + uint32_t rc = avd_admin_op_msg_snd(&su->name, AVSV_SA_AMF_SU, > SA_AMF_ADMIN_RESTART, > + su->su_on_node); > + if (rc != NCSCC_RC_SUCCESS) { > + report_admin_op_error(avd_cb->immOiHandle, > su->pend_cbk.invocation, > + SA_AIS_ERR_TIMEOUT, NULL, > + "Admin op request send failed '%s'", > su->name.value); > + su->pend_cbk.admin_oper = > static_cast<SaAmfAdminOperationIdT>(0); > + su->pend_cbk.invocation = 0; > + } > + TRACE_LEAVE(); > +} > > /***************************************************************************** > * Function: avd_su_si_assign_func > * > @@ -1370,6 +1395,8 @@ void avd_su_si_assign_evh(AVD_CL_CB *cb, > } > else > > su->complete_admin_op(SA_AIS_ERR_TIMEOUT); > + } else if (su->pend_cbk.admin_oper == > SA_AMF_ADMIN_RESTART) { > + > process_su_si_response_for_surestart_admin_op(su); > } > } else if (su->su_on_node->admin_node_pend_cbk.invocation != 0) > { > /* decrement the SU count on the node undergoing admin > operation > diff --git a/osaf/services/saf/amf/amfd/su.cc > b/osaf/services/saf/amf/amfd/su.cc > --- a/osaf/services/saf/amf/amfd/su.cc > +++ b/osaf/services/saf/amf/amfd/su.cc > @@ -1133,7 +1133,35 @@ void AVD_SU::repaired(SaImmOiHandleT imm > done: > TRACE_LEAVE(); > } > +void AVD_SU::restart(SaImmOiHandleT immoi_handle, > + SaInvocationT invocation) { > + TRACE_ENTER2("'%s'", name.value); > > + pend_cbk.admin_oper = SA_AMF_ADMIN_RESTART; > + pend_cbk.invocation = invocation; > + if ((su_all_comps_restartable() == true) || > + (is_any_non_restartable_comp_assigned() == false)) { > + if (avd_admin_op_msg_snd(&name, AVSV_SA_AMF_SU, > SA_AMF_ADMIN_RESTART, > + su_on_node) != NCSCC_RC_SUCCESS) { > + report_admin_op_error(immoi_handle, invocation, > SA_AIS_ERR_TIMEOUT, NULL, > + "Admin op request send failed '%s'", > name.value); > + pend_cbk.invocation = 0; > + pend_cbk.admin_oper = > static_cast<SaAmfAdminOperationIdT>(0); > + } > + } else { > + /* Atleast one non-restartable (saAmfCompDisableRestart or > + saAmfCtDefDisableRestart is true) comp is assigned. > + First gracefully switch-over SU's assignments to other > + At present assignment of whole SU will be gracefully > + reassigned. > + Thus for PI applications modeled on NWay and Nway Active > model > + this is spec deviation. > + */ > + set_readiness_state(SA_AMF_READINESS_OUT_OF_SERVICE); > + sg_of_su->su_fault(avd_cb, this); > + } > + TRACE_LEAVE(); > +} > /** > * Handle admin operations on SaAmfSU objects. > * > @@ -1153,7 +1181,7 @@ static void su_admin_op_cb(SaImmOiHandle > > TRACE_ENTER2("%llu, '%s', %llu", invocation, su_name->value, op_id); > > - if ( op_id > SA_AMF_ADMIN_SHUTDOWN && op_id != SA_AMF_ADMIN_REPAIRED) { > + if ( op_id > SA_AMF_ADMIN_RESTART && op_id != SA_AMF_ADMIN_REPAIRED) { > report_admin_op_error(immoi_handle, invocation, > SA_AIS_ERR_NOT_SUPPORTED, NULL, > "Unsupported admin op for SU: %llu", op_id); > goto done; > @@ -1228,10 +1256,12 @@ static void su_admin_op_cb(SaImmOiHandle > if (((su->saAmfSUAdminState == SA_AMF_ADMIN_UNLOCKED) && > (op_id != SA_AMF_ADMIN_LOCK) && > (op_id != SA_AMF_ADMIN_SHUTDOWN) && > + (op_id != SA_AMF_ADMIN_RESTART) && > (op_id != SA_AMF_ADMIN_REPAIRED)) || > ((su->saAmfSUAdminState == SA_AMF_ADMIN_LOCKED) && > (op_id != SA_AMF_ADMIN_UNLOCK) && > (op_id != SA_AMF_ADMIN_REPAIRED) && > + (op_id != SA_AMF_ADMIN_RESTART) && > (op_id != SA_AMF_ADMIN_LOCK_INSTANTIATION)) || > ((su->saAmfSUAdminState == SA_AMF_ADMIN_LOCKED_INSTANTIATION) && > (op_id != SA_AMF_ADMIN_UNLOCK_INSTANTIATION) && > @@ -1243,7 +1273,36 @@ static void su_admin_op_cb(SaImmOiHandle > "State transition invalid, state %u, op %llu", > su->saAmfSUAdminState, op_id); > goto done; > } > - > + if (op_id == SA_AMF_ADMIN_RESTART) { > + if (su->sg_of_su->sg_ncs_spec == true) { > + report_admin_op_error(immoi_handle, invocation, > SA_AIS_ERR_BAD_OPERATION, NULL, > + "Not allowed on middleware SU: %s, > op_id: %llu", > + su->name.value, op_id); > + goto done; > + } > + if (su->saAmfSUPresenceState == SA_AMF_PRESENCE_UNINSTANTIATED) > { > + report_admin_op_error(immoi_handle, invocation, > SA_AIS_ERR_BAD_OPERATION, NULL, > + "Prescence state of SU is > uninstantiated, it is: %u, op_id: %llu", > + su->saAmfSUPresenceState, op_id); > + goto done; > + } > + if (su->saAmfSUOperState == SA_AMF_OPERATIONAL_DISABLED) { > + report_admin_op_error(immoi_handle, invocation, > SA_AIS_ERR_BAD_OPERATION, NULL, > + "SU is disabled (%u), repair it or > check node status, op_id: %llu", > + su->saAmfSUOperState, op_id); > + goto done; > + } > + SaAisErrorT rc = SA_AIS_OK; > + rc = su->check_su_stability(); > + if (rc != SA_AIS_OK) { > + report_admin_op_error(immoi_handle, invocation, > + SA_AIS_ERR_TRY_AGAIN, NULL, > + "Some entity is unstable, > Operation cannot " > + "be performed on '%s'" > + "Check syslog for entity > details", su->name.value); > + goto done; > + } > + } > node = su->get_node_ptr(); > if (node->admin_node_pend_cbk.admin_oper != 0) { > report_admin_op_error(immoi_handle, invocation, > SA_AIS_ERR_TRY_AGAIN, NULL, > @@ -1272,6 +1331,9 @@ static void su_admin_op_cb(SaImmOiHandle > case SA_AMF_ADMIN_REPAIRED: > su->repaired(immoi_handle, invocation); > break; > + case SA_AMF_ADMIN_RESTART: > + su->restart(immoi_handle, invocation); > + break; > default: > report_admin_op_error(immoi_handle, invocation, > SA_AIS_ERR_INVALID_PARAM, NULL, > "Unsupported admin op"); > @@ -2202,3 +2264,43 @@ AVD_COMP *AVD_SU::su_get_comp_undergoing > } > return NULL; > } > +bool AVD_SU::su_all_comps_restartable() > +{ > + for (AVD_COMP *comp = list_of_comp; comp; comp = comp->su_comp_next) > { > + if (comp->comp_info.comp_restart == true) > + return false; > + } > + return true; > +} > +bool AVD_SU::is_any_non_restartable_comp_assigned() > +{ > + for (AVD_COMP *comp = list_of_comp; comp; comp = comp->su_comp_next) > { > + if ((comp->comp_info.comp_restart == true) && > + (is_comp_assigned_any_csi(comp) == true)) > + return true; > + } > + return false; > +} > +bool AVD_SU::all_pi_comps_restartable() > +{ > + for (AVD_COMP *comp = list_of_comp; comp; comp = comp->su_comp_next) { > + AVD_COMP_TYPE *comptype = > comptype_db->find(Amf::to_string(&comp->saAmfCompType)); > + if ((comp->comp_info.comp_restart == true) && > + ((comptype->saAmfCtCompCategory == > SA_AMF_COMP_SA_AWARE) || > + > (IS_COMP_PROXIED_PI(comptype->saAmfCtCompCategory)))) > + return false; > + } > + return true; > +} > +bool AVD_SU::all_pi_comps_nonrestartable() > +{ > + for (AVD_COMP *comp = list_of_comp; comp; comp = comp->su_comp_next) > { > + AVD_COMP_TYPE *comptype = > comptype_db->find(Amf::to_string(&comp->saAmfCompType)); > + if ((comp->comp_info.comp_restart == false) && > + ((comptype->saAmfCtCompCategory == > SA_AMF_COMP_SA_AWARE) || > + > (IS_COMP_PROXIED_PI(comptype->saAmfCtCompCategory)))) > + return false; > + } > + return true; > +} > + > diff --git a/osaf/services/saf/amf/amfnd/su.cc > b/osaf/services/saf/amf/amfnd/su.cc > --- a/osaf/services/saf/amf/amfnd/su.cc > +++ b/osaf/services/saf/amf/amfnd/su.cc > @@ -668,12 +668,31 @@ uint32_t avnd_evt_su_admin_op_req(AVND_C > > break; > } > + case SA_AMF_ADMIN_RESTART: { > + LOG_NO("Admin Restart request for '%s'", su->name.value); > + su->admin_op_Id = SA_AMF_ADMIN_RESTART; > + set_suRestart_flag(su); > + m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, su, > AVND_CKPT_SU_FLAG_CHANGE); > + if ((su_all_comps_restartable(*su) == true) || > + (is_any_non_restartable_comp_assigned(*su) == > false)) { > + rc = avnd_su_curr_info_del(cb, su); > + if (NCSCC_RC_SUCCESS != rc) > + goto done; > + rc = avnd_su_si_unmark(cb, su); > + if (NCSCC_RC_SUCCESS != rc) > + goto done; > + } > + rc = avnd_su_pres_fsm_run(cb, su, 0, > AVND_SU_PRES_FSM_EV_RESTART); > + if (NCSCC_RC_SUCCESS != rc) > + goto done; > + break; > + } > default: > LOG_NO("%s: unsupported adm op %u", __FUNCTION__, > info->oper_id); > rc = NCSCC_RC_FAILURE; > break; > } > - > +done: > TRACE_LEAVE(); > return rc; > } ------------------------------------------------------------------------------ _______________________________________________ Opensaf-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/opensaf-devel
