It might be good to make this timeout value configurable in the plm*.conf file.
Ack, Mathi. On Tue, Jan 2, 2018 at 4:53 PM, Alex Jones <[email protected]> wrote: > Child EE which is a controller can get shutdown because its parent EE > (host) > has not connected to PLM, yet. > > If the controller is a VM, and the host is a payload, there is a race > condition when instantiating the EEs. If the host doesn't connect to PLM > first, then when the controller EE (child of host EE) connects to PLM, it > see that the host isn't instantiated, and shuts itself down. > > If the controller child EE instantiates before the host has connected to > PLM, > set a 20 second timer. If the host doesn't instantiate within this time, > then > all child EEs will be shut down. > --- > src/plm/common/plms_evt.h | 3 +- > src/plm/plmd/plms_plmc.c | 79 ++++++++++++++++++++++++++++++ > +++++++++++++++++ > src/plm/plmd/plms_utils.c | 11 ++++++- > 3 files changed, 91 insertions(+), 2 deletions(-) > > diff --git a/src/plm/common/plms_evt.h b/src/plm/common/plms_evt.h > index 43f4748..e87c632 100644 > --- a/src/plm/common/plms_evt.h > +++ b/src/plm/common/plms_evt.h > @@ -98,7 +98,8 @@ typedef enum { > typedef enum { > PLMS_TMR_NONE, > PLMS_TMR_EE_INSTANTIATING, > - PLMS_TMR_EE_TERMINATING > + PLMS_TMR_EE_TERMINATING, > + PLMS_TMR_EE_HOST_INSTANTIATED > } PLMS_TMR_EVT_TYPE; > > typedef struct plms_imm_admin_op { > diff --git a/src/plm/plmd/plms_plmc.c b/src/plm/plmd/plms_plmc.c > index 06c8d4b..c310a86 100644 > --- a/src/plm/plmd/plms_plmc.c > +++ b/src/plm/plmd/plms_plmc.c > @@ -50,6 +50,8 @@ static SaUint32T > plms_os_info_resp_mngt_flag_clear(PLMS_ENTITY > *); > static void plms_insted_dep_immi_failure_cbk_call(PLMS_ENTITY *, > PLMS_GROUP_ENTITY *); > static void plms_is_dep_set_cbk_call(PLMS_ENTITY *); > + > +static void plms_ee_stop_host_timer(PLMS_ENTITY *); > /*********************************************************** > ******************* > @brief : Process instantiating event from PLMC. > 1. Do the OS verification irrespective of previous state. > @@ -346,6 +348,7 @@ SaUint32T plms_plmc_tcp_connect_process(PLMS_ENTITY > *ent) > if ((SA_PLM_EE_ADMIN_LOCKED_INSTANTIATION == > ent->entity.ee_entity.saPlmEEAdminState) || > ((NULL != ent->parent) && > + ent->parent->entity_type != PLMS_EE_ENTITY && > (plms_is_rdness_state_set(ent->parent, > SA_PLM_READINESS_OUT_OF_SERVICE))) > || > (!plms_min_dep_is_ok(ent))) { > @@ -379,6 +382,19 @@ SaUint32T plms_plmc_tcp_connect_process(PLMS_ENTITY > *ent) > return NCSCC_RC_FAILURE; > } > > + if (ent->parent && ent->parent->entity_type == PLMS_EE_ENTITY && > + plms_is_rdness_state_set(ent->parent, > SA_PLM_READINESS_OUT_OF_SERVICE)) { > + LOG_IN("host EE not instantiated yet: starting timer"); > + ent->tmr.tmr_type = PLMS_TMR_EE_HOST_INSTANTIATED; > + ret_err = plms_timer_start(&ent->tmr.timer_id, > + ent, > + SA_TIME_ONE_SECOND * 20); > + if (ret_err != NCSCC_RC_SUCCESS) { > + LOG_ER("failed to start host EE instantiated > timer"); > + return ret_err; > + } > + } > + > if (plms_is_rdness_state_set(ent, SA_PLM_READINESS_IN_SERVICE)) { > TRACE("Ent %s is already in insvc.", ent->dn_name_str); > return NCSCC_RC_SUCCESS; > @@ -532,6 +548,13 @@ SaUint32T plms_plmc_tcp_connect_process(PLMS_ENTITY > *ent) > ret_err = plms_plmc_unlck_insvc(ent, trk_info, > aff_ent_list_flag, is_set); > } > + > + /* If this is a host EE, stop timer for all child EEs */ > + if (ret_err == NCSCC_RC_SUCCESS && ent->entity_type == > PLMS_EE_ENTITY && > + ent->leftmost_child) { > + plms_ee_stop_host_timer(ent->leftmost_child); > + } > + > TRACE_LEAVE2("Return Val: %d", ret_err); > return ret_err; > } > @@ -1052,6 +1075,12 @@ SaUint32T plms_plmc_get_os_info_response(PLMS_ENTITY > *ent, > to insvc.*/ > ret_err = plms_plmc_unlck_insvc( > ent, trk_info, aff_ent_list_flag, > is_set); > + > + /* If this is a host EE, stop timer for > all child EEs */ > + if (ret_err == NCSCC_RC_SUCCESS && > ent->entity_type == PLMS_EE_ENTITY && > + ent->leftmost_child) { > + plms_ee_stop_host_timer(ent-> > leftmost_child); > + } > } > } > } else { > @@ -2658,6 +2687,28 @@ SaUint32T plms_ee_term_failed_tmr_exp(PLMS_ENTITY > *ent) > TRACE_LEAVE2("Return Val: %d", ret_err); > return ret_err; > } > + > +SaUint32T plms_ee_host_instantiate_tmr_exp(PLMS_ENTITY *ent) > +{ > + SaUint32T ret_err = NCSCC_RC_SUCCESS; > + > + TRACE_ENTER2("Entity: %s",ent->dn_name_str); > + > + if (ent->tmr.timer_id) { > + /* Clean up the timer context.*/ > + ent->tmr.timer_id = 0; > + ent->tmr.tmr_type = PLMS_TMR_NONE; > + ent->tmr.context_info = NULL; > + > + ret_err = plms_ee_term(ent, 0, 0); > + } else { > + TRACE("timer was already disabled"); > + } > + > + TRACE_LEAVE2("Return Val: %d",ret_err); > + return ret_err; > +} > + > /*********************************************************** > ******************* > @brief : Isolate the entity which fails to instantiate or > terminate > @param[in] : ent - EE. > @@ -2818,6 +2869,11 @@ SaUint32T plms_mbx_tmr_handler(PLMS_EVT *evt) > ret_err = plms_ee_term_failed_tmr_exp( > (PLMS_ENTITY *)evt->req_evt.plm_tmr.context_info); > break; > + > + case PLMS_TMR_EE_HOST_INSTANTIATED: > + ret_err = plms_ee_host_instantiate_tmr_exp( > + (PLMS_ENTITY *)evt->req_evt.plm_tmr.context_info); > + break; > default: > break; > } > @@ -3228,3 +3284,26 @@ void plms_is_dep_set_cbk_call(PLMS_ENTITY *ent) > } > return; > } > +void plms_ee_stop_host_timer(PLMS_ENTITY *child) > +{ > + TRACE_ENTER(); > + > + do { > + if (!child) > + break; > + > + TRACE("Entity: %s", child->dn_name.value); > + > + plms_ee_stop_host_timer(child->right_sibling); > + > + if (child->tmr.timer_id && > + child->tmr.tmr_type == PLMS_TMR_EE_HOST_INSTANTIATED) { > + plms_timer_stop(child); > + } else if (child->tmr.timer_id) { > + TRACE("another timer is running other than HOST_INSTANTIATED: %i", > + child->tmr.tmr_type); > + } > + } while (false); > + > + TRACE_LEAVE(); > +} > diff --git a/src/plm/plmd/plms_utils.c b/src/plm/plmd/plms_utils.c > index d3479e4..5637cdf 100644 > --- a/src/plm/plmd/plms_utils.c > +++ b/src/plm/plmd/plms_utils.c > @@ -2931,6 +2931,8 @@ SaUint32T plms_move_ent_to_insvc(PLMS_ENTITY > *chld_ent, SaUint8T *is_flag_aff) > > /* If my parent is OOS, then forget. Return from here.*/ > if ((NULL != chld_ent->parent) && > + /* If my parent is EE allow me to continue for now (timer going) */ > + chld_ent->parent->entity_type != PLMS_EE_ENTITY && > !plms_is_rdness_state_set(chld_ent->parent, > SA_PLM_READINESS_IN_SERVICE)) { > > @@ -3022,7 +3024,14 @@ void plms_move_chld_ent_to_insvc(PLMS_ENTITY > *chld_ent, > if (plms_is_rdness_state_set(chld_ent, > SA_PLM_READINESS_IN_SERVICE)) { > > LOG_ER("Entity %s is already insvc", > chld_ent->dn_name_str); > - return; > + if (chld_ent->parent->entity_type == PLMS_EE_ENTITY) { > + if (chld_ent->tmr.timer_id && > + chld_ent->tmr.tmr_type == > PLMS_TMR_EE_HOST_INSTANTIATED) { > + TRACE("stopping instantiation timer for > child"); > + plms_timer_stop(chld_ent); > + } > + } > + > } > > /* Traverse the right-node */ > -- > 2.9.5 > > ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot _______________________________________________ Opensaf-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/opensaf-devel
