It might be good to make this timeout value configurable in the plm*.conf
file.

Ack,
Mathi.


On Tue, Jan 2, 2018 at 4:53 PM, Alex Jones <[email protected]> wrote:

> Child EE which is a controller can get shutdown because its parent EE
> (host)
> has not connected to PLM, yet.
>
> If the controller is a VM, and the host is a payload, there is a race
> condition when instantiating the EEs. If the host doesn't connect to PLM
> first, then when the controller EE (child of host EE) connects to PLM, it
> see that the host isn't instantiated, and shuts itself down.
>
> If the controller child EE instantiates before the host has connected to
> PLM,
> set a 20 second timer. If the host doesn't instantiate within this time,
> then
> all child EEs will be shut down.
> ---
>  src/plm/common/plms_evt.h |  3 +-
>  src/plm/plmd/plms_plmc.c  | 79 ++++++++++++++++++++++++++++++
> +++++++++++++++++
>  src/plm/plmd/plms_utils.c | 11 ++++++-
>  3 files changed, 91 insertions(+), 2 deletions(-)
>
> diff --git a/src/plm/common/plms_evt.h b/src/plm/common/plms_evt.h
> index 43f4748..e87c632 100644
> --- a/src/plm/common/plms_evt.h
> +++ b/src/plm/common/plms_evt.h
> @@ -98,7 +98,8 @@ typedef enum {
>  typedef enum {
>    PLMS_TMR_NONE,
>    PLMS_TMR_EE_INSTANTIATING,
> -  PLMS_TMR_EE_TERMINATING
> +  PLMS_TMR_EE_TERMINATING,
> +  PLMS_TMR_EE_HOST_INSTANTIATED
>  } PLMS_TMR_EVT_TYPE;
>
>  typedef struct plms_imm_admin_op {
> diff --git a/src/plm/plmd/plms_plmc.c b/src/plm/plmd/plms_plmc.c
> index 06c8d4b..c310a86 100644
> --- a/src/plm/plmd/plms_plmc.c
> +++ b/src/plm/plmd/plms_plmc.c
> @@ -50,6 +50,8 @@ static SaUint32T 
> plms_os_info_resp_mngt_flag_clear(PLMS_ENTITY
> *);
>  static void plms_insted_dep_immi_failure_cbk_call(PLMS_ENTITY *,
>                                                   PLMS_GROUP_ENTITY *);
>  static void plms_is_dep_set_cbk_call(PLMS_ENTITY *);
> +
> +static void plms_ee_stop_host_timer(PLMS_ENTITY *);
>  /***********************************************************
> *******************
>  @brief         : Process instantiating event from PLMC.
>                   1. Do the OS verification irrespective of previous state.
> @@ -346,6 +348,7 @@ SaUint32T plms_plmc_tcp_connect_process(PLMS_ENTITY
> *ent)
>         if ((SA_PLM_EE_ADMIN_LOCKED_INSTANTIATION ==
>              ent->entity.ee_entity.saPlmEEAdminState) ||
>             ((NULL != ent->parent) &&
> +               ent->parent->entity_type != PLMS_EE_ENTITY &&
>              (plms_is_rdness_state_set(ent->parent,
>                                        SA_PLM_READINESS_OUT_OF_SERVICE)))
> ||
>             (!plms_min_dep_is_ok(ent))) {
> @@ -379,6 +382,19 @@ SaUint32T plms_plmc_tcp_connect_process(PLMS_ENTITY
> *ent)
>                 return NCSCC_RC_FAILURE;
>         }
>
> +       if (ent->parent && ent->parent->entity_type == PLMS_EE_ENTITY &&
> +               plms_is_rdness_state_set(ent->parent,
> SA_PLM_READINESS_OUT_OF_SERVICE)) {
> +               LOG_IN("host EE not instantiated yet: starting timer");
> +               ent->tmr.tmr_type = PLMS_TMR_EE_HOST_INSTANTIATED;
> +               ret_err = plms_timer_start(&ent->tmr.timer_id,
> +                                               ent,
> +                                               SA_TIME_ONE_SECOND * 20);
> +               if (ret_err != NCSCC_RC_SUCCESS) {
> +                       LOG_ER("failed to start host EE instantiated
> timer");
> +                       return ret_err;
> +               }
> +       }
> +
>         if (plms_is_rdness_state_set(ent, SA_PLM_READINESS_IN_SERVICE)) {
>                 TRACE("Ent %s is already in insvc.", ent->dn_name_str);
>                 return NCSCC_RC_SUCCESS;
> @@ -532,6 +548,13 @@ SaUint32T plms_plmc_tcp_connect_process(PLMS_ENTITY
> *ent)
>                 ret_err = plms_plmc_unlck_insvc(ent, trk_info,
>                                                 aff_ent_list_flag, is_set);
>         }
> +
> +       /* If this is a host EE, stop timer for all child EEs */
> +        if (ret_err == NCSCC_RC_SUCCESS && ent->entity_type ==
> PLMS_EE_ENTITY &&
> +               ent->leftmost_child) {
> +               plms_ee_stop_host_timer(ent->leftmost_child);
> +        }
> +
>         TRACE_LEAVE2("Return Val: %d", ret_err);
>         return ret_err;
>  }
> @@ -1052,6 +1075,12 @@ SaUint32T plms_plmc_get_os_info_response(PLMS_ENTITY
> *ent,
>                                 to insvc.*/
>                                 ret_err = plms_plmc_unlck_insvc(
>                                     ent, trk_info, aff_ent_list_flag,
> is_set);
> +
> +                               /* If this is a host EE, stop timer for
> all child EEs */
> +                               if (ret_err == NCSCC_RC_SUCCESS &&
> ent->entity_type == PLMS_EE_ENTITY &&
> +                                       ent->leftmost_child) {
> +                                       plms_ee_stop_host_timer(ent->
> leftmost_child);
> +                               }
>                         }
>                 }
>         } else {
> @@ -2658,6 +2687,28 @@ SaUint32T plms_ee_term_failed_tmr_exp(PLMS_ENTITY
> *ent)
>         TRACE_LEAVE2("Return Val: %d", ret_err);
>         return ret_err;
>  }
> +
> +SaUint32T plms_ee_host_instantiate_tmr_exp(PLMS_ENTITY *ent)
> +{
> +       SaUint32T ret_err = NCSCC_RC_SUCCESS;
> +
> +       TRACE_ENTER2("Entity: %s",ent->dn_name_str);
> +
> +       if (ent->tmr.timer_id) {
> +               /* Clean up the timer context.*/
> +               ent->tmr.timer_id = 0;
> +               ent->tmr.tmr_type = PLMS_TMR_NONE;
> +               ent->tmr.context_info = NULL;
> +
> +               ret_err = plms_ee_term(ent, 0, 0);
> +       } else {
> +               TRACE("timer was already disabled");
> +       }
> +
> +       TRACE_LEAVE2("Return Val: %d",ret_err);
> +       return ret_err;
> +}
> +
>  /***********************************************************
> *******************
>  @brief         : Isolate the entity which fails to instantiate or
> terminate
>  @param[in]     : ent - EE.
> @@ -2818,6 +2869,11 @@ SaUint32T plms_mbx_tmr_handler(PLMS_EVT *evt)
>                 ret_err = plms_ee_term_failed_tmr_exp(
>                     (PLMS_ENTITY *)evt->req_evt.plm_tmr.context_info);
>                 break;
> +
> +       case PLMS_TMR_EE_HOST_INSTANTIATED:
> +               ret_err = plms_ee_host_instantiate_tmr_exp(
> +               (PLMS_ENTITY *)evt->req_evt.plm_tmr.context_info);
> +               break;
>         default:
>                 break;
>         }
> @@ -3228,3 +3284,26 @@ void plms_is_dep_set_cbk_call(PLMS_ENTITY *ent)
>         }
>         return;
>  }
> +void plms_ee_stop_host_timer(PLMS_ENTITY *child)
> +{
> +  TRACE_ENTER();
> +
> +  do {
> +    if (!child)
> +      break;
> +
> +    TRACE("Entity: %s", child->dn_name.value);
> +
> +    plms_ee_stop_host_timer(child->right_sibling);
> +
> +    if (child->tmr.timer_id &&
> +        child->tmr.tmr_type == PLMS_TMR_EE_HOST_INSTANTIATED) {
> +      plms_timer_stop(child);
> +    } else if (child->tmr.timer_id) {
> +      TRACE("another timer is running other than HOST_INSTANTIATED: %i",
> +             child->tmr.tmr_type);
> +    }
> +  } while (false);
> +
> +  TRACE_LEAVE();
> +}
> diff --git a/src/plm/plmd/plms_utils.c b/src/plm/plmd/plms_utils.c
> index d3479e4..5637cdf 100644
> --- a/src/plm/plmd/plms_utils.c
> +++ b/src/plm/plmd/plms_utils.c
> @@ -2931,6 +2931,8 @@ SaUint32T plms_move_ent_to_insvc(PLMS_ENTITY
> *chld_ent, SaUint8T *is_flag_aff)
>
>         /* If my parent is OOS, then forget. Return from here.*/
>         if ((NULL != chld_ent->parent) &&
> +       /* If my parent is EE allow me to continue for now (timer going) */
> +            chld_ent->parent->entity_type != PLMS_EE_ENTITY &&
>             !plms_is_rdness_state_set(chld_ent->parent,
>                                       SA_PLM_READINESS_IN_SERVICE)) {
>
> @@ -3022,7 +3024,14 @@ void plms_move_chld_ent_to_insvc(PLMS_ENTITY
> *chld_ent,
>         if (plms_is_rdness_state_set(chld_ent,
> SA_PLM_READINESS_IN_SERVICE)) {
>
>                 LOG_ER("Entity %s is already  insvc",
> chld_ent->dn_name_str);
> -               return;
> +                if (chld_ent->parent->entity_type == PLMS_EE_ENTITY) {
> +                        if (chld_ent->tmr.timer_id &&
> +                                chld_ent->tmr.tmr_type ==
> PLMS_TMR_EE_HOST_INSTANTIATED) {
> +                                TRACE("stopping instantiation timer for
> child");
> +                                plms_timer_stop(chld_ent);
> +                        }
> +                }
> +
>         }
>
>         /* Traverse the right-node */
> --
> 2.9.5
>
>
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to