Hi Minh,
                In case, there is discrepancy as in this situations, then we 
can give more importance to SUSI reported from Amfnd. This approach could be 
simpler.
Thanks
-Nagu

> -----Original Message-----
> From: Minh Chau [mailto:[email protected]]
> Sent: 09 May 2017 12:10
> To: [email protected]; [email protected];
> [email protected]; [email protected]
> Cc: [email protected]; Minh Chau
> Subject: [PATCH 2/3] amfd: Add iteration to failover all absent assignments
> [#2416]
> 
> Problem: As described in previous patch: "amfd: Make creation and deletion
> of assignment object as IMM synced call [#2416]"
> 
> The previous patch makes creation and deletion assignment object as IMM
> synced call at the first try. It acts as the prevention of inconsistency 
> between
> AMFND and IMM. However, if the first IMM call fails, a chance of same
> problem can happen.
> 
> This patch continues the approach of absent assignments that is using
> node_fail() Sg Fsm to perform failover. This node_fail() now also is used as a
> clean up method that iterates and remove all absent invalid assignments as
> well as failover valid assignments.
> ---
>  src/amf/amfd/cluster.cc |  7 ++++---
>  src/amf/amfd/sg.cc      | 23 +++++++++++++++++------
>  src/amf/amfd/si.cc      | 20 ++++++++++++++++----
>  src/amf/amfd/su.cc      | 42 +++++++++++++++++++++++++++++++++++++---
> --
>  src/amf/amfd/su.h       |  1 +
>  5 files changed, 75 insertions(+), 18 deletions(-)
> 
> diff --git a/src/amf/amfd/cluster.cc b/src/amf/amfd/cluster.cc index
> ef1a0328a..9ec6746a8 100644
> --- a/src/amf/amfd/cluster.cc
> +++ b/src/amf/amfd/cluster.cc
> @@ -97,11 +97,12 @@ void avd_cluster_tmr_init_evh(AVD_CL_CB *cb,
> AVD_EVT *evt) {
>        continue;
>      }
> 
> -    if (i_sg->any_assignment_absent()) {
> +    while (i_sg->any_assignment_absent()) {
>        // failover with ABSENT SUSI, which had already been removed during
> -      // headless
> +      // headless, until all ABSENT SUSI(s) are failovered successfully
>        i_sg->failover_absent_assignment();
> -    } else if (i_sg->any_assignment_in_progress() == false) {
> +    }
> +    if (i_sg->any_assignment_in_progress() == false) {
>        i_sg->set_fsm_state(AVD_SG_FSM_STABLE);
>      }
> 
> diff --git a/src/amf/amfd/sg.cc b/src/amf/amfd/sg.cc index
> 9b04a423c..74916009c 100644
> --- a/src/amf/amfd/sg.cc
> +++ b/src/amf/amfd/sg.cc
> @@ -2288,16 +2288,27 @@ bool
> avd_sg_validate_headless_cached_rta(AVD_CL_CB *cb) {
> 
>  void AVD_SG::failover_absent_assignment() {
>    TRACE_ENTER2("SG:'%s'", name.c_str());
> +  AVD_SU* failed_su = nullptr;
>    for (const auto &su : list_of_su) {
>      if (su->any_susi_fsm_in(AVD_SU_SI_STATE_ABSENT)) {
> -      node_fail(avd_cb, su);
> -      if (su->is_in_service())
> -        su->set_readiness_state(SA_AMF_READINESS_IN_SERVICE);
> -      if (su->sg_of_su->sg_fsm_state == AVD_SG_FSM_STABLE)
> -        su->sg_of_su->realign(avd_cb, this);
> -      break;
> +      // look up SU has the most absent STANBY assignment to failover first
> +      // TODO: need to verify with NpM and Nway Sg
> +      if (failed_su == nullptr) failed_su = su;
> +      else if (su->count_susi_with(SA_AMF_HA_STANDBY,
> AVD_SU_SI_STATE_ABSENT) >
> +          failed_su->count_susi_with(SA_AMF_HA_STANDBY,
> +              AVD_SU_SI_STATE_ABSENT)) {
> +          failed_su = su;
> +      }
>      }
>    }
> +
> +  if (failed_su != nullptr) {
> +    node_fail(avd_cb, failed_su);
> +    if (failed_su->is_in_service())
> +      failed_su->set_readiness_state(SA_AMF_READINESS_IN_SERVICE);
> +  }
> +  if (sg_fsm_state == AVD_SG_FSM_STABLE)
> +    realign(avd_cb, this);
>    TRACE_LEAVE();
>  }
> 
> diff --git a/src/amf/amfd/si.cc b/src/amf/amfd/si.cc index
> 298188a84..6b38a6a11 100644
> --- a/src/amf/amfd/si.cc
> +++ b/src/amf/amfd/si.cc
> @@ -1402,7 +1402,10 @@ void AVD_SI::inc_curr_act_ass() {  }
> 
>  void AVD_SI::dec_curr_act_ass() {
> -  osafassert(saAmfSINumCurrActiveAssignments > 0);
> +  if (saAmfSINumCurrActiveAssignments == 0) {
> +    LOG_WA("Failed to decrease saAmfSINumCurrActiveAssignments");
> +    return;
> +  }
>    saAmfSINumCurrActiveAssignments--;
>    TRACE("%s saAmfSINumCurrActiveAssignments=%u", name.c_str(),
>          saAmfSINumCurrActiveAssignments); @@ -1419,7 +1422,10 @@ void
> AVD_SI::inc_curr_stdby_ass() {  }
> 
>  void AVD_SI::dec_curr_stdby_ass() {
> -  osafassert(saAmfSINumCurrStandbyAssignments > 0);
> +  if (saAmfSINumCurrStandbyAssignments == 0) {
> +    LOG_WA("Failed to decrease saAmfSINumCurrStandbyAssignments");
> +    return;
> +  }
>    saAmfSINumCurrStandbyAssignments--;
>    TRACE("%s saAmfSINumCurrStandbyAssignments=%u", name.c_str(),
>          saAmfSINumCurrStandbyAssignments);
> @@ -1433,7 +1439,10 @@ void AVD_SI::inc_curr_act_dec_std_ass() {
>    TRACE("%s saAmfSINumCurrActiveAssignments=%u", name.c_str(),
>          saAmfSINumCurrActiveAssignments);
> 
> -  osafassert(saAmfSINumCurrStandbyAssignments > 0);
> +  if (saAmfSINumCurrStandbyAssignments == 0) {
> +    LOG_WA("Failed to decrease saAmfSINumCurrStandbyAssignments");
> +    return;
> +  }
>    saAmfSINumCurrStandbyAssignments--;
>    TRACE("%s saAmfSINumCurrStandbyAssignments=%u", name.c_str(),
>          saAmfSINumCurrStandbyAssignments);
> @@ -1448,7 +1457,10 @@ void AVD_SI::inc_curr_stdby_dec_act_ass() {
>          saAmfSINumCurrStandbyAssignments);
>    m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, this,
> AVSV_CKPT_SI_SU_CURR_STBY);
> 
> -  osafassert(saAmfSINumCurrActiveAssignments > 0);
> +  if (saAmfSINumCurrActiveAssignments == 0) {
> +    LOG_WA("Failed to decrease saAmfSINumCurrActiveAssignments");
> +    return;
> +  }
>    saAmfSINumCurrActiveAssignments--;
>    TRACE("%s saAmfSINumCurrActiveAssignments=%u", name.c_str(),
>          saAmfSINumCurrActiveAssignments); diff --git a/src/amf/amfd/su.cc
> b/src/amf/amfd/su.cc index fac1188b5..2a1923c04 100644
> --- a/src/amf/amfd/su.cc
> +++ b/src/amf/amfd/su.cc
> @@ -2127,29 +2127,45 @@ static void
> su_ccb_apply_cb(CcbUtilOperationData_t *opdata) {  }
> 
>  void AVD_SU::inc_curr_act_si() {
> +  if (saAmfSUNumCurrActiveSIs >= sg_of_su->saAmfSGMaxActiveSIsperSU) {
> +    LOG_WA("Failed to increase saAmfSUNumCurrActiveSIs(%u), "
> +        "saAmfSGMaxActiveSIsperSU(%u)", saAmfSUNumCurrActiveSIs,
> +        sg_of_su->saAmfSGMaxActiveSIsperSU);
> +    return;
> +  }
>    saAmfSUNumCurrActiveSIs++;
> -  osafassert(saAmfSUNumCurrActiveSIs <= sg_of_su-
> >saAmfSGMaxActiveSIsperSU);
>    TRACE("%s saAmfSUNumCurrActiveSIs=%u", name.c_str(),
> saAmfSUNumCurrActiveSIs);
>    m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, this,
> AVSV_CKPT_SU_SI_CURR_ACTIVE);  }
> 
>  void AVD_SU::dec_curr_act_si() {
> -  osafassert(saAmfSUNumCurrActiveSIs > 0);
> +  if (saAmfSUNumCurrActiveSIs == 0) {
> +    LOG_WA("Failed to decrease saAmfSUNumCurrActiveSIs");
> +    return;
> +  }
>    saAmfSUNumCurrActiveSIs--;
>    TRACE("%s saAmfSUNumCurrActiveSIs=%u", name.c_str(),
> saAmfSUNumCurrActiveSIs);
>    m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, this,
> AVSV_CKPT_SU_SI_CURR_ACTIVE);  }
> 
>  void AVD_SU::inc_curr_stdby_si() {
> +  if (saAmfSUNumCurrStandbySIs >= sg_of_su-
> >saAmfSGMaxStandbySIsperSU) {
> +    LOG_WA("Failed to increase saAmfSUNumCurrStandbySIs(%u), "
> +        "saAmfSGMaxStandbySIsperSU(%u)", saAmfSUNumCurrStandbySIs,
> +        sg_of_su->saAmfSGMaxStandbySIsperSU);
> +    return;
> +  }
>    saAmfSUNumCurrStandbySIs++;
> -  osafassert(saAmfSUNumCurrStandbySIs <= sg_of_su-
> >saAmfSGMaxStandbySIsperSU);
>    TRACE("%s saAmfSUNumCurrStandbySIs=%u", name.c_str(),
>          saAmfSUNumCurrStandbySIs);
>    m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, this,
> AVSV_CKPT_SU_SI_CURR_STBY);  }
> 
>  void AVD_SU::dec_curr_stdby_si() {
> -  osafassert(saAmfSUNumCurrStandbySIs > 0);
> +  if (saAmfSUNumCurrStandbySIs == 0) {
> +    LOG_WA("Failed to decrease saAmfSUNumCurrStandbySIs");
> +    return;
> +  }
>    saAmfSUNumCurrStandbySIs--;
>    TRACE("%s saAmfSUNumCurrStandbySIs=%u", name.c_str(),
>          saAmfSUNumCurrStandbySIs);
> @@ -2527,6 +2543,22 @@ uint32_t AVD_SU::curr_num_active_sis() {  }
> 
>  /**
> + * @brief    Count number of assignment belonging to *this* SU object
> + *           that has @ha and @fsm.
> + * @param    ha: HA state of searching object
> + *           fsm: fsm state of searching object
> + * @return   count
> + */
> +uint32_t AVD_SU::count_susi_with(SaAmfHAStateT ha, uint32_t fsm) {
> +  uint32_t count = 0;
> +  for (AVD_SU_SI_REL *susi = list_of_susi; susi != nullptr;
> +       susi = susi->su_next)
> +    if ((susi->state == ha) && (susi->fsm == fsm))
> +      count++;
> +  return count;
> +}
> +
> +/**
>   * @brief       This function completes admin operation on SU.
>   *              It responds IMM with the result of admin operation on SU.
>   * @param       ptr to su
> @@ -2732,4 +2764,4 @@ bool AVD_SU::restrict_auto_repair() const
>    }
> 
>    return false;
> -}
> \ No newline at end of file
> +}
> diff --git a/src/amf/amfd/su.h b/src/amf/amfd/su.h index
> 9fece7a13..96c6b803f 100644
> --- a/src/amf/amfd/su.h
> +++ b/src/amf/amfd/su.h
> @@ -143,6 +143,7 @@ class AVD_SU {
>    SaAisErrorT check_su_stability();
>    uint32_t curr_num_standby_sis();
>    uint32_t curr_num_active_sis();
> +  uint32_t count_susi_with(SaAmfHAStateT ha, uint32_t fsm);
>    bool su_any_comp_undergoing_restart_admin_op();
>    AVD_COMP *su_get_comp_undergoing_restart_admin_op();
>    bool su_all_comps_restartable();
> --
> 2.11.0

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to