Hi Praveen,

I attached them to ticket.

Thanks,
Minh

On 19/08/16 21:08, praveen malviya wrote:
> Hi Minh,
> All patches are not received.
> Please attached them in the ticket.
>
> Thanks,
> Praveen
>
> On 18-Aug-16 5:45 AM, Minh Hon Chau wrote:
>>  osaf/services/saf/amf/amfd/include/sg.h |   4 +-
>>  osaf/services/saf/amf/amfd/include/susi.h |   2 +
>>  osaf/services/saf/amf/amfd/ndfsm.cc       |  15 ++++++-
>>  osaf/services/saf/amf/amfd/sg.cc          |  37 ++++++++++++++++++-
>>  osaf/services/saf/amf/amfd/siass.cc       |  59 
>> +++++++++++++++++++++++++++++-
>>  osaf/services/saf/amf/amfd/su.cc          |  12 ++++++
>>  6 files changed, 121 insertions(+), 8 deletions(-)
>>
>>
>> Since headless interuption is unplanned action and writing rta to IMM
>> is currently queued up in AMFD implemenentation. That can result into
>> inappropriate states of SG fsm state, SUSI fsm state, ha state,
>> SUOperationList, etc. Eventually, AMFD will run into SG unstable, false
>> assertion, or even SUSIs become permanently PARTIALLY, which is hard
>> to debug (even harder without trace)
>>
>> This patch adds a validation routine to check headless cached RTAs read
>> from IMM, more validation rule to be added. Also, a TODO is left for
>> discussion about what's a action should be taken if validation is 
>> failed.
>>
>> diff --git a/osaf/services/saf/amf/amfd/include/sg.h 
>> b/osaf/services/saf/amf/amfd/include/sg.h
>> --- a/osaf/services/saf/amf/amfd/include/sg.h
>> +++ b/osaf/services/saf/amf/amfd/include/sg.h
>> @@ -418,7 +418,7 @@ public:
>>      bool any_assignment_absent();
>>      void failover_absent_assignment();
>>      bool ng_using_saAmfSGAdminState;
>> -
>> +    bool headless_validation;
>>      uint32_t term_su_list_in_reverse();
>>         //Runtime calculates value of saAmfSGNumCurrAssignedSUs;
>>      uint32_t curr_assigned_sus() const;
>> @@ -579,7 +579,7 @@ private:
>>  #define m_AVD_CHK_OPLIST(i_su,flag) (flag) = 
>> (i_su)->sg_of_su->in_su_oper_list(i_su)
>>
>>  void avd_sg_read_headless_cached_rta(AVD_CL_CB *cb);
>> -
>> +bool avd_sg_validate_headless_cached_rta(AVD_CL_CB *cb);
>>  extern void avd_sg_delete(AVD_SG *sg);
>>  extern void avd_sg_db_add(AVD_SG *sg);
>>  extern void avd_sg_db_remove(AVD_SG *sg);
>> diff --git a/osaf/services/saf/amf/amfd/include/susi.h 
>> b/osaf/services/saf/amf/amfd/include/susi.h
>> --- a/osaf/services/saf/amf/amfd/include/susi.h
>> +++ b/osaf/services/saf/amf/amfd/include/susi.h
>> @@ -143,6 +143,8 @@ AVD_SU_SI_REL *avd_susi_create(AVD_CL_CB
>>                          AVD_SU_SI_STATE default_fsm = 
>> AVD_SU_SI_STATE_ABSENT);
>>  AVD_SU_SI_REL *avd_susi_find(AVD_CL_CB *cb, const SaNameT *su_name, 
>> const SaNameT *si_name);
>>  void avd_susi_update_fsm(AVD_SU_SI_REL *susi, AVD_SU_SI_STATE 
>> new_fsm_state);
>> +bool avd_susi_validate_headless_cached_rta(AVD_SU_SI_REL *present_susi,
>> +        SaAmfHAStateT ha_fr_imm, AVD_SU_SI_STATE fsm_fr_imm);
>>  void avd_susi_read_headless_cached_rta(AVD_CL_CB *cb);
>>  extern void avd_susi_update(AVD_SU_SI_REL *susi, SaAmfHAStateT 
>> ha_state);
>>
>> diff --git a/osaf/services/saf/amf/amfd/ndfsm.cc 
>> b/osaf/services/saf/amf/amfd/ndfsm.cc
>> --- a/osaf/services/saf/amf/amfd/ndfsm.cc
>> +++ b/osaf/services/saf/amf/amfd/ndfsm.cc
>> @@ -127,13 +127,22 @@ void avd_process_state_info_queue(AVD_CL
>>
>>      // Read cached rta from Imm, the order of calling
>>      // below functions is IMPORTANT.
>> -    // Reading sg must be after reading susi
>> -    // Cleanup compcsi must be after reading sg
>>      if (found_state_info == true) {
>> +        LOG_NO("Enter restore headless cached RTAs from IMM");
>> +        // Read all cached susi, includes ABSENT SUSI with IMM fsm 
>> state
>>          avd_susi_read_headless_cached_rta(cb);
>> +        // Read SUSwitch of SU, validate toggle depends on SUSI fsm 
>> state
>> +        avd_su_read_headless_cached_rta(cb);
>> +        // Read SUOperationList, set ABSENT fsm state for ABSENT SUSI
>>          avd_sg_read_headless_cached_rta(cb);
>> +        // Clean compcsi object of ABSENT SUSI
>>          avd_compcsi_cleanup_imm_object(cb);
>> -        avd_su_read_headless_cached_rta(cb);
>> +        // Last, validate all
>> +        bool valid = avd_sg_validate_headless_cached_rta(cb);
>> +        if (valid)
>> +            LOG_NO("Leave reading headless cached RTAs from IMM: 
>> SUCCESS");
>> +        else
>> +            LOG_ER("Leave reading headless cached RTAs from IMM: 
>> FAILED");
>>      }
>>  done:
>>      TRACE("queue_size after processing: %lu", (unsigned long) 
>> cb->evt_queue.size());
>> diff --git a/osaf/services/saf/amf/amfd/sg.cc 
>> b/osaf/services/saf/amf/amfd/sg.cc
>> --- a/osaf/services/saf/amf/amfd/sg.cc
>> +++ b/osaf/services/saf/amf/amfd/sg.cc
>> @@ -124,7 +124,8 @@ AVD_SG::AVD_SG():
>>          max_assigned_su(nullptr),
>>          min_assigned_su(nullptr),
>>          si_tobe_redistributed(nullptr),
>> -        try_inst_counter(0)
>> +        try_inst_counter(0),
>> +        headless_validation(true)
>>  {
>>      adminOp = static_cast<SaAmfAdminOperationIdT>(0);
>>      memset(&name, 0, sizeof(SaNameT));
>> @@ -2115,6 +2116,9 @@ void avd_sg_read_headless_cached_rta(AVD
>>                      (SaImmAttrValuesT_2 ***)&attributes)) == 
>> SA_AIS_OK) {
>>          sg = sg_db->find(Amf::to_string(&sg_dn));
>>          if (sg && sg->sg_ncs_spec == false) {
>> +            if (sg->headless_validation == false) {
>> +                continue;
>> +            }
>>              // Read sg fsm state
>>              rc = 
>> immutil_getAttr(const_cast<SaImmAttrNameT>("osafAmfSGFsmState"),
>>                      attributes, 0, &imm_sg_fsm_state);
>> @@ -2159,6 +2163,37 @@ done:
>>      TRACE_LEAVE();
>>  }
>>
>> +/**
>> + * @brief  Validate all cached RTAs read from IMM after headless.
>> +           This validation is necessary. If AMFD doesn't have this
>> +           validation routine and the cached RTAs are invalid,
>> +           that would lead into *unpredictably* wrong states, which
>> +           is hard to debug (harder if no trace)
>> + * @param  Control block (AVD_CL_CB).
>> + * @Return true if valid, false otherwise.
>> +*/
>> +bool avd_sg_validate_headless_cached_rta(AVD_CL_CB *cb) {
>> +    TRACE_ENTER();
>> +    bool valid = true;
>> +    for (std::map<std::string, AVD_SG*>::const_iterator it = 
>> sg_db->begin();
>> +            it != sg_db->end(); it++) {
>> +        AVD_SG *i_sg = it->second;
>> +        if (i_sg->sg_ncs_spec == true) {
>> +            continue;
>> +        }
>> +
>> +        if (i_sg->headless_validation == false) {
>> +            //TODO: AMFD should make all SUs of this SG faulty to 
>> remove
>> +            //all assignments, clean up IMM headless cached RTA.
>> +            //Just assert for now
>> +            //osafassert(false);
>> +            valid = false;
>> +        }
>> +    }
>> +    TRACE_LEAVE2("%u", valid);
>> +    return valid;
>> +}
>> +
>>  void AVD_SG::failover_absent_assignment() {
>>
>>      TRACE_ENTER2("SG:'%s'", Amf::to_string(&name).c_str());
>> diff --git a/osaf/services/saf/amf/amfd/siass.cc 
>> b/osaf/services/saf/amf/amfd/siass.cc
>> --- a/osaf/services/saf/amf/amfd/siass.cc
>> +++ b/osaf/services/saf/amf/amfd/siass.cc
>> @@ -214,11 +214,17 @@ void avd_susi_read_headless_cached_rta(A
>>          susi = avd_su_susi_find(cb, su, &si->name);
>>          rc = immutil_getAttr("osafAmfSISUFsmState", attributes, 0, 
>> &imm_susi_fsm);
>>          osafassert(rc == SA_AIS_OK);
>> +        rc = immutil_getAttr("saAmfSISUHAState", attributes, 0, 
>> &imm_ha_state);
>> +        osafassert(rc == SA_AIS_OK);
>>
>>          if (susi) { // FOR PRESENT SUSI found in AMFND(s)
>>              TRACE("SISU:'%s', old(imm) fsm state: %d, new(sync) fsm 
>> state: %d",
>>                  Amf::to_string(&dn).c_str(), imm_susi_fsm, susi->fsm);
>>
>> +            if (avd_susi_validate_headless_cached_rta(susi, 
>> imm_ha_state,
>> +                    imm_susi_fsm) == false) {
>> +                continue;
>> +            }
>>  #if 1
>>              // If remove the below line in this #if block, AMFD will 
>> use
>>              // the synced fsm state, which is latest. That means, in
>> @@ -255,8 +261,6 @@ void avd_susi_read_headless_cached_rta(A
>>
>>          } else { // For ABSENT SUSI
>>              if (su->sg_of_su->sg_ncs_spec == false) {
>> -                rc = immutil_getAttr("saAmfSISUHAState", attributes, 
>> 0, &imm_ha_state);
>> -                osafassert(rc == SA_AIS_OK);
>>                  TRACE("Absent SUSI, ha_state:'%u', fsm_state:'%u'", 
>> imm_ha_state, imm_susi_fsm);
>>                  if (imm_susi_fsm != AVD_SU_SI_STATE_UNASGN) {
>>                      absent_susi = avd_susi_create(avd_cb, si, su, 
>> imm_ha_state, false, AVSV_SUSI_ACT_BASE);
>> @@ -288,6 +292,57 @@ void avd_susi_read_headless_cached_rta(A
>>  done:
>>      TRACE_LEAVE();
>>  }
>> +/**
>> + * Validate cached RTA read from IMM
>> + * @param present_susi
>> + * @param ha_fr_imm: Ha state of @present_susi read from IMM
>> + * @param fsm_fr_imm: Fsm state of @present susi read from IMM
>> + * @return: true of valid, false otherwise
>> + */
>> +bool avd_susi_validate_headless_cached_rta(AVD_SU_SI_REL *present_susi,
>> +        SaAmfHAStateT ha_fr_imm, AVD_SU_SI_STATE fsm_fr_imm) {
>> +    std::string dn = Amf::to_string(&present_susi->si->name) + "," +
>> +            Amf::to_string(&present_susi->su->name);
>> +    TRACE_ENTER2("SISU:'%s'", dn.c_str());
>> +    bool valid = true;
>> +    // rule 1: valid ha state
>> +    if (ha_fr_imm != present_susi->state) {
>> +        if (ha_fr_imm == SA_AMF_HA_QUIESCING ||
>> +            ha_fr_imm == SA_AMF_HA_QUIESCED) {
>> +            // That's fine
>> +            ;
>> +        } else {
>> +            LOG_ER("SISU:'%s', old(imm) ha state: %d, new(sync) ha 
>> state: %d",
>> +            dn.c_str(),    ha_fr_imm, present_susi->state);
>> +            valid = false;
>> +            goto done;
>> +        }
>> +    }
>> +    // rule 2: if ha_fr_imm is QUIESCING, one of relevant entities must
>> +    // have adminState is SHUTTINGDOWN
>> +    if (ha_fr_imm == SA_AMF_HA_QUIESCING) {
>> +        if (present_susi->su->saAmfSUAdminState == 
>> SA_AMF_ADMIN_SHUTTING_DOWN ||
>> +            present_susi->si->saAmfSIAdminState == 
>> SA_AMF_ADMIN_SHUTTING_DOWN ||
>> + present_susi->su->sg_of_su->saAmfSGAdminState == 
>> SA_AMF_ADMIN_SHUTTING_DOWN ||
>> + present_susi->su->su_on_node->saAmfNodeAdminState == 
>> SA_AMF_ADMIN_SHUTTING_DOWN) {
>> +            // That's fine
>> +            ;
>> +        } else {
>> +            LOG_ER("SISU:'%s', ha:'%u', but one of [node/sg/su/si] 
>> is not in SHUTTING_DOWN",
>> +                    dn.c_str(), ha_fr_imm);
>> +            valid = false;
>> +            goto done;
>> +        }
>> +    }
>> +    // TODO: more rules to be added when issue is found in reality 
>> due to writing
>> +    // cached RTA to IMM
>> +done:
>> +    if (valid == false)
>> + present_susi->su->sg_of_su->headless_validation = valid;
>> +
>> +    TRACE_LEAVE2("%u, %u", valid, 
>> present_susi->su->sg_of_su->headless_validation);
>> +    return present_susi->su->sg_of_su->headless_validation;
>> +}
>>  
>> /*****************************************************************************
>>  
>>
>>   * Function: avd_susi_create
>>   *
>> diff --git a/osaf/services/saf/amf/amfd/su.cc 
>> b/osaf/services/saf/amf/amfd/su.cc
>> --- a/osaf/services/saf/amf/amfd/su.cc
>> +++ b/osaf/services/saf/amf/amfd/su.cc
>> @@ -1964,6 +1964,18 @@ void avd_su_read_headless_cached_rta(AVD
>>              rc = 
>> immutil_getAttr(const_cast<SaImmAttrNameT>("osafAmfSUSwitch"),
>>                      attributes, 0, &su_toggle);
>>              osafassert(rc == SA_AIS_OK);
>> +            if (su_toggle == AVSV_SI_TOGGLE_SWITCH) {
>> +                // 2N, if toggle but no pending assignment -> bad state
>> +                if (su->sg_of_su->sg_redundancy_model == 
>> SA_AMF_2N_REDUNDANCY_MODEL &&
>> + su->sg_of_su->any_assignment_in_progress() == false){
>> +                    LOG_ER("SG'%s', osafAmfSUSwitch:'%u', but no 
>> pending assignment",
>> + Amf::to_string(&su->sg_of_su->name).c_str(),
>> +                            su_toggle);
>> +                    su->sg_of_su->headless_validation = false;
>> +                }
>> +                if (su->sg_of_su->headless_validation == false)
>> +                    continue;
>> +            }
>>              su->set_su_switch(su_toggle, false);
>>          }
>>      }
>>
>


------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to