Hi Praveen,
I attached them to ticket.
Thanks,
Minh
On 19/08/16 21:08, praveen malviya wrote:
> Hi Minh,
> All patches are not received.
> Please attached them in the ticket.
>
> Thanks,
> Praveen
>
> On 18-Aug-16 5:45 AM, Minh Hon Chau wrote:
>> osaf/services/saf/amf/amfd/include/sg.h | 4 +-
>> osaf/services/saf/amf/amfd/include/susi.h | 2 +
>> osaf/services/saf/amf/amfd/ndfsm.cc | 15 ++++++-
>> osaf/services/saf/amf/amfd/sg.cc | 37 ++++++++++++++++++-
>> osaf/services/saf/amf/amfd/siass.cc | 59
>> +++++++++++++++++++++++++++++-
>> osaf/services/saf/amf/amfd/su.cc | 12 ++++++
>> 6 files changed, 121 insertions(+), 8 deletions(-)
>>
>>
>> Since headless interuption is unplanned action and writing rta to IMM
>> is currently queued up in AMFD implemenentation. That can result into
>> inappropriate states of SG fsm state, SUSI fsm state, ha state,
>> SUOperationList, etc. Eventually, AMFD will run into SG unstable, false
>> assertion, or even SUSIs become permanently PARTIALLY, which is hard
>> to debug (even harder without trace)
>>
>> This patch adds a validation routine to check headless cached RTAs read
>> from IMM, more validation rule to be added. Also, a TODO is left for
>> discussion about what's a action should be taken if validation is
>> failed.
>>
>> diff --git a/osaf/services/saf/amf/amfd/include/sg.h
>> b/osaf/services/saf/amf/amfd/include/sg.h
>> --- a/osaf/services/saf/amf/amfd/include/sg.h
>> +++ b/osaf/services/saf/amf/amfd/include/sg.h
>> @@ -418,7 +418,7 @@ public:
>> bool any_assignment_absent();
>> void failover_absent_assignment();
>> bool ng_using_saAmfSGAdminState;
>> -
>> + bool headless_validation;
>> uint32_t term_su_list_in_reverse();
>> //Runtime calculates value of saAmfSGNumCurrAssignedSUs;
>> uint32_t curr_assigned_sus() const;
>> @@ -579,7 +579,7 @@ private:
>> #define m_AVD_CHK_OPLIST(i_su,flag) (flag) =
>> (i_su)->sg_of_su->in_su_oper_list(i_su)
>>
>> void avd_sg_read_headless_cached_rta(AVD_CL_CB *cb);
>> -
>> +bool avd_sg_validate_headless_cached_rta(AVD_CL_CB *cb);
>> extern void avd_sg_delete(AVD_SG *sg);
>> extern void avd_sg_db_add(AVD_SG *sg);
>> extern void avd_sg_db_remove(AVD_SG *sg);
>> diff --git a/osaf/services/saf/amf/amfd/include/susi.h
>> b/osaf/services/saf/amf/amfd/include/susi.h
>> --- a/osaf/services/saf/amf/amfd/include/susi.h
>> +++ b/osaf/services/saf/amf/amfd/include/susi.h
>> @@ -143,6 +143,8 @@ AVD_SU_SI_REL *avd_susi_create(AVD_CL_CB
>> AVD_SU_SI_STATE default_fsm =
>> AVD_SU_SI_STATE_ABSENT);
>> AVD_SU_SI_REL *avd_susi_find(AVD_CL_CB *cb, const SaNameT *su_name,
>> const SaNameT *si_name);
>> void avd_susi_update_fsm(AVD_SU_SI_REL *susi, AVD_SU_SI_STATE
>> new_fsm_state);
>> +bool avd_susi_validate_headless_cached_rta(AVD_SU_SI_REL *present_susi,
>> + SaAmfHAStateT ha_fr_imm, AVD_SU_SI_STATE fsm_fr_imm);
>> void avd_susi_read_headless_cached_rta(AVD_CL_CB *cb);
>> extern void avd_susi_update(AVD_SU_SI_REL *susi, SaAmfHAStateT
>> ha_state);
>>
>> diff --git a/osaf/services/saf/amf/amfd/ndfsm.cc
>> b/osaf/services/saf/amf/amfd/ndfsm.cc
>> --- a/osaf/services/saf/amf/amfd/ndfsm.cc
>> +++ b/osaf/services/saf/amf/amfd/ndfsm.cc
>> @@ -127,13 +127,22 @@ void avd_process_state_info_queue(AVD_CL
>>
>> // Read cached rta from Imm, the order of calling
>> // below functions is IMPORTANT.
>> - // Reading sg must be after reading susi
>> - // Cleanup compcsi must be after reading sg
>> if (found_state_info == true) {
>> + LOG_NO("Enter restore headless cached RTAs from IMM");
>> + // Read all cached susi, includes ABSENT SUSI with IMM fsm
>> state
>> avd_susi_read_headless_cached_rta(cb);
>> + // Read SUSwitch of SU, validate toggle depends on SUSI fsm
>> state
>> + avd_su_read_headless_cached_rta(cb);
>> + // Read SUOperationList, set ABSENT fsm state for ABSENT SUSI
>> avd_sg_read_headless_cached_rta(cb);
>> + // Clean compcsi object of ABSENT SUSI
>> avd_compcsi_cleanup_imm_object(cb);
>> - avd_su_read_headless_cached_rta(cb);
>> + // Last, validate all
>> + bool valid = avd_sg_validate_headless_cached_rta(cb);
>> + if (valid)
>> + LOG_NO("Leave reading headless cached RTAs from IMM:
>> SUCCESS");
>> + else
>> + LOG_ER("Leave reading headless cached RTAs from IMM:
>> FAILED");
>> }
>> done:
>> TRACE("queue_size after processing: %lu", (unsigned long)
>> cb->evt_queue.size());
>> diff --git a/osaf/services/saf/amf/amfd/sg.cc
>> b/osaf/services/saf/amf/amfd/sg.cc
>> --- a/osaf/services/saf/amf/amfd/sg.cc
>> +++ b/osaf/services/saf/amf/amfd/sg.cc
>> @@ -124,7 +124,8 @@ AVD_SG::AVD_SG():
>> max_assigned_su(nullptr),
>> min_assigned_su(nullptr),
>> si_tobe_redistributed(nullptr),
>> - try_inst_counter(0)
>> + try_inst_counter(0),
>> + headless_validation(true)
>> {
>> adminOp = static_cast<SaAmfAdminOperationIdT>(0);
>> memset(&name, 0, sizeof(SaNameT));
>> @@ -2115,6 +2116,9 @@ void avd_sg_read_headless_cached_rta(AVD
>> (SaImmAttrValuesT_2 ***)&attributes)) ==
>> SA_AIS_OK) {
>> sg = sg_db->find(Amf::to_string(&sg_dn));
>> if (sg && sg->sg_ncs_spec == false) {
>> + if (sg->headless_validation == false) {
>> + continue;
>> + }
>> // Read sg fsm state
>> rc =
>> immutil_getAttr(const_cast<SaImmAttrNameT>("osafAmfSGFsmState"),
>> attributes, 0, &imm_sg_fsm_state);
>> @@ -2159,6 +2163,37 @@ done:
>> TRACE_LEAVE();
>> }
>>
>> +/**
>> + * @brief Validate all cached RTAs read from IMM after headless.
>> + This validation is necessary. If AMFD doesn't have this
>> + validation routine and the cached RTAs are invalid,
>> + that would lead into *unpredictably* wrong states, which
>> + is hard to debug (harder if no trace)
>> + * @param Control block (AVD_CL_CB).
>> + * @Return true if valid, false otherwise.
>> +*/
>> +bool avd_sg_validate_headless_cached_rta(AVD_CL_CB *cb) {
>> + TRACE_ENTER();
>> + bool valid = true;
>> + for (std::map<std::string, AVD_SG*>::const_iterator it =
>> sg_db->begin();
>> + it != sg_db->end(); it++) {
>> + AVD_SG *i_sg = it->second;
>> + if (i_sg->sg_ncs_spec == true) {
>> + continue;
>> + }
>> +
>> + if (i_sg->headless_validation == false) {
>> + //TODO: AMFD should make all SUs of this SG faulty to
>> remove
>> + //all assignments, clean up IMM headless cached RTA.
>> + //Just assert for now
>> + //osafassert(false);
>> + valid = false;
>> + }
>> + }
>> + TRACE_LEAVE2("%u", valid);
>> + return valid;
>> +}
>> +
>> void AVD_SG::failover_absent_assignment() {
>>
>> TRACE_ENTER2("SG:'%s'", Amf::to_string(&name).c_str());
>> diff --git a/osaf/services/saf/amf/amfd/siass.cc
>> b/osaf/services/saf/amf/amfd/siass.cc
>> --- a/osaf/services/saf/amf/amfd/siass.cc
>> +++ b/osaf/services/saf/amf/amfd/siass.cc
>> @@ -214,11 +214,17 @@ void avd_susi_read_headless_cached_rta(A
>> susi = avd_su_susi_find(cb, su, &si->name);
>> rc = immutil_getAttr("osafAmfSISUFsmState", attributes, 0,
>> &imm_susi_fsm);
>> osafassert(rc == SA_AIS_OK);
>> + rc = immutil_getAttr("saAmfSISUHAState", attributes, 0,
>> &imm_ha_state);
>> + osafassert(rc == SA_AIS_OK);
>>
>> if (susi) { // FOR PRESENT SUSI found in AMFND(s)
>> TRACE("SISU:'%s', old(imm) fsm state: %d, new(sync) fsm
>> state: %d",
>> Amf::to_string(&dn).c_str(), imm_susi_fsm, susi->fsm);
>>
>> + if (avd_susi_validate_headless_cached_rta(susi,
>> imm_ha_state,
>> + imm_susi_fsm) == false) {
>> + continue;
>> + }
>> #if 1
>> // If remove the below line in this #if block, AMFD will
>> use
>> // the synced fsm state, which is latest. That means, in
>> @@ -255,8 +261,6 @@ void avd_susi_read_headless_cached_rta(A
>>
>> } else { // For ABSENT SUSI
>> if (su->sg_of_su->sg_ncs_spec == false) {
>> - rc = immutil_getAttr("saAmfSISUHAState", attributes,
>> 0, &imm_ha_state);
>> - osafassert(rc == SA_AIS_OK);
>> TRACE("Absent SUSI, ha_state:'%u', fsm_state:'%u'",
>> imm_ha_state, imm_susi_fsm);
>> if (imm_susi_fsm != AVD_SU_SI_STATE_UNASGN) {
>> absent_susi = avd_susi_create(avd_cb, si, su,
>> imm_ha_state, false, AVSV_SUSI_ACT_BASE);
>> @@ -288,6 +292,57 @@ void avd_susi_read_headless_cached_rta(A
>> done:
>> TRACE_LEAVE();
>> }
>> +/**
>> + * Validate cached RTA read from IMM
>> + * @param present_susi
>> + * @param ha_fr_imm: Ha state of @present_susi read from IMM
>> + * @param fsm_fr_imm: Fsm state of @present susi read from IMM
>> + * @return: true of valid, false otherwise
>> + */
>> +bool avd_susi_validate_headless_cached_rta(AVD_SU_SI_REL *present_susi,
>> + SaAmfHAStateT ha_fr_imm, AVD_SU_SI_STATE fsm_fr_imm) {
>> + std::string dn = Amf::to_string(&present_susi->si->name) + "," +
>> + Amf::to_string(&present_susi->su->name);
>> + TRACE_ENTER2("SISU:'%s'", dn.c_str());
>> + bool valid = true;
>> + // rule 1: valid ha state
>> + if (ha_fr_imm != present_susi->state) {
>> + if (ha_fr_imm == SA_AMF_HA_QUIESCING ||
>> + ha_fr_imm == SA_AMF_HA_QUIESCED) {
>> + // That's fine
>> + ;
>> + } else {
>> + LOG_ER("SISU:'%s', old(imm) ha state: %d, new(sync) ha
>> state: %d",
>> + dn.c_str(), ha_fr_imm, present_susi->state);
>> + valid = false;
>> + goto done;
>> + }
>> + }
>> + // rule 2: if ha_fr_imm is QUIESCING, one of relevant entities must
>> + // have adminState is SHUTTINGDOWN
>> + if (ha_fr_imm == SA_AMF_HA_QUIESCING) {
>> + if (present_susi->su->saAmfSUAdminState ==
>> SA_AMF_ADMIN_SHUTTING_DOWN ||
>> + present_susi->si->saAmfSIAdminState ==
>> SA_AMF_ADMIN_SHUTTING_DOWN ||
>> + present_susi->su->sg_of_su->saAmfSGAdminState ==
>> SA_AMF_ADMIN_SHUTTING_DOWN ||
>> + present_susi->su->su_on_node->saAmfNodeAdminState ==
>> SA_AMF_ADMIN_SHUTTING_DOWN) {
>> + // That's fine
>> + ;
>> + } else {
>> + LOG_ER("SISU:'%s', ha:'%u', but one of [node/sg/su/si]
>> is not in SHUTTING_DOWN",
>> + dn.c_str(), ha_fr_imm);
>> + valid = false;
>> + goto done;
>> + }
>> + }
>> + // TODO: more rules to be added when issue is found in reality
>> due to writing
>> + // cached RTA to IMM
>> +done:
>> + if (valid == false)
>> + present_susi->su->sg_of_su->headless_validation = valid;
>> +
>> + TRACE_LEAVE2("%u, %u", valid,
>> present_susi->su->sg_of_su->headless_validation);
>> + return present_susi->su->sg_of_su->headless_validation;
>> +}
>>
>> /*****************************************************************************
>>
>>
>> * Function: avd_susi_create
>> *
>> diff --git a/osaf/services/saf/amf/amfd/su.cc
>> b/osaf/services/saf/amf/amfd/su.cc
>> --- a/osaf/services/saf/amf/amfd/su.cc
>> +++ b/osaf/services/saf/amf/amfd/su.cc
>> @@ -1964,6 +1964,18 @@ void avd_su_read_headless_cached_rta(AVD
>> rc =
>> immutil_getAttr(const_cast<SaImmAttrNameT>("osafAmfSUSwitch"),
>> attributes, 0, &su_toggle);
>> osafassert(rc == SA_AIS_OK);
>> + if (su_toggle == AVSV_SI_TOGGLE_SWITCH) {
>> + // 2N, if toggle but no pending assignment -> bad state
>> + if (su->sg_of_su->sg_redundancy_model ==
>> SA_AMF_2N_REDUNDANCY_MODEL &&
>> + su->sg_of_su->any_assignment_in_progress() == false){
>> + LOG_ER("SG'%s', osafAmfSUSwitch:'%u', but no
>> pending assignment",
>> + Amf::to_string(&su->sg_of_su->name).c_str(),
>> + su_toggle);
>> + su->sg_of_su->headless_validation = false;
>> + }
>> + if (su->sg_of_su->headless_validation == false)
>> + continue;
>> + }
>> su->set_su_switch(su_toggle, false);
>> }
>> }
>>
>
------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel