Hi Nagu, This patch is not a full solution for loss of RTA, it's only for problem reported in #2210, so I only tested the scenario described in this ticket. It's probably hard to see the loss if just following the test scenario. I had to change AVD_SI::set_admin_state in AMFD code not to update Admin State to simulate the loss. The fix can be seen something like this in syslog
2017-02-17 09:42:24 SC-1 osafamfd[474]: NO Enter restore headless cached RTAs from IMM 2017-02-17 09:42:24 SC-1 osafamfd[474]: WA SISU:'safSi=AmfDemoTwon,safApp=AmfDemoTwon,safSu=SU4,safSg=AmfDemoTwon,safApp=AmfDemoTwon', ha:'3', but one of [node/sg/su/si] is not in LOCKED 2017-02-17 09:42:24 SC-1 osafamfd[474]: NO Leave reading headless cached RTAs from IMM: SUCCESS 2017-02-17 09:42:24 SC-1 osafamfd[474]: NO Node 'SC-1' joined the cluster I will update "Testing commands:" for next times Thanks, Minh On 16/02/17 19:28, Nagendra Kumar wrote: > Ack(Not tested). Please do mention the test cases in " Testing Commands:" > > Thanks > -Nagu > >> -----Original Message----- >> From: Minh Hon Chau [mailto:[email protected]] >> Sent: 07 February 2017 08:56 >> To: [email protected]; Nagendra Kumar; Praveen Malviya; >> [email protected]; [email protected]; >> [email protected] >> Cc: [email protected] >> Subject: [PATCH 1 of 1] amfd: Recover the loss of update Admin State after >> headless [#2210] >> >> src/amf/amfd/siass.cc | 53 >> ++++++++++++++++++++++++++++++++++++++++---------- >> 1 files changed, 42 insertions(+), 11 deletions(-) >> >> >> Both controllers go down at the time AMFD is about update Admin State, >> after headless the admin state read from IMM conflicts with the existing >> assignment. In ticket #2210, saAmfSIAdminState is UNLOCKED while the >> SUSI assignment is QUIESCED. >> >> The patch helps to recover the admin state based on the HA State of >> existing assignment. The loss of update RTA is still open issue for >> many other attributes that are possibly missed to update IMM at the time >> cluster going headless. >> >> diff --git a/src/amf/amfd/siass.cc b/src/amf/amfd/siass.cc >> --- a/src/amf/amfd/siass.cc >> +++ b/src/amf/amfd/siass.cc >> @@ -1,6 +1,7 @@ >> /* -*- OpenSAF -*- >> * >> * (C) Copyright 2008 The OpenSAF Foundation >> + * (C) Copyright 2017 Ericsson AB - All Rights Reserved. >> * >> * This program is distributed in the hope that it will be useful, but >> * WITHOUT ANY WARRANTY; without even the implied warranty of >> MERCHANTABILITY >> @@ -346,19 +347,49 @@ bool avd_susi_validate_headless_cached_r >> } >> } >> // rule 2: if ha_fr_imm is QUIESCING, one of relevant entities must >> - // have adminState is SHUTTINGDOWN >> + // have adminState is SHUTTINGDOWN, otherwise re-adjust if >> possible >> if (ha_fr_imm == SA_AMF_HA_QUIESCING) { >> - if (present_susi->su->saAmfSUAdminState == >> SA_AMF_ADMIN_SHUTTING_DOWN || >> - present_susi->si->saAmfSIAdminState == >> SA_AMF_ADMIN_SHUTTING_DOWN || >> - present_susi->su->sg_of_su->saAmfSGAdminState == >> SA_AMF_ADMIN_SHUTTING_DOWN || >> - present_susi->su->su_on_node- >>> saAmfNodeAdminState == SA_AMF_ADMIN_SHUTTING_DOWN) { >> - // That's fine >> - ; >> - } else { >> - LOG_ER("SISU:'%s', ha:'%u', but one of >> [node/sg/su/si] is not in SHUTTING_DOWN", >> + if (present_susi->su->saAmfSUAdminState != >> SA_AMF_ADMIN_SHUTTING_DOWN && >> + present_susi->si->saAmfSIAdminState != >> SA_AMF_ADMIN_SHUTTING_DOWN && >> + present_susi->su->sg_of_su->saAmfSGAdminState != >> SA_AMF_ADMIN_SHUTTING_DOWN && >> + present_susi->su->su_on_node- >>> saAmfNodeAdminState != SA_AMF_ADMIN_SHUTTING_DOWN) { >> + LOG_WA("SISU:'%s', ha:'%u', but one of >> [node/sg/su/si] is not in SHUTTING_DOWN", >> dn.c_str(), ha_fr_imm); >> - valid = false; >> - goto done; >> + if (present_susi->su->sg_of_su->sg_fsm_state == >> AVD_SG_FSM_SU_OPER) >> + present_susi->su- >>> set_admin_state(SA_AMF_ADMIN_SHUTTING_DOWN); >> + else if (present_susi->su->sg_of_su->sg_fsm_state >> == AVD_SG_FSM_SI_OPER) >> + present_susi->si- >>> set_admin_state(SA_AMF_ADMIN_SHUTTING_DOWN); >> + else if (present_susi->su->sg_of_su->sg_fsm_state >> == AVD_SG_FSM_SG_ADMIN) >> + present_susi->su->sg_of_su- >>> set_admin_state(SA_AMF_ADMIN_SHUTTING_DOWN); >> + else { >> + LOG_ER("Failed to adjust the Admin State of >> [sg/su/si] with sg fsm state:'%u'", >> + present_susi->su->sg_of_su- >>> sg_fsm_state); >> + valid = false; >> + goto done; >> + } >> + } >> + } >> + // rule 3: if ha_fr_imm is QUIESCED, one of relevant entities must >> + // have adminState is LOCKED, otherwise re-adjust if possible >> + if (ha_fr_imm == SA_AMF_HA_QUIESCED) { >> + if (present_susi->su->saAmfSUAdminState != >> SA_AMF_ADMIN_LOCKED && >> + present_susi->si->saAmfSIAdminState != >> SA_AMF_ADMIN_LOCKED && >> + present_susi->su->sg_of_su->saAmfSGAdminState != >> SA_AMF_ADMIN_LOCKED && >> + present_susi->su->su_on_node- >>> saAmfNodeAdminState != SA_AMF_ADMIN_LOCKED) { >> + LOG_WA("SISU:'%s', ha:'%u', but one of >> [node/sg/su/si] is not in LOCKED", >> + dn.c_str(), ha_fr_imm); >> + if (present_susi->su->sg_of_su->sg_fsm_state == >> AVD_SG_FSM_SU_OPER) >> + present_susi->su- >>> set_admin_state(SA_AMF_ADMIN_LOCKED); >> + else if (present_susi->su->sg_of_su->sg_fsm_state >> == AVD_SG_FSM_SI_OPER) >> + present_susi->si- >>> set_admin_state(SA_AMF_ADMIN_LOCKED); >> + else if (present_susi->su->sg_of_su->sg_fsm_state >> == AVD_SG_FSM_SG_ADMIN) >> + present_susi->su->sg_of_su- >>> set_admin_state(SA_AMF_ADMIN_LOCKED); >> + else { >> + LOG_ER("Failed to adjust the Admin State of >> [sg/su/si] with sg fsm state:'%u'", >> + present_susi->su->sg_of_su- >>> sg_fsm_state); >> + valid = false; >> + goto done; >> + } >> } >> } >> // TODO: more rules to be added when issue is found in reality due to >> writing ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot _______________________________________________ Opensaf-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/opensaf-devel
