Hi Nagu,

This patch is not a full solution for loss of RTA, it's only for problem 
reported in #2210, so I only tested the scenario described in this ticket.
It's probably hard to see the loss if just following the test scenario. 
I had to change AVD_SI::set_admin_state in AMFD code not to update Admin 
State to simulate the loss.
The fix can be seen something like this in syslog

2017-02-17 09:42:24 SC-1 osafamfd[474]: NO Enter restore headless cached 
RTAs from IMM
2017-02-17 09:42:24 SC-1 osafamfd[474]: WA 
SISU:'safSi=AmfDemoTwon,safApp=AmfDemoTwon,safSu=SU4,safSg=AmfDemoTwon,safApp=AmfDemoTwon',
 
ha:'3', but one of [node/sg/su/si] is not in LOCKED
2017-02-17 09:42:24 SC-1 osafamfd[474]: NO Leave reading headless cached 
RTAs from IMM: SUCCESS
2017-02-17 09:42:24 SC-1 osafamfd[474]: NO Node 'SC-1' joined the cluster

I will update "Testing commands:" for next times

Thanks,
Minh

On 16/02/17 19:28, Nagendra Kumar wrote:
> Ack(Not tested). Please do mention the test cases in " Testing Commands:"
>
> Thanks
> -Nagu
>
>> -----Original Message-----
>> From: Minh Hon Chau [mailto:[email protected]]
>> Sent: 07 February 2017 08:56
>> To: [email protected]; Nagendra Kumar; Praveen Malviya;
>> [email protected]; [email protected];
>> [email protected]
>> Cc: [email protected]
>> Subject: [PATCH 1 of 1] amfd: Recover the loss of update Admin State after
>> headless [#2210]
>>
>>   src/amf/amfd/siass.cc |  53
>> ++++++++++++++++++++++++++++++++++++++++----------
>>   1 files changed, 42 insertions(+), 11 deletions(-)
>>
>>
>> Both controllers go down at the time AMFD is about update Admin State,
>> after headless the admin state read from IMM conflicts with the existing
>> assignment. In ticket #2210, saAmfSIAdminState is UNLOCKED while the
>> SUSI assignment is QUIESCED.
>>
>> The patch helps to recover the admin state based on the HA State of
>> existing assignment. The loss of update RTA is still open issue for
>> many other attributes that are possibly missed to update IMM at the time
>> cluster going headless.
>>
>> diff --git a/src/amf/amfd/siass.cc b/src/amf/amfd/siass.cc
>> --- a/src/amf/amfd/siass.cc
>> +++ b/src/amf/amfd/siass.cc
>> @@ -1,6 +1,7 @@
>>   /*      -*- OpenSAF  -*-
>>    *
>>    * (C) Copyright 2008 The OpenSAF Foundation
>> + * (C) Copyright 2017 Ericsson AB - All Rights Reserved.
>>    *
>>    * This program is distributed in the hope that it will be useful, but
>>    * WITHOUT ANY WARRANTY; without even the implied warranty of
>> MERCHANTABILITY
>> @@ -346,19 +347,49 @@ bool avd_susi_validate_headless_cached_r
>>              }
>>      }
>>      // rule 2: if ha_fr_imm is QUIESCING, one of relevant entities must
>> -    // have adminState is SHUTTINGDOWN
>> +    // have adminState is SHUTTINGDOWN, otherwise re-adjust if
>> possible
>>      if (ha_fr_imm == SA_AMF_HA_QUIESCING) {
>> -            if (present_susi->su->saAmfSUAdminState ==
>> SA_AMF_ADMIN_SHUTTING_DOWN ||
>> -                    present_susi->si->saAmfSIAdminState ==
>> SA_AMF_ADMIN_SHUTTING_DOWN ||
>> -                    present_susi->su->sg_of_su->saAmfSGAdminState ==
>> SA_AMF_ADMIN_SHUTTING_DOWN ||
>> -                    present_susi->su->su_on_node-
>>> saAmfNodeAdminState == SA_AMF_ADMIN_SHUTTING_DOWN) {
>> -                    // That's fine
>> -                    ;
>> -            } else {
>> -                    LOG_ER("SISU:'%s', ha:'%u', but one of
>> [node/sg/su/si] is not in SHUTTING_DOWN",
>> +            if (present_susi->su->saAmfSUAdminState !=
>> SA_AMF_ADMIN_SHUTTING_DOWN &&
>> +                    present_susi->si->saAmfSIAdminState !=
>> SA_AMF_ADMIN_SHUTTING_DOWN &&
>> +                    present_susi->su->sg_of_su->saAmfSGAdminState !=
>> SA_AMF_ADMIN_SHUTTING_DOWN &&
>> +                    present_susi->su->su_on_node-
>>> saAmfNodeAdminState != SA_AMF_ADMIN_SHUTTING_DOWN) {
>> +                    LOG_WA("SISU:'%s', ha:'%u', but one of
>> [node/sg/su/si] is not in SHUTTING_DOWN",
>>                                      dn.c_str(), ha_fr_imm);
>> -                    valid = false;
>> -                    goto done;
>> +                    if (present_susi->su->sg_of_su->sg_fsm_state ==
>> AVD_SG_FSM_SU_OPER)
>> +                            present_susi->su-
>>> set_admin_state(SA_AMF_ADMIN_SHUTTING_DOWN);
>> +                    else if (present_susi->su->sg_of_su->sg_fsm_state
>> == AVD_SG_FSM_SI_OPER)
>> +                            present_susi->si-
>>> set_admin_state(SA_AMF_ADMIN_SHUTTING_DOWN);
>> +                    else if (present_susi->su->sg_of_su->sg_fsm_state
>> == AVD_SG_FSM_SG_ADMIN)
>> +                            present_susi->su->sg_of_su-
>>> set_admin_state(SA_AMF_ADMIN_SHUTTING_DOWN);
>> +                    else {
>> +                            LOG_ER("Failed to adjust the Admin State of
>> [sg/su/si] with sg fsm state:'%u'",
>> +                                            present_susi->su->sg_of_su-
>>> sg_fsm_state);
>> +                            valid = false;
>> +                            goto done;
>> +                    }
>> +            }
>> +    }
>> +    // rule 3: if ha_fr_imm is QUIESCED, one of relevant entities must
>> +    // have adminState is LOCKED, otherwise re-adjust if possible
>> +    if (ha_fr_imm == SA_AMF_HA_QUIESCED) {
>> +            if (present_susi->su->saAmfSUAdminState !=
>> SA_AMF_ADMIN_LOCKED &&
>> +                    present_susi->si->saAmfSIAdminState !=
>> SA_AMF_ADMIN_LOCKED &&
>> +                    present_susi->su->sg_of_su->saAmfSGAdminState !=
>> SA_AMF_ADMIN_LOCKED &&
>> +                    present_susi->su->su_on_node-
>>> saAmfNodeAdminState != SA_AMF_ADMIN_LOCKED) {
>> +                    LOG_WA("SISU:'%s', ha:'%u', but one of
>> [node/sg/su/si] is not in LOCKED",
>> +                                    dn.c_str(), ha_fr_imm);
>> +                    if (present_susi->su->sg_of_su->sg_fsm_state ==
>> AVD_SG_FSM_SU_OPER)
>> +                            present_susi->su-
>>> set_admin_state(SA_AMF_ADMIN_LOCKED);
>> +                    else if (present_susi->su->sg_of_su->sg_fsm_state
>> == AVD_SG_FSM_SI_OPER)
>> +                            present_susi->si-
>>> set_admin_state(SA_AMF_ADMIN_LOCKED);
>> +                    else if (present_susi->su->sg_of_su->sg_fsm_state
>> == AVD_SG_FSM_SG_ADMIN)
>> +                            present_susi->su->sg_of_su-
>>> set_admin_state(SA_AMF_ADMIN_LOCKED);
>> +                    else {
>> +                            LOG_ER("Failed to adjust the Admin State of
>> [sg/su/si] with sg fsm state:'%u'",
>> +                                            present_susi->su->sg_of_su-
>>> sg_fsm_state);
>> +                            valid = false;
>> +                            goto done;
>> +                    }
>>              }
>>      }
>>      // TODO: more rules to be added when issue is found in reality due to
>> writing


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to