Hi Minh,

We have not seen this issue earlier. Why CLMA is returning timeout where 
it is busy?
If we are allowing Standby controller to continue tracking the Node, 
then I think (though not fully sure), it will increase one more client 
for validation step and this client does not exist locally. Also we need 
to evaluate the situation where Standby controller is CLM locked or 
disabled.

Thanks,
Praveen




On 17-Nov-16 3:44 AM, minh chau wrote:
> Hi all,
>
> Has anyone had chance to review this patch?
>
> Thanks,
> Minh
>
> On 14/11/16 15:27, Minh Hon Chau wrote:
>>   osaf/services/saf/amf/amfd/clm.cc       |  37
>> +++++++++++++++++++++++---------
>>   osaf/services/saf/amf/amfd/include/cb.h |   1 +
>>   osaf/services/saf/amf/amfd/role.cc      |  16 +++++++-------
>>   3 files changed, 35 insertions(+), 19 deletions(-)
>>
>>
>> In controller failover/switchover, sometimes active AMFD fails to stop
>> CLM track callback. Therefore, when this AMFD become standby, AMFD can
>> continue receiving CLM track callback and trigger the operations which
>> should only be executed in active AMFD.
>>
>> diff --git a/osaf/services/saf/amf/amfd/clm.cc
>> b/osaf/services/saf/amf/amfd/clm.cc
>> --- a/osaf/services/saf/amf/amfd/clm.cc
>> +++ b/osaf/services/saf/amf/amfd/clm.cc
>> @@ -219,7 +219,13 @@ static void clm_track_cb(const SaClmClus
>>           LOG_ER("ClmTrackCallback received in error");
>>           goto done;
>>       }
>> -
>> +    if (avd_cb->avail_state_avd != SA_AMF_HA_ACTIVE) {
>> +        if (avd_cb->is_clm_track_started == true) {
>> +            LOG_NO("Retry to stop clm track with AMFD state(%d)",
>> avd_cb->avail_state_avd);
>> +            avd_clm_track_stop();
>> +        }
>> +        goto done;
>> +    }
>>       /*
>>       ** The CLM cluster can be larger than the AMF cluster thus it is
>> not an
>>       ** error if the corresponding AMF node cannot be found.
>> @@ -394,6 +400,7 @@ SaAisErrorT avd_clm_init(AVD_CL_CB* cb)
>>         cb->clmHandle = 0;
>>       cb->clm_sel_obj = 0;
>> +    cb->is_clm_track_started = false;
>>       TRACE_ENTER();
>>       /*
>>        * TODO: This CLM initialization thread can be re-factored
>> @@ -453,6 +460,8 @@ SaAisErrorT avd_clm_track_start(void)
>>           } else {
>>               LOG_ER("Failed to start cluster tracking %u", error);
>>           }
>> +    } else {
>> +        avd_cb->is_clm_track_started = true;
>>       }
>>       TRACE_LEAVE();
>>       return error;
>> @@ -460,17 +469,23 @@ SaAisErrorT avd_clm_track_start(void)
>>     SaAisErrorT avd_clm_track_stop(void)
>>   {
>> -        SaAisErrorT error = SA_AIS_OK;
>> +    SaAisErrorT error = SA_AIS_OK;
>> +    TRACE_ENTER();
>> +    error = saClmClusterTrackStop(avd_cb->clmHandle);
>> +    if (error != SA_AIS_OK) {
>> +        if (error == SA_AIS_ERR_TRY_AGAIN || error ==
>> SA_AIS_ERR_TIMEOUT ||
>> +                error == SA_AIS_ERR_UNAVAILABLE) {
>> +            LOG_WA("Failed to stop cluster tracking %u", error);
>> +        } else {
>> +            LOG_ER("Failed to stop cluster tracking %u", error);
>> +        }
>> +    } else {
>> +        TRACE("Sucessfully stops cluster tracking");
>> +        avd_cb->is_clm_track_started = false;
>> +    }
>>   -        TRACE_ENTER();
>> -    error = saClmClusterTrackStop(avd_cb->clmHandle);
>> -        if (SA_AIS_OK != error)
>> -                LOG_ER("Failed to stop cluster tracking %u", error);
>> -    else
>> -        TRACE("Sucessfully stops cluster tracking");
>> -
>> -        TRACE_LEAVE();
>> -        return error;
>> +    TRACE_LEAVE();
>> +    return error;
>>   }
>>     void clm_node_terminate(AVD_AVND *node)
>> diff --git a/osaf/services/saf/amf/amfd/include/cb.h
>> b/osaf/services/saf/amf/amfd/include/cb.h
>> --- a/osaf/services/saf/amf/amfd/include/cb.h
>> +++ b/osaf/services/saf/amf/amfd/include/cb.h
>> @@ -210,6 +210,7 @@ typedef struct cl_cb_tag {
>>       /* Clm stuff */
>>       std::atomic<SaClmHandleT> clmHandle;
>>       std::atomic<SaSelectionObjectT> clm_sel_obj;
>> +    bool is_clm_track_started;
>>         bool fully_initialized;
>>       bool swap_switch; /* true - In middle of role switch. */
>> diff --git a/osaf/services/saf/amf/amfd/role.cc
>> b/osaf/services/saf/amf/amfd/role.cc
>> --- a/osaf/services/saf/amf/amfd/role.cc
>> +++ b/osaf/services/saf/amf/amfd/role.cc
>> @@ -1050,9 +1050,7 @@ uint32_t amfd_switch_actv_qsd(AVD_CL_CB
>>       /*  Mark AVD as Quiesced. */
>>       cb->avail_state_avd = SA_AMF_HA_QUIESCED;
>>
>> -    if (avd_clm_track_stop() != SA_AIS_OK) {
>> -        LOG_ER("ClmTrack stop failed");
>> -    }
>> +    avd_clm_track_stop();
>>         /* Go ahead and set mds role as already the NCS SU has been
>> switched */
>>       if (NCSCC_RC_SUCCESS != (rc = avd_mds_set_vdest_role(cb,
>> SA_AMF_HA_QUIESCED))) {
>> @@ -1260,11 +1258,13 @@ uint32_t amfd_switch_stdby_actv(AVD_CL_C
>>       if (NCSCC_RC_SUCCESS != avd_rde_set_role(SA_AMF_HA_ACTIVE)) {
>>           LOG_ER("rde role change failed from stdy -> Active");
>>       }
>> -
>> -    if(avd_clm_track_start() != SA_AIS_OK) {
>> -        LOG_ER("Switch Standby --> Active, clm track start failed");
>> -        avd_d2d_chg_role_rsp(cb, NCSCC_RC_FAILURE, SA_AMF_HA_ACTIVE);
>> -        return NCSCC_RC_FAILURE;
>> +    // reuse clm track start
>> +    if (avd_cb->is_clm_track_started == false) {
>> +        if(avd_clm_track_start() != SA_AIS_OK) {
>> +            LOG_ER("Switch Standby --> Active, clm track start failed");
>> +            avd_d2d_chg_role_rsp(cb, NCSCC_RC_FAILURE,
>> SA_AMF_HA_ACTIVE);
>> +            return NCSCC_RC_FAILURE;
>> +        }
>>       }
>>         /* Send the message to other avd for role change rsp as
>> success */
>>
>

------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to