Hi Nagu, Praveen

This V2 patch is changed according to your latest comments, the only 
diff I don't make amfd exit, and do logging instead.
If it happens and reaches to this error logging, it should be enough to 
get us noticed, and investigate services internally. Reboot node would 
cause error reports from applications. Please help to review and let me 
know if the patch needs to change anything.

Thanks,
Minh

On 02/12/16 22:33, Minh Hon Chau wrote:
>   osaf/services/saf/amf/amfd/clm.cc       |  35 
> ++++++++++++++++++++++----------
>   osaf/services/saf/amf/amfd/include/cb.h |   1 +
>   osaf/services/saf/amf/amfd/role.cc      |  10 ++++++--
>   3 files changed, 32 insertions(+), 14 deletions(-)
>
>
> V2 Incorporate comments from Praveen, Nagu: Send saClmResponse_4
> if clm cb is received in non-action amfd, retry to stop clm when switch
> over is done. This V2 does not make amfd exit at second retry, only
> logging error as it should succeed.
>
> In controller failover/switchover, sometimes active AMFD fails to stop
> CLM track callback. Therefore, when this AMFD become standby, AMFD can
> continue receiving CLM track callback and trigger the operations which
> should only be executed in active AMFD.
>
> diff --git a/osaf/services/saf/amf/amfd/clm.cc 
> b/osaf/services/saf/amf/amfd/clm.cc
> --- a/osaf/services/saf/amf/amfd/clm.cc
> +++ b/osaf/services/saf/amf/amfd/clm.cc
> @@ -220,7 +220,11 @@ static void clm_track_cb(const SaClmClus
>               LOG_ER("ClmTrackCallback received in error");
>               goto done;
>       }
> -
> +     if (avd_cb->avail_state_avd != SA_AMF_HA_ACTIVE) {
> +             LOG_WA("Receive clm track cb with AMFD state(%d)", 
> avd_cb->avail_state_avd);
> +             saClmResponse_4(avd_cb->clmHandle, invocation, 
> SA_CLM_CALLBACK_RESPONSE_OK);
> +             goto done;
> +     }
>       /*
>       ** The CLM cluster can be larger than the AMF cluster thus it is not an
>       ** error if the corresponding AMF node cannot be found.
> @@ -395,6 +399,7 @@ SaAisErrorT avd_clm_init(AVD_CL_CB* cb)
>   
>       cb->clmHandle = 0;
>       cb->clm_sel_obj = 0;
> +     cb->is_clm_track_started = false;
>       TRACE_ENTER();
>       /*
>        * TODO: This CLM initialization thread can be re-factored
> @@ -454,6 +459,8 @@ SaAisErrorT avd_clm_track_start(void)
>               } else {
>                       LOG_ER("Failed to start cluster tracking %u", error);
>               }
> +     } else {
> +             avd_cb->is_clm_track_started = true;
>       }
>       TRACE_LEAVE();
>       return error;
> @@ -461,17 +468,23 @@ SaAisErrorT avd_clm_track_start(void)
>   
>   SaAisErrorT avd_clm_track_stop(void)
>   {
> -        SaAisErrorT error = SA_AIS_OK;
> +     SaAisErrorT error = SA_AIS_OK;
> +     TRACE_ENTER();
> +     error = saClmClusterTrackStop(avd_cb->clmHandle);
> +     if (error != SA_AIS_OK) {
> +             if (error == SA_AIS_ERR_TRY_AGAIN || error == 
> SA_AIS_ERR_TIMEOUT ||
> +                             error == SA_AIS_ERR_UNAVAILABLE) {
> +                     LOG_WA("Failed to stop cluster tracking %u", error);
> +             } else {
> +                     LOG_ER("Failed to stop cluster tracking %u", error);
> +             }
> +     } else {
> +             TRACE("Sucessfully stops cluster tracking");
> +             avd_cb->is_clm_track_started = false;
> +     }
>   
> -        TRACE_ENTER();
> -     error = saClmClusterTrackStop(avd_cb->clmHandle);
> -        if (SA_AIS_OK != error)
> -                LOG_ER("Failed to stop cluster tracking %u", error);
> -     else
> -             TRACE("Sucessfully stops cluster tracking");
> -
> -        TRACE_LEAVE();
> -        return error;
> +     TRACE_LEAVE();
> +     return error;
>   }
>   
>   void clm_node_terminate(AVD_AVND *node)
> diff --git a/osaf/services/saf/amf/amfd/include/cb.h 
> b/osaf/services/saf/amf/amfd/include/cb.h
> --- a/osaf/services/saf/amf/amfd/include/cb.h
> +++ b/osaf/services/saf/amf/amfd/include/cb.h
> @@ -215,6 +215,7 @@ typedef struct cl_cb_tag {
>       /* Clm stuff */
>       std::atomic<SaClmHandleT> clmHandle;
>       std::atomic<SaSelectionObjectT> clm_sel_obj;
> +     bool is_clm_track_started;
>   
>       bool fully_initialized;
>       bool swap_switch; /* true - In middle of role switch. */
> diff --git a/osaf/services/saf/amf/amfd/role.cc 
> b/osaf/services/saf/amf/amfd/role.cc
> --- a/osaf/services/saf/amf/amfd/role.cc
> +++ b/osaf/services/saf/amf/amfd/role.cc
> @@ -1055,9 +1055,7 @@ uint32_t amfd_switch_actv_qsd(AVD_CL_CB
>       /*  Mark AVD as Quiesced. */
>       cb->avail_state_avd = SA_AMF_HA_QUIESCED;
>       
> -     if (avd_clm_track_stop() != SA_AIS_OK) {
> -             LOG_ER("ClmTrack stop failed");
> -     }
> +     avd_clm_track_stop();
>   
>       /* Go ahead and set mds role as already the NCS SU has been switched */
>       if (NCSCC_RC_SUCCESS != (rc = avd_mds_set_vdest_role(cb, 
> SA_AMF_HA_QUIESCED))) {
> @@ -1136,6 +1134,12 @@ uint32_t amfd_switch_qsd_stdby(AVD_CL_CB
>               avd_pg_node_csi_del_all(cb, avnd);
>       }
>   
> +     if (cb->is_clm_track_started == true) {
> +             if (avd_clm_track_stop() != SA_AIS_OK) {
> +                     LOG_ER("Failed to stop cluster tracking after switch 
> over");
> +             }
> +     }
> +
>       LOG_NO("Controller switch over done");
>       saflog(LOG_NOTICE, amfSvcUsrName, "Controller switch over done at %x", 
> cb->node_id_avd);
>   
>


------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to