Hi Mahesh,

I have few comments, started with [Vu]:

General: some places use tabs, some other places use spaces.

Regards, Vu

> -----Original Message-----
> From: [email protected] [mailto:[email protected]]
> Sent: Tuesday, July 26, 2016 3:43 PM
> To: [email protected]; [email protected]
> Cc: [email protected]
> Subject: [PATCH 1 of 3] lga: agent Cluster Membership (CLM) integration
> [#1638] V1
> 
>  osaf/libs/agents/saf/lga/lga.h             |   2 +
>  osaf/libs/agents/saf/lga/lga_api.c         |  37 ++++++++++++++
>  osaf/libs/agents/saf/lga/lga_mds.c         |  75
> ++++++++++++++++++++++++++++++
>  osaf/libs/agents/saf/lga/lga_util.c        |   2 +
>  osaf/libs/common/logsv/include/lgsv_defs.h |   6 ++-
>  osaf/libs/common/logsv/include/lgsv_msg.h  |  17 +++++-
>  6 files changed, 134 insertions(+), 5 deletions(-)
> 
> 
> Description:
> 
> Form CLM integration is supported from Log Service A.02.02.
> 
> At-least a A.02.02 LGA client will check CLM membership status of client's
> node.
>  old LGA clients A.02.01 are always clm member.
> 
> This patch enhanced the log service for Unavailability of the Log Service
API
> on a
> Non-Member Node which will fail with SA_AIS_ERR_UNAVAILABLE.
> 
> After this patch the Log Service does not provide service to processes on
> cluster nodes that are not
> in the cluster membership.
> 
> If the node rejoins the cluster membership, processes executing on the
node
> will be
> able to reinitialize new library handles and use the entire set of Log
Service
> APIs that
> operate on these new handles; however, invocation of APIs that operate on
> handles
> acquired by any process before the node left the membership will continue
> to fail with
> SA_AIS_ERR_UNAVAILABLE (or with the special treatment described above
> for
> asynchronous calls) with the exception of saLogFinalize(), which is used
to
> free the
> library handles and all resources associated with these handles. Hence, it
is
> recommended
> for the processes to finalize the library handles as soon as the processes
> detect that the node left the membership.
> 
> Detailed README will be provide soon.
> 
> Following are expected Log Service API behavior :
> 
> Case1: On Non-Member Node,  Log Service API will fail with code
> SA_AIS_ERR_UNAVAILABLE (31)
> Case2: On Member Node after recovered from Non-Member Node, Log
> Service API will fail with code SA_AIS_ERR_UNAVAILABLE (31)
> Case3: Non-Member Node + (Headless) Log Service API will fail with code
> SA_AIS_ERR_UNAVAILABLE (31)
> Case4: On Non-Member Node + (Headless) + (Head Joined) Log Service API
> will fail with code SA_AIS_ERR_UNAVAILABLE (31)
> 
> diff --git a/osaf/libs/agents/saf/lga/lga.h
b/osaf/libs/agents/saf/lga/lga.h
> --- a/osaf/libs/agents/saf/lga/lga.h
> +++ b/osaf/libs/agents/saf/lga/lga.h
> @@ -77,6 +77,7 @@ typedef struct lga_client_hdl_rec {
>                                    * Set when client is initialized an all
>                                    * streams are recovered
>                                    */
> +     bool is_stale_client;  /* Status of client based on the CLM status
of
> node.*/
>  } lga_client_hdl_rec_t;
> 
>  /* States of the server */
> @@ -118,6 +119,7 @@ typedef struct {
>       /* LGS LGA sync params */
>       int lgs_sync_awaited;
>       NCS_SEL_OBJ lgs_sync_sel;
> +     SaClmClusterChangesT clm_node_state; /*Reflects CLM status of
> this node(for future use).*/
>  } lga_cb_t;
> 
>  /* lga_saf_api.c */
> diff --git a/osaf/libs/agents/saf/lga/lga_api.c
> b/osaf/libs/agents/saf/lga/lga_api.c
> --- a/osaf/libs/agents/saf/lga/lga_api.c
> +++ b/osaf/libs/agents/saf/lga/lga_api.c
> @@ -325,6 +325,14 @@ SaAisErrorT saLogSelectionObjectGet(SaLo
>               goto done;
>       }
> 
> +     /*Check CLM membership of node.*/
> +        if (hdl_rec->is_stale_client == true) {
> +             TRACE("Node not CLM member or stale client");
> +             ncshm_give_hdl(logHandle);
> +             rc = SA_AIS_ERR_UNAVAILABLE;
> +                goto done;
> +        }
> +
>       /* Obtain the selection object from the IPC queue */
>       sel_obj = m_NCS_IPC_GET_SEL_OBJ(&hdl_rec->mbx);
> 
> @@ -381,6 +389,14 @@ SaAisErrorT saLogDispatch(SaLogHandleT l
>               goto done;
>       }
> 
> +     /*Check CLM membership of node.*/
> +         if (hdl_rec->is_stale_client == true) {
> +                 TRACE("Node not CLM member or stale client");
> +                 ncshm_give_hdl(logHandle);
> +                 rc = SA_AIS_ERR_UNAVAILABLE;
> +                 goto done;
> +        }
> +
>       if ((rc = lga_hdl_cbk_dispatch(&lga_cb, hdl_rec, dispatchFlags)) !=
> SA_AIS_OK)
>               TRACE("LGA_DISPATCH_FAILURE");
> 
> @@ -776,6 +792,13 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl
>               ais_rc = SA_AIS_ERR_BAD_HANDLE;
>               goto done;
>       }
> +
> +     /*Check CLM membership of node.*/
> +        if (hdl_rec->is_stale_client == true) {
> +             TRACE("%s Node not CLM member or stale client",
> __FUNCTION__);
> +             ais_rc = SA_AIS_ERR_UNAVAILABLE;
> +             goto done_give_hdl;
> +        }
> 
>       /***
>        * Handle states
> @@ -1202,6 +1225,13 @@ SaAisErrorT saLogWriteLogAsync(SaLogStre
>               goto done_give_hdl_stream;
>       }
> 
> +     /*Check CLM membership of node.*/
> +     if (hdl_rec->is_stale_client == true) {
> +             TRACE("%s Node not CLM member or stale client",
> __FUNCTION__);
> +             ais_rc = SA_AIS_ERR_UNAVAILABLE;
> +             goto done_give_hdl_all;
> +     }
[Vu] The variable `is_state_client` is set in the MDS thread and referred to
in LOG API calling thread. So, seems there is an timing issue here.
E.g: 
saLogWriteLogAsync() passes above CLM membership check but after that, right
before sending message to MDS layer, 
the process receive `node leave` event. If this is the case,
saLogWriteLogAsync() returns OK.

> +
>       if ((hdl_rec->reg_cbk.saLogWriteLogCallback == NULL) && (ackFlags
> == SA_LOG_RECORD_WRITE_ACK)) {
>               TRACE("%s: Write Callback not registered", __FUNCTION__);
>               ais_rc = SA_AIS_ERR_INIT;
> @@ -1350,6 +1380,13 @@ SaAisErrorT saLogStreamClose(SaLogStream
>               goto done_give_hdl_stream;
>       }
> 
> +     /*Check CLM membership of node.*/
> +         if (hdl_rec->is_stale_client == true) {
> +                 TRACE("Node not CLM member or stale client");
> +                 ais_rc = SA_AIS_ERR_UNAVAILABLE;
> +                 goto rmv_stream;
> +        }
> +
>       if (is_lga_state(LGA_NO_SERVER)) {
>               /* No server is available. Remove the stream from client
> database.
>                * Server side will manage to release resources of this
stream
> when up.
> diff --git a/osaf/libs/agents/saf/lga/lga_mds.c
> b/osaf/libs/agents/saf/lga/lga_mds.c
> --- a/osaf/libs/agents/saf/lga/lga_mds.c
> +++ b/osaf/libs/agents/saf/lga/lga_mds.c
> @@ -476,6 +476,47 @@ static uint32_t lga_lgs_msg_proc(lga_cb_
>                       }
>                       break;
> 
> +             case LGSV_CLM_NODE_STATUS_CALLBACK:
> +                     {
> +                             lga_client_hdl_rec_t *lga_hdl_rec;
> +
> +
>       TRACE_2("LGSV_CLM_NODE_STATUS_CALLBACK
> clm_node_status:%d",
> +                                             lgsv_msg-
> >info.cbk_info.clm_node_status_cbk.clm_node_status);
> +
> +                             /** Create the chan hdl record here before
> +                              ** queing this message onto the priority
> queue
> +                              ** so that the dispatch by the application
to
> fetch
> +                              ** the callback is instantaneous.
> +                              **/
[Vu] These comments is not relevant to below code. Should remove it.
> +
> +                             /** Lookup the hdl rec by client_id  **/
> +                             if (NULL == (lga_hdl_rec =
> +
>       lga_find_hdl_rec_by_regid(cb, lgsv_msg-
> >info.cbk_info.lgs_client_id))) {
> +                                     TRACE("regid not found");
> +                                     lga_msg_destroy(lgsv_msg);
> +                                     TRACE_LEAVE();
> +                                     return NCSCC_RC_FAILURE;
> +                             }
[Vu] Node leave is an event at node level. I don't think above check is
necessary or at least not returning ERR, but keep going to next check.
And as the event is at node level, consider to put `clm_node_state` and `
is_stale_client` to an global data type.

> +                                cb->clm_node_state = lgsv_msg-
> >info.cbk_info.clm_node_status_cbk.clm_node_status;
> +                                //A client becomes stale if Node loses
CLM Membership.
> +                                if (cb->clm_node_state !=
SA_CLM_NODE_JOINED) {
> +                                     /*If the node rejoins the cluster
> membership, processes executing on the node will be
> +                                       able to reinitialize new library
> handles and use the entire set of Log Service APIs that
> +                                       operate on these new handles;
> however, invocation of APIs that operate on handles
> +                                       acquired by any process before the
> node left the membership will continue to fail with
> +                                       SA_AIS_ERR_UNAVAILABLE (or with
> the special treatment described above for
> +                                       asynchronous calls) with the
> exception of saLogFinalize(), which is used to free the
> +                                       library handles and all resources
> associated with these handles. Hence, it is recommended
> +                                       for the processes to finalize the
> library handles as soon as the processes
> +                                       detect that the node left the
> membership.*/
> +                                     lga_hdl_rec->is_stale_client = true;
> +                                        TRACE("CLM_NODE callback
is_stale_client: %d
> clm_node_state: %d",
> +
lga_hdl_rec->is_stale_client, cb-
> >clm_node_state);
> +                                }
> +                             lga_msg_destroy(lgsv_msg);
> +                     }
> +                        break;
> +
>               default:
>                       TRACE("unknown type %d", lgsv_msg-
> >info.cbk_info.type);
>                       lga_msg_destroy(lgsv_msg);
> @@ -820,6 +861,35 @@ static uint32_t lga_dec_write_cbk_msg(NC
>  }
> 
> 
> /*************************************************************
> ***************
> +  Name          : lga_dec_clm_node_status_cbk_msg
> +
> +  Description   : This routine decodes  message
> +
> +  Arguments     : NCS_UBAID *msg,
> +                  LGSV_MSG *msg
> +
> +  Return Values : uint32_t
> +
> +  Notes         : None.
> +*************************************************************
> *****************/
> +static uint32_t lga_dec_clm_node_status_cbk_msg(NCS_UBAID *uba,
> lgsv_msg_t *msg)
> +{
> +     uint8_t *p8;
> +     uint32_t total_bytes = 0;
> +     logsv_lga_clm_status_cbk_t *param = &msg-
> >info.cbk_info.clm_node_status_cbk;
> +     uint8_t local_data[100];
> +
> +     osafassert(uba != NULL);
> +
> +     p8 = ncs_dec_flatten_space(uba, local_data, 4);
> +        param->clm_node_status = ncs_decode_32bit(&p8);
> +        ncs_dec_skip_space(uba, 4);
> +        total_bytes += 4;
> +
> +        return total_bytes;
> +}
> +
> +/************************************************************
> ****************
>    Name          : lga_dec_lstr_open_sync_rsp_msg
> 
>    Description   : This routine decodes a log stream open sync response
> message
> @@ -936,6 +1006,11 @@ static uint32_t lga_mds_dec(struct ncsmd
>                                       msg->info.cbk_info.lgs_client_id);
>                               total_bytes += lga_dec_write_cbk_msg(uba,
> msg);
>                               break;
> +                     case LGSV_CLM_NODE_STATUS_CALLBACK:
> +                             TRACE_2("decode clm node status message,
> lgs_client_id=%d",
> +                                             msg-
> >info.cbk_info.lgs_client_id);
> +                             total_bytes +=
> lga_dec_clm_node_status_cbk_msg(uba, msg);
> +                                break;
>                       default:
>                               TRACE_2("Unknown callback type = %d!",
> msg->info.cbk_info.type);
>                               break;
> diff --git a/osaf/libs/agents/saf/lga/lga_util.c
> b/osaf/libs/agents/saf/lga/lga_util.c
> --- a/osaf/libs/agents/saf/lga/lga_util.c
> +++ b/osaf/libs/agents/saf/lga/lga_util.c
> @@ -53,6 +53,7 @@ static unsigned int lga_create(void)
> 
>       osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>       lga_cb.lgs_sync_awaited = 0;
> +     lga_cb.clm_node_state = SA_CLM_NODE_JOINED;
>       osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
> 
>       /* No longer needed */
> @@ -702,6 +703,7 @@ lga_client_hdl_rec_t *lga_hdl_rec_add(lg
>       **/
>       rec->lgs_client_id = client_id;
> 
> +     rec->is_stale_client = false;
>       /***
>        * Initiate the recovery flags
>        * The setting means that the client is initialized and that there
is
> diff --git a/osaf/libs/common/logsv/include/lgsv_defs.h
> b/osaf/libs/common/logsv/include/lgsv_defs.h
> --- a/osaf/libs/common/logsv/include/lgsv_defs.h
> +++ b/osaf/libs/common/logsv/include/lgsv_defs.h
> @@ -20,7 +20,11 @@
> 
>  #define LOG_RELEASE_CODE 'A'
>  #define LOG_MAJOR_VERSION 2
> -#define LOG_MINOR_VERSION 1
> +#define LOG_MINOR_VERSION 2
> +
> +#define LOG_RELEASE_CODE_0 'A'
> +#define LOG_MAJOR_VERSION_0 2
> +#define LOG_MINOR_VERSION_0 1
[Vu] These three macros are only used in ` is_client_clm_member()`. Consider
to remove them, but use internal variable SaVersionT instead.
> 
>  // Waiting time in library for sync send, unit 10ms
>  #define LGS_WAIT_TIME 1000
> diff --git a/osaf/libs/common/logsv/include/lgsv_msg.h
> b/osaf/libs/common/logsv/include/lgsv_msg.h
> --- a/osaf/libs/common/logsv/include/lgsv_msg.h
> +++ b/osaf/libs/common/logsv/include/lgsv_msg.h
> @@ -45,6 +45,7 @@ typedef enum {
>  /* LGSV Callback enums */
>  typedef enum {
>       LGSV_WRITE_LOG_CALLBACK_IND = 0,
> +     LGSV_CLM_NODE_STATUS_CALLBACK = 1,
>       LGSV_LGS_CBK_MAX
>  } lgsv_cbk_msg_type_t;
> 
> @@ -121,12 +122,20 @@ typedef struct {
>       SaAisErrorT error;
>  } lgsv_write_log_callback_ind_t;
> 
> -/* wrapper structure for all the callbacks */
> +/*CLM node status callback structure for LGS*/
> +typedef struct logsv_loga_clm_status_param_tag {
> +     uint32_t clm_node_status;
> +} logsv_lga_clm_status_cbk_t;
> +
> +
>  typedef struct {
> -     lgsv_cbk_msg_type_t type;       /* callback type */
> -     uint32_t lgs_client_id; /* lgs client_id */
> -     SaInvocationT inv;      /* invocation value */
> +     lgsv_cbk_msg_type_t type;       /* callback type */
> +     uint32_t lgs_client_id; /* lgs client_id */
> +     SaInvocationT inv;      /* invocation value */
> +     /*      union {*/
>       lgsv_write_log_callback_ind_t write_cbk;
> +     logsv_lga_clm_status_cbk_t clm_node_status_cbk;
> +     /*      } param; */
[Vu] Remove 02 above comments.
>  } lgsv_cbk_info_t;
> 
>  /* API Response parameter definitions */


------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to