Hi

My comments [Lennart]

I will continue reviewing patch 2

Thanks
Lennart

> -----Original Message-----
> From: mahesh.va...@oracle.com [mailto:mahesh.va...@oracle.com]
> Sent: den 2 augusti 2016 07:18
> To: Vu Minh Nguyen <vu.m.ngu...@dektech.com.au>; Lennart Lund
> <lennart.l...@ericsson.com>; Anders Widell <anders.wid...@ericsson.com>
> Cc: opensaf-devel@lists.sourceforge.net
> Subject: [PATCH 1 of 3] lga: agent Cluster Membership (CLM) integration
> [#1638] V2
> 
>  osaf/libs/agents/saf/lga/lga.h             |   2 +
>  osaf/libs/agents/saf/lga/lga_api.c         |  47 ++++++++++++++++++++
>  osaf/libs/agents/saf/lga/lga_mds.c         |  69
> ++++++++++++++++++++++++++++++
>  osaf/libs/agents/saf/lga/lga_util.c        |   2 +
>  osaf/libs/common/logsv/include/lgsv_defs.h |   6 ++-
>  osaf/libs/common/logsv/include/lgsv_msg.h  |  17 +++++-
>  6 files changed, 138 insertions(+), 5 deletions(-)
> 
> 
> V2 patch:
> 
> Incorporated Anders Widell  and   Vu    review comments ,
> for more details see the review comments posted on V1 patch.
> Description:
> 
> Form CLM integration is supported from Log Service A.02.02.
> 
> At-least a A.02.02 LGA client will check CLM membership status of client's
> node.
>  old LGA clients A.02.01 are always clm member.
> 
> This patch enhanced the log service for Unavailability of the Log Service API
> on a
> Non-Member Node which will fail with SA_AIS_ERR_UNAVAILABLE.
> 
> After this patch the Log Service does not provide service to processes on
> cluster nodes that are not
> in the cluster membership.
> 
> If the node rejoins the cluster membership, processes executing on the
> node will be
> able to reinitialize new library handles and use the entire set of Log Service
> APIs that
> operate on these new handles; however, invocation of APIs that operate on
> handles
> acquired by any process before the node left the membership will continue
> to fail with
> SA_AIS_ERR_UNAVAILABLE (or with the special treatment described above
> for
> asynchronous calls) with the exception of saLogFinalize(), which is used to
> free the
> library handles and all resources associated with these handles. Hence, it is
> recommended
> for the processes to finalize the library handles as soon as the processes
> detect that the node left the membership.
> 
> Detailed README will be provide soon.
> 
> Following are expected Log Service API behavior :
> 
> Case1: On Non-Member Node,  Log Service API will fail with code
> SA_AIS_ERR_UNAVAILABLE (31)
> Case2: On Member Node after recovered from Non-Member Node, Log
> Service API will fail with code SA_AIS_ERR_UNAVAILABLE (31)
> Case3: Non-Member Node + (Headless) Log Service API will fail with code
> SA_AIS_ERR_UNAVAILABLE (31)
> Case4: On Non-Member Node + (Headless) + (Head Joined) Log Service API
> will fail with code SA_AIS_ERR_UNAVAILABLE (31)

[Lennart] I assume you will move this info about the implementation/feature to 
the README file
> 
> diff --git a/osaf/libs/agents/saf/lga/lga.h b/osaf/libs/agents/saf/lga/lga.h
> --- a/osaf/libs/agents/saf/lga/lga.h
> +++ b/osaf/libs/agents/saf/lga/lga.h
> @@ -77,6 +77,7 @@ typedef struct lga_client_hdl_rec {
>                                    * Set when client is initialized an all
>                                    * streams are recovered
>                                    */
> +     bool is_stale_client;  /* Status of client based on the CLM status of
> node.*/
>  } lga_client_hdl_rec_t;
> 
>  /* States of the server */
> @@ -118,6 +119,7 @@ typedef struct {
>       /* LGS LGA sync params */
>       int lgs_sync_awaited;
>       NCS_SEL_OBJ lgs_sync_sel;
> +     SaClmClusterChangesT clm_node_state; /*Reflects CLM status of
> this node(for future use).*/

[Lennart] This type is defined in saClm.h but saClm.h is included indirectly 
via ncsencdec_pub.h, saClm.h should be included here directly

>  } lga_cb_t;
> 
>  /* lga_saf_api.c */
> diff --git a/osaf/libs/agents/saf/lga/lga_api.c
> b/osaf/libs/agents/saf/lga/lga_api.c
> --- a/osaf/libs/agents/saf/lga/lga_api.c
> +++ b/osaf/libs/agents/saf/lga/lga_api.c
> @@ -324,6 +324,16 @@ SaAisErrorT saLogSelectionObjectGet(SaLo
>               rc = SA_AIS_ERR_BAD_HANDLE;
>               goto done;
>       }
> +
> +     osaf_mutex_lock_ordie(&lga_cb.cb_lock);
> +     /*Check CLM membership of node.*/
> +     if (hdl_rec->is_stale_client == true) {
> +             TRACE("Node not CLM member or stale client");
> +             ncshm_give_hdl(logHandle);
> +             rc = SA_AIS_ERR_UNAVAILABLE;
> +             goto done;
> +     }
> +     osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
> 
>       /* Obtain the selection object from the IPC queue */
>       sel_obj = m_NCS_IPC_GET_SEL_OBJ(&hdl_rec->mbx);
> @@ -381,6 +391,16 @@ SaAisErrorT saLogDispatch(SaLogHandleT l
>               goto done;
>       }
> 
> +     osaf_mutex_lock_ordie(&lga_cb.cb_lock);
> +     /*Check CLM membership of node.*/
> +     if (hdl_rec->is_stale_client == true) {
> +             TRACE("Node not CLM member or stale client");
> +             ncshm_give_hdl(logHandle);
> +             rc = SA_AIS_ERR_UNAVAILABLE;
> +             goto done;
> +     }
> +     osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
[Lennart] Is it nessecary to lock this whole section or is it only 
is_stale_client that has to be protected?
If "goto done;" is executed the mutex will not be unlocked! Also see comment 
for lgs_mdc.c (mutex not used).
This applies to all API:s checking is_stale_client
> +
>       if ((rc = lga_hdl_cbk_dispatch(&lga_cb, hdl_rec, dispatchFlags)) !=
> SA_AIS_OK)
>               TRACE("LGA_DISPATCH_FAILURE");
> 
> @@ -777,6 +797,15 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl
>               goto done;
>       }
> 
> +     osaf_mutex_lock_ordie(&lga_cb.cb_lock);
> +     /*Check CLM membership of node.*/
> +     if (hdl_rec->is_stale_client == true) {
> +             TRACE("%s Node not CLM member or stale client",
> __FUNCTION__);
> +             ais_rc = SA_AIS_ERR_UNAVAILABLE;
> +             goto done_give_hdl;
> +     }
> +     osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
> +
>       /***
>        * Handle states
>        * Synchronize with mds and recovery thread (mutex)
> @@ -1202,6 +1231,15 @@ SaAisErrorT saLogWriteLogAsync(SaLogStre
>               goto done_give_hdl_stream;
>       }
> 
> +     osaf_mutex_lock_ordie(&lga_cb.cb_lock);
> +     /*Check CLM membership of node.*/
> +     if (hdl_rec->is_stale_client == true) {
> +             TRACE("%s Node not CLM member or stale client",
> __FUNCTION__);
> +             ais_rc = SA_AIS_ERR_UNAVAILABLE;
> +             goto done_give_hdl_all;
> +     }
> +     osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
> +
>       if ((hdl_rec->reg_cbk.saLogWriteLogCallback == NULL) && (ackFlags
> == SA_LOG_RECORD_WRITE_ACK)) {
>               TRACE("%s: Write Callback not registered", __FUNCTION__);
>               ais_rc = SA_AIS_ERR_INIT;
> @@ -1350,6 +1388,15 @@ SaAisErrorT saLogStreamClose(SaLogStream
>               goto done_give_hdl_stream;
>       }
> 
> +     osaf_mutex_lock_ordie(&lga_cb.cb_lock);
> +     /*Check CLM membership of node.*/
> +     if (hdl_rec->is_stale_client == true) {
> +             TRACE("Node not CLM member or stale client");
> +             ais_rc = SA_AIS_ERR_UNAVAILABLE;
> +             goto rmv_stream;
> +     }
> +     osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
> +
>       if (is_lga_state(LGA_NO_SERVER)) {
>               /* No server is available. Remove the stream from client
> database.
>                * Server side will manage to release resources of this stream
> when up.
> diff --git a/osaf/libs/agents/saf/lga/lga_mds.c
> b/osaf/libs/agents/saf/lga/lga_mds.c
> --- a/osaf/libs/agents/saf/lga/lga_mds.c
> +++ b/osaf/libs/agents/saf/lga/lga_mds.c
> @@ -476,6 +476,41 @@ static uint32_t lga_lgs_msg_proc(lga_cb_
>                       }
>                       break;
> 
> +             case LGSV_CLM_NODE_STATUS_CALLBACK:
> +                     {
> +                             lga_client_hdl_rec_t *lga_hdl_rec;
> +
> +
>       TRACE_2("LGSV_CLM_NODE_STATUS_CALLBACK
> clm_node_status:%d",
> +                                             lgsv_msg-
> >info.cbk_info.clm_node_status_cbk.clm_node_status);
> +
> +                             /** Lookup the hdl rec by client_id  **/
> +                             if (NULL == (lga_hdl_rec =
> +
>       lga_find_hdl_rec_by_regid(cb, lgsv_msg-
> >info.cbk_info.lgs_client_id))) {
> +                                     TRACE("regid not found");
> +                                     lga_msg_destroy(lgsv_msg);
> +                                     TRACE_LEAVE();
> +                                     return NCSCC_RC_FAILURE;
> +                             }
> +                             cb->clm_node_state = lgsv_msg-
> >info.cbk_info.clm_node_status_cbk.clm_node_status;
> +                             //A client becomes stale if Node loses CLM
> Membership.
> +                             if (cb->clm_node_state !=
> SA_CLM_NODE_JOINED) {
[Lennart] clm_node_state not protected by mutex. See lga_util.c

> +                                     /*If the node rejoins the cluster
> membership, processes executing on the node will be
> +                                       able to reinitialize new library
> handles and use the entire set of Log Service APIs that
> +                                       operate on these new handles;
> however, invocation of APIs that operate on handles
> +                                       acquired by any process before the
> node left the membership will continue to fail with
> +                                       SA_AIS_ERR_UNAVAILABLE (or with
> the special treatment described above for
> +                                       asynchronous calls) with the
> exception of saLogFinalize(), which is used to free the
> +                                       library handles and all resources
> associated with these handles. Hence, it is recommended
> +                                       for the processes to finalize the
> library handles as soon as the processes
> +                                       detect that the node left the
> membership.*/
> +                                     lga_hdl_rec->is_stale_client = true;

[Lennart] Not protected by mutex. See lga_ais.c

> +                                     TRACE("CLM_NODE callback
> is_stale_client: %d clm_node_state: %d",
> +                                                     lga_hdl_rec-
> >is_stale_client, cb->clm_node_state);
> +                             }
> +                             lga_msg_destroy(lgsv_msg);
> +                     }
> +                     break;
> +
>               default:
>                       TRACE("unknown type %d", lgsv_msg-
> >info.cbk_info.type);
>                       lga_msg_destroy(lgsv_msg);
> @@ -820,6 +855,35 @@ static uint32_t lga_dec_write_cbk_msg(NC
>  }
> 
> 
> /**********************************************************
> ******************
> +Name          : lga_dec_clm_node_status_cbk_msg
> +
> +Description   : This routine decodes  message
> +
> +Arguments     : NCS_UBAID *msg,
> +LGSV_MSG *msg
> +
> +Return Values : uint32_t
> +
> +Notes         : None.
> +
> **********************************************************
> ********************/
> +static uint32_t lga_dec_clm_node_status_cbk_msg(NCS_UBAID *uba,
> lgsv_msg_t *msg)
> +{
> +     uint8_t *p8;
> +     uint32_t total_bytes = 0;
> +     logsv_lga_clm_status_cbk_t *param = &msg-
> >info.cbk_info.clm_node_status_cbk;
> +     uint8_t local_data[100];
> +
> +     osafassert(uba != NULL);
> +
> +     p8 = ncs_dec_flatten_space(uba, local_data, 4);
> +     param->clm_node_status = ncs_decode_32bit(&p8);
> +     ncs_dec_skip_space(uba, 4);
> +     total_bytes += 4;
> +
> +     return total_bytes;
> +}
> +
> +/*********************************************************
> *******************
>    Name          : lga_dec_lstr_open_sync_rsp_msg
> 
>    Description   : This routine decodes a log stream open sync response
> message
> @@ -936,6 +1000,11 @@ static uint32_t lga_mds_dec(struct ncsmd
>                                       msg->info.cbk_info.lgs_client_id);
>                               total_bytes += lga_dec_write_cbk_msg(uba,
> msg);
>                               break;
> +                     case LGSV_CLM_NODE_STATUS_CALLBACK:
> +                             TRACE_2("decode clm node status message,
> lgs_client_id=%d",
> +                                             msg-
> >info.cbk_info.lgs_client_id);
> +                             total_bytes +=
> lga_dec_clm_node_status_cbk_msg(uba, msg);
> +                             break;
>                       default:
>                               TRACE_2("Unknown callback type = %d!",
> msg->info.cbk_info.type);
>                               break;
> diff --git a/osaf/libs/agents/saf/lga/lga_util.c
> b/osaf/libs/agents/saf/lga/lga_util.c
> --- a/osaf/libs/agents/saf/lga/lga_util.c
> +++ b/osaf/libs/agents/saf/lga/lga_util.c
> @@ -53,6 +53,7 @@ static unsigned int lga_create(void)
> 
>       osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>       lga_cb.lgs_sync_awaited = 0;
> +     lga_cb.clm_node_state = SA_CLM_NODE_JOINED;
[Lennart] Protected by mutex here but not in lga_mds.c

>       osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
> 
>       /* No longer needed */
> @@ -702,6 +703,7 @@ lga_client_hdl_rec_t *lga_hdl_rec_add(lg
>       **/
>       rec->lgs_client_id = client_id;
> 
> +     rec->is_stale_client = false;
>       /***
>        * Initiate the recovery flags
>        * The setting means that the client is initialized and that there is
> diff --git a/osaf/libs/common/logsv/include/lgsv_defs.h
> b/osaf/libs/common/logsv/include/lgsv_defs.h
> --- a/osaf/libs/common/logsv/include/lgsv_defs.h
> +++ b/osaf/libs/common/logsv/include/lgsv_defs.h
> @@ -20,7 +20,11 @@
> 
>  #define LOG_RELEASE_CODE 'A'
>  #define LOG_MAJOR_VERSION 2
> -#define LOG_MINOR_VERSION 1
> +#define LOG_MINOR_VERSION 2
> +
> +#define LOG_RELEASE_CODE_0 'A'
> +#define LOG_MAJOR_VERSION_0 2
> +#define LOG_MINOR_VERSION_0 1
> 
>  // Waiting time in library for sync send, unit 10ms
>  #define LGS_WAIT_TIME 1000
> diff --git a/osaf/libs/common/logsv/include/lgsv_msg.h
> b/osaf/libs/common/logsv/include/lgsv_msg.h
> --- a/osaf/libs/common/logsv/include/lgsv_msg.h
> +++ b/osaf/libs/common/logsv/include/lgsv_msg.h
> @@ -45,6 +45,7 @@ typedef enum {
>  /* LGSV Callback enums */
>  typedef enum {
>       LGSV_WRITE_LOG_CALLBACK_IND = 0,
> +     LGSV_CLM_NODE_STATUS_CALLBACK = 1,
>       LGSV_LGS_CBK_MAX
>  } lgsv_cbk_msg_type_t;
> 
> @@ -121,12 +122,20 @@ typedef struct {
>       SaAisErrorT error;
>  } lgsv_write_log_callback_ind_t;
> 
> -/* wrapper structure for all the callbacks */
> +/*CLM node status callback structure for LGS*/
> +typedef struct logsv_loga_clm_status_param_tag {
> +     uint32_t clm_node_status;
> +} logsv_lga_clm_status_cbk_t;
> +
> +/* wrapper structure for all the callbacks */
>  typedef struct {
> -     lgsv_cbk_msg_type_t type;       /* callback type */
> -     uint32_t lgs_client_id; /* lgs client_id */
> -     SaInvocationT inv;      /* invocation value */
> +     lgsv_cbk_msg_type_t type;       /* callback type */
> +     uint32_t lgs_client_id; /* lgs client_id */
> +     SaInvocationT inv;      /* invocation value */
> +     /*      union {*/
>       lgsv_write_log_callback_ind_t write_cbk;
> +     logsv_lga_clm_status_cbk_t clm_node_status_cbk;
> +     /*      } param; */
>  } lgsv_cbk_info_t;
> 
>  /* API Response parameter definitions */

------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to