Hi Vu,

I incorporated most of your  comments ,
please find my response with [AVM]   &  code change in  V2 patch ( lga: 
agent Cluster Membership (CLM) integration [#1638] V2)

-AVM

On 7/29/2016 9:20 AM, Vu Minh Nguyen wrote:
> Hi Mahesh,
>
> I have few comments, started with [Vu]:
>
> General: some places use tabs, some other places use spaces.
>
> Regards, Vu
>
>> -----Original Message-----
>> From: [email protected] [mailto:[email protected]]
>> Sent: Tuesday, July 26, 2016 3:43 PM
>> To: [email protected]; [email protected]
>> Cc: [email protected]
>> Subject: [PATCH 1 of 3] lga: agent Cluster Membership (CLM) integration
>> [#1638] V1
>>
>>   osaf/libs/agents/saf/lga/lga.h             |   2 +
>>   osaf/libs/agents/saf/lga/lga_api.c         |  37 ++++++++++++++
>>   osaf/libs/agents/saf/lga/lga_mds.c         |  75
>> ++++++++++++++++++++++++++++++
>>   osaf/libs/agents/saf/lga/lga_util.c        |   2 +
>>   osaf/libs/common/logsv/include/lgsv_defs.h |   6 ++-
>>   osaf/libs/common/logsv/include/lgsv_msg.h  |  17 +++++-
>>   6 files changed, 134 insertions(+), 5 deletions(-)
>>
>>
>> Description:
>>
>> Form CLM integration is supported from Log Service A.02.02.
>>
>> At-least a A.02.02 LGA client will check CLM membership status of client's
>> node.
>>   old LGA clients A.02.01 are always clm member.
>>
>> This patch enhanced the log service for Unavailability of the Log Service
> API
>> on a
>> Non-Member Node which will fail with SA_AIS_ERR_UNAVAILABLE.
>>
>> After this patch the Log Service does not provide service to processes on
>> cluster nodes that are not
>> in the cluster membership.
>>
>> If the node rejoins the cluster membership, processes executing on the
> node
>> will be
>> able to reinitialize new library handles and use the entire set of Log
> Service
>> APIs that
>> operate on these new handles; however, invocation of APIs that operate on
>> handles
>> acquired by any process before the node left the membership will continue
>> to fail with
>> SA_AIS_ERR_UNAVAILABLE (or with the special treatment described above
>> for
>> asynchronous calls) with the exception of saLogFinalize(), which is used
> to
>> free the
>> library handles and all resources associated with these handles. Hence, it
> is
>> recommended
>> for the processes to finalize the library handles as soon as the processes
>> detect that the node left the membership.
>>
>> Detailed README will be provide soon.
>>
>> Following are expected Log Service API behavior :
>>
>> Case1: On Non-Member Node,  Log Service API will fail with code
>> SA_AIS_ERR_UNAVAILABLE (31)
>> Case2: On Member Node after recovered from Non-Member Node, Log
>> Service API will fail with code SA_AIS_ERR_UNAVAILABLE (31)
>> Case3: Non-Member Node + (Headless) Log Service API will fail with code
>> SA_AIS_ERR_UNAVAILABLE (31)
>> Case4: On Non-Member Node + (Headless) + (Head Joined) Log Service API
>> will fail with code SA_AIS_ERR_UNAVAILABLE (31)
>>
>> diff --git a/osaf/libs/agents/saf/lga/lga.h
> b/osaf/libs/agents/saf/lga/lga.h
>> --- a/osaf/libs/agents/saf/lga/lga.h
>> +++ b/osaf/libs/agents/saf/lga/lga.h
>> @@ -77,6 +77,7 @@ typedef struct lga_client_hdl_rec {
>>                                   * Set when client is initialized an all
>>                                   * streams are recovered
>>                                   */
>> +    bool is_stale_client;  /* Status of client based on the CLM status
> of
>> node.*/
>>   } lga_client_hdl_rec_t;
>>
>>   /* States of the server */
>> @@ -118,6 +119,7 @@ typedef struct {
>>      /* LGS LGA sync params */
>>      int lgs_sync_awaited;
>>      NCS_SEL_OBJ lgs_sync_sel;
>> +    SaClmClusterChangesT clm_node_state; /*Reflects CLM status of
>> this node(for future use).*/
>>   } lga_cb_t;
>>
>>   /* lga_saf_api.c */
>> diff --git a/osaf/libs/agents/saf/lga/lga_api.c
>> b/osaf/libs/agents/saf/lga/lga_api.c
>> --- a/osaf/libs/agents/saf/lga/lga_api.c
>> +++ b/osaf/libs/agents/saf/lga/lga_api.c
>> @@ -325,6 +325,14 @@ SaAisErrorT saLogSelectionObjectGet(SaLo
>>              goto done;
>>      }
>>
>> +    /*Check CLM membership of node.*/
>> +        if (hdl_rec->is_stale_client == true) {
>> +            TRACE("Node not CLM member or stale client");
>> +            ncshm_give_hdl(logHandle);
>> +            rc = SA_AIS_ERR_UNAVAILABLE;
>> +                goto done;
>> +        }
>> +
>>      /* Obtain the selection object from the IPC queue */
>>      sel_obj = m_NCS_IPC_GET_SEL_OBJ(&hdl_rec->mbx);
>>
>> @@ -381,6 +389,14 @@ SaAisErrorT saLogDispatch(SaLogHandleT l
>>              goto done;
>>      }
>>
>> +    /*Check CLM membership of node.*/
>> +         if (hdl_rec->is_stale_client == true) {
>> +                 TRACE("Node not CLM member or stale client");
>> +                 ncshm_give_hdl(logHandle);
>> +                 rc = SA_AIS_ERR_UNAVAILABLE;
>> +                 goto done;
>> +        }
>> +
>>      if ((rc = lga_hdl_cbk_dispatch(&lga_cb, hdl_rec, dispatchFlags)) !=
>> SA_AIS_OK)
>>              TRACE("LGA_DISPATCH_FAILURE");
>>
>> @@ -776,6 +792,13 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl
>>              ais_rc = SA_AIS_ERR_BAD_HANDLE;
>>              goto done;
>>      }
>> +
>> +    /*Check CLM membership of node.*/
>> +        if (hdl_rec->is_stale_client == true) {
>> +            TRACE("%s Node not CLM member or stale client",
>> __FUNCTION__);
>> +            ais_rc = SA_AIS_ERR_UNAVAILABLE;
>> +            goto done_give_hdl;
>> +        }
>>
>>      /***
>>       * Handle states
>> @@ -1202,6 +1225,13 @@ SaAisErrorT saLogWriteLogAsync(SaLogStre
>>              goto done_give_hdl_stream;
>>      }
>>
>> +    /*Check CLM membership of node.*/
>> +    if (hdl_rec->is_stale_client == true) {
>> +            TRACE("%s Node not CLM member or stale client",
>> __FUNCTION__);
>> +            ais_rc = SA_AIS_ERR_UNAVAILABLE;
>> +            goto done_give_hdl_all;
>> +    }
> [Vu] The variable `is_state_client` is set in the MDS thread and referred to
> in LOG API calling thread. So, seems there is an timing issue here.
> E.g:
> saLogWriteLogAsync() passes above CLM membership check but after that, right
> before sending message to MDS layer,
> the process receive `node leave` event. If this is the case,
> saLogWriteLogAsync() returns OK.
[AVM]  osaf_mutex_lock_ordie(&lga_cb.cb_lock);  & 
osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
              is being taken now.
>
>> +
>>      if ((hdl_rec->reg_cbk.saLogWriteLogCallback == NULL) && (ackFlags
>> == SA_LOG_RECORD_WRITE_ACK)) {
>>              TRACE("%s: Write Callback not registered", __FUNCTION__);
>>              ais_rc = SA_AIS_ERR_INIT;
>> @@ -1350,6 +1380,13 @@ SaAisErrorT saLogStreamClose(SaLogStream
>>              goto done_give_hdl_stream;
>>      }
>>
>> +    /*Check CLM membership of node.*/
>> +         if (hdl_rec->is_stale_client == true) {
>> +                 TRACE("Node not CLM member or stale client");
>> +                 ais_rc = SA_AIS_ERR_UNAVAILABLE;
>> +                 goto rmv_stream;
>> +        }
>> +
>>      if (is_lga_state(LGA_NO_SERVER)) {
>>              /* No server is available. Remove the stream from client
>> database.
>>               * Server side will manage to release resources of this
> stream
>> when up.
>> diff --git a/osaf/libs/agents/saf/lga/lga_mds.c
>> b/osaf/libs/agents/saf/lga/lga_mds.c
>> --- a/osaf/libs/agents/saf/lga/lga_mds.c
>> +++ b/osaf/libs/agents/saf/lga/lga_mds.c
>> @@ -476,6 +476,47 @@ static uint32_t lga_lgs_msg_proc(lga_cb_
>>                      }
>>                      break;
>>
>> +            case LGSV_CLM_NODE_STATUS_CALLBACK:
>> +                    {
>> +                            lga_client_hdl_rec_t *lga_hdl_rec;
>> +
>> +
>>      TRACE_2("LGSV_CLM_NODE_STATUS_CALLBACK
>> clm_node_status:%d",
>> +                                            lgsv_msg-
>>> info.cbk_info.clm_node_status_cbk.clm_node_status);
>> +
>> +                            /** Create the chan hdl record here before
>> +                             ** queing this message onto the priority
>> queue
>> +                             ** so that the dispatch by the application
> to
>> fetch
>> +                             ** the callback is instantaneous.
>> +                             **/
> [Vu] These comments is not relevant to below code. Should remove it.
[AVM] Done.
>> +
>> +                            /** Lookup the hdl rec by client_id  **/
>> +                            if (NULL == (lga_hdl_rec =
>> +
>>      lga_find_hdl_rec_by_regid(cb, lgsv_msg-
>>> info.cbk_info.lgs_client_id))) {
>> +                                    TRACE("regid not found");
>> +                                    lga_msg_destroy(lgsv_msg);
>> +                                    TRACE_LEAVE();
>> +                                    return NCSCC_RC_FAILURE;
>> +                            }
> [Vu] Node leave is an event at node level. I don't think above check is
> necessary or at least not returning ERR, but keep going to next check.
> And as the event is at node level, consider to put `clm_node_state` and `
> is_stale_client` to an global data type.
[AVM]  MDS is a single thread for sending/receiving  LGA/LGS message  is 
received/send from LGS/LGA
             so their is no possibility of lga_hdl_rec  not availability 
, unless an abnormal issue , so reporting error is ok.
>
>> +                                cb->clm_node_state = lgsv_msg-
>>> info.cbk_info.clm_node_status_cbk.clm_node_status;
>> +                                //A client becomes stale if Node loses
> CLM Membership.
>> +                                if (cb->clm_node_state !=
> SA_CLM_NODE_JOINED) {
>> +                                    /*If the node rejoins the cluster
>> membership, processes executing on the node will be
>> +                                      able to reinitialize new library
>> handles and use the entire set of Log Service APIs that
>> +                                      operate on these new handles;
>> however, invocation of APIs that operate on handles
>> +                                      acquired by any process before the
>> node left the membership will continue to fail with
>> +                                      SA_AIS_ERR_UNAVAILABLE (or with
>> the special treatment described above for
>> +                                      asynchronous calls) with the
>> exception of saLogFinalize(), which is used to free the
>> +                                      library handles and all resources
>> associated with these handles. Hence, it is recommended
>> +                                      for the processes to finalize the
>> library handles as soon as the processes
>> +                                      detect that the node left the
>> membership.*/
>> +                                    lga_hdl_rec->is_stale_client = true;
>> +                                        TRACE("CLM_NODE callback
> is_stale_client: %d
>> clm_node_state: %d",
>> +
> lga_hdl_rec->is_stale_client, cb-
>>> clm_node_state);
>> +                                }
>> +                            lga_msg_destroy(lgsv_msg);
>> +                    }
>> +                        break;
>> +
>>              default:
>>                      TRACE("unknown type %d", lgsv_msg-
>>> info.cbk_info.type);
>>                      lga_msg_destroy(lgsv_msg);
>> @@ -820,6 +861,35 @@ static uint32_t lga_dec_write_cbk_msg(NC
>>   }
>>
>>
>> /*************************************************************
>> ***************
>> +  Name          : lga_dec_clm_node_status_cbk_msg
>> +
>> +  Description   : This routine decodes  message
>> +
>> +  Arguments     : NCS_UBAID *msg,
>> +                  LGSV_MSG *msg
>> +
>> +  Return Values : uint32_t
>> +
>> +  Notes         : None.
>> +*************************************************************
>> *****************/
>> +static uint32_t lga_dec_clm_node_status_cbk_msg(NCS_UBAID *uba,
>> lgsv_msg_t *msg)
>> +{
>> +    uint8_t *p8;
>> +    uint32_t total_bytes = 0;
>> +    logsv_lga_clm_status_cbk_t *param = &msg-
>>> info.cbk_info.clm_node_status_cbk;
>> +    uint8_t local_data[100];
>> +
>> +    osafassert(uba != NULL);
>> +
>> +    p8 = ncs_dec_flatten_space(uba, local_data, 4);
>> +        param->clm_node_status = ncs_decode_32bit(&p8);
>> +        ncs_dec_skip_space(uba, 4);
>> +        total_bytes += 4;
>> +
>> +        return total_bytes;
>> +}
>> +
>> +/************************************************************
>> ****************
>>     Name          : lga_dec_lstr_open_sync_rsp_msg
>>
>>     Description   : This routine decodes a log stream open sync response
>> message
>> @@ -936,6 +1006,11 @@ static uint32_t lga_mds_dec(struct ncsmd
>>                                      msg->info.cbk_info.lgs_client_id);
>>                              total_bytes += lga_dec_write_cbk_msg(uba,
>> msg);
>>                              break;
>> +                    case LGSV_CLM_NODE_STATUS_CALLBACK:
>> +                            TRACE_2("decode clm node status message,
>> lgs_client_id=%d",
>> +                                            msg-
>>> info.cbk_info.lgs_client_id);
>> +                            total_bytes +=
>> lga_dec_clm_node_status_cbk_msg(uba, msg);
>> +                                break;
>>                      default:
>>                              TRACE_2("Unknown callback type = %d!",
>> msg->info.cbk_info.type);
>>                              break;
>> diff --git a/osaf/libs/agents/saf/lga/lga_util.c
>> b/osaf/libs/agents/saf/lga/lga_util.c
>> --- a/osaf/libs/agents/saf/lga/lga_util.c
>> +++ b/osaf/libs/agents/saf/lga/lga_util.c
>> @@ -53,6 +53,7 @@ static unsigned int lga_create(void)
>>
>>      osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>>      lga_cb.lgs_sync_awaited = 0;
>> +    lga_cb.clm_node_state = SA_CLM_NODE_JOINED;
>>      osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>>
>>      /* No longer needed */
>> @@ -702,6 +703,7 @@ lga_client_hdl_rec_t *lga_hdl_rec_add(lg
>>        **/
>>      rec->lgs_client_id = client_id;
>>
>> +    rec->is_stale_client = false;
>>      /***
>>       * Initiate the recovery flags
>>       * The setting means that the client is initialized and that there
> is
>> diff --git a/osaf/libs/common/logsv/include/lgsv_defs.h
>> b/osaf/libs/common/logsv/include/lgsv_defs.h
>> --- a/osaf/libs/common/logsv/include/lgsv_defs.h
>> +++ b/osaf/libs/common/logsv/include/lgsv_defs.h
>> @@ -20,7 +20,11 @@
>>
>>   #define LOG_RELEASE_CODE 'A'
>>   #define LOG_MAJOR_VERSION 2
>> -#define LOG_MINOR_VERSION 1
>> +#define LOG_MINOR_VERSION 2
>> +
>> +#define LOG_RELEASE_CODE_0 'A'
>> +#define LOG_MAJOR_VERSION_0 2
>> +#define LOG_MINOR_VERSION_0 1
> [Vu] These three macros are only used in ` is_client_clm_member()`. Consider
> to remove them, but use internal variable SaVersionT instead.
[AVM]  This is required  as reference for application which doesn't 
required to validate Cluster Member ship
              for example as Middle ware service ( Opensaf service).
>>   // Waiting time in library for sync send, unit 10ms
>>   #define LGS_WAIT_TIME 1000
>> diff --git a/osaf/libs/common/logsv/include/lgsv_msg.h
>> b/osaf/libs/common/logsv/include/lgsv_msg.h
>> --- a/osaf/libs/common/logsv/include/lgsv_msg.h
>> +++ b/osaf/libs/common/logsv/include/lgsv_msg.h
>> @@ -45,6 +45,7 @@ typedef enum {
>>   /* LGSV Callback enums */
>>   typedef enum {
>>      LGSV_WRITE_LOG_CALLBACK_IND = 0,
>> +    LGSV_CLM_NODE_STATUS_CALLBACK = 1,
>>      LGSV_LGS_CBK_MAX
>>   } lgsv_cbk_msg_type_t;
>>
>> @@ -121,12 +122,20 @@ typedef struct {
>>      SaAisErrorT error;
>>   } lgsv_write_log_callback_ind_t;
>>
>> -/* wrapper structure for all the callbacks */
>> +/*CLM node status callback structure for LGS*/
>> +typedef struct logsv_loga_clm_status_param_tag {
>> +    uint32_t clm_node_status;
>> +} logsv_lga_clm_status_cbk_t;
>> +
>> +
>>   typedef struct {
>> -    lgsv_cbk_msg_type_t type;       /* callback type */
>> -    uint32_t lgs_client_id; /* lgs client_id */
>> -    SaInvocationT inv;      /* invocation value */
>> +    lgsv_cbk_msg_type_t type;       /* callback type */
>> +    uint32_t lgs_client_id; /* lgs client_id */
>> +    SaInvocationT inv;      /* invocation value */
>> +    /*      union {*/
>>      lgsv_write_log_callback_ind_t write_cbk;
>> +    logsv_lga_clm_status_cbk_t clm_node_status_cbk;
>> +    /*      } param; */
> [Vu] Remove 02 above comments.
[AVM] Corrected the comment.
>>   } lgsv_cbk_info_t;
>>
>>   /* API Response parameter definitions */


------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to