Hi Lennart,

Thanks for the comments.

One of your major comment is  `lga_hdl_rec->is_stale_client` &  
`cb->clm_node_state` is not protected in
lga_mds.c , but if you see the  lga_mds_rcv()  , lga_lgs_msg_proc() it 
self is protected , so all the chances in lga_lgs_msg_proc() are protected.
I understand that the existing code is little bit miss leading.

The other comments I will address in V3 Patch.
==========================================================================
static uint32_t lga_mds_rcv(struct ncsmds_callback_info *mds_cb_info)
{
         lgsv_msg_t *lgsv_msg = (lgsv_msg_t 
*)mds_cb_info->info.receive.i_msg;
         uint32_t rc;

         osaf_mutex_lock_ordie(&lga_cb.cb_lock);

         /* process the message */
         rc = lga_lgs_msg_proc(&lga_cb, lgsv_msg, 
mds_cb_info->info.receive.i_priority);
         if (rc != NCSCC_RC_SUCCESS) {
                 TRACE_2("lga_lgs_msg_proc returned: %d", rc);
         }

         osaf_mutex_unlock_ordie(&lga_cb.cb_lock);

         return rc;
}
==========================================================================

-AVM

On 8/2/2016 4:21 PM, Lennart Lund wrote:
> Hi
>
> My comments [Lennart]
>
> I will continue reviewing patch 2
>
> Thanks
> Lennart
>
>> -----Original Message-----
>> From: mahesh.va...@oracle.com [mailto:mahesh.va...@oracle.com]
>> Sent: den 2 augusti 2016 07:18
>> To: Vu Minh Nguyen <vu.m.ngu...@dektech.com.au>; Lennart Lund
>> <lennart.l...@ericsson.com>; Anders Widell <anders.wid...@ericsson.com>
>> Cc: opensaf-devel@lists.sourceforge.net
>> Subject: [PATCH 1 of 3] lga: agent Cluster Membership (CLM) integration
>> [#1638] V2
>>
>>   osaf/libs/agents/saf/lga/lga.h             |   2 +
>>   osaf/libs/agents/saf/lga/lga_api.c         |  47 ++++++++++++++++++++
>>   osaf/libs/agents/saf/lga/lga_mds.c         |  69
>> ++++++++++++++++++++++++++++++
>>   osaf/libs/agents/saf/lga/lga_util.c        |   2 +
>>   osaf/libs/common/logsv/include/lgsv_defs.h |   6 ++-
>>   osaf/libs/common/logsv/include/lgsv_msg.h  |  17 +++++-
>>   6 files changed, 138 insertions(+), 5 deletions(-)
>>
>>
>> V2 patch:
>>
>> Incorporated Anders Widell  and   Vu    review comments ,
>> for more details see the review comments posted on V1 patch.
>> Description:
>>
>> Form CLM integration is supported from Log Service A.02.02.
>>
>> At-least a A.02.02 LGA client will check CLM membership status of client's
>> node.
>>   old LGA clients A.02.01 are always clm member.
>>
>> This patch enhanced the log service for Unavailability of the Log Service API
>> on a
>> Non-Member Node which will fail with SA_AIS_ERR_UNAVAILABLE.
>>
>> After this patch the Log Service does not provide service to processes on
>> cluster nodes that are not
>> in the cluster membership.
>>
>> If the node rejoins the cluster membership, processes executing on the
>> node will be
>> able to reinitialize new library handles and use the entire set of Log 
>> Service
>> APIs that
>> operate on these new handles; however, invocation of APIs that operate on
>> handles
>> acquired by any process before the node left the membership will continue
>> to fail with
>> SA_AIS_ERR_UNAVAILABLE (or with the special treatment described above
>> for
>> asynchronous calls) with the exception of saLogFinalize(), which is used to
>> free the
>> library handles and all resources associated with these handles. Hence, it is
>> recommended
>> for the processes to finalize the library handles as soon as the processes
>> detect that the node left the membership.
>>
>> Detailed README will be provide soon.
>>
>> Following are expected Log Service API behavior :
>>
>> Case1: On Non-Member Node,  Log Service API will fail with code
>> SA_AIS_ERR_UNAVAILABLE (31)
>> Case2: On Member Node after recovered from Non-Member Node, Log
>> Service API will fail with code SA_AIS_ERR_UNAVAILABLE (31)
>> Case3: Non-Member Node + (Headless) Log Service API will fail with code
>> SA_AIS_ERR_UNAVAILABLE (31)
>> Case4: On Non-Member Node + (Headless) + (Head Joined) Log Service API
>> will fail with code SA_AIS_ERR_UNAVAILABLE (31)
> [Lennart] I assume you will move this info about the implementation/feature 
> to the README file
[AVM] Ok , I will add these as expected as behavior to README.
>> diff --git a/osaf/libs/agents/saf/lga/lga.h b/osaf/libs/agents/saf/lga/lga.h
>> --- a/osaf/libs/agents/saf/lga/lga.h
>> +++ b/osaf/libs/agents/saf/lga/lga.h
>> @@ -77,6 +77,7 @@ typedef struct lga_client_hdl_rec {
>>                                   * Set when client is initialized an all
>>                                   * streams are recovered
>>                                   */
>> +    bool is_stale_client;  /* Status of client based on the CLM status of
>> node.*/
>>   } lga_client_hdl_rec_t;
>>
>>   /* States of the server */
>> @@ -118,6 +119,7 @@ typedef struct {
>>      /* LGS LGA sync params */
>>      int lgs_sync_awaited;
>>      NCS_SEL_OBJ lgs_sync_sel;
>> +    SaClmClusterChangesT clm_node_state; /*Reflects CLM status of
>> this node(for future use).*/
> [Lennart] This type is defined in saClm.h but saClm.h is included indirectly 
> via ncsencdec_pub.h, saClm.h should be included here directly
[AVM] Done
>
>>   } lga_cb_t;
>>
>>   /* lga_saf_api.c */
>> diff --git a/osaf/libs/agents/saf/lga/lga_api.c
>> b/osaf/libs/agents/saf/lga/lga_api.c
>> --- a/osaf/libs/agents/saf/lga/lga_api.c
>> +++ b/osaf/libs/agents/saf/lga/lga_api.c
>> @@ -324,6 +324,16 @@ SaAisErrorT saLogSelectionObjectGet(SaLo
>>              rc = SA_AIS_ERR_BAD_HANDLE;
>>              goto done;
>>      }
>> +
>> +    osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>> +    /*Check CLM membership of node.*/
>> +    if (hdl_rec->is_stale_client == true) {
>> +            TRACE("Node not CLM member or stale client");
>> +            ncshm_give_hdl(logHandle);
>> +            rc = SA_AIS_ERR_UNAVAILABLE;
>> +            goto done;
>> +    }
>> +    osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>>
>>      /* Obtain the selection object from the IPC queue */
>>      sel_obj = m_NCS_IPC_GET_SEL_OBJ(&hdl_rec->mbx);
>> @@ -381,6 +391,16 @@ SaAisErrorT saLogDispatch(SaLogHandleT l
>>              goto done;
>>      }
>>
>> +    osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>> +    /*Check CLM membership of node.*/
>> +    if (hdl_rec->is_stale_client == true) {
>> +            TRACE("Node not CLM member or stale client");
>> +            ncshm_give_hdl(logHandle);
>> +            rc = SA_AIS_ERR_UNAVAILABLE;
>> +            goto done;
>> +    }
>> +    osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
> [Lennart] Is it nessecary to lock this whole section or is it only 
> is_stale_client that has to be protected?
> If "goto done;" is executed the mutex will not be unlocked! Also see comment 
> for lgs_mdc.c (mutex not used).
> This applies to all API:s checking is_stale_client
[AVM] Corrected Unlock on go to done.   in lga_mds.c lga_lgs_msg_proc()  
it self is protected with  mutex_lock.
>> +
>>      if ((rc = lga_hdl_cbk_dispatch(&lga_cb, hdl_rec, dispatchFlags)) !=
>> SA_AIS_OK)
>>              TRACE("LGA_DISPATCH_FAILURE");
>>
>> @@ -777,6 +797,15 @@ SaAisErrorT saLogStreamOpen_2(SaLogHandl
>>              goto done;
>>      }
>>
>> +    osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>> +    /*Check CLM membership of node.*/
>> +    if (hdl_rec->is_stale_client == true) {
>> +            TRACE("%s Node not CLM member or stale client",
>> __FUNCTION__);
>> +            ais_rc = SA_AIS_ERR_UNAVAILABLE;
>> +            goto done_give_hdl;
>> +    }
>> +    osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>> +
>>      /***
>>       * Handle states
>>       * Synchronize with mds and recovery thread (mutex)
>> @@ -1202,6 +1231,15 @@ SaAisErrorT saLogWriteLogAsync(SaLogStre
>>              goto done_give_hdl_stream;
>>      }
>>
>> +    osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>> +    /*Check CLM membership of node.*/
>> +    if (hdl_rec->is_stale_client == true) {
>> +            TRACE("%s Node not CLM member or stale client",
>> __FUNCTION__);
>> +            ais_rc = SA_AIS_ERR_UNAVAILABLE;
>> +            goto done_give_hdl_all;
>> +    }
>> +    osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>> +
>>      if ((hdl_rec->reg_cbk.saLogWriteLogCallback == NULL) && (ackFlags
>> == SA_LOG_RECORD_WRITE_ACK)) {
>>              TRACE("%s: Write Callback not registered", __FUNCTION__);
>>              ais_rc = SA_AIS_ERR_INIT;
>> @@ -1350,6 +1388,15 @@ SaAisErrorT saLogStreamClose(SaLogStream
>>              goto done_give_hdl_stream;
>>      }
>>
>> +    osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>> +    /*Check CLM membership of node.*/
>> +    if (hdl_rec->is_stale_client == true) {
>> +            TRACE("Node not CLM member or stale client");
>> +            ais_rc = SA_AIS_ERR_UNAVAILABLE;
>> +            goto rmv_stream;
>> +    }
>> +    osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>> +
>>      if (is_lga_state(LGA_NO_SERVER)) {
>>              /* No server is available. Remove the stream from client
>> database.
>>               * Server side will manage to release resources of this stream
>> when up.
>> diff --git a/osaf/libs/agents/saf/lga/lga_mds.c
>> b/osaf/libs/agents/saf/lga/lga_mds.c
>> --- a/osaf/libs/agents/saf/lga/lga_mds.c
>> +++ b/osaf/libs/agents/saf/lga/lga_mds.c
>> @@ -476,6 +476,41 @@ static uint32_t lga_lgs_msg_proc(lga_cb_
>>                      }
>>                      break;
>>
>> +            case LGSV_CLM_NODE_STATUS_CALLBACK:
>> +                    {
>> +                            lga_client_hdl_rec_t *lga_hdl_rec;
>> +
>> +
>>      TRACE_2("LGSV_CLM_NODE_STATUS_CALLBACK
>> clm_node_status:%d",
>> +                                            lgsv_msg-
>>> info.cbk_info.clm_node_status_cbk.clm_node_status);
>> +
>> +                            /** Lookup the hdl rec by client_id  **/
>> +                            if (NULL == (lga_hdl_rec =
>> +
>>      lga_find_hdl_rec_by_regid(cb, lgsv_msg-
>>> info.cbk_info.lgs_client_id))) {
>> +                                    TRACE("regid not found");
>> +                                    lga_msg_destroy(lgsv_msg);
>> +                                    TRACE_LEAVE();
>> +                                    return NCSCC_RC_FAILURE;
>> +                            }
>> +                            cb->clm_node_state = lgsv_msg-
>>> info.cbk_info.clm_node_status_cbk.clm_node_status;
>> +                            //A client becomes stale if Node loses CLM
>> Membership.
>> +                            if (cb->clm_node_state !=
>> SA_CLM_NODE_JOINED) {
> [Lennart] clm_node_state not protected by mutex. See lga_util.c
   [AVM] In lga_mds.c  lga_lgs_msg_proc()  it self is protected with 
mutex_lock in function lga_mds_rcv().
>
>> +                                    /*If the node rejoins the cluster
>> membership, processes executing on the node will be
>> +                                      able to reinitialize new library
>> handles and use the entire set of Log Service APIs that
>> +                                      operate on these new handles;
>> however, invocation of APIs that operate on handles
>> +                                      acquired by any process before the
>> node left the membership will continue to fail with
>> +                                      SA_AIS_ERR_UNAVAILABLE (or with
>> the special treatment described above for
>> +                                      asynchronous calls) with the
>> exception of saLogFinalize(), which is used to free the
>> +                                      library handles and all resources
>> associated with these handles. Hence, it is recommended
>> +                                      for the processes to finalize the
>> library handles as soon as the processes
>> +                                      detect that the node left the
>> membership.*/
>> +                                    lga_hdl_rec->is_stale_client = true;
> [Lennart] Not protected by mutex. See lga_ais.c
   [AVM] In lga_mds.c  lga_lgs_msg_proc()  it self is protected with 
mutex_lock in function lga_mds_rcv().
>
>> +                                    TRACE("CLM_NODE callback
>> is_stale_client: %d clm_node_state: %d",
>> +                                                    lga_hdl_rec-
>>> is_stale_client, cb->clm_node_state);
>> +                            }
>> +                            lga_msg_destroy(lgsv_msg);
>> +                    }
>> +                    break;
>> +
>>              default:
>>                      TRACE("unknown type %d", lgsv_msg-
>>> info.cbk_info.type);
>>                      lga_msg_destroy(lgsv_msg);
>> @@ -820,6 +855,35 @@ static uint32_t lga_dec_write_cbk_msg(NC
>>   }
>>
>>
>> /**********************************************************
>> ******************
>> +Name          : lga_dec_clm_node_status_cbk_msg
>> +
>> +Description   : This routine decodes  message
>> +
>> +Arguments     : NCS_UBAID *msg,
>> +LGSV_MSG *msg
>> +
>> +Return Values : uint32_t
>> +
>> +Notes         : None.
>> +
>> **********************************************************
>> ********************/
>> +static uint32_t lga_dec_clm_node_status_cbk_msg(NCS_UBAID *uba,
>> lgsv_msg_t *msg)
>> +{
>> +    uint8_t *p8;
>> +    uint32_t total_bytes = 0;
>> +    logsv_lga_clm_status_cbk_t *param = &msg-
>>> info.cbk_info.clm_node_status_cbk;
>> +    uint8_t local_data[100];
>> +
>> +    osafassert(uba != NULL);
>> +
>> +    p8 = ncs_dec_flatten_space(uba, local_data, 4);
>> +    param->clm_node_status = ncs_decode_32bit(&p8);
>> +    ncs_dec_skip_space(uba, 4);
>> +    total_bytes += 4;
>> +
>> +    return total_bytes;
>> +}
>> +
>> +/*********************************************************
>> *******************
>>     Name          : lga_dec_lstr_open_sync_rsp_msg
>>
>>     Description   : This routine decodes a log stream open sync response
>> message
>> @@ -936,6 +1000,11 @@ static uint32_t lga_mds_dec(struct ncsmd
>>                                      msg->info.cbk_info.lgs_client_id);
>>                              total_bytes += lga_dec_write_cbk_msg(uba,
>> msg);
>>                              break;
>> +                    case LGSV_CLM_NODE_STATUS_CALLBACK:
>> +                            TRACE_2("decode clm node status message,
>> lgs_client_id=%d",
>> +                                            msg-
>>> info.cbk_info.lgs_client_id);
>> +                            total_bytes +=
>> lga_dec_clm_node_status_cbk_msg(uba, msg);
>> +                            break;
>>                      default:
>>                              TRACE_2("Unknown callback type = %d!",
>> msg->info.cbk_info.type);
>>                              break;
>> diff --git a/osaf/libs/agents/saf/lga/lga_util.c
>> b/osaf/libs/agents/saf/lga/lga_util.c
>> --- a/osaf/libs/agents/saf/lga/lga_util.c
>> +++ b/osaf/libs/agents/saf/lga/lga_util.c
>> @@ -53,6 +53,7 @@ static unsigned int lga_create(void)
>>
>>      osaf_mutex_lock_ordie(&lga_cb.cb_lock);
>>      lga_cb.lgs_sync_awaited = 0;
>> +    lga_cb.clm_node_state = SA_CLM_NODE_JOINED;
> [Lennart] Protected by mutex here but not in lga_mds.c
   [AVM] In lga_mds.c  lga_lgs_msg_proc()  it self is protected with 
mutex_lock in function lga_mds_rcv().
>
>>      osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
>>
>>      /* No longer needed */
>> @@ -702,6 +703,7 @@ lga_client_hdl_rec_t *lga_hdl_rec_add(lg
>>        **/
>>      rec->lgs_client_id = client_id;
>>
>> +    rec->is_stale_client = false;
>>      /***
>>       * Initiate the recovery flags
>>       * The setting means that the client is initialized and that there is
>> diff --git a/osaf/libs/common/logsv/include/lgsv_defs.h
>> b/osaf/libs/common/logsv/include/lgsv_defs.h
>> --- a/osaf/libs/common/logsv/include/lgsv_defs.h
>> +++ b/osaf/libs/common/logsv/include/lgsv_defs.h
>> @@ -20,7 +20,11 @@
>>
>>   #define LOG_RELEASE_CODE 'A'
>>   #define LOG_MAJOR_VERSION 2
>> -#define LOG_MINOR_VERSION 1
>> +#define LOG_MINOR_VERSION 2
>> +
>> +#define LOG_RELEASE_CODE_0 'A'
>> +#define LOG_MAJOR_VERSION_0 2
>> +#define LOG_MINOR_VERSION_0 1
>>
>>   // Waiting time in library for sync send, unit 10ms
>>   #define LGS_WAIT_TIME 1000
>> diff --git a/osaf/libs/common/logsv/include/lgsv_msg.h
>> b/osaf/libs/common/logsv/include/lgsv_msg.h
>> --- a/osaf/libs/common/logsv/include/lgsv_msg.h
>> +++ b/osaf/libs/common/logsv/include/lgsv_msg.h
>> @@ -45,6 +45,7 @@ typedef enum {
>>   /* LGSV Callback enums */
>>   typedef enum {
>>      LGSV_WRITE_LOG_CALLBACK_IND = 0,
>> +    LGSV_CLM_NODE_STATUS_CALLBACK = 1,
>>      LGSV_LGS_CBK_MAX
>>   } lgsv_cbk_msg_type_t;
>>
>> @@ -121,12 +122,20 @@ typedef struct {
>>      SaAisErrorT error;
>>   } lgsv_write_log_callback_ind_t;
>>
>> -/* wrapper structure for all the callbacks */
>> +/*CLM node status callback structure for LGS*/
>> +typedef struct logsv_loga_clm_status_param_tag {
>> +    uint32_t clm_node_status;
>> +} logsv_lga_clm_status_cbk_t;
>> +
>> +/* wrapper structure for all the callbacks */
>>   typedef struct {
>> -    lgsv_cbk_msg_type_t type;       /* callback type */
>> -    uint32_t lgs_client_id; /* lgs client_id */
>> -    SaInvocationT inv;      /* invocation value */
>> +    lgsv_cbk_msg_type_t type;       /* callback type */
>> +    uint32_t lgs_client_id; /* lgs client_id */
>> +    SaInvocationT inv;      /* invocation value */
>> +    /*      union {*/
>>      lgsv_write_log_callback_ind_t write_cbk;
>> +    logsv_lga_clm_status_cbk_t clm_node_status_cbk;
>> +    /*      } param; */
>>   } lgsv_cbk_info_t;
>>
>>   /* API Response parameter definitions */


------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to