Hi,

I was facing some test case fails in some scenarios , I am looking in to 
it ,
ether the testcase issue or changes causing the problem , I will get 
back to you soon.

-AVM

On 12/23/2015 2:45 PM, Nhat Pham wrote:
> Hi Mahesh,
>
> Would you have comment about the patch?
>
> Best regards,
> Nhat Pham
>
> -----Original Message-----
> From: A V Mahesh [mailto:[email protected]]
> Sent: Wednesday, December 9, 2015 1:19 PM
> To: Nhat Pham <[email protected]>; [email protected]
> Cc: [email protected]
> Subject: Re: [PATCH 1 of 1] cpsv: improve handling unlink and close
> non-collocated checkpoint [#1616]
>
> Hi,
>
> Ok got it , I will review both cases
>
> -AVM
>
> On 12/9/2015 11:34 AM, Nhat Pham wrote:
>> Hi Mahesh,
>>
>> For 'several problem', I mean 3 use cases where:
>> 1,2 : the checkpoint replicas are not deleted immediately even no
>> client exists
>> 3: the checkpoint is deleted although there is a client using the
>> checkpoint.
>>
>> The patch only addresses the problem in these 3 use cases.
>>
>> Best regards,
>> Nhat Pham
>>
>> -----Original Message-----
>> From: A V Mahesh [mailto:[email protected]]
>> Sent: Wednesday, December 9, 2015 12:21 PM
>> To: Nhat Pham <[email protected]>; [email protected]
>> Cc: [email protected]
>> Subject: Re: [PATCH 1 of 1] cpsv: improve handling unlink and close
>> non-collocated checkpoint [#1616]
>>
>> Hi Nhat
>>
>>    >>There are several problems relating to closing and unlinking
>> non-collocated checkpoint.
>>
>> I can see only one problem unlinked non-collocated checkpoint is not
>> getting deleted immediate even No client exist for that non-collocated
>> checkpoint.
>>
>> I see 1,2 ,3 are  use-case of  non-collocated checkpoint , in all
>> cases the the non-collocated checkpoint is not getting deleted
>> immediately is that you mean by `several problems` ?
>>
>> Please let me know is any other portable exist and it is being
>> addressed in this patch , so that I can look the patch in that point of view
>> as well .
>>
>> -AVM
>>
>>
>> On 12/9/2015 8:06 AM, Nhat Pham wrote:
>>>     osaf/services/saf/cpsv/cpnd/cpnd_evt.c  |  51 +++++++++++++------------
>>>     osaf/services/saf/cpsv/cpnd/cpnd_proc.c |  66
>> ++++++++++++++++++--------------
>>>     2 files changed, 64 insertions(+), 53 deletions(-)
>>>
>>>
>>> Problem:
>>> --------
>>> There are several problems relating to closing and unlinking
>> non-collocated checkpoint.
>>> 1. A non-collocated checkpoint is firstly created on SC-2. It is
>>> closed on
>> SC-2. It is opened on PL-3.
>>> It is unlinked. It is closes on PL-3. The replicas on SCs are not
>>> destroyed although the checkpoint is unlinked and no client is using it.
>>>
>>> 2. A non-collocated checkpoint is firstly created on PL-3. It is
>>> closed on
>> PL-3. It is opened on SC-2.
>>> It is unlinked. It is closes on SC-2. The replicas on SCs and PL-3
>>> are not destroyed although the checkpoint is unlinked and no client
>>> is using
>> it.
>>> 3. A non-collocated checkpoint is firstly created on PL-3. It is
>>> closed on
>> PL-3. It is opened on PL-4.
>>> It is unlinked. The replicas on SCs and PL-3 are destroyed although
>>> the
>> checkpoint is using on PL-4.
>>> Solution:
>>> ---------
>>> The main cause of above problems is to use checking if non-collocated
>>> replica is on PL to decide destroying the replicas. This mechanism is
>>> not correct in some cases. The solution is use another mechanism
>>> which checks if there is any client using the checkpoint on the
>>> cluster by
>> verifying if the retention duration timer is active or not.
>>> Test:
>>> -----
>>> Following test cases were executed for both non-collocated and
>>> collocated checkpoint to verify the solution:
>>> 1. verify_unlink_ckpt_created_on_sc_before_close_it_from_sc
>>> 2. verify_unlink_ckpt_created_on_sc_before_close_it_from_pl
>>> 3. verify_unlink_ckpt_created_on_sc_after_close_it
>>> 4. verify_unlink_ckpt_created_on_pl_before_close_it_from_pl
>>> 5. verify_unlink_ckpt_created_on_pl_before_close_it_from_sc
>>> 6. verify_unlink_ckpt_created_on_pl_before_close_it_from_other_pl
>>> 7. verify_unlink_ckpt_created_on_pl_after_close_it
>>>
>>> diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
>>> b/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
>>> --- a/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
>>> +++ b/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
>>> @@ -26,7 +26,7 @@
>>>
>>>     #include "cpnd.h"
>>>
>>> -extern uint32_t cpnd_ckpt_non_collocated_rplica_close(CPND_CB *cb,
>>> CPND_CKPT_NODE *cp_node, SaAisErrorT *error);
>>> +extern uint32_t cpnd_proc_rdset_start(CPND_CB *cb, CPND_CKPT_NODE
>>> +*cp_node);
>>>     extern uint32_t cpnd_proc_non_colloc_rt_expiry(CPND_CB *cb,
>>> SaCkptCheckpointHandleT ckpt_id);
>>>
>>>     static uint32_t cpnd_evt_proc_cb_dump(CPND_CB *cb); @@ -1194,8
>>> +1194,7 @@ static uint32_t cpnd_evt_proc_ckpt_unlin
>>>
>> /*********************************************************************
>> ******
>> *
>>>      * Name          : cpnd_evt_proc_ckpt_unlink_info
>>>      *
>>> - * Description   : Function to process check point unlink
>>> - *                 from Applications.
>>> + * Description   : Function to process checkpoint unlink event from CPD
>>>      *
>>>      * Arguments     : CPND_CB *cb - CPND CB pointer
>>>      *                 CPSV_EVT *evt - Received Event structure
>>> @@ -1209,10 +1208,11 @@ static uint32_t cpnd_evt_proc_ckpt_unlin
>>>     {
>>>             uint32_t rc = NCSCC_RC_SUCCESS;
>>>             CPND_CKPT_NODE *cp_node = NULL;
>>> -   SaAisErrorT error;
>>> +   SaAisErrorT error = SA_AIS_OK;
>>>             CPSV_SEND_INFO sinfo_cpa;
>>>             CPSV_EVT send_evt;
>>>             bool sinfo_cpa_flag = false;
>>> +   bool destroy_replica = false;
>>>
>>>             TRACE_ENTER();
>>>             memset(&send_evt, '\0', sizeof(CPSV_EVT)); @@ -1220,25 +1220,35
>>> @@ static uint32_t cpnd_evt_proc_ckpt_unlin
>>>             if (cp_node == NULL) {
>>>                     TRACE_4("cpnd ckpt node get failed for
>> ckpt_id:%llx",evt->info.ckpt_ulink.ckpt_id);
>>>                     rc = NCSCC_RC_FAILURE;
>>> -           send_evt.info.cpa.info.ulinkRsp.error =
>> SA_AIS_ERR_NOT_EXIST;
>>> +           error = SA_AIS_ERR_NOT_EXIST;
>>>                     goto agent_rsp;
>>>             }
>>>
>>>             sinfo_cpa = cp_node->cpa_sinfo;
>>>             sinfo_cpa_flag = cp_node->cpa_sinfo_flag;
>>> +
>>>             if (cp_node->is_close == true) {
>>> -           send_evt.info.cpa.info.ulinkRsp.error = SA_AIS_OK;
>>> +           /* For non-collocated checkpoint if retention duration timer
>> is active
>>> +            * (i.e the checkpoint is not opened by any client in
>> cluster) the replica
>>> +            * should be destroyed in this case */
>>> +           if
>> (!m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags))
>> {
>>> +                   if (cp_node->ret_tmr.is_active) {
>>> +                           TRACE_1("cpnd destroy replica ckpt_id:%llx -
>> No client opens the non-collocated checkpoint ",
>>> +                                   cp_node->ckpt_id);
>>> +                           destroy_replica = true;
>>> +                   }
>>> +           }
>>> +           /* For collocated checkpoint, there is no client opening the
>> checkpoint on this
>>> +            * node. The replica should be destroyed. */
>>> +           else
>>> +                   destroy_replica = true;
>>> +   }
>>> +
>>> +   if (destroy_replica == true) {
>>>                     /* check timer is present,if yes...stop the timer and
>> destroy shm_info and the node */
>>>                     if (cp_node->ret_tmr.is_active)
>>>                             cpnd_tmr_stop(&cp_node->ret_tmr);
>>>
>>> -           if
>> (!m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags))
>> {
>>> -                   if
>> (cpnd_is_noncollocated_replica_present_on_payload(cb, cp_node)) {
>>> -                           rc = NCSCC_RC_SUCCESS;
>>> -                           goto agent_rsp;
>>> -                   }
>>> -           }
>>> -
>>>                     rc = cpnd_ckpt_replica_destroy(cb, cp_node, &error);
>>>                     if (rc == NCSCC_RC_FAILURE) {
>>>                             TRACE_4("cpnd ckpt replica destroy failed for
>> ckpt_id:%llx,error
>>> %u",cp_node->ckpt_id, error); @@ -1260,8 +1270,6 @@ static uint32_t
>>> cpnd_evt_proc_ckpt_unlin
>>>
>>>                     }
>>>                     TRACE_4("cpnd proc ckpt unlink set for
>>> ckpt_id:%llx",cp_node->ckpt_id);
>>> -
>>> -           send_evt.info.cpa.info.ulinkRsp.error = SA_AIS_OK;
>>>             }
>>>
>>>      agent_rsp:
>>> @@ -1269,6 +1277,7 @@ static uint32_t cpnd_evt_proc_ckpt_unlin
>>>             if (sinfo_cpa_flag == 1) {
>>>                     send_evt.type = CPSV_EVT_TYPE_CPA;
>>>                     send_evt.info.cpa.type = CPA_EVT_ND2A_CKPT_UNLINK_RSP;
>>> +           send_evt.info.cpa.info.ulinkRsp.error = error;
>>>                     rc = cpnd_mds_send_rsp(cb, &sinfo_cpa, &send_evt);
>>>
>>>             }
>>> @@ -1767,7 +1776,6 @@ static uint32_t cpnd_evt_proc_ckpt_activ
>>>     static uint32_t cpnd_evt_proc_ckpt_rdset_info(CPND_CB *cb,
>>> CPND_EVT
>> *evt, CPSV_SEND_INFO *sinfo)
>>>     {
>>>             CPND_CKPT_NODE *cp_node = NULL;
>>> -   SaAisErrorT error = SA_AIS_OK;
>>>
>>>             TRACE_ENTER();
>>>             /* get cp_node from ckpt_info_db */ @@ -1791,14 +1799,9 @@ 
>>> static
>>> uint32_t cpnd_evt_proc_ckpt_rdset
>>>             }
>>>
>>>             if (evt->info.rdset.type == CPSV_CKPT_RDSET_START) {
>>> -           if
>> (!m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags))
>> {
>>> -                   if (cpnd_ckpt_non_collocated_rplica_close(cb,
>> cp_node, &error) == NCSCC_RC_FAILURE) {
>>> -                           TRACE_4("cpnd ckpt relica close failed for
>> client_hdl:%llx,ckpt_id:%llx",evt->info.closeReq.client_hdl,
>> cp_node->ckpt_id);
>>> -
>>> -                   }
>>> -                   TRACE_LEAVE();
>>> -                   return NCSCC_RC_SUCCESS;
>>> -           }
>>> +           cpnd_proc_rdset_start(cb, cp_node);
>>> +           TRACE_LEAVE();
>>> +           return NCSCC_RC_SUCCESS;
>>>             }
>>>
>>>             /* if timer already started on one of the node then what to 
>>> do!!!
>>> diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
>>> b/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
>>> --- a/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
>>> +++ b/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
>>> @@ -2297,53 +2297,61 @@ uint32_t cpnd_ckpt_replica_close(CPND_CB
>>>     }
>>>
>>>
>> /*********************************************************************
>> ******
>> *************
>>> - * Name          : cpnd_ckpt_non_collocated_rplica_close
>>> + * Name          : cpnd_proc_rdset_start
>>>      *
>>> - * Description   : This is the function close the non_collocated Ckpt
>> Replica
>>> + * Description   : This is the function process the event
>> CPSV_CKPT_RDSET_START
>>> + *                 This event is only applicable for non-collocated
>> checkpoint
>>>      * Arguments     : cb       - CPND Control Block pointer
>>>      *                 cp_node  - pointer to checkpoint node
>>>      *
>>>      * Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE
>>>
>>> *********************************************************************
>>> *
>>> *******************/
>>>
>>> -uint32_t cpnd_ckpt_non_collocated_rplica_close(CPND_CB *cb,
>>> CPND_CKPT_NODE *cp_node, SaAisErrorT *error)
>>> +uint32_t cpnd_proc_rdset_start(CPND_CB *cb, CPND_CKPT_NODE *cp_node)
>>>     {
>>>             SaTimeT presentTime;
>>> +   SaAisErrorT error = SA_AIS_OK;
>>>             uint32_t rc = NCSCC_RC_SUCCESS;
>>>
>>>             TRACE_ENTER();
>>> -   if (cp_node->ckpt_lcl_ref_cnt == 0) {
>>>
>>> -           cp_node->is_close = true;
>>> -           cpnd_restart_set_close_flag(cb, cp_node);
>>> +   if
>> (m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags))
>> {
>>> +           TRACE_LEAVE();
>>> +           return NCSCC_RC_SUCCESS;
>>> +   }
>>>
>>> -           if (cp_node->is_unlink != true &&
>>> -
>> (m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionD
>> uratio
>> n) != 0)) {
>>> -                   m_GET_TIME_STAMP(presentTime);
>>> -                   cpnd_restart_update_timer(cb, cp_node, presentTime);
>>> +   if (cp_node->ckpt_lcl_ref_cnt != 0) {
>>> +           LOG_ER("cpnd receives CPND_EVT_D2ND_RDSET_INFO with START
>> while ckpt_lcl_ref_cnt = %d", cp_node->ckpt_lcl_ref_cnt);
>>> +           TRACE_LEAVE();
>>> +           return NCSCC_RC_FAILURE;
>>> +   }
>>>
>>> -                   cp_node->ret_tmr.type =
>> CPND_TMR_TYPE_NON_COLLOC_RETENTION;
>>> -                   cp_node->ret_tmr.uarg = cb->cpnd_cb_hdl_id;
>>> -                   cp_node->ret_tmr.ckpt_id = cp_node->ckpt_id;
>>> -                   cpnd_tmr_start(&cp_node->ret_tmr,
>>> -
>> m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDu
>> ration
>> ));
>>> -                   TRACE_1("cpnd ckpt ret tmr success
>> ckpt_id:%llx",cp_node->ckpt_id);
>>> -           } else {
>>> -                   /* Check for Non-Collocated Replica */
>>> -                   if
>> (cpnd_is_noncollocated_replica_present_on_payload(cb, cp_node)) {
>>> -                           return NCSCC_RC_SUCCESS;
>>> -                   }
>>> -                   rc = cpnd_ckpt_replica_destroy(cb, cp_node, error);
>>> -                   if (rc == NCSCC_RC_FAILURE) {
>>> -                           TRACE_4("cpnd ckpt replica destroy failed
>> ckpt_id:%llx",cp_node->ckpt_id);
>>> -                           return NCSCC_RC_FAILURE;
>>> -                   }
>>> -                   TRACE_1("cpnd ckpt replica destroy failed
>> ckpt_id:%llx",cp_node->ckpt_id);
>>> +   cp_node->is_close = true;
>>> +   cpnd_restart_set_close_flag(cb, cp_node);
>>>
>>> -                   cpnd_restart_shm_ckpt_free(cb, cp_node);
>>> -                   cpnd_ckpt_node_destroy(cb, cp_node);
>>> +   if (cp_node->is_unlink != true &&
>>> +
>> (m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionD
>> uratio
>> n) != 0)) {
>>> +           m_GET_TIME_STAMP(presentTime);
>>> +           cpnd_restart_update_timer(cb, cp_node, presentTime);
>>> +
>>> +           cp_node->ret_tmr.type = CPND_TMR_TYPE_NON_COLLOC_RETENTION;
>>> +           cp_node->ret_tmr.uarg = cb->cpnd_cb_hdl_id;
>>> +           cp_node->ret_tmr.ckpt_id = cp_node->ckpt_id;
>>> +           cpnd_tmr_start(&cp_node->ret_tmr,
>>> +
>> m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDu
>> ration
>> ));
>>> +           TRACE_1("cpnd ckpt ret tmr success
>> ckpt_id:%llx",cp_node->ckpt_id);
>>> +   } else {
>>> +           rc = cpnd_ckpt_replica_destroy(cb, cp_node, &error);
>>> +           if (rc == NCSCC_RC_FAILURE) {
>>> +                   LOG_ER("cpnd ckpt replica destroy failed
>> ckpt_id:%llx, error:%d",cp_node->ckpt_id, error);
>>> +                   return NCSCC_RC_FAILURE;
>>>                     }
>>> +           TRACE_1("cpnd ckpt replica destroy success
>>> +ckpt_id:%llx",cp_node->ckpt_id);
>>> +
>>> +           cpnd_restart_shm_ckpt_free(cb, cp_node);
>>> +           cpnd_ckpt_node_destroy(cb, cp_node);
>>>             }
>>> +
>>>             TRACE_LEAVE();
>>>             return NCSCC_RC_SUCCESS;
>>>     }
>


------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to