Hi Mahesh,

For 'several problem', I mean 3 use cases where:
1,2 : the checkpoint replicas are not deleted immediately even no client
exists
3: the checkpoint is deleted although there is a client using the
checkpoint.

The patch only addresses the problem in these 3 use cases.

Best regards,
Nhat Pham

-----Original Message-----
From: A V Mahesh [mailto:[email protected]]
Sent: Wednesday, December 9, 2015 12:21 PM
To: Nhat Pham <[email protected]>; [email protected]
Cc: [email protected]
Subject: Re: [PATCH 1 of 1] cpsv: improve handling unlink and close
non-collocated checkpoint [#1616]

Hi Nhat

 >>There are several problems relating to closing and unlinking
non-collocated checkpoint.

I can see only one problem unlinked non-collocated checkpoint is not getting
deleted immediate even No client exist for that non-collocated checkpoint.

I see 1,2 ,3 are  use-case of  non-collocated checkpoint , in all cases the
the non-collocated checkpoint is not getting deleted immediately is that you
mean by `several problems` ?

Please let me know is any other portable exist and it is being addressed in
this patch , so that I can look the patch in that point of view as well .

-AVM


On 12/9/2015 8:06 AM, Nhat Pham wrote:
>   osaf/services/saf/cpsv/cpnd/cpnd_evt.c  |  51 +++++++++++++------------
>   osaf/services/saf/cpsv/cpnd/cpnd_proc.c |  66
++++++++++++++++++--------------
>   2 files changed, 64 insertions(+), 53 deletions(-)
>
>
> Problem:
> --------
> There are several problems relating to closing and unlinking
non-collocated checkpoint.
>
> 1. A non-collocated checkpoint is firstly created on SC-2. It is closed on
SC-2. It is opened on PL-3.
> It is unlinked. It is closes on PL-3. The replicas on SCs are not
> destroyed although the checkpoint is unlinked and no client is using it.
>
> 2. A non-collocated checkpoint is firstly created on PL-3. It is closed on
PL-3. It is opened on SC-2.
> It is unlinked. It is closes on SC-2. The replicas on SCs and PL-3 are
> not destroyed although the checkpoint is unlinked and no client is using
it.
>
> 3. A non-collocated checkpoint is firstly created on PL-3. It is closed on
PL-3. It is opened on PL-4.
> It is unlinked. The replicas on SCs and PL-3 are destroyed although the
checkpoint is using on PL-4.
>
> Solution:
> ---------
> The main cause of above problems is to use checking if non-collocated
> replica is on PL to decide destroying the replicas. This mechanism is
> not correct in some cases. The solution is use another mechanism which
> checks if there is any client using the checkpoint on the cluster by
verifying if the retention duration timer is active or not.
>
> Test:
> -----
> Following test cases were executed for both non-collocated and
> collocated checkpoint to verify the solution:
> 1. verify_unlink_ckpt_created_on_sc_before_close_it_from_sc
> 2. verify_unlink_ckpt_created_on_sc_before_close_it_from_pl
> 3. verify_unlink_ckpt_created_on_sc_after_close_it
> 4. verify_unlink_ckpt_created_on_pl_before_close_it_from_pl
> 5. verify_unlink_ckpt_created_on_pl_before_close_it_from_sc
> 6. verify_unlink_ckpt_created_on_pl_before_close_it_from_other_pl
> 7. verify_unlink_ckpt_created_on_pl_after_close_it
>
> diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
> b/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
> --- a/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
> +++ b/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
> @@ -26,7 +26,7 @@
>
>   #include "cpnd.h"
>
> -extern uint32_t cpnd_ckpt_non_collocated_rplica_close(CPND_CB *cb,
> CPND_CKPT_NODE *cp_node, SaAisErrorT *error);
> +extern uint32_t cpnd_proc_rdset_start(CPND_CB *cb, CPND_CKPT_NODE
> +*cp_node);
>   extern uint32_t cpnd_proc_non_colloc_rt_expiry(CPND_CB *cb,
> SaCkptCheckpointHandleT ckpt_id);
>
>   static uint32_t cpnd_evt_proc_cb_dump(CPND_CB *cb); @@ -1194,8
> +1194,7 @@ static uint32_t cpnd_evt_proc_ckpt_unlin
>
/***************************************************************************
*
>    * Name          : cpnd_evt_proc_ckpt_unlink_info
>    *
> - * Description   : Function to process check point unlink
> - *                 from Applications.
> + * Description   : Function to process checkpoint unlink event from CPD
>    *
>    * Arguments     : CPND_CB *cb - CPND CB pointer
>    *                 CPSV_EVT *evt - Received Event structure
> @@ -1209,10 +1208,11 @@ static uint32_t cpnd_evt_proc_ckpt_unlin
>   {
>       uint32_t rc = NCSCC_RC_SUCCESS;
>       CPND_CKPT_NODE *cp_node = NULL;
> -     SaAisErrorT error;
> +     SaAisErrorT error = SA_AIS_OK;
>       CPSV_SEND_INFO sinfo_cpa;
>       CPSV_EVT send_evt;
>       bool sinfo_cpa_flag = false;
> +     bool destroy_replica = false;
>
>       TRACE_ENTER();
>       memset(&send_evt, '\0', sizeof(CPSV_EVT)); @@ -1220,25 +1220,35 @@
> static uint32_t cpnd_evt_proc_ckpt_unlin
>       if (cp_node == NULL) {
>               TRACE_4("cpnd ckpt node get failed for
ckpt_id:%llx",evt->info.ckpt_ulink.ckpt_id);
>               rc = NCSCC_RC_FAILURE;
> -             send_evt.info.cpa.info.ulinkRsp.error =
SA_AIS_ERR_NOT_EXIST;
> +             error = SA_AIS_ERR_NOT_EXIST;
>               goto agent_rsp;
>       }
>
>       sinfo_cpa = cp_node->cpa_sinfo;
>       sinfo_cpa_flag = cp_node->cpa_sinfo_flag;
> +
>       if (cp_node->is_close == true) {
> -             send_evt.info.cpa.info.ulinkRsp.error = SA_AIS_OK;
> +             /* For non-collocated checkpoint if retention duration timer
is active
> +              * (i.e the checkpoint is not opened by any client in
cluster) the replica
> +              * should be destroyed in this case */
> +             if
(!m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) {
> +                     if (cp_node->ret_tmr.is_active) {
> +                             TRACE_1("cpnd destroy replica ckpt_id:%llx -
No client opens the non-collocated checkpoint ",
> +                                     cp_node->ckpt_id);
> +                             destroy_replica = true;
> +                     }
> +             }
> +             /* For collocated checkpoint, there is no client opening the
checkpoint on this
> +              * node. The replica should be destroyed. */
> +             else
> +                     destroy_replica = true;
> +     }
> +
> +     if (destroy_replica == true) {
>               /* check timer is present,if yes...stop the timer and
destroy shm_info and the node */
>               if (cp_node->ret_tmr.is_active)
>                       cpnd_tmr_stop(&cp_node->ret_tmr);
>
> -             if
(!m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) {
> -                     if
(cpnd_is_noncollocated_replica_present_on_payload(cb, cp_node)) {
> -                             rc = NCSCC_RC_SUCCESS;
> -                             goto agent_rsp;
> -                     }
> -             }
> -
>               rc = cpnd_ckpt_replica_destroy(cb, cp_node, &error);
>               if (rc == NCSCC_RC_FAILURE) {
>                       TRACE_4("cpnd ckpt replica destroy failed for
ckpt_id:%llx,error
> %u",cp_node->ckpt_id, error); @@ -1260,8 +1270,6 @@ static uint32_t
> cpnd_evt_proc_ckpt_unlin
>
>               }
>               TRACE_4("cpnd proc ckpt unlink set for
> ckpt_id:%llx",cp_node->ckpt_id);
> -
> -             send_evt.info.cpa.info.ulinkRsp.error = SA_AIS_OK;
>       }
>
>    agent_rsp:
> @@ -1269,6 +1277,7 @@ static uint32_t cpnd_evt_proc_ckpt_unlin
>       if (sinfo_cpa_flag == 1) {
>               send_evt.type = CPSV_EVT_TYPE_CPA;
>               send_evt.info.cpa.type = CPA_EVT_ND2A_CKPT_UNLINK_RSP;
> +             send_evt.info.cpa.info.ulinkRsp.error = error;
>               rc = cpnd_mds_send_rsp(cb, &sinfo_cpa, &send_evt);
>
>       }
> @@ -1767,7 +1776,6 @@ static uint32_t cpnd_evt_proc_ckpt_activ
>   static uint32_t cpnd_evt_proc_ckpt_rdset_info(CPND_CB *cb, CPND_EVT
*evt, CPSV_SEND_INFO *sinfo)
>   {
>       CPND_CKPT_NODE *cp_node = NULL;
> -     SaAisErrorT error = SA_AIS_OK;
>
>       TRACE_ENTER();
>       /* get cp_node from ckpt_info_db */ @@ -1791,14 +1799,9 @@ static
> uint32_t cpnd_evt_proc_ckpt_rdset
>       }
>
>       if (evt->info.rdset.type == CPSV_CKPT_RDSET_START) {
> -             if
(!m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) {
> -                     if (cpnd_ckpt_non_collocated_rplica_close(cb,
cp_node, &error) == NCSCC_RC_FAILURE) {
> -                             TRACE_4("cpnd ckpt relica close failed for
client_hdl:%llx,ckpt_id:%llx",evt->info.closeReq.client_hdl,
cp_node->ckpt_id);
> -
> -                     }
> -                     TRACE_LEAVE();
> -                     return NCSCC_RC_SUCCESS;
> -             }
> +             cpnd_proc_rdset_start(cb, cp_node);
> +             TRACE_LEAVE();
> +             return NCSCC_RC_SUCCESS;
>       }
>
>       /* if timer already started on one of the node then what to do!!!
> diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
> b/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
> --- a/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
> +++ b/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
> @@ -2297,53 +2297,61 @@ uint32_t cpnd_ckpt_replica_close(CPND_CB
>   }
>
>
/***************************************************************************
*************
> - * Name          : cpnd_ckpt_non_collocated_rplica_close
> + * Name          : cpnd_proc_rdset_start
>    *
> - * Description   : This is the function close the non_collocated Ckpt
Replica
> + * Description   : This is the function process the event
CPSV_CKPT_RDSET_START
> + *                 This event is only applicable for non-collocated
checkpoint
>    * Arguments     : cb       - CPND Control Block pointer
>    *                 cp_node  - pointer to checkpoint node
>    *
>    * Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE
>
> **********************************************************************
> *******************/
>
> -uint32_t cpnd_ckpt_non_collocated_rplica_close(CPND_CB *cb,
> CPND_CKPT_NODE *cp_node, SaAisErrorT *error)
> +uint32_t cpnd_proc_rdset_start(CPND_CB *cb, CPND_CKPT_NODE *cp_node)
>   {
>       SaTimeT presentTime;
> +     SaAisErrorT error = SA_AIS_OK;
>       uint32_t rc = NCSCC_RC_SUCCESS;
>
>       TRACE_ENTER();
> -     if (cp_node->ckpt_lcl_ref_cnt == 0) {
>
> -             cp_node->is_close = true;
> -             cpnd_restart_set_close_flag(cb, cp_node);
> +     if
(m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) {
> +             TRACE_LEAVE();
> +             return NCSCC_RC_SUCCESS;
> +     }
>
> -             if (cp_node->is_unlink != true &&
> -
(m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDuratio
n) != 0)) {
> -                     m_GET_TIME_STAMP(presentTime);
> -                     cpnd_restart_update_timer(cb, cp_node, presentTime);
> +     if (cp_node->ckpt_lcl_ref_cnt != 0) {
> +             LOG_ER("cpnd receives CPND_EVT_D2ND_RDSET_INFO with START
while ckpt_lcl_ref_cnt = %d", cp_node->ckpt_lcl_ref_cnt);
> +             TRACE_LEAVE();
> +             return NCSCC_RC_FAILURE;
> +     }
>
> -                     cp_node->ret_tmr.type =
CPND_TMR_TYPE_NON_COLLOC_RETENTION;
> -                     cp_node->ret_tmr.uarg = cb->cpnd_cb_hdl_id;
> -                     cp_node->ret_tmr.ckpt_id = cp_node->ckpt_id;
> -                     cpnd_tmr_start(&cp_node->ret_tmr,
> -
m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDuration
));
> -                     TRACE_1("cpnd ckpt ret tmr success
ckpt_id:%llx",cp_node->ckpt_id);
> -             } else {
> -                     /* Check for Non-Collocated Replica */
> -                     if
(cpnd_is_noncollocated_replica_present_on_payload(cb, cp_node)) {
> -                             return NCSCC_RC_SUCCESS;
> -                     }
> -                     rc = cpnd_ckpt_replica_destroy(cb, cp_node, error);
> -                     if (rc == NCSCC_RC_FAILURE) {
> -                             TRACE_4("cpnd ckpt replica destroy failed
ckpt_id:%llx",cp_node->ckpt_id);
> -                             return NCSCC_RC_FAILURE;
> -                     }
> -                     TRACE_1("cpnd ckpt replica destroy failed
ckpt_id:%llx",cp_node->ckpt_id);
> +     cp_node->is_close = true;
> +     cpnd_restart_set_close_flag(cb, cp_node);
>
> -                     cpnd_restart_shm_ckpt_free(cb, cp_node);
> -                     cpnd_ckpt_node_destroy(cb, cp_node);
> +     if (cp_node->is_unlink != true &&
> +
(m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDuratio
n) != 0)) {
> +             m_GET_TIME_STAMP(presentTime);
> +             cpnd_restart_update_timer(cb, cp_node, presentTime);
> +
> +             cp_node->ret_tmr.type = CPND_TMR_TYPE_NON_COLLOC_RETENTION;
> +             cp_node->ret_tmr.uarg = cb->cpnd_cb_hdl_id;
> +             cp_node->ret_tmr.ckpt_id = cp_node->ckpt_id;
> +             cpnd_tmr_start(&cp_node->ret_tmr,
> +
m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDuration
));
> +             TRACE_1("cpnd ckpt ret tmr success
ckpt_id:%llx",cp_node->ckpt_id);
> +     } else {
> +             rc = cpnd_ckpt_replica_destroy(cb, cp_node, &error);
> +             if (rc == NCSCC_RC_FAILURE) {
> +                     LOG_ER("cpnd ckpt replica destroy failed
ckpt_id:%llx, error:%d",cp_node->ckpt_id, error);
> +                     return NCSCC_RC_FAILURE;
>               }
> +             TRACE_1("cpnd ckpt replica destroy success
> +ckpt_id:%llx",cp_node->ckpt_id);
> +
> +             cpnd_restart_shm_ckpt_free(cb, cp_node);
> +             cpnd_ckpt_node_destroy(cb, cp_node);
>       }
> +
>       TRACE_LEAVE();
>       return NCSCC_RC_SUCCESS;
>   }



------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to