Hi Nhat

 >>There are several problems relating to closing and unlinking 
non-collocated checkpoint.

I can see only one problem unlinked non-collocated checkpoint is not 
getting deleted immediate
even No client exist for that non-collocated checkpoint.

I see 1,2 ,3 are  use-case of  non-collocated checkpoint , in all cases 
the the non-collocated checkpoint
is not getting deleted immediately is that you mean by `several problems` ?

Please let me know is any other portable exist and it is being addressed 
in this patch ,
so that I can look the patch in that point of view as well .

-AVM


On 12/9/2015 8:06 AM, Nhat Pham wrote:
>   osaf/services/saf/cpsv/cpnd/cpnd_evt.c  |  51 +++++++++++++------------
>   osaf/services/saf/cpsv/cpnd/cpnd_proc.c |  66 
> ++++++++++++++++++--------------
>   2 files changed, 64 insertions(+), 53 deletions(-)
>
>
> Problem:
> --------
> There are several problems relating to closing and unlinking non-collocated 
> checkpoint.
>
> 1. A non-collocated checkpoint is firstly created on SC-2. It is closed on 
> SC-2. It is opened on PL-3.
> It is unlinked. It is closes on PL-3. The replicas on SCs are not destroyed 
> although the checkpoint is
> unlinked and no client is using it.
>
> 2. A non-collocated checkpoint is firstly created on PL-3. It is closed on 
> PL-3. It is opened on SC-2.
> It is unlinked. It is closes on SC-2. The replicas on SCs and PL-3 are not 
> destroyed although the
> checkpoint is unlinked and no client is using it.
>
> 3. A non-collocated checkpoint is firstly created on PL-3. It is closed on 
> PL-3. It is opened on PL-4.
> It is unlinked. The replicas on SCs and PL-3 are destroyed although the 
> checkpoint is using on PL-4.
>
> Solution:
> ---------
> The main cause of above problems is to use checking if non-collocated replica 
> is on PL to decide
> destroying the replicas. This mechanism is not correct in some cases. The 
> solution is use another
> mechanism which checks if there is any client using the checkpoint on the 
> cluster by verifying if
> the retention duration timer is active or not.
>
> Test:
> -----
> Following test cases were executed for both non-collocated and collocated 
> checkpoint to verify
> the solution:
> 1. verify_unlink_ckpt_created_on_sc_before_close_it_from_sc
> 2. verify_unlink_ckpt_created_on_sc_before_close_it_from_pl
> 3. verify_unlink_ckpt_created_on_sc_after_close_it
> 4. verify_unlink_ckpt_created_on_pl_before_close_it_from_pl
> 5. verify_unlink_ckpt_created_on_pl_before_close_it_from_sc
> 6. verify_unlink_ckpt_created_on_pl_before_close_it_from_other_pl
> 7. verify_unlink_ckpt_created_on_pl_after_close_it
>
> diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_evt.c 
> b/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
> --- a/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
> +++ b/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
> @@ -26,7 +26,7 @@
>   
>   #include "cpnd.h"
>   
> -extern uint32_t cpnd_ckpt_non_collocated_rplica_close(CPND_CB *cb, 
> CPND_CKPT_NODE *cp_node, SaAisErrorT *error);
> +extern uint32_t cpnd_proc_rdset_start(CPND_CB *cb, CPND_CKPT_NODE *cp_node);
>   extern uint32_t cpnd_proc_non_colloc_rt_expiry(CPND_CB *cb, 
> SaCkptCheckpointHandleT ckpt_id);
>   
>   static uint32_t cpnd_evt_proc_cb_dump(CPND_CB *cb);
> @@ -1194,8 +1194,7 @@ static uint32_t cpnd_evt_proc_ckpt_unlin
>   
> /****************************************************************************
>    * Name          : cpnd_evt_proc_ckpt_unlink_info
>    *
> - * Description   : Function to process check point unlink
> - *                 from Applications.
> + * Description   : Function to process checkpoint unlink event from CPD
>    *
>    * Arguments     : CPND_CB *cb - CPND CB pointer
>    *                 CPSV_EVT *evt - Received Event structure
> @@ -1209,10 +1208,11 @@ static uint32_t cpnd_evt_proc_ckpt_unlin
>   {
>       uint32_t rc = NCSCC_RC_SUCCESS;
>       CPND_CKPT_NODE *cp_node = NULL;
> -     SaAisErrorT error;
> +     SaAisErrorT error = SA_AIS_OK;
>       CPSV_SEND_INFO sinfo_cpa;
>       CPSV_EVT send_evt;
>       bool sinfo_cpa_flag = false;
> +     bool destroy_replica = false;
>   
>       TRACE_ENTER();
>       memset(&send_evt, '\0', sizeof(CPSV_EVT));
> @@ -1220,25 +1220,35 @@ static uint32_t cpnd_evt_proc_ckpt_unlin
>       if (cp_node == NULL) {
>               TRACE_4("cpnd ckpt node get failed for 
> ckpt_id:%llx",evt->info.ckpt_ulink.ckpt_id);
>               rc = NCSCC_RC_FAILURE;
> -             send_evt.info.cpa.info.ulinkRsp.error = SA_AIS_ERR_NOT_EXIST;
> +             error = SA_AIS_ERR_NOT_EXIST;
>               goto agent_rsp;
>       }
>   
>       sinfo_cpa = cp_node->cpa_sinfo;
>       sinfo_cpa_flag = cp_node->cpa_sinfo_flag;
> +
>       if (cp_node->is_close == true) {
> -             send_evt.info.cpa.info.ulinkRsp.error = SA_AIS_OK;
> +             /* For non-collocated checkpoint if retention duration timer is 
> active
> +              * (i.e the checkpoint is not opened by any client in cluster) 
> the replica
> +              * should be destroyed in this case */
> +             if 
> (!m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) {
> +                     if (cp_node->ret_tmr.is_active) {
> +                             TRACE_1("cpnd destroy replica ckpt_id:%llx - No 
> client opens the non-collocated checkpoint ",
> +                                     cp_node->ckpt_id);
> +                             destroy_replica = true;
> +                     }
> +             }
> +             /* For collocated checkpoint, there is no client opening the 
> checkpoint on this
> +              * node. The replica should be destroyed. */
> +             else
> +                     destroy_replica = true;
> +     }
> +
> +     if (destroy_replica == true) {
>               /* check timer is present,if yes...stop the timer and destroy 
> shm_info and the node */
>               if (cp_node->ret_tmr.is_active)
>                       cpnd_tmr_stop(&cp_node->ret_tmr);
>   
> -             if 
> (!m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) {
> -                     if 
> (cpnd_is_noncollocated_replica_present_on_payload(cb, cp_node)) {
> -                             rc = NCSCC_RC_SUCCESS;
> -                             goto agent_rsp;
> -                     }
> -             }
> -
>               rc = cpnd_ckpt_replica_destroy(cb, cp_node, &error);
>               if (rc == NCSCC_RC_FAILURE) {
>                       TRACE_4("cpnd ckpt replica destroy failed for 
> ckpt_id:%llx,error %u",cp_node->ckpt_id, error);
> @@ -1260,8 +1270,6 @@ static uint32_t cpnd_evt_proc_ckpt_unlin
>   
>               }
>               TRACE_4("cpnd proc ckpt unlink set for 
> ckpt_id:%llx",cp_node->ckpt_id);
> -
> -             send_evt.info.cpa.info.ulinkRsp.error = SA_AIS_OK;
>       }
>   
>    agent_rsp:
> @@ -1269,6 +1277,7 @@ static uint32_t cpnd_evt_proc_ckpt_unlin
>       if (sinfo_cpa_flag == 1) {
>               send_evt.type = CPSV_EVT_TYPE_CPA;
>               send_evt.info.cpa.type = CPA_EVT_ND2A_CKPT_UNLINK_RSP;
> +             send_evt.info.cpa.info.ulinkRsp.error = error;
>               rc = cpnd_mds_send_rsp(cb, &sinfo_cpa, &send_evt);
>   
>       }
> @@ -1767,7 +1776,6 @@ static uint32_t cpnd_evt_proc_ckpt_activ
>   static uint32_t cpnd_evt_proc_ckpt_rdset_info(CPND_CB *cb, CPND_EVT *evt, 
> CPSV_SEND_INFO *sinfo)
>   {
>       CPND_CKPT_NODE *cp_node = NULL;
> -     SaAisErrorT error = SA_AIS_OK;
>   
>       TRACE_ENTER();
>       /* get cp_node from ckpt_info_db */
> @@ -1791,14 +1799,9 @@ static uint32_t cpnd_evt_proc_ckpt_rdset
>       }
>   
>       if (evt->info.rdset.type == CPSV_CKPT_RDSET_START) {
> -             if 
> (!m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) {
> -                     if (cpnd_ckpt_non_collocated_rplica_close(cb, cp_node, 
> &error) == NCSCC_RC_FAILURE) {
> -                             TRACE_4("cpnd ckpt relica close failed for 
> client_hdl:%llx,ckpt_id:%llx",evt->info.closeReq.client_hdl, 
> cp_node->ckpt_id);
> -
> -                     }
> -                     TRACE_LEAVE();
> -                     return NCSCC_RC_SUCCESS;
> -             }
> +             cpnd_proc_rdset_start(cb, cp_node);
> +             TRACE_LEAVE();
> +             return NCSCC_RC_SUCCESS;
>       }
>   
>       /* if timer already started on one of the node then what to do!!!
> diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_proc.c 
> b/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
> --- a/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
> +++ b/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
> @@ -2297,53 +2297,61 @@ uint32_t cpnd_ckpt_replica_close(CPND_CB
>   }
>   
>   
> /****************************************************************************************
> - * Name          : cpnd_ckpt_non_collocated_rplica_close
> + * Name          : cpnd_proc_rdset_start
>    *
> - * Description   : This is the function close the non_collocated Ckpt Replica
> + * Description   : This is the function process the event 
> CPSV_CKPT_RDSET_START
> + *                 This event is only applicable for non-collocated 
> checkpoint
>    * Arguments     : cb       - CPND Control Block pointer
>    *                 cp_node  - pointer to checkpoint node
>    *
>    * Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE
>    
> *****************************************************************************************/
>   
> -uint32_t cpnd_ckpt_non_collocated_rplica_close(CPND_CB *cb, CPND_CKPT_NODE 
> *cp_node, SaAisErrorT *error)
> +uint32_t cpnd_proc_rdset_start(CPND_CB *cb, CPND_CKPT_NODE *cp_node)
>   {
>       SaTimeT presentTime;
> +     SaAisErrorT error = SA_AIS_OK;
>       uint32_t rc = NCSCC_RC_SUCCESS;
>   
>       TRACE_ENTER();
> -     if (cp_node->ckpt_lcl_ref_cnt == 0) {
>   
> -             cp_node->is_close = true;
> -             cpnd_restart_set_close_flag(cb, cp_node);
> +     if 
> (m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) {
> +             TRACE_LEAVE();
> +             return NCSCC_RC_SUCCESS;
> +     }
>   
> -             if (cp_node->is_unlink != true &&
> -                 
> (m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDuration)
>  != 0)) {
> -                     m_GET_TIME_STAMP(presentTime);
> -                     cpnd_restart_update_timer(cb, cp_node, presentTime);
> +     if (cp_node->ckpt_lcl_ref_cnt != 0) {
> +             LOG_ER("cpnd receives CPND_EVT_D2ND_RDSET_INFO with START while 
> ckpt_lcl_ref_cnt = %d", cp_node->ckpt_lcl_ref_cnt);
> +             TRACE_LEAVE();
> +             return NCSCC_RC_FAILURE;
> +     }
>   
> -                     cp_node->ret_tmr.type = 
> CPND_TMR_TYPE_NON_COLLOC_RETENTION;
> -                     cp_node->ret_tmr.uarg = cb->cpnd_cb_hdl_id;
> -                     cp_node->ret_tmr.ckpt_id = cp_node->ckpt_id;
> -                     cpnd_tmr_start(&cp_node->ret_tmr,
> -                                    
> m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDuration));
> -                     TRACE_1("cpnd ckpt ret tmr success 
> ckpt_id:%llx",cp_node->ckpt_id);
> -             } else {
> -                     /* Check for Non-Collocated Replica */
> -                     if 
> (cpnd_is_noncollocated_replica_present_on_payload(cb, cp_node)) {
> -                             return NCSCC_RC_SUCCESS;
> -                     }
> -                     rc = cpnd_ckpt_replica_destroy(cb, cp_node, error);
> -                     if (rc == NCSCC_RC_FAILURE) {
> -                             TRACE_4("cpnd ckpt replica destroy failed 
> ckpt_id:%llx",cp_node->ckpt_id);
> -                             return NCSCC_RC_FAILURE;
> -                     }
> -                     TRACE_1("cpnd ckpt replica destroy failed 
> ckpt_id:%llx",cp_node->ckpt_id);
> +     cp_node->is_close = true;
> +     cpnd_restart_set_close_flag(cb, cp_node);
>   
> -                     cpnd_restart_shm_ckpt_free(cb, cp_node);
> -                     cpnd_ckpt_node_destroy(cb, cp_node);
> +     if (cp_node->is_unlink != true &&
> +         
> (m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDuration)
>  != 0)) {
> +             m_GET_TIME_STAMP(presentTime);
> +             cpnd_restart_update_timer(cb, cp_node, presentTime);
> +
> +             cp_node->ret_tmr.type = CPND_TMR_TYPE_NON_COLLOC_RETENTION;
> +             cp_node->ret_tmr.uarg = cb->cpnd_cb_hdl_id;
> +             cp_node->ret_tmr.ckpt_id = cp_node->ckpt_id;
> +             cpnd_tmr_start(&cp_node->ret_tmr,
> +                            
> m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDuration));
> +             TRACE_1("cpnd ckpt ret tmr success 
> ckpt_id:%llx",cp_node->ckpt_id);
> +     } else {
> +             rc = cpnd_ckpt_replica_destroy(cb, cp_node, &error);
> +             if (rc == NCSCC_RC_FAILURE) {
> +                     LOG_ER("cpnd ckpt replica destroy failed ckpt_id:%llx, 
> error:%d",cp_node->ckpt_id, error);
> +                     return NCSCC_RC_FAILURE;
>               }
> +             TRACE_1("cpnd ckpt replica destroy success 
> ckpt_id:%llx",cp_node->ckpt_id);
> +
> +             cpnd_restart_shm_ckpt_free(cb, cp_node);
> +             cpnd_ckpt_node_destroy(cb, cp_node);
>       }
> +
>       TRACE_LEAVE();
>       return NCSCC_RC_SUCCESS;
>   }


------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to