Hi Mahesh, For 'several problem', I mean 3 use cases where: 1,2 : the checkpoint replicas are not deleted immediately even no client exists 3: the checkpoint is deleted although there is a client using the checkpoint.
The patch only addresses the problem in these 3 use cases. Best regards, Nhat Pham -----Original Message----- From: A V Mahesh [mailto:[email protected]] Sent: Wednesday, December 9, 2015 12:21 PM To: Nhat Pham <[email protected]>; [email protected] Cc: [email protected] Subject: Re: [PATCH 1 of 1] cpsv: improve handling unlink and close non-collocated checkpoint [#1616] Hi Nhat >>There are several problems relating to closing and unlinking non-collocated checkpoint. I can see only one problem unlinked non-collocated checkpoint is not getting deleted immediate even No client exist for that non-collocated checkpoint. I see 1,2 ,3 are use-case of non-collocated checkpoint , in all cases the the non-collocated checkpoint is not getting deleted immediately is that you mean by `several problems` ? Please let me know is any other portable exist and it is being addressed in this patch , so that I can look the patch in that point of view as well . -AVM On 12/9/2015 8:06 AM, Nhat Pham wrote: > osaf/services/saf/cpsv/cpnd/cpnd_evt.c | 51 +++++++++++++------------ > osaf/services/saf/cpsv/cpnd/cpnd_proc.c | 66 ++++++++++++++++++-------------- > 2 files changed, 64 insertions(+), 53 deletions(-) > > > Problem: > -------- > There are several problems relating to closing and unlinking non-collocated checkpoint. > > 1. A non-collocated checkpoint is firstly created on SC-2. It is closed on SC-2. It is opened on PL-3. > It is unlinked. It is closes on PL-3. The replicas on SCs are not > destroyed although the checkpoint is unlinked and no client is using it. > > 2. A non-collocated checkpoint is firstly created on PL-3. It is closed on PL-3. It is opened on SC-2. > It is unlinked. It is closes on SC-2. The replicas on SCs and PL-3 are > not destroyed although the checkpoint is unlinked and no client is using it. > > 3. A non-collocated checkpoint is firstly created on PL-3. It is closed on PL-3. It is opened on PL-4. > It is unlinked. The replicas on SCs and PL-3 are destroyed although the checkpoint is using on PL-4. > > Solution: > --------- > The main cause of above problems is to use checking if non-collocated > replica is on PL to decide destroying the replicas. This mechanism is > not correct in some cases. The solution is use another mechanism which > checks if there is any client using the checkpoint on the cluster by verifying if the retention duration timer is active or not. > > Test: > ----- > Following test cases were executed for both non-collocated and > collocated checkpoint to verify the solution: > 1. verify_unlink_ckpt_created_on_sc_before_close_it_from_sc > 2. verify_unlink_ckpt_created_on_sc_before_close_it_from_pl > 3. verify_unlink_ckpt_created_on_sc_after_close_it > 4. verify_unlink_ckpt_created_on_pl_before_close_it_from_pl > 5. verify_unlink_ckpt_created_on_pl_before_close_it_from_sc > 6. verify_unlink_ckpt_created_on_pl_before_close_it_from_other_pl > 7. verify_unlink_ckpt_created_on_pl_after_close_it > > diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_evt.c > b/osaf/services/saf/cpsv/cpnd/cpnd_evt.c > --- a/osaf/services/saf/cpsv/cpnd/cpnd_evt.c > +++ b/osaf/services/saf/cpsv/cpnd/cpnd_evt.c > @@ -26,7 +26,7 @@ > > #include "cpnd.h" > > -extern uint32_t cpnd_ckpt_non_collocated_rplica_close(CPND_CB *cb, > CPND_CKPT_NODE *cp_node, SaAisErrorT *error); > +extern uint32_t cpnd_proc_rdset_start(CPND_CB *cb, CPND_CKPT_NODE > +*cp_node); > extern uint32_t cpnd_proc_non_colloc_rt_expiry(CPND_CB *cb, > SaCkptCheckpointHandleT ckpt_id); > > static uint32_t cpnd_evt_proc_cb_dump(CPND_CB *cb); @@ -1194,8 > +1194,7 @@ static uint32_t cpnd_evt_proc_ckpt_unlin > /*************************************************************************** * > * Name : cpnd_evt_proc_ckpt_unlink_info > * > - * Description : Function to process check point unlink > - * from Applications. > + * Description : Function to process checkpoint unlink event from CPD > * > * Arguments : CPND_CB *cb - CPND CB pointer > * CPSV_EVT *evt - Received Event structure > @@ -1209,10 +1208,11 @@ static uint32_t cpnd_evt_proc_ckpt_unlin > { > uint32_t rc = NCSCC_RC_SUCCESS; > CPND_CKPT_NODE *cp_node = NULL; > - SaAisErrorT error; > + SaAisErrorT error = SA_AIS_OK; > CPSV_SEND_INFO sinfo_cpa; > CPSV_EVT send_evt; > bool sinfo_cpa_flag = false; > + bool destroy_replica = false; > > TRACE_ENTER(); > memset(&send_evt, '\0', sizeof(CPSV_EVT)); @@ -1220,25 +1220,35 @@ > static uint32_t cpnd_evt_proc_ckpt_unlin > if (cp_node == NULL) { > TRACE_4("cpnd ckpt node get failed for ckpt_id:%llx",evt->info.ckpt_ulink.ckpt_id); > rc = NCSCC_RC_FAILURE; > - send_evt.info.cpa.info.ulinkRsp.error = SA_AIS_ERR_NOT_EXIST; > + error = SA_AIS_ERR_NOT_EXIST; > goto agent_rsp; > } > > sinfo_cpa = cp_node->cpa_sinfo; > sinfo_cpa_flag = cp_node->cpa_sinfo_flag; > + > if (cp_node->is_close == true) { > - send_evt.info.cpa.info.ulinkRsp.error = SA_AIS_OK; > + /* For non-collocated checkpoint if retention duration timer is active > + * (i.e the checkpoint is not opened by any client in cluster) the replica > + * should be destroyed in this case */ > + if (!m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) { > + if (cp_node->ret_tmr.is_active) { > + TRACE_1("cpnd destroy replica ckpt_id:%llx - No client opens the non-collocated checkpoint ", > + cp_node->ckpt_id); > + destroy_replica = true; > + } > + } > + /* For collocated checkpoint, there is no client opening the checkpoint on this > + * node. The replica should be destroyed. */ > + else > + destroy_replica = true; > + } > + > + if (destroy_replica == true) { > /* check timer is present,if yes...stop the timer and destroy shm_info and the node */ > if (cp_node->ret_tmr.is_active) > cpnd_tmr_stop(&cp_node->ret_tmr); > > - if (!m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) { > - if (cpnd_is_noncollocated_replica_present_on_payload(cb, cp_node)) { > - rc = NCSCC_RC_SUCCESS; > - goto agent_rsp; > - } > - } > - > rc = cpnd_ckpt_replica_destroy(cb, cp_node, &error); > if (rc == NCSCC_RC_FAILURE) { > TRACE_4("cpnd ckpt replica destroy failed for ckpt_id:%llx,error > %u",cp_node->ckpt_id, error); @@ -1260,8 +1270,6 @@ static uint32_t > cpnd_evt_proc_ckpt_unlin > > } > TRACE_4("cpnd proc ckpt unlink set for > ckpt_id:%llx",cp_node->ckpt_id); > - > - send_evt.info.cpa.info.ulinkRsp.error = SA_AIS_OK; > } > > agent_rsp: > @@ -1269,6 +1277,7 @@ static uint32_t cpnd_evt_proc_ckpt_unlin > if (sinfo_cpa_flag == 1) { > send_evt.type = CPSV_EVT_TYPE_CPA; > send_evt.info.cpa.type = CPA_EVT_ND2A_CKPT_UNLINK_RSP; > + send_evt.info.cpa.info.ulinkRsp.error = error; > rc = cpnd_mds_send_rsp(cb, &sinfo_cpa, &send_evt); > > } > @@ -1767,7 +1776,6 @@ static uint32_t cpnd_evt_proc_ckpt_activ > static uint32_t cpnd_evt_proc_ckpt_rdset_info(CPND_CB *cb, CPND_EVT *evt, CPSV_SEND_INFO *sinfo) > { > CPND_CKPT_NODE *cp_node = NULL; > - SaAisErrorT error = SA_AIS_OK; > > TRACE_ENTER(); > /* get cp_node from ckpt_info_db */ @@ -1791,14 +1799,9 @@ static > uint32_t cpnd_evt_proc_ckpt_rdset > } > > if (evt->info.rdset.type == CPSV_CKPT_RDSET_START) { > - if (!m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) { > - if (cpnd_ckpt_non_collocated_rplica_close(cb, cp_node, &error) == NCSCC_RC_FAILURE) { > - TRACE_4("cpnd ckpt relica close failed for client_hdl:%llx,ckpt_id:%llx",evt->info.closeReq.client_hdl, cp_node->ckpt_id); > - > - } > - TRACE_LEAVE(); > - return NCSCC_RC_SUCCESS; > - } > + cpnd_proc_rdset_start(cb, cp_node); > + TRACE_LEAVE(); > + return NCSCC_RC_SUCCESS; > } > > /* if timer already started on one of the node then what to do!!! > diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_proc.c > b/osaf/services/saf/cpsv/cpnd/cpnd_proc.c > --- a/osaf/services/saf/cpsv/cpnd/cpnd_proc.c > +++ b/osaf/services/saf/cpsv/cpnd/cpnd_proc.c > @@ -2297,53 +2297,61 @@ uint32_t cpnd_ckpt_replica_close(CPND_CB > } > > /*************************************************************************** ************* > - * Name : cpnd_ckpt_non_collocated_rplica_close > + * Name : cpnd_proc_rdset_start > * > - * Description : This is the function close the non_collocated Ckpt Replica > + * Description : This is the function process the event CPSV_CKPT_RDSET_START > + * This event is only applicable for non-collocated checkpoint > * Arguments : cb - CPND Control Block pointer > * cp_node - pointer to checkpoint node > * > * Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE > > ********************************************************************** > *******************/ > > -uint32_t cpnd_ckpt_non_collocated_rplica_close(CPND_CB *cb, > CPND_CKPT_NODE *cp_node, SaAisErrorT *error) > +uint32_t cpnd_proc_rdset_start(CPND_CB *cb, CPND_CKPT_NODE *cp_node) > { > SaTimeT presentTime; > + SaAisErrorT error = SA_AIS_OK; > uint32_t rc = NCSCC_RC_SUCCESS; > > TRACE_ENTER(); > - if (cp_node->ckpt_lcl_ref_cnt == 0) { > > - cp_node->is_close = true; > - cpnd_restart_set_close_flag(cb, cp_node); > + if (m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) { > + TRACE_LEAVE(); > + return NCSCC_RC_SUCCESS; > + } > > - if (cp_node->is_unlink != true && > - (m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDuratio n) != 0)) { > - m_GET_TIME_STAMP(presentTime); > - cpnd_restart_update_timer(cb, cp_node, presentTime); > + if (cp_node->ckpt_lcl_ref_cnt != 0) { > + LOG_ER("cpnd receives CPND_EVT_D2ND_RDSET_INFO with START while ckpt_lcl_ref_cnt = %d", cp_node->ckpt_lcl_ref_cnt); > + TRACE_LEAVE(); > + return NCSCC_RC_FAILURE; > + } > > - cp_node->ret_tmr.type = CPND_TMR_TYPE_NON_COLLOC_RETENTION; > - cp_node->ret_tmr.uarg = cb->cpnd_cb_hdl_id; > - cp_node->ret_tmr.ckpt_id = cp_node->ckpt_id; > - cpnd_tmr_start(&cp_node->ret_tmr, > - m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDuration )); > - TRACE_1("cpnd ckpt ret tmr success ckpt_id:%llx",cp_node->ckpt_id); > - } else { > - /* Check for Non-Collocated Replica */ > - if (cpnd_is_noncollocated_replica_present_on_payload(cb, cp_node)) { > - return NCSCC_RC_SUCCESS; > - } > - rc = cpnd_ckpt_replica_destroy(cb, cp_node, error); > - if (rc == NCSCC_RC_FAILURE) { > - TRACE_4("cpnd ckpt replica destroy failed ckpt_id:%llx",cp_node->ckpt_id); > - return NCSCC_RC_FAILURE; > - } > - TRACE_1("cpnd ckpt replica destroy failed ckpt_id:%llx",cp_node->ckpt_id); > + cp_node->is_close = true; > + cpnd_restart_set_close_flag(cb, cp_node); > > - cpnd_restart_shm_ckpt_free(cb, cp_node); > - cpnd_ckpt_node_destroy(cb, cp_node); > + if (cp_node->is_unlink != true && > + (m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDuratio n) != 0)) { > + m_GET_TIME_STAMP(presentTime); > + cpnd_restart_update_timer(cb, cp_node, presentTime); > + > + cp_node->ret_tmr.type = CPND_TMR_TYPE_NON_COLLOC_RETENTION; > + cp_node->ret_tmr.uarg = cb->cpnd_cb_hdl_id; > + cp_node->ret_tmr.ckpt_id = cp_node->ckpt_id; > + cpnd_tmr_start(&cp_node->ret_tmr, > + m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDuration )); > + TRACE_1("cpnd ckpt ret tmr success ckpt_id:%llx",cp_node->ckpt_id); > + } else { > + rc = cpnd_ckpt_replica_destroy(cb, cp_node, &error); > + if (rc == NCSCC_RC_FAILURE) { > + LOG_ER("cpnd ckpt replica destroy failed ckpt_id:%llx, error:%d",cp_node->ckpt_id, error); > + return NCSCC_RC_FAILURE; > } > + TRACE_1("cpnd ckpt replica destroy success > +ckpt_id:%llx",cp_node->ckpt_id); > + > + cpnd_restart_shm_ckpt_free(cb, cp_node); > + cpnd_ckpt_node_destroy(cb, cp_node); > } > + > TRACE_LEAVE(); > return NCSCC_RC_SUCCESS; > } ------------------------------------------------------------------------------ _______________________________________________ Opensaf-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/opensaf-devel
