Hi Mahesh, Would you have comment about the patch?
Best regards, Nhat Pham -----Original Message----- From: A V Mahesh [mailto:[email protected]] Sent: Wednesday, December 9, 2015 1:19 PM To: Nhat Pham <[email protected]>; [email protected] Cc: [email protected] Subject: Re: [PATCH 1 of 1] cpsv: improve handling unlink and close non-collocated checkpoint [#1616] Hi, Ok got it , I will review both cases -AVM On 12/9/2015 11:34 AM, Nhat Pham wrote: > Hi Mahesh, > > For 'several problem', I mean 3 use cases where: > 1,2 : the checkpoint replicas are not deleted immediately even no > client exists > 3: the checkpoint is deleted although there is a client using the > checkpoint. > > The patch only addresses the problem in these 3 use cases. > > Best regards, > Nhat Pham > > -----Original Message----- > From: A V Mahesh [mailto:[email protected]] > Sent: Wednesday, December 9, 2015 12:21 PM > To: Nhat Pham <[email protected]>; [email protected] > Cc: [email protected] > Subject: Re: [PATCH 1 of 1] cpsv: improve handling unlink and close > non-collocated checkpoint [#1616] > > Hi Nhat > > >>There are several problems relating to closing and unlinking > non-collocated checkpoint. > > I can see only one problem unlinked non-collocated checkpoint is not > getting deleted immediate even No client exist for that non-collocated > checkpoint. > > I see 1,2 ,3 are use-case of non-collocated checkpoint , in all > cases the the non-collocated checkpoint is not getting deleted > immediately is that you mean by `several problems` ? > > Please let me know is any other portable exist and it is being > addressed in this patch , so that I can look the patch in that point of view > as well . > > -AVM > > > On 12/9/2015 8:06 AM, Nhat Pham wrote: >> osaf/services/saf/cpsv/cpnd/cpnd_evt.c | 51 +++++++++++++------------ >> osaf/services/saf/cpsv/cpnd/cpnd_proc.c | 66 > ++++++++++++++++++-------------- >> 2 files changed, 64 insertions(+), 53 deletions(-) >> >> >> Problem: >> -------- >> There are several problems relating to closing and unlinking > non-collocated checkpoint. >> 1. A non-collocated checkpoint is firstly created on SC-2. It is >> closed on > SC-2. It is opened on PL-3. >> It is unlinked. It is closes on PL-3. The replicas on SCs are not >> destroyed although the checkpoint is unlinked and no client is using it. >> >> 2. A non-collocated checkpoint is firstly created on PL-3. It is >> closed on > PL-3. It is opened on SC-2. >> It is unlinked. It is closes on SC-2. The replicas on SCs and PL-3 >> are not destroyed although the checkpoint is unlinked and no client >> is using > it. >> 3. A non-collocated checkpoint is firstly created on PL-3. It is >> closed on > PL-3. It is opened on PL-4. >> It is unlinked. The replicas on SCs and PL-3 are destroyed although >> the > checkpoint is using on PL-4. >> Solution: >> --------- >> The main cause of above problems is to use checking if non-collocated >> replica is on PL to decide destroying the replicas. This mechanism is >> not correct in some cases. The solution is use another mechanism >> which checks if there is any client using the checkpoint on the >> cluster by > verifying if the retention duration timer is active or not. >> Test: >> ----- >> Following test cases were executed for both non-collocated and >> collocated checkpoint to verify the solution: >> 1. verify_unlink_ckpt_created_on_sc_before_close_it_from_sc >> 2. verify_unlink_ckpt_created_on_sc_before_close_it_from_pl >> 3. verify_unlink_ckpt_created_on_sc_after_close_it >> 4. verify_unlink_ckpt_created_on_pl_before_close_it_from_pl >> 5. verify_unlink_ckpt_created_on_pl_before_close_it_from_sc >> 6. verify_unlink_ckpt_created_on_pl_before_close_it_from_other_pl >> 7. verify_unlink_ckpt_created_on_pl_after_close_it >> >> diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_evt.c >> b/osaf/services/saf/cpsv/cpnd/cpnd_evt.c >> --- a/osaf/services/saf/cpsv/cpnd/cpnd_evt.c >> +++ b/osaf/services/saf/cpsv/cpnd/cpnd_evt.c >> @@ -26,7 +26,7 @@ >> >> #include "cpnd.h" >> >> -extern uint32_t cpnd_ckpt_non_collocated_rplica_close(CPND_CB *cb, >> CPND_CKPT_NODE *cp_node, SaAisErrorT *error); >> +extern uint32_t cpnd_proc_rdset_start(CPND_CB *cb, CPND_CKPT_NODE >> +*cp_node); >> extern uint32_t cpnd_proc_non_colloc_rt_expiry(CPND_CB *cb, >> SaCkptCheckpointHandleT ckpt_id); >> >> static uint32_t cpnd_evt_proc_cb_dump(CPND_CB *cb); @@ -1194,8 >> +1194,7 @@ static uint32_t cpnd_evt_proc_ckpt_unlin >> > /********************************************************************* > ****** > * >> * Name : cpnd_evt_proc_ckpt_unlink_info >> * >> - * Description : Function to process check point unlink >> - * from Applications. >> + * Description : Function to process checkpoint unlink event from CPD >> * >> * Arguments : CPND_CB *cb - CPND CB pointer >> * CPSV_EVT *evt - Received Event structure >> @@ -1209,10 +1208,11 @@ static uint32_t cpnd_evt_proc_ckpt_unlin >> { >> uint32_t rc = NCSCC_RC_SUCCESS; >> CPND_CKPT_NODE *cp_node = NULL; >> - SaAisErrorT error; >> + SaAisErrorT error = SA_AIS_OK; >> CPSV_SEND_INFO sinfo_cpa; >> CPSV_EVT send_evt; >> bool sinfo_cpa_flag = false; >> + bool destroy_replica = false; >> >> TRACE_ENTER(); >> memset(&send_evt, '\0', sizeof(CPSV_EVT)); @@ -1220,25 +1220,35 >> @@ static uint32_t cpnd_evt_proc_ckpt_unlin >> if (cp_node == NULL) { >> TRACE_4("cpnd ckpt node get failed for > ckpt_id:%llx",evt->info.ckpt_ulink.ckpt_id); >> rc = NCSCC_RC_FAILURE; >> - send_evt.info.cpa.info.ulinkRsp.error = > SA_AIS_ERR_NOT_EXIST; >> + error = SA_AIS_ERR_NOT_EXIST; >> goto agent_rsp; >> } >> >> sinfo_cpa = cp_node->cpa_sinfo; >> sinfo_cpa_flag = cp_node->cpa_sinfo_flag; >> + >> if (cp_node->is_close == true) { >> - send_evt.info.cpa.info.ulinkRsp.error = SA_AIS_OK; >> + /* For non-collocated checkpoint if retention duration timer > is active >> + * (i.e the checkpoint is not opened by any client in > cluster) the replica >> + * should be destroyed in this case */ >> + if > (!m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) > { >> + if (cp_node->ret_tmr.is_active) { >> + TRACE_1("cpnd destroy replica ckpt_id:%llx - > No client opens the non-collocated checkpoint ", >> + cp_node->ckpt_id); >> + destroy_replica = true; >> + } >> + } >> + /* For collocated checkpoint, there is no client opening the > checkpoint on this >> + * node. The replica should be destroyed. */ >> + else >> + destroy_replica = true; >> + } >> + >> + if (destroy_replica == true) { >> /* check timer is present,if yes...stop the timer and > destroy shm_info and the node */ >> if (cp_node->ret_tmr.is_active) >> cpnd_tmr_stop(&cp_node->ret_tmr); >> >> - if > (!m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) > { >> - if > (cpnd_is_noncollocated_replica_present_on_payload(cb, cp_node)) { >> - rc = NCSCC_RC_SUCCESS; >> - goto agent_rsp; >> - } >> - } >> - >> rc = cpnd_ckpt_replica_destroy(cb, cp_node, &error); >> if (rc == NCSCC_RC_FAILURE) { >> TRACE_4("cpnd ckpt replica destroy failed for > ckpt_id:%llx,error >> %u",cp_node->ckpt_id, error); @@ -1260,8 +1270,6 @@ static uint32_t >> cpnd_evt_proc_ckpt_unlin >> >> } >> TRACE_4("cpnd proc ckpt unlink set for >> ckpt_id:%llx",cp_node->ckpt_id); >> - >> - send_evt.info.cpa.info.ulinkRsp.error = SA_AIS_OK; >> } >> >> agent_rsp: >> @@ -1269,6 +1277,7 @@ static uint32_t cpnd_evt_proc_ckpt_unlin >> if (sinfo_cpa_flag == 1) { >> send_evt.type = CPSV_EVT_TYPE_CPA; >> send_evt.info.cpa.type = CPA_EVT_ND2A_CKPT_UNLINK_RSP; >> + send_evt.info.cpa.info.ulinkRsp.error = error; >> rc = cpnd_mds_send_rsp(cb, &sinfo_cpa, &send_evt); >> >> } >> @@ -1767,7 +1776,6 @@ static uint32_t cpnd_evt_proc_ckpt_activ >> static uint32_t cpnd_evt_proc_ckpt_rdset_info(CPND_CB *cb, >> CPND_EVT > *evt, CPSV_SEND_INFO *sinfo) >> { >> CPND_CKPT_NODE *cp_node = NULL; >> - SaAisErrorT error = SA_AIS_OK; >> >> TRACE_ENTER(); >> /* get cp_node from ckpt_info_db */ @@ -1791,14 +1799,9 @@ static >> uint32_t cpnd_evt_proc_ckpt_rdset >> } >> >> if (evt->info.rdset.type == CPSV_CKPT_RDSET_START) { >> - if > (!m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) > { >> - if (cpnd_ckpt_non_collocated_rplica_close(cb, > cp_node, &error) == NCSCC_RC_FAILURE) { >> - TRACE_4("cpnd ckpt relica close failed for > client_hdl:%llx,ckpt_id:%llx",evt->info.closeReq.client_hdl, > cp_node->ckpt_id); >> - >> - } >> - TRACE_LEAVE(); >> - return NCSCC_RC_SUCCESS; >> - } >> + cpnd_proc_rdset_start(cb, cp_node); >> + TRACE_LEAVE(); >> + return NCSCC_RC_SUCCESS; >> } >> >> /* if timer already started on one of the node then what to do!!! >> diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_proc.c >> b/osaf/services/saf/cpsv/cpnd/cpnd_proc.c >> --- a/osaf/services/saf/cpsv/cpnd/cpnd_proc.c >> +++ b/osaf/services/saf/cpsv/cpnd/cpnd_proc.c >> @@ -2297,53 +2297,61 @@ uint32_t cpnd_ckpt_replica_close(CPND_CB >> } >> >> > /********************************************************************* > ****** > ************* >> - * Name : cpnd_ckpt_non_collocated_rplica_close >> + * Name : cpnd_proc_rdset_start >> * >> - * Description : This is the function close the non_collocated Ckpt > Replica >> + * Description : This is the function process the event > CPSV_CKPT_RDSET_START >> + * This event is only applicable for non-collocated > checkpoint >> * Arguments : cb - CPND Control Block pointer >> * cp_node - pointer to checkpoint node >> * >> * Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE >> >> ********************************************************************* >> * >> *******************/ >> >> -uint32_t cpnd_ckpt_non_collocated_rplica_close(CPND_CB *cb, >> CPND_CKPT_NODE *cp_node, SaAisErrorT *error) >> +uint32_t cpnd_proc_rdset_start(CPND_CB *cb, CPND_CKPT_NODE *cp_node) >> { >> SaTimeT presentTime; >> + SaAisErrorT error = SA_AIS_OK; >> uint32_t rc = NCSCC_RC_SUCCESS; >> >> TRACE_ENTER(); >> - if (cp_node->ckpt_lcl_ref_cnt == 0) { >> >> - cp_node->is_close = true; >> - cpnd_restart_set_close_flag(cb, cp_node); >> + if > (m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) > { >> + TRACE_LEAVE(); >> + return NCSCC_RC_SUCCESS; >> + } >> >> - if (cp_node->is_unlink != true && >> - > (m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionD > uratio > n) != 0)) { >> - m_GET_TIME_STAMP(presentTime); >> - cpnd_restart_update_timer(cb, cp_node, presentTime); >> + if (cp_node->ckpt_lcl_ref_cnt != 0) { >> + LOG_ER("cpnd receives CPND_EVT_D2ND_RDSET_INFO with START > while ckpt_lcl_ref_cnt = %d", cp_node->ckpt_lcl_ref_cnt); >> + TRACE_LEAVE(); >> + return NCSCC_RC_FAILURE; >> + } >> >> - cp_node->ret_tmr.type = > CPND_TMR_TYPE_NON_COLLOC_RETENTION; >> - cp_node->ret_tmr.uarg = cb->cpnd_cb_hdl_id; >> - cp_node->ret_tmr.ckpt_id = cp_node->ckpt_id; >> - cpnd_tmr_start(&cp_node->ret_tmr, >> - > m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDu > ration > )); >> - TRACE_1("cpnd ckpt ret tmr success > ckpt_id:%llx",cp_node->ckpt_id); >> - } else { >> - /* Check for Non-Collocated Replica */ >> - if > (cpnd_is_noncollocated_replica_present_on_payload(cb, cp_node)) { >> - return NCSCC_RC_SUCCESS; >> - } >> - rc = cpnd_ckpt_replica_destroy(cb, cp_node, error); >> - if (rc == NCSCC_RC_FAILURE) { >> - TRACE_4("cpnd ckpt replica destroy failed > ckpt_id:%llx",cp_node->ckpt_id); >> - return NCSCC_RC_FAILURE; >> - } >> - TRACE_1("cpnd ckpt replica destroy failed > ckpt_id:%llx",cp_node->ckpt_id); >> + cp_node->is_close = true; >> + cpnd_restart_set_close_flag(cb, cp_node); >> >> - cpnd_restart_shm_ckpt_free(cb, cp_node); >> - cpnd_ckpt_node_destroy(cb, cp_node); >> + if (cp_node->is_unlink != true && >> + > (m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionD > uratio > n) != 0)) { >> + m_GET_TIME_STAMP(presentTime); >> + cpnd_restart_update_timer(cb, cp_node, presentTime); >> + >> + cp_node->ret_tmr.type = CPND_TMR_TYPE_NON_COLLOC_RETENTION; >> + cp_node->ret_tmr.uarg = cb->cpnd_cb_hdl_id; >> + cp_node->ret_tmr.ckpt_id = cp_node->ckpt_id; >> + cpnd_tmr_start(&cp_node->ret_tmr, >> + > m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDu > ration > )); >> + TRACE_1("cpnd ckpt ret tmr success > ckpt_id:%llx",cp_node->ckpt_id); >> + } else { >> + rc = cpnd_ckpt_replica_destroy(cb, cp_node, &error); >> + if (rc == NCSCC_RC_FAILURE) { >> + LOG_ER("cpnd ckpt replica destroy failed > ckpt_id:%llx, error:%d",cp_node->ckpt_id, error); >> + return NCSCC_RC_FAILURE; >> } >> + TRACE_1("cpnd ckpt replica destroy success >> +ckpt_id:%llx",cp_node->ckpt_id); >> + >> + cpnd_restart_shm_ckpt_free(cb, cp_node); >> + cpnd_ckpt_node_destroy(cb, cp_node); >> } >> + >> TRACE_LEAVE(); >> return NCSCC_RC_SUCCESS; >> } > ------------------------------------------------------------------------------ _______________________________________________ Opensaf-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/opensaf-devel
