Hi Nhat
>>There are several problems relating to closing and unlinking
non-collocated checkpoint.
I can see only one problem unlinked non-collocated checkpoint is not
getting deleted immediate
even No client exist for that non-collocated checkpoint.
I see 1,2 ,3 are use-case of non-collocated checkpoint , in all cases
the the non-collocated checkpoint
is not getting deleted immediately is that you mean by `several problems` ?
Please let me know is any other portable exist and it is being addressed
in this patch ,
so that I can look the patch in that point of view as well .
-AVM
On 12/9/2015 8:06 AM, Nhat Pham wrote:
> osaf/services/saf/cpsv/cpnd/cpnd_evt.c | 51 +++++++++++++------------
> osaf/services/saf/cpsv/cpnd/cpnd_proc.c | 66
> ++++++++++++++++++--------------
> 2 files changed, 64 insertions(+), 53 deletions(-)
>
>
> Problem:
> --------
> There are several problems relating to closing and unlinking non-collocated
> checkpoint.
>
> 1. A non-collocated checkpoint is firstly created on SC-2. It is closed on
> SC-2. It is opened on PL-3.
> It is unlinked. It is closes on PL-3. The replicas on SCs are not destroyed
> although the checkpoint is
> unlinked and no client is using it.
>
> 2. A non-collocated checkpoint is firstly created on PL-3. It is closed on
> PL-3. It is opened on SC-2.
> It is unlinked. It is closes on SC-2. The replicas on SCs and PL-3 are not
> destroyed although the
> checkpoint is unlinked and no client is using it.
>
> 3. A non-collocated checkpoint is firstly created on PL-3. It is closed on
> PL-3. It is opened on PL-4.
> It is unlinked. The replicas on SCs and PL-3 are destroyed although the
> checkpoint is using on PL-4.
>
> Solution:
> ---------
> The main cause of above problems is to use checking if non-collocated replica
> is on PL to decide
> destroying the replicas. This mechanism is not correct in some cases. The
> solution is use another
> mechanism which checks if there is any client using the checkpoint on the
> cluster by verifying if
> the retention duration timer is active or not.
>
> Test:
> -----
> Following test cases were executed for both non-collocated and collocated
> checkpoint to verify
> the solution:
> 1. verify_unlink_ckpt_created_on_sc_before_close_it_from_sc
> 2. verify_unlink_ckpt_created_on_sc_before_close_it_from_pl
> 3. verify_unlink_ckpt_created_on_sc_after_close_it
> 4. verify_unlink_ckpt_created_on_pl_before_close_it_from_pl
> 5. verify_unlink_ckpt_created_on_pl_before_close_it_from_sc
> 6. verify_unlink_ckpt_created_on_pl_before_close_it_from_other_pl
> 7. verify_unlink_ckpt_created_on_pl_after_close_it
>
> diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
> b/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
> --- a/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
> +++ b/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
> @@ -26,7 +26,7 @@
>
> #include "cpnd.h"
>
> -extern uint32_t cpnd_ckpt_non_collocated_rplica_close(CPND_CB *cb,
> CPND_CKPT_NODE *cp_node, SaAisErrorT *error);
> +extern uint32_t cpnd_proc_rdset_start(CPND_CB *cb, CPND_CKPT_NODE *cp_node);
> extern uint32_t cpnd_proc_non_colloc_rt_expiry(CPND_CB *cb,
> SaCkptCheckpointHandleT ckpt_id);
>
> static uint32_t cpnd_evt_proc_cb_dump(CPND_CB *cb);
> @@ -1194,8 +1194,7 @@ static uint32_t cpnd_evt_proc_ckpt_unlin
>
> /****************************************************************************
> * Name : cpnd_evt_proc_ckpt_unlink_info
> *
> - * Description : Function to process check point unlink
> - * from Applications.
> + * Description : Function to process checkpoint unlink event from CPD
> *
> * Arguments : CPND_CB *cb - CPND CB pointer
> * CPSV_EVT *evt - Received Event structure
> @@ -1209,10 +1208,11 @@ static uint32_t cpnd_evt_proc_ckpt_unlin
> {
> uint32_t rc = NCSCC_RC_SUCCESS;
> CPND_CKPT_NODE *cp_node = NULL;
> - SaAisErrorT error;
> + SaAisErrorT error = SA_AIS_OK;
> CPSV_SEND_INFO sinfo_cpa;
> CPSV_EVT send_evt;
> bool sinfo_cpa_flag = false;
> + bool destroy_replica = false;
>
> TRACE_ENTER();
> memset(&send_evt, '\0', sizeof(CPSV_EVT));
> @@ -1220,25 +1220,35 @@ static uint32_t cpnd_evt_proc_ckpt_unlin
> if (cp_node == NULL) {
> TRACE_4("cpnd ckpt node get failed for
> ckpt_id:%llx",evt->info.ckpt_ulink.ckpt_id);
> rc = NCSCC_RC_FAILURE;
> - send_evt.info.cpa.info.ulinkRsp.error = SA_AIS_ERR_NOT_EXIST;
> + error = SA_AIS_ERR_NOT_EXIST;
> goto agent_rsp;
> }
>
> sinfo_cpa = cp_node->cpa_sinfo;
> sinfo_cpa_flag = cp_node->cpa_sinfo_flag;
> +
> if (cp_node->is_close == true) {
> - send_evt.info.cpa.info.ulinkRsp.error = SA_AIS_OK;
> + /* For non-collocated checkpoint if retention duration timer is
> active
> + * (i.e the checkpoint is not opened by any client in cluster)
> the replica
> + * should be destroyed in this case */
> + if
> (!m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) {
> + if (cp_node->ret_tmr.is_active) {
> + TRACE_1("cpnd destroy replica ckpt_id:%llx - No
> client opens the non-collocated checkpoint ",
> + cp_node->ckpt_id);
> + destroy_replica = true;
> + }
> + }
> + /* For collocated checkpoint, there is no client opening the
> checkpoint on this
> + * node. The replica should be destroyed. */
> + else
> + destroy_replica = true;
> + }
> +
> + if (destroy_replica == true) {
> /* check timer is present,if yes...stop the timer and destroy
> shm_info and the node */
> if (cp_node->ret_tmr.is_active)
> cpnd_tmr_stop(&cp_node->ret_tmr);
>
> - if
> (!m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) {
> - if
> (cpnd_is_noncollocated_replica_present_on_payload(cb, cp_node)) {
> - rc = NCSCC_RC_SUCCESS;
> - goto agent_rsp;
> - }
> - }
> -
> rc = cpnd_ckpt_replica_destroy(cb, cp_node, &error);
> if (rc == NCSCC_RC_FAILURE) {
> TRACE_4("cpnd ckpt replica destroy failed for
> ckpt_id:%llx,error %u",cp_node->ckpt_id, error);
> @@ -1260,8 +1270,6 @@ static uint32_t cpnd_evt_proc_ckpt_unlin
>
> }
> TRACE_4("cpnd proc ckpt unlink set for
> ckpt_id:%llx",cp_node->ckpt_id);
> -
> - send_evt.info.cpa.info.ulinkRsp.error = SA_AIS_OK;
> }
>
> agent_rsp:
> @@ -1269,6 +1277,7 @@ static uint32_t cpnd_evt_proc_ckpt_unlin
> if (sinfo_cpa_flag == 1) {
> send_evt.type = CPSV_EVT_TYPE_CPA;
> send_evt.info.cpa.type = CPA_EVT_ND2A_CKPT_UNLINK_RSP;
> + send_evt.info.cpa.info.ulinkRsp.error = error;
> rc = cpnd_mds_send_rsp(cb, &sinfo_cpa, &send_evt);
>
> }
> @@ -1767,7 +1776,6 @@ static uint32_t cpnd_evt_proc_ckpt_activ
> static uint32_t cpnd_evt_proc_ckpt_rdset_info(CPND_CB *cb, CPND_EVT *evt,
> CPSV_SEND_INFO *sinfo)
> {
> CPND_CKPT_NODE *cp_node = NULL;
> - SaAisErrorT error = SA_AIS_OK;
>
> TRACE_ENTER();
> /* get cp_node from ckpt_info_db */
> @@ -1791,14 +1799,9 @@ static uint32_t cpnd_evt_proc_ckpt_rdset
> }
>
> if (evt->info.rdset.type == CPSV_CKPT_RDSET_START) {
> - if
> (!m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) {
> - if (cpnd_ckpt_non_collocated_rplica_close(cb, cp_node,
> &error) == NCSCC_RC_FAILURE) {
> - TRACE_4("cpnd ckpt relica close failed for
> client_hdl:%llx,ckpt_id:%llx",evt->info.closeReq.client_hdl,
> cp_node->ckpt_id);
> -
> - }
> - TRACE_LEAVE();
> - return NCSCC_RC_SUCCESS;
> - }
> + cpnd_proc_rdset_start(cb, cp_node);
> + TRACE_LEAVE();
> + return NCSCC_RC_SUCCESS;
> }
>
> /* if timer already started on one of the node then what to do!!!
> diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
> b/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
> --- a/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
> +++ b/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
> @@ -2297,53 +2297,61 @@ uint32_t cpnd_ckpt_replica_close(CPND_CB
> }
>
>
> /****************************************************************************************
> - * Name : cpnd_ckpt_non_collocated_rplica_close
> + * Name : cpnd_proc_rdset_start
> *
> - * Description : This is the function close the non_collocated Ckpt Replica
> + * Description : This is the function process the event
> CPSV_CKPT_RDSET_START
> + * This event is only applicable for non-collocated
> checkpoint
> * Arguments : cb - CPND Control Block pointer
> * cp_node - pointer to checkpoint node
> *
> * Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE
>
> *****************************************************************************************/
>
> -uint32_t cpnd_ckpt_non_collocated_rplica_close(CPND_CB *cb, CPND_CKPT_NODE
> *cp_node, SaAisErrorT *error)
> +uint32_t cpnd_proc_rdset_start(CPND_CB *cb, CPND_CKPT_NODE *cp_node)
> {
> SaTimeT presentTime;
> + SaAisErrorT error = SA_AIS_OK;
> uint32_t rc = NCSCC_RC_SUCCESS;
>
> TRACE_ENTER();
> - if (cp_node->ckpt_lcl_ref_cnt == 0) {
>
> - cp_node->is_close = true;
> - cpnd_restart_set_close_flag(cb, cp_node);
> + if
> (m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) {
> + TRACE_LEAVE();
> + return NCSCC_RC_SUCCESS;
> + }
>
> - if (cp_node->is_unlink != true &&
> -
> (m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDuration)
> != 0)) {
> - m_GET_TIME_STAMP(presentTime);
> - cpnd_restart_update_timer(cb, cp_node, presentTime);
> + if (cp_node->ckpt_lcl_ref_cnt != 0) {
> + LOG_ER("cpnd receives CPND_EVT_D2ND_RDSET_INFO with START while
> ckpt_lcl_ref_cnt = %d", cp_node->ckpt_lcl_ref_cnt);
> + TRACE_LEAVE();
> + return NCSCC_RC_FAILURE;
> + }
>
> - cp_node->ret_tmr.type =
> CPND_TMR_TYPE_NON_COLLOC_RETENTION;
> - cp_node->ret_tmr.uarg = cb->cpnd_cb_hdl_id;
> - cp_node->ret_tmr.ckpt_id = cp_node->ckpt_id;
> - cpnd_tmr_start(&cp_node->ret_tmr,
> -
> m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDuration));
> - TRACE_1("cpnd ckpt ret tmr success
> ckpt_id:%llx",cp_node->ckpt_id);
> - } else {
> - /* Check for Non-Collocated Replica */
> - if
> (cpnd_is_noncollocated_replica_present_on_payload(cb, cp_node)) {
> - return NCSCC_RC_SUCCESS;
> - }
> - rc = cpnd_ckpt_replica_destroy(cb, cp_node, error);
> - if (rc == NCSCC_RC_FAILURE) {
> - TRACE_4("cpnd ckpt replica destroy failed
> ckpt_id:%llx",cp_node->ckpt_id);
> - return NCSCC_RC_FAILURE;
> - }
> - TRACE_1("cpnd ckpt replica destroy failed
> ckpt_id:%llx",cp_node->ckpt_id);
> + cp_node->is_close = true;
> + cpnd_restart_set_close_flag(cb, cp_node);
>
> - cpnd_restart_shm_ckpt_free(cb, cp_node);
> - cpnd_ckpt_node_destroy(cb, cp_node);
> + if (cp_node->is_unlink != true &&
> +
> (m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDuration)
> != 0)) {
> + m_GET_TIME_STAMP(presentTime);
> + cpnd_restart_update_timer(cb, cp_node, presentTime);
> +
> + cp_node->ret_tmr.type = CPND_TMR_TYPE_NON_COLLOC_RETENTION;
> + cp_node->ret_tmr.uarg = cb->cpnd_cb_hdl_id;
> + cp_node->ret_tmr.ckpt_id = cp_node->ckpt_id;
> + cpnd_tmr_start(&cp_node->ret_tmr,
> +
> m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDuration));
> + TRACE_1("cpnd ckpt ret tmr success
> ckpt_id:%llx",cp_node->ckpt_id);
> + } else {
> + rc = cpnd_ckpt_replica_destroy(cb, cp_node, &error);
> + if (rc == NCSCC_RC_FAILURE) {
> + LOG_ER("cpnd ckpt replica destroy failed ckpt_id:%llx,
> error:%d",cp_node->ckpt_id, error);
> + return NCSCC_RC_FAILURE;
> }
> + TRACE_1("cpnd ckpt replica destroy success
> ckpt_id:%llx",cp_node->ckpt_id);
> +
> + cpnd_restart_shm_ckpt_free(cb, cp_node);
> + cpnd_ckpt_node_destroy(cb, cp_node);
> }
> +
> TRACE_LEAVE();
> return NCSCC_RC_SUCCESS;
> }
------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel