Hi,

Ok got it , I will review both cases

-AVM

On 12/9/2015 11:34 AM, Nhat Pham wrote:
> Hi Mahesh,
>
> For 'several problem', I mean 3 use cases where:
> 1,2 : the checkpoint replicas are not deleted immediately even no client
> exists
> 3: the checkpoint is deleted although there is a client using the
> checkpoint.
>
> The patch only addresses the problem in these 3 use cases.
>
> Best regards,
> Nhat Pham
>
> -----Original Message-----
> From: A V Mahesh [mailto:[email protected]]
> Sent: Wednesday, December 9, 2015 12:21 PM
> To: Nhat Pham <[email protected]>; [email protected]
> Cc: [email protected]
> Subject: Re: [PATCH 1 of 1] cpsv: improve handling unlink and close
> non-collocated checkpoint [#1616]
>
> Hi Nhat
>
>   >>There are several problems relating to closing and unlinking
> non-collocated checkpoint.
>
> I can see only one problem unlinked non-collocated checkpoint is not getting
> deleted immediate even No client exist for that non-collocated checkpoint.
>
> I see 1,2 ,3 are  use-case of  non-collocated checkpoint , in all cases the
> the non-collocated checkpoint is not getting deleted immediately is that you
> mean by `several problems` ?
>
> Please let me know is any other portable exist and it is being addressed in
> this patch , so that I can look the patch in that point of view as well .
>
> -AVM
>
>
> On 12/9/2015 8:06 AM, Nhat Pham wrote:
>>    osaf/services/saf/cpsv/cpnd/cpnd_evt.c  |  51 +++++++++++++------------
>>    osaf/services/saf/cpsv/cpnd/cpnd_proc.c |  66
> ++++++++++++++++++--------------
>>    2 files changed, 64 insertions(+), 53 deletions(-)
>>
>>
>> Problem:
>> --------
>> There are several problems relating to closing and unlinking
> non-collocated checkpoint.
>> 1. A non-collocated checkpoint is firstly created on SC-2. It is closed on
> SC-2. It is opened on PL-3.
>> It is unlinked. It is closes on PL-3. The replicas on SCs are not
>> destroyed although the checkpoint is unlinked and no client is using it.
>>
>> 2. A non-collocated checkpoint is firstly created on PL-3. It is closed on
> PL-3. It is opened on SC-2.
>> It is unlinked. It is closes on SC-2. The replicas on SCs and PL-3 are
>> not destroyed although the checkpoint is unlinked and no client is using
> it.
>> 3. A non-collocated checkpoint is firstly created on PL-3. It is closed on
> PL-3. It is opened on PL-4.
>> It is unlinked. The replicas on SCs and PL-3 are destroyed although the
> checkpoint is using on PL-4.
>> Solution:
>> ---------
>> The main cause of above problems is to use checking if non-collocated
>> replica is on PL to decide destroying the replicas. This mechanism is
>> not correct in some cases. The solution is use another mechanism which
>> checks if there is any client using the checkpoint on the cluster by
> verifying if the retention duration timer is active or not.
>> Test:
>> -----
>> Following test cases were executed for both non-collocated and
>> collocated checkpoint to verify the solution:
>> 1. verify_unlink_ckpt_created_on_sc_before_close_it_from_sc
>> 2. verify_unlink_ckpt_created_on_sc_before_close_it_from_pl
>> 3. verify_unlink_ckpt_created_on_sc_after_close_it
>> 4. verify_unlink_ckpt_created_on_pl_before_close_it_from_pl
>> 5. verify_unlink_ckpt_created_on_pl_before_close_it_from_sc
>> 6. verify_unlink_ckpt_created_on_pl_before_close_it_from_other_pl
>> 7. verify_unlink_ckpt_created_on_pl_after_close_it
>>
>> diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
>> b/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
>> --- a/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
>> +++ b/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
>> @@ -26,7 +26,7 @@
>>
>>    #include "cpnd.h"
>>
>> -extern uint32_t cpnd_ckpt_non_collocated_rplica_close(CPND_CB *cb,
>> CPND_CKPT_NODE *cp_node, SaAisErrorT *error);
>> +extern uint32_t cpnd_proc_rdset_start(CPND_CB *cb, CPND_CKPT_NODE
>> +*cp_node);
>>    extern uint32_t cpnd_proc_non_colloc_rt_expiry(CPND_CB *cb,
>> SaCkptCheckpointHandleT ckpt_id);
>>
>>    static uint32_t cpnd_evt_proc_cb_dump(CPND_CB *cb); @@ -1194,8
>> +1194,7 @@ static uint32_t cpnd_evt_proc_ckpt_unlin
>>
> /***************************************************************************
> *
>>     * Name          : cpnd_evt_proc_ckpt_unlink_info
>>     *
>> - * Description   : Function to process check point unlink
>> - *                 from Applications.
>> + * Description   : Function to process checkpoint unlink event from CPD
>>     *
>>     * Arguments     : CPND_CB *cb - CPND CB pointer
>>     *                 CPSV_EVT *evt - Received Event structure
>> @@ -1209,10 +1208,11 @@ static uint32_t cpnd_evt_proc_ckpt_unlin
>>    {
>>      uint32_t rc = NCSCC_RC_SUCCESS;
>>      CPND_CKPT_NODE *cp_node = NULL;
>> -    SaAisErrorT error;
>> +    SaAisErrorT error = SA_AIS_OK;
>>      CPSV_SEND_INFO sinfo_cpa;
>>      CPSV_EVT send_evt;
>>      bool sinfo_cpa_flag = false;
>> +    bool destroy_replica = false;
>>
>>      TRACE_ENTER();
>>      memset(&send_evt, '\0', sizeof(CPSV_EVT)); @@ -1220,25 +1220,35 @@
>> static uint32_t cpnd_evt_proc_ckpt_unlin
>>      if (cp_node == NULL) {
>>              TRACE_4("cpnd ckpt node get failed for
> ckpt_id:%llx",evt->info.ckpt_ulink.ckpt_id);
>>              rc = NCSCC_RC_FAILURE;
>> -            send_evt.info.cpa.info.ulinkRsp.error =
> SA_AIS_ERR_NOT_EXIST;
>> +            error = SA_AIS_ERR_NOT_EXIST;
>>              goto agent_rsp;
>>      }
>>
>>      sinfo_cpa = cp_node->cpa_sinfo;
>>      sinfo_cpa_flag = cp_node->cpa_sinfo_flag;
>> +
>>      if (cp_node->is_close == true) {
>> -            send_evt.info.cpa.info.ulinkRsp.error = SA_AIS_OK;
>> +            /* For non-collocated checkpoint if retention duration timer
> is active
>> +             * (i.e the checkpoint is not opened by any client in
> cluster) the replica
>> +             * should be destroyed in this case */
>> +            if
> (!m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) {
>> +                    if (cp_node->ret_tmr.is_active) {
>> +                            TRACE_1("cpnd destroy replica ckpt_id:%llx -
> No client opens the non-collocated checkpoint ",
>> +                                    cp_node->ckpt_id);
>> +                            destroy_replica = true;
>> +                    }
>> +            }
>> +            /* For collocated checkpoint, there is no client opening the
> checkpoint on this
>> +             * node. The replica should be destroyed. */
>> +            else
>> +                    destroy_replica = true;
>> +    }
>> +
>> +    if (destroy_replica == true) {
>>              /* check timer is present,if yes...stop the timer and
> destroy shm_info and the node */
>>              if (cp_node->ret_tmr.is_active)
>>                      cpnd_tmr_stop(&cp_node->ret_tmr);
>>
>> -            if
> (!m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) {
>> -                    if
> (cpnd_is_noncollocated_replica_present_on_payload(cb, cp_node)) {
>> -                            rc = NCSCC_RC_SUCCESS;
>> -                            goto agent_rsp;
>> -                    }
>> -            }
>> -
>>              rc = cpnd_ckpt_replica_destroy(cb, cp_node, &error);
>>              if (rc == NCSCC_RC_FAILURE) {
>>                      TRACE_4("cpnd ckpt replica destroy failed for
> ckpt_id:%llx,error
>> %u",cp_node->ckpt_id, error); @@ -1260,8 +1270,6 @@ static uint32_t
>> cpnd_evt_proc_ckpt_unlin
>>
>>              }
>>              TRACE_4("cpnd proc ckpt unlink set for
>> ckpt_id:%llx",cp_node->ckpt_id);
>> -
>> -            send_evt.info.cpa.info.ulinkRsp.error = SA_AIS_OK;
>>      }
>>
>>     agent_rsp:
>> @@ -1269,6 +1277,7 @@ static uint32_t cpnd_evt_proc_ckpt_unlin
>>      if (sinfo_cpa_flag == 1) {
>>              send_evt.type = CPSV_EVT_TYPE_CPA;
>>              send_evt.info.cpa.type = CPA_EVT_ND2A_CKPT_UNLINK_RSP;
>> +            send_evt.info.cpa.info.ulinkRsp.error = error;
>>              rc = cpnd_mds_send_rsp(cb, &sinfo_cpa, &send_evt);
>>
>>      }
>> @@ -1767,7 +1776,6 @@ static uint32_t cpnd_evt_proc_ckpt_activ
>>    static uint32_t cpnd_evt_proc_ckpt_rdset_info(CPND_CB *cb, CPND_EVT
> *evt, CPSV_SEND_INFO *sinfo)
>>    {
>>      CPND_CKPT_NODE *cp_node = NULL;
>> -    SaAisErrorT error = SA_AIS_OK;
>>
>>      TRACE_ENTER();
>>      /* get cp_node from ckpt_info_db */ @@ -1791,14 +1799,9 @@ static
>> uint32_t cpnd_evt_proc_ckpt_rdset
>>      }
>>
>>      if (evt->info.rdset.type == CPSV_CKPT_RDSET_START) {
>> -            if
> (!m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) {
>> -                    if (cpnd_ckpt_non_collocated_rplica_close(cb,
> cp_node, &error) == NCSCC_RC_FAILURE) {
>> -                            TRACE_4("cpnd ckpt relica close failed for
> client_hdl:%llx,ckpt_id:%llx",evt->info.closeReq.client_hdl,
> cp_node->ckpt_id);
>> -
>> -                    }
>> -                    TRACE_LEAVE();
>> -                    return NCSCC_RC_SUCCESS;
>> -            }
>> +            cpnd_proc_rdset_start(cb, cp_node);
>> +            TRACE_LEAVE();
>> +            return NCSCC_RC_SUCCESS;
>>      }
>>
>>      /* if timer already started on one of the node then what to do!!!
>> diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
>> b/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
>> --- a/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
>> +++ b/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
>> @@ -2297,53 +2297,61 @@ uint32_t cpnd_ckpt_replica_close(CPND_CB
>>    }
>>
>>
> /***************************************************************************
> *************
>> - * Name          : cpnd_ckpt_non_collocated_rplica_close
>> + * Name          : cpnd_proc_rdset_start
>>     *
>> - * Description   : This is the function close the non_collocated Ckpt
> Replica
>> + * Description   : This is the function process the event
> CPSV_CKPT_RDSET_START
>> + *                 This event is only applicable for non-collocated
> checkpoint
>>     * Arguments     : cb       - CPND Control Block pointer
>>     *                 cp_node  - pointer to checkpoint node
>>     *
>>     * Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE
>>
>> **********************************************************************
>> *******************/
>>
>> -uint32_t cpnd_ckpt_non_collocated_rplica_close(CPND_CB *cb,
>> CPND_CKPT_NODE *cp_node, SaAisErrorT *error)
>> +uint32_t cpnd_proc_rdset_start(CPND_CB *cb, CPND_CKPT_NODE *cp_node)
>>    {
>>      SaTimeT presentTime;
>> +    SaAisErrorT error = SA_AIS_OK;
>>      uint32_t rc = NCSCC_RC_SUCCESS;
>>
>>      TRACE_ENTER();
>> -    if (cp_node->ckpt_lcl_ref_cnt == 0) {
>>
>> -            cp_node->is_close = true;
>> -            cpnd_restart_set_close_flag(cb, cp_node);
>> +    if
> (m_CPND_IS_COLLOCATED_ATTR_SET(cp_node->create_attrib.creationFlags)) {
>> +            TRACE_LEAVE();
>> +            return NCSCC_RC_SUCCESS;
>> +    }
>>
>> -            if (cp_node->is_unlink != true &&
>> -
> (m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDuratio
> n) != 0)) {
>> -                    m_GET_TIME_STAMP(presentTime);
>> -                    cpnd_restart_update_timer(cb, cp_node, presentTime);
>> +    if (cp_node->ckpt_lcl_ref_cnt != 0) {
>> +            LOG_ER("cpnd receives CPND_EVT_D2ND_RDSET_INFO with START
> while ckpt_lcl_ref_cnt = %d", cp_node->ckpt_lcl_ref_cnt);
>> +            TRACE_LEAVE();
>> +            return NCSCC_RC_FAILURE;
>> +    }
>>
>> -                    cp_node->ret_tmr.type =
> CPND_TMR_TYPE_NON_COLLOC_RETENTION;
>> -                    cp_node->ret_tmr.uarg = cb->cpnd_cb_hdl_id;
>> -                    cp_node->ret_tmr.ckpt_id = cp_node->ckpt_id;
>> -                    cpnd_tmr_start(&cp_node->ret_tmr,
>> -
> m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDuration
> ));
>> -                    TRACE_1("cpnd ckpt ret tmr success
> ckpt_id:%llx",cp_node->ckpt_id);
>> -            } else {
>> -                    /* Check for Non-Collocated Replica */
>> -                    if
> (cpnd_is_noncollocated_replica_present_on_payload(cb, cp_node)) {
>> -                            return NCSCC_RC_SUCCESS;
>> -                    }
>> -                    rc = cpnd_ckpt_replica_destroy(cb, cp_node, error);
>> -                    if (rc == NCSCC_RC_FAILURE) {
>> -                            TRACE_4("cpnd ckpt replica destroy failed
> ckpt_id:%llx",cp_node->ckpt_id);
>> -                            return NCSCC_RC_FAILURE;
>> -                    }
>> -                    TRACE_1("cpnd ckpt replica destroy failed
> ckpt_id:%llx",cp_node->ckpt_id);
>> +    cp_node->is_close = true;
>> +    cpnd_restart_set_close_flag(cb, cp_node);
>>
>> -                    cpnd_restart_shm_ckpt_free(cb, cp_node);
>> -                    cpnd_ckpt_node_destroy(cb, cp_node);
>> +    if (cp_node->is_unlink != true &&
>> +
> (m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDuratio
> n) != 0)) {
>> +            m_GET_TIME_STAMP(presentTime);
>> +            cpnd_restart_update_timer(cb, cp_node, presentTime);
>> +
>> +            cp_node->ret_tmr.type = CPND_TMR_TYPE_NON_COLLOC_RETENTION;
>> +            cp_node->ret_tmr.uarg = cb->cpnd_cb_hdl_id;
>> +            cp_node->ret_tmr.ckpt_id = cp_node->ckpt_id;
>> +            cpnd_tmr_start(&cp_node->ret_tmr,
>> +
> m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(cp_node->create_attrib.retentionDuration
> ));
>> +            TRACE_1("cpnd ckpt ret tmr success
> ckpt_id:%llx",cp_node->ckpt_id);
>> +    } else {
>> +            rc = cpnd_ckpt_replica_destroy(cb, cp_node, &error);
>> +            if (rc == NCSCC_RC_FAILURE) {
>> +                    LOG_ER("cpnd ckpt replica destroy failed
> ckpt_id:%llx, error:%d",cp_node->ckpt_id, error);
>> +                    return NCSCC_RC_FAILURE;
>>              }
>> +            TRACE_1("cpnd ckpt replica destroy success
>> +ckpt_id:%llx",cp_node->ckpt_id);
>> +
>> +            cpnd_restart_shm_ckpt_free(cb, cp_node);
>> +            cpnd_ckpt_node_destroy(cb, cp_node);
>>      }
>> +
>>      TRACE_LEAVE();
>>      return NCSCC_RC_SUCCESS;
>>    }
>


------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to