Hi Ted,

I'm not clear about the scenario that the problem happen.

According to the code the cpd informs cpnd and cpa about active replica via
message
- CPND_EVT_D2ND_CKPT_ACTIVE_SET
- CPA_EVT_D2A_ACT_CKPT_INFO_BCAST_SEND

Best regards,
Nhat Pham

646 static uint32_t cpd_evt_proc_active_set(CPD_CB *cb, CPD_EVT *evt,
CPSV_SEND_INFO *sinfo)
 647 {
 648         CPD_CKPT_INFO_NODE *ckpt_node = NULL;
 649         SaAisErrorT rc = SA_AIS_OK;
 650         CPSV_EVT send_evt;
 651         uint32_t proc_rc = NCSCC_RC_SUCCESS;
 652
 653         TRACE_ENTER();
 654         rc = cpd_proc_active_set(cb, evt->info.arep_set.ckpt_id,
evt->info.arep_set.mds_dest, &ckpt_node);
 655         if (rc != SA_AIS_OK)
 656                 goto send_rsp;
 657
 658         /* REDUNDANCY  A2S  */
 659         cpd_a2s_ckpt_arep_set(cb, ckpt_node);
 660
 661  send_rsp:
 662         memset(&send_evt, 0, sizeof(CPSV_EVT));
 663         send_evt.type = CPSV_EVT_TYPE_CPND;
 664         send_evt.info.cpnd.type = CPND_EVT_D2ND_CKPT_ACTIVE_SET_ACK;
 665         send_evt.info.cpnd.info.arep_ack.error = rc;
 666         proc_rc = cpd_mds_send_rsp(cb, sinfo, &send_evt);
 667
 668         if (rc == SA_AIS_OK) {
 669                 /* Broadcast the Active Replica info to all CPNDs */
 670                 memset(&send_evt, 0, sizeof(CPSV_EVT));
 671                 send_evt.type = CPSV_EVT_TYPE_CPND;
 672                 send_evt.info.cpnd.type =
CPND_EVT_D2ND_CKPT_ACTIVE_SET;
 673                 send_evt.info.cpnd.info.active_set.ckpt_id =
evt->info.arep_set.ckpt_id;
 674                 send_evt.info.cpnd.info.active_set.mds_dest =
evt->info.arep_set.mds_dest;
 675                 proc_rc = cpd_mds_bcast_send(cb, &send_evt,
NCSMDS_SVC_ID_CPND);
 676         }
 677
 678         /*Broadcast the active MDS_DEST info of ckpt to all CPA's */
 679         if (rc == SA_AIS_OK) {
 680                 memset(&send_evt, 0, sizeof(CPSV_EVT));
 681                 send_evt.type = CPSV_EVT_TYPE_CPA;
 682                 send_evt.info.cpa.type =
CPA_EVT_D2A_ACT_CKPT_INFO_BCAST_SEND;
 683                 send_evt.info.cpa.info.ackpt_info.ckpt_id =
evt->info.arep_set.ckpt_id;
 684                 send_evt.info.cpa.info.ackpt_info.mds_dest =
evt->info.arep_set.mds_dest;
 685                 proc_rc = cpd_mds_bcast_send(cb, &send_evt,
NCSMDS_SVC_ID_CPA);
 686                 TRACE_2("cpd ckpt active set success for
ckpt_id:%llx,mds_dest:%"PRIu64,evt->info.arep_set.ckpt_id,
 687                                evt->info.arep_set.mds_dest);
 688         }
 689
 690         TRACE_LEAVE2("Ret val %d",proc_rc);
 691         return proc_rc;
 692 }

-----Original Message-----
From: A V Mahesh [mailto:[email protected]]
Sent: Tuesday, October 13, 2015 2:21 PM
To: Yao Cheng LIANG <[email protected]>; [email protected]
Subject: Re: [users] issue with Checkpint when setting active replica

Ok

I will look in to it.

-AVM

On 10/13/2015 11:36 AM, Yao Cheng LIANG wrote:
> Below is what CPD done. It can be easily see that it does not inform other
cpnd:
> -------------------
> uint32_t cpd_proc_active_set(CPD_CB *cb, SaCkptCheckpointHandleT ckpt_id,
MDS_DEST mds_dest,
>                         CPD_CKPT_INFO_NODE **ckpt_node)
> {
>       SaAisErrorT rc = SA_AIS_OK;
>       CPD_REP_KEY_INFO key_info;
>       CPD_CKPT_REPLOC_INFO *rep_info = NULL;
>       CPD_CPND_INFO_NODE *cpnd_info_node;
>       SaNameT node_name;
>
>       memset(&node_name, 0, sizeof(SaNameT));
>       memset(&key_info, 0, sizeof(CPD_REP_KEY_INFO));
>
>       cpd_ckpt_node_get(&cb->ckpt_tree, &ckpt_id, ckpt_node);
>       if ((*ckpt_node) == NULL) {
>               TRACE_4("cpd ckpt info node get failed for
ckpt_id:%llx",ckpt_id);
>
>               return SA_AIS_ERR_NOT_EXIST;
>       }
> /* Update the Active Replica Info */
>
>       if ((*ckpt_node)->is_active_exists) {
>               if (((*ckpt_node)->active_dest) != mds_dest) {
>                       cpd_cpnd_info_node_get(&cb->cpnd_tree,
&((*ckpt_node)->active_dest), &cpnd_info_node);
>                       if (cpnd_info_node) {
>                               key_info.ckpt_name =
(*ckpt_node)->ckpt_name;
>                               key_info.node_name =
cpnd_info_node->node_name;
>                               cpd_ckpt_reploc_get(&cb->ckpt_reploc_tree,
&key_info, &rep_info);
>                               if (rep_info) {
>                                       rep_info->rep_type = 2;
>                               }
>                               memset(&key_info, 0,
sizeof(CPD_REP_KEY_INFO));
>                       }
>               }
>       }
>
>       (*ckpt_node)->is_active_exists = true;
>       (*ckpt_node)->active_dest = mds_dest;
>
>       if (mds_dest) {
>               cpd_cpnd_info_node_get(&cb->cpnd_tree, &mds_dest,
&cpnd_info_node);
>               if (!cpnd_info_node) {
>                       TRACE_4("cpd cpnd node does not exit for
mds_dest:%"PRIu64,mds_dest);
>                       TRACE_LEAVE();
>                       return rc;
>               }
>               key_info.ckpt_name = (*ckpt_node)->ckpt_name;
>               key_info.node_name = cpnd_info_node->node_name;
>               /*  key_info.node_name.length =
m_NCS_OS_NTOHS(cpnd_info_node->node_name.length); */
>               cpd_ckpt_reploc_get(&cb->ckpt_reploc_tree, &key_info,
&rep_info);
>               if (rep_info) {
>                       rep_info->rep_type = 1;
>               }
>       }
>       TRACE_LEAVE();
>       return rc;
> }
> ------------------------------------
>
> Br,
>
> Ted
>
> -----Original Message-----
> From: Yao Cheng LIANG
> Sent: Tuesday, October 13, 2015 1:13 PM
> To: 'A V Mahesh'; [email protected]
> Cc: Yao Cheng LIANG
> Subject: RE: issue with Checkpint when setting active replica
>
> Dear AVM,
>
> It does not need a test application. If you go through the processing of
Activating Replica, it is easy to find what I have said. Code review is
another approach of finding issues? Isn't it?
>
> Br,
>
> Ted
>
> -----Original Message-----
> From: A V Mahesh [mailto:[email protected]]
> Sent: Tuesday, October 13, 2015 12:10 PM
> To: Yao Cheng LIANG; [email protected]
> Subject: Re: issue with Checkpint when setting active replica
>
> Hi Ted,
>
> Please raise a ticket by attaching the test application and reproducible
steps.
>
> -AVM
>
>
> On 10/13/2015 9:17 AM, Yao Cheng LIANG wrote:
>> Dear all,
>>
>> I am reading 4.6.0 checkpoint service code, and found a possible bug when
setting active replica. The current implementation only works first time a
replica is set active, as it only update the relevant data structure in cpd
and this cpnd, but does not inform other cpnd which has cpa opened the
checkpoint. Suppose there are multiple cpnd  have opened a checkpoint, say
they are a, b, c, d, ..., now "a" is active, so "b", "c", "d" all know that
"a" is "active". Now suppose "b" call "saCkptActiveReplicaSet", the 4.6.0
implementation only update the cpd of the new "active" replica, but cpd does
not inform "a", "c", "d", so "a", "c", "d" still think "a" is active, this
is apparently wrong.
>>
>> Br,
>>
>> Ted


----------------------------------------------------------------------------
--
_______________________________________________
Opensaf-users mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-users



------------------------------------------------------------------------------
_______________________________________________
Opensaf-users mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-users

Reply via email to