Hi Ted,
I'm not clear about the scenario that the problem happen.
According to the code the cpd informs cpnd and cpa about active replica via
message
- CPND_EVT_D2ND_CKPT_ACTIVE_SET
- CPA_EVT_D2A_ACT_CKPT_INFO_BCAST_SEND
Best regards,
Nhat Pham
646 static uint32_t cpd_evt_proc_active_set(CPD_CB *cb, CPD_EVT *evt,
CPSV_SEND_INFO *sinfo)
647 {
648 CPD_CKPT_INFO_NODE *ckpt_node = NULL;
649 SaAisErrorT rc = SA_AIS_OK;
650 CPSV_EVT send_evt;
651 uint32_t proc_rc = NCSCC_RC_SUCCESS;
652
653 TRACE_ENTER();
654 rc = cpd_proc_active_set(cb, evt->info.arep_set.ckpt_id,
evt->info.arep_set.mds_dest, &ckpt_node);
655 if (rc != SA_AIS_OK)
656 goto send_rsp;
657
658 /* REDUNDANCY A2S */
659 cpd_a2s_ckpt_arep_set(cb, ckpt_node);
660
661 send_rsp:
662 memset(&send_evt, 0, sizeof(CPSV_EVT));
663 send_evt.type = CPSV_EVT_TYPE_CPND;
664 send_evt.info.cpnd.type = CPND_EVT_D2ND_CKPT_ACTIVE_SET_ACK;
665 send_evt.info.cpnd.info.arep_ack.error = rc;
666 proc_rc = cpd_mds_send_rsp(cb, sinfo, &send_evt);
667
668 if (rc == SA_AIS_OK) {
669 /* Broadcast the Active Replica info to all CPNDs */
670 memset(&send_evt, 0, sizeof(CPSV_EVT));
671 send_evt.type = CPSV_EVT_TYPE_CPND;
672 send_evt.info.cpnd.type =
CPND_EVT_D2ND_CKPT_ACTIVE_SET;
673 send_evt.info.cpnd.info.active_set.ckpt_id =
evt->info.arep_set.ckpt_id;
674 send_evt.info.cpnd.info.active_set.mds_dest =
evt->info.arep_set.mds_dest;
675 proc_rc = cpd_mds_bcast_send(cb, &send_evt,
NCSMDS_SVC_ID_CPND);
676 }
677
678 /*Broadcast the active MDS_DEST info of ckpt to all CPA's */
679 if (rc == SA_AIS_OK) {
680 memset(&send_evt, 0, sizeof(CPSV_EVT));
681 send_evt.type = CPSV_EVT_TYPE_CPA;
682 send_evt.info.cpa.type =
CPA_EVT_D2A_ACT_CKPT_INFO_BCAST_SEND;
683 send_evt.info.cpa.info.ackpt_info.ckpt_id =
evt->info.arep_set.ckpt_id;
684 send_evt.info.cpa.info.ackpt_info.mds_dest =
evt->info.arep_set.mds_dest;
685 proc_rc = cpd_mds_bcast_send(cb, &send_evt,
NCSMDS_SVC_ID_CPA);
686 TRACE_2("cpd ckpt active set success for
ckpt_id:%llx,mds_dest:%"PRIu64,evt->info.arep_set.ckpt_id,
687 evt->info.arep_set.mds_dest);
688 }
689
690 TRACE_LEAVE2("Ret val %d",proc_rc);
691 return proc_rc;
692 }
-----Original Message-----
From: A V Mahesh [mailto:[email protected]]
Sent: Tuesday, October 13, 2015 2:21 PM
To: Yao Cheng LIANG <[email protected]>; [email protected]
Subject: Re: [users] issue with Checkpint when setting active replica
Ok
I will look in to it.
-AVM
On 10/13/2015 11:36 AM, Yao Cheng LIANG wrote:
> Below is what CPD done. It can be easily see that it does not inform other
cpnd:
> -------------------
> uint32_t cpd_proc_active_set(CPD_CB *cb, SaCkptCheckpointHandleT ckpt_id,
MDS_DEST mds_dest,
> CPD_CKPT_INFO_NODE **ckpt_node)
> {
> SaAisErrorT rc = SA_AIS_OK;
> CPD_REP_KEY_INFO key_info;
> CPD_CKPT_REPLOC_INFO *rep_info = NULL;
> CPD_CPND_INFO_NODE *cpnd_info_node;
> SaNameT node_name;
>
> memset(&node_name, 0, sizeof(SaNameT));
> memset(&key_info, 0, sizeof(CPD_REP_KEY_INFO));
>
> cpd_ckpt_node_get(&cb->ckpt_tree, &ckpt_id, ckpt_node);
> if ((*ckpt_node) == NULL) {
> TRACE_4("cpd ckpt info node get failed for
ckpt_id:%llx",ckpt_id);
>
> return SA_AIS_ERR_NOT_EXIST;
> }
> /* Update the Active Replica Info */
>
> if ((*ckpt_node)->is_active_exists) {
> if (((*ckpt_node)->active_dest) != mds_dest) {
> cpd_cpnd_info_node_get(&cb->cpnd_tree,
&((*ckpt_node)->active_dest), &cpnd_info_node);
> if (cpnd_info_node) {
> key_info.ckpt_name =
(*ckpt_node)->ckpt_name;
> key_info.node_name =
cpnd_info_node->node_name;
> cpd_ckpt_reploc_get(&cb->ckpt_reploc_tree,
&key_info, &rep_info);
> if (rep_info) {
> rep_info->rep_type = 2;
> }
> memset(&key_info, 0,
sizeof(CPD_REP_KEY_INFO));
> }
> }
> }
>
> (*ckpt_node)->is_active_exists = true;
> (*ckpt_node)->active_dest = mds_dest;
>
> if (mds_dest) {
> cpd_cpnd_info_node_get(&cb->cpnd_tree, &mds_dest,
&cpnd_info_node);
> if (!cpnd_info_node) {
> TRACE_4("cpd cpnd node does not exit for
mds_dest:%"PRIu64,mds_dest);
> TRACE_LEAVE();
> return rc;
> }
> key_info.ckpt_name = (*ckpt_node)->ckpt_name;
> key_info.node_name = cpnd_info_node->node_name;
> /* key_info.node_name.length =
m_NCS_OS_NTOHS(cpnd_info_node->node_name.length); */
> cpd_ckpt_reploc_get(&cb->ckpt_reploc_tree, &key_info,
&rep_info);
> if (rep_info) {
> rep_info->rep_type = 1;
> }
> }
> TRACE_LEAVE();
> return rc;
> }
> ------------------------------------
>
> Br,
>
> Ted
>
> -----Original Message-----
> From: Yao Cheng LIANG
> Sent: Tuesday, October 13, 2015 1:13 PM
> To: 'A V Mahesh'; [email protected]
> Cc: Yao Cheng LIANG
> Subject: RE: issue with Checkpint when setting active replica
>
> Dear AVM,
>
> It does not need a test application. If you go through the processing of
Activating Replica, it is easy to find what I have said. Code review is
another approach of finding issues? Isn't it?
>
> Br,
>
> Ted
>
> -----Original Message-----
> From: A V Mahesh [mailto:[email protected]]
> Sent: Tuesday, October 13, 2015 12:10 PM
> To: Yao Cheng LIANG; [email protected]
> Subject: Re: issue with Checkpint when setting active replica
>
> Hi Ted,
>
> Please raise a ticket by attaching the test application and reproducible
steps.
>
> -AVM
>
>
> On 10/13/2015 9:17 AM, Yao Cheng LIANG wrote:
>> Dear all,
>>
>> I am reading 4.6.0 checkpoint service code, and found a possible bug when
setting active replica. The current implementation only works first time a
replica is set active, as it only update the relevant data structure in cpd
and this cpnd, but does not inform other cpnd which has cpa opened the
checkpoint. Suppose there are multiple cpnd have opened a checkpoint, say
they are a, b, c, d, ..., now "a" is active, so "b", "c", "d" all know that
"a" is "active". Now suppose "b" call "saCkptActiveReplicaSet", the 4.6.0
implementation only update the cpd of the new "active" replica, but cpd does
not inform "a", "c", "d", so "a", "c", "d" still think "a" is active, this
is apparently wrong.
>>
>> Br,
>>
>> Ted
----------------------------------------------------------------------------
--
_______________________________________________
Opensaf-users mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-users
------------------------------------------------------------------------------
_______________________________________________
Opensaf-users mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-users