Thanks Praveen, I have the same thought as yours, the buffered
su_si_assignment should be sent when AMFD is in APP_STATE, it's the
state AMFD handling non-ncs SU.
I'm reusing set_leds msg, which signal veteran node to resend
su_si_assignment msg, patch is attached
Another thing could be improved in this situation, which is
su_si_assignment also has to be buffered while veteran is waiting for
set_leds, if susi assignment is done just in time after MDSNCS_UP/NEW_ACTIVE
I'll update this part later
@Nagu: I just did some tests, it seems to work, please apply this patch
on top the latest of part 1, sorry for this interruption :)
Thanks all
Minh
Here is the diff
diff --git a/osaf/services/saf/amf/amfd/cluster.cc
b/osaf/services/saf/amf/amfd/cluster.cc
--- a/osaf/services/saf/amf/amfd/cluster.cc
+++ b/osaf/services/saf/amf/amfd/cluster.cc
@@ -54,6 +54,7 @@ void avd_cluster_tmr_init_evh(AVD_CL_CB
{
TRACE_ENTER();
AVD_SU *su = nullptr;
+ AVD_AVND *node = nullptr;
saflog(LOG_NOTICE, amfSvcUsrName, "Cluster startup timeout,
assigning SIs to SUs");
osafassert(evt->info.tmr.type == AVD_TMR_CL_INIT);
@@ -72,6 +73,15 @@ void avd_cluster_tmr_init_evh(AVD_CL_CB
cb->init_state = AVD_APP_STATE;
m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(cb, cb, AVSV_CKPT_AVD_CB_CONFIG);
+ // Resend set_leds to veteran node
+
+ for (std::map<std::string, AVD_AVND *>::const_iterator it =
node_name_db->begin();
+ it != node_name_db->end(); it++) {
+ node = it->second;
+ if (node->veteran)
+ avd_snd_set_leds_msg(cb, node);
+ }
+
/* call the realignment routine for each of the SGs in the
* system that are not NCS specific.
*/
diff --git a/osaf/services/saf/amf/amfd/include/node.h
b/osaf/services/saf/amf/amfd/include/node.h
--- a/osaf/services/saf/amf/amfd/include/node.h
+++ b/osaf/services/saf/amf/amfd/include/node.h
@@ -142,6 +142,7 @@ class AVD_AVND {
AVD_AMF_NG *admin_ng; /* points to the nodegroup on which admin
operation is going on.*/
uint16_t node_up_msg_count; /* to count of node_up msg that director
had received from this node */
bool reboot;
+ bool veteran;
//Member functions.
void node_sus_termstate_set(bool term_state) const;
diff --git a/osaf/services/saf/amf/amfd/ndfsm.cc
b/osaf/services/saf/amf/amfd/ndfsm.cc
--- a/osaf/services/saf/amf/amfd/ndfsm.cc
+++ b/osaf/services/saf/amf/amfd/ndfsm.cc
@@ -415,7 +415,7 @@ void avd_node_up_evh(AVD_CL_CB *cb, AVD_
// this node is already up
avd_node_state_set(avnd, AVD_AVND_STATE_PRESENT);
avd_node_oper_state_set(avnd, SA_AMF_OPERATIONAL_ENABLED);
-
+ avnd->veteran = true;
// Update readiness state of all SUs which are waiting for
node
// oper state
for (const auto& su : avnd->list_of_ncs_su) {
@@ -677,6 +677,7 @@ void avd_mds_avnd_down_evh(AVD_CL_CB *cb
node->recvr_fail_sw = false;
node->node_info.initialViewNumber = 0;
node->node_info.member = SA_FALSE;
+ node->veteran = false;
}
}
diff --git a/osaf/services/saf/amf/amfd/node.cc
b/osaf/services/saf/amf/amfd/node.cc
--- a/osaf/services/saf/amf/amfd/node.cc
+++ b/osaf/services/saf/amf/amfd/node.cc
@@ -153,6 +153,7 @@ AVD_AVND *avd_node_new(const std::string
node = new AVD_AVND(dn);
node->node_up_msg_count = 0;
node->reboot = false;
+ node->veteran = false;
return node;
}
diff --git a/osaf/services/saf/amf/amfnd/di.cc
b/osaf/services/saf/amf/amfnd/di.cc
--- a/osaf/services/saf/amf/amfnd/di.cc
+++ b/osaf/services/saf/amf/amfnd/di.cc
@@ -1270,7 +1270,6 @@ void avnd_diq_rec_del(AVND_CB *cb, AVND_
/* stop the AvD msg response timer */
if (m_AVND_TMR_IS_ACTIVE(rec->resp_tmr)) {
m_AVND_TMR_MSG_RESP_STOP(cb, *rec);
- avnd_diq_rec_send_buffered_msg(cb);
/* resend pg start track */
avnd_di_resend_pg_start_track(cb);
}
diff --git a/osaf/services/saf/amf/amfnd/term.cc
b/osaf/services/saf/amf/amfnd/term.cc
--- a/osaf/services/saf/amf/amfnd/term.cc
+++ b/osaf/services/saf/amf/amfnd/term.cc
@@ -217,12 +217,14 @@ uint32_t avnd_evt_avd_set_leds_evh(AVND_
cb->rcv_msg_id = info->msg_id;
if (cb->led_state == AVND_LED_STATE_GREEN) {
- /* Nothing to be done we have already got this msg */
+ // Resend buffered headless msg if NoRed MW SU is assigned ACTIVE
+ avnd_diq_rec_send_buffered_msg(cb);
goto done;
}
cb->led_state = AVND_LED_STATE_GREEN;
+
/* Notify the NIS script/deamon that we have fully come up */
rc = nid_notify(const_cast<char*>("AMFND"), NCSCC_RC_SUCCESS,
nullptr);
On 23/08/16 23:01, praveen malviya wrote:
On 23-Aug-16 5:41 PM, praveen malviya wrote:
In susi_success_su_oper() there is a check on Readiness state of SU, if
it is inservice then only AMFD performs switchover. Why SUs of a node
that is not synced is becoming IN_SERVICE.
Also the AMFND where locked SU is hosted, should send buffered message
only when its NCS SUs are assigned. Such a check can be included in
avnd_diq_rec_send_buffered_msg().
I think I missed headless state here, NCS sus are already assigned.
What we need is AMFND should send buffered assignments after cluster
timer expiry and node sync timer expiry. In headless case, after node
sync timer only valid node will be present in the system. So
failover/switchover to them will not be an issue. At the same time
cluter timer expiry needs to be honored because AMFD code for
application assignment state works in APP state that is linked to
cluster timer expiry.
-Could we hold any assignment message received before cluster timer
expiry and node sync timer expiry and process them after expiry?
-Another way can be AMFND should send buffered assignments only after
expiry of these timers. What if AMFND starts some timer (larger value
among cluster timer and node sync timer) on receiving AMFD up and on
expiry of this timer sends the buffered assignment message.
Thanks,
Praveen
Thanks,
Praveen
On 23-Aug-16 4:33 PM, minh chau wrote:
Hi Nagu,
I see in the trace you provided, the SU2/SU3 become IN_SERVICE late. If
there's a delay in PL4 joining cluster after headless in your test then
you could also see it in the latest patches (longDN rebased version)
I'm looking in to this issue.
Thanks.
Minh
On 23/08/16 20:24, Nagendra Kumar wrote:
Please ignore TC #2, my mistake.
Thanks
-Nagu
-----Original Message-----
From: Nagendra Kumar
Sent: 23 August 2016 15:49
To: Minh Hon Chau; hans.nordeb...@ericsson.com; Praveen Malviya;
gary....@dektech.com.au; long.hb.ngu...@dektech.com.au
Cc: opensaf-devel@lists.sourceforge.net
Subject: RE: [PATCH 2 of 2] AMFND: Admin operation continuation if
csi
callback completes during headless [#1725 part 1] V1
Please consider previous TC as TC #1
TC #2: Same configuration as TC #1. Logs attached in the ticket TC
#2.
Steps:
1. Same as step #1 of TC #1.
2. After locking SU1, keep delay in
avnd_evt_avd_info_su_si_assign_evh and
stop SC-1 and SC-2.
3. Start SC-1 and SC-2. SU1 is still in quisced state. Ideally, it
should have no
assignment and SU3 should have got assignment.
safSISU=safSu=SU3\,safSg=AmfDemo_2N\,safApp=AmfDemo1,safSi=AmfDe
mo1,safApp=AmfDemo1
saAmfSISUHAState=STANDBY(2)
safSISU=safSu=SU2\,safSg=AmfDemo_2N\,safApp=AmfDemo1,safSi=AmfDe
mo1,safApp=AmfDemo1
saAmfSISUHAState=ACTIVE(1)
safSISU=safSu=PL-
4\,safSg=NoRed\,safApp=OpenSAF,safSi=NoRed4,safApp=OpenSAF
saAmfSISUHAState=ACTIVE(1)
safSISU=safSu=SC-
1\,safSg=NoRed\,safApp=OpenSAF,safSi=NoRed1,safApp=OpenSAF
saAmfSISUHAState=ACTIVE(1)
safSISU=safSu=SC-1\,safSg=2N\,safApp=OpenSAF,safSi=SC-
2N,safApp=OpenSAF
saAmfSISUHAState=ACTIVE(1)
safSISU=safSu=SC-
2\,safSg=NoRed\,safApp=OpenSAF,safSi=NoRed3,safApp=OpenSAF
saAmfSISUHAState=ACTIVE(1)
safSISU=safSu=SC-2\,safSg=2N\,safApp=OpenSAF,safSi=SC-
2N,safApp=OpenSAF
saAmfSISUHAState=STANDBY(2)
safSISU=safSu=PL-
3\,safSg=NoRed\,safApp=OpenSAF,safSi=NoRed2,safApp=OpenSAF
saAmfSISUHAState=ACTIVE(1)
After that PL-3 rebooted by the following logs:
Aug 23 15:31:52 PM_PL-3 osafamfwd[18056]: TIMEOUT receiving AMF
health check request, generating core for amfnd Aug 23 15:31:52
PM_PL-3
osafamfwd[18056]: Last received healthcheck cnt=82 at Tue Aug 23
15:30:52
2016 Aug 23 15:31:52 PM_PL-3 osafamfwd[18056]: Rebooting OpenSAF
NodeId = 0 EE Name = No EE Mapped, Reason: AMFND unresponsive,
AMFWDOG initiated system reboot, OwnNodeId = 131855, SupervisionTime
= 60 Aug 23 15:31:52 PM_PL-3 opensaf_reboot: Rebooting local node;
timeout=60
Thanks
-Nagu
-----Original Message-----
From: Nagendra Kumar
Sent: 23 August 2016 15:19
To: Minh Hon Chau; hans.nordeb...@ericsson.com; Praveen Malviya;
gary....@dektech.com.au; long.hb.ngu...@dektech.com.au
Cc: opensaf-devel@lists.sourceforge.net
Subject: RE: [PATCH 2 of 2] AMFND: Admin operation continuation
if csi
callback completes during headless [#1725 part 1] V1
Please note that it is on change set 7846:31417997c82f and I have
applied patch of ticket #1894.
Thanks
-Nagu
-----Original Message-----
From: Nagendra Kumar
Sent: 23 August 2016 15:15
To: Minh Hon Chau; hans.nordeb...@ericsson.com; Praveen Malviya;
gary....@dektech.com.au; long.hb.ngu...@dektech.com.au
Cc: opensaf-devel@lists.sourceforge.net
Subject: RE: [PATCH 2 of 2] AMFND: Admin operation continuation if
csi callback completes during headless [#1725 part 1] V1
Hi Minh,
The following SU lock case is not working. This issue will
exist
for all the flows, so please check.
Configuration and traces attached in the ticket.
Steps:
1. Start SC-1, SC-2, PL-3 and PL-4. Run the following command:
immcfg -f /tmp/AppConfig-2N-1725.xml amf-adm unlock-in
safSu=SU1,safSg=AmfDemo_2N,safApp=AmfDemo1
amf-adm unlock-in safSu=SU2,safSg=AmfDemo_2N,safApp=AmfDemo1
amf-adm unlock-in safSu=SU3,safSg=AmfDemo_2N,safApp=AmfDemo1
amf-adm unlock safSu=SU1,safSg=AmfDemo_2N,safApp=AmfDemo1
amf-adm unlock safSu=SU2,safSg=AmfDemo_2N,safApp=AmfDemo1
amf-adm unlock safSu=SU3,safSg=AmfDemo_2N,safApp=AmfDemo1
Assignments are:
PM_SC-1:/home/nagu/views/staging-1725 # /etc/init.d/opensafd
status
safSISU=safSu=SC-
1\,safSg=NoRed\,safApp=OpenSAF,safSi=NoRed1,safApp=OpenSAF
saAmfSISUHAState=ACTIVE(1)
safSISU=safSu=SC-1\,safSg=2N\,safApp=OpenSAF,safSi=SC-
2N,safApp=OpenSAF
saAmfSISUHAState=ACTIVE(1)
safSISU=safSu=SC-
2\,safSg=NoRed\,safApp=OpenSAF,safSi=NoRed2,safApp=OpenSAF
saAmfSISUHAState=ACTIVE(1)
safSISU=safSu=SC-2\,safSg=2N\,safApp=OpenSAF,safSi=SC-
2N,safApp=OpenSAF
saAmfSISUHAState=STANDBY(2)
safSISU=safSu=PL-
4\,safSg=NoRed\,safApp=OpenSAF,safSi=NoRed3,safApp=OpenSAF
saAmfSISUHAState=ACTIVE(1)
safSISU=safSu=PL-
3\,safSg=NoRed\,safApp=OpenSAF,safSi=NoRed4,safApp=OpenSAF
saAmfSISUHAState=ACTIVE(1)
safSISU=safSu=SU2\,safSg=AmfDemo_2N\,safApp=AmfDemo1,safSi=AmfDe
mo1,safApp=AmfDemo1
saAmfSISUHAState=STANDBY(2)
safSISU=safSu=SU1\,safSg=AmfDemo_2N\,safApp=AmfDemo1,safSi=AmfDe
mo1,safApp=AmfDemo1
saAmfSISUHAState=ACTIVE(1)
2. Issue lock on SU1.
amf-adm lock safSu=SU1,safSg=AmfDemo_2N,safApp=AmfDemo1
And keep gdb in csi_set callback. Stop SC-1 and SC-2.
Send Ok from csi_set callback.
3. Start SC-1 and SC-2.
4. Assignment to components of SU2 is not given and assignments of
SU2 still shows Standby.
PM_SC-1:/home/nagu/views/staging-1725 # /etc/init.d/opensafd
status
safSISU=safSu=SU2\,safSg=AmfDemo_2N\,safApp=AmfDemo1,safSi=AmfDe
mo1,safApp=AmfDemo1
saAmfSISUHAState=STANDBY(2)
safSISU=safSu=SC-2\,safSg=2N\,safApp=OpenSAF,safSi=SC-
2N,safApp=OpenSAF
saAmfSISUHAState=STANDBY(2)
safSISU=safSu=SC-
1\,safSg=NoRed\,safApp=OpenSAF,safSi=NoRed1,safApp=OpenSAF
saAmfSISUHAState=ACTIVE(1)
safSISU=safSu=PL-
4\,safSg=NoRed\,safApp=OpenSAF,safSi=NoRed3,safApp=OpenSAF
saAmfSISUHAState=ACTIVE(1)
safSISU=safSu=PL-
3\,safSg=NoRed\,safApp=OpenSAF,safSi=NoRed2,safApp=OpenSAF
saAmfSISUHAState=ACTIVE(1)
safSISU=safSu=SC-
2\,safSg=NoRed\,safApp=OpenSAF,safSi=NoRed4,safApp=OpenSAF
saAmfSISUHAState=ACTIVE(1)
safSISU=safSu=SC-1\,safSg=2N\,safApp=OpenSAF,safSi=SC-
2N,safApp=OpenSAF
saAmfSISUHAState=ACTIVE(1)
Thanks
-Nagu
-----Original Message-----
From: Minh Hon Chau [mailto:minh.c...@dektech.com.au]
Sent: 05 August 2016 02:50
To: hans.nordeb...@ericsson.com; Nagendra Kumar; Praveen Malviya;
gary....@dektech.com.au; long.hb.ngu...@dektech.com.au;
minh.c...@dektech.com.au
Cc: opensaf-devel@lists.sourceforge.net
Subject: [PATCH 2 of 2] AMFND: Admin operation continuation if csi
callback completes during headless [#1725 part 1] V1
osaf/services/saf/amf/amfnd/di.cc | 199
+++++++++++++++++-
---
--
--
osaf/services/saf/amf/amfnd/include/avnd_di.h | 1 +
2 files changed, 134 insertions(+), 66 deletions(-)
The patch buffers susi_resp_msg during headless stage and resend
it to AMFD after headless.
diff --git a/osaf/services/saf/amf/amfnd/di.cc
b/osaf/services/saf/amf/amfnd/di.cc
--- a/osaf/services/saf/amf/amfnd/di.cc
+++ b/osaf/services/saf/amf/amfnd/di.cc
@@ -804,11 +804,6 @@ uint32_t avnd_di_susi_resp_send(AVND_CB
if (cb->term_state ==
AVND_TERM_STATE_OPENSAF_SHUTDOWN_STARTED)
return rc;
- if (cb->is_avd_down == true) {
- m_AVND_SU_ALL_SI_RESET(su);
- return rc;
- }
-
// should be in assignment pending state to be here
osafassert(m_AVND_SU_IS_ASSIGN_PEND(su));
@@ -819,64 +814,76 @@ uint32_t avnd_di_susi_resp_send(AVND_CB
TRACE_ENTER2("Sending Resp su=%s, si=%s, curr_state=%u,
prv_state=%u", su->name.value, curr_si->name.value,curr_si-
curr_state,curr_si->prv_state);
/* populate the susi resp msg */
msg.info.avd = new AVSV_DND_MSG();
- msg.type = AVND_MSG_AVD;
- msg.info.avd->msg_type = AVSV_N2D_INFO_SU_SI_ASSIGN_MSG;
- msg.info.avd->msg_info.n2d_su_si_assign.msg_id = ++(cb-
snd_msg_id);
- msg.info.avd->msg_info.n2d_su_si_assign.node_id = cb-
node_info.nodeId;
- if (si) {
- msg.info.avd->msg_info.n2d_su_si_assign.single_csi =
- ((si->single_csi_add_rem_in_si ==
AVSV_SUSI_ACT_BASE)
?
false : true);
- }
- TRACE("curr_assign_state '%u'",
curr_si->curr_assign_state);
- msg.info.avd->msg_info.n2d_su_si_assign.msg_act =
- (m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNED(curr_si)
||
-
m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNING(curr_si))
?
- ((!curr_si->prv_state) ? AVSV_SUSI_ACT_ASGN :
AVSV_SUSI_ACT_MOD) : AVSV_SUSI_ACT_DEL;
- msg.info.avd->msg_info.n2d_su_si_assign.su_name = su->name;
- if (si) {
- msg.info.avd->msg_info.n2d_su_si_assign.si_name =
si-
name;
- if (AVSV_SUSI_ACT_ASGN ==
si->single_csi_add_rem_in_si) {
- TRACE("si->curr_assign_state '%u'", curr_si-
curr_assign_state);
-
msg.info.avd->msg_info.n2d_su_si_assign.msg_act =
-
(m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNED(curr_si) ||
-
m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNING(curr_si)) ?
- AVSV_SUSI_ACT_ASGN :
AVSV_SUSI_ACT_DEL;
- }
- }
- msg.info.avd->msg_info.n2d_su_si_assign.ha_state =
- (SA_AMF_HA_QUIESCING == curr_si->curr_state) ?
SA_AMF_HA_QUIESCED : curr_si->curr_state;
- msg.info.avd->msg_info.n2d_su_si_assign.error =
- (m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNED(curr_si)
||
-
m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_REMOVED(curr_si))
?
NCSCC_RC_SUCCESS : NCSCC_RC_FAILURE;
+ msg.type = AVND_MSG_AVD;
+ msg.info.avd->msg_type = AVSV_N2D_INFO_SU_SI_ASSIGN_MSG;
+ msg.info.avd->msg_info.n2d_su_si_assign.node_id = cb-
node_info.nodeId;
+ if (si) {
+ msg.info.avd->msg_info.n2d_su_si_assign.single_csi =
+ ((si->single_csi_add_rem_in_si ==
AVSV_SUSI_ACT_BASE) ? false : true);
+ }
+ TRACE("curr_assign_state '%u'", curr_si->curr_assign_state);
+ msg.info.avd->msg_info.n2d_su_si_assign.msg_act =
+
(m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNED(curr_si) ||
+
m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNING(curr_si)) ?
+ ((!curr_si->prv_state) ?
AVSV_SUSI_ACT_ASGN : AVSV_SUSI_ACT_MOD) : AVSV_SUSI_ACT_DEL;
+ msg.info.avd->msg_info.n2d_su_si_assign.su_name = su->name;
+ if (si) {
+ msg.info.avd->msg_info.n2d_su_si_assign.si_name = si-
name;
+ if (AVSV_SUSI_ACT_ASGN == si->single_csi_add_rem_in_si) {
+ TRACE("si->curr_assign_state '%u'", curr_si-
curr_assign_state);
+ msg.info.avd-
msg_info.n2d_su_si_assign.msg_act =
+
(m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNED(curr_si) ||
+
m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNING(curr_si)) ?
+ AVSV_SUSI_ACT_ASGN :
AVSV_SUSI_ACT_DEL;
+ }
+ }
+ msg.info.avd->msg_info.n2d_su_si_assign.ha_state =
+ (SA_AMF_HA_QUIESCING == curr_si->curr_state) ?
SA_AMF_HA_QUIESCED : curr_si->curr_state;
+ msg.info.avd->msg_info.n2d_su_si_assign.error =
+
(m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNED(curr_si) ||
+
m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_REMOVED(curr_si)) ?
+NCSCC_RC_SUCCESS : NCSCC_RC_FAILURE;
- if (msg.info.avd->msg_info.n2d_su_si_assign.msg_act ==
AVSV_SUSI_ACT_ASGN)
- osafassert(si);
+ if (msg.info.avd->msg_info.n2d_su_si_assign.msg_act ==
AVSV_SUSI_ACT_ASGN)
+ osafassert(si);
- /* send the msg to AvD */
- TRACE("Sending. msg_id'%u', node_id'%u', msg_act'%u',
su'%s',
si'%s',
ha_state'%u', error'%u', single_csi'%u'",
- msg.info.avd->msg_info.n2d_su_si_assign.msg_id,
msg.info.avd-
msg_info.n2d_su_si_assign.node_id,
- msg.info.avd->msg_info.n2d_su_si_assign.msg_act,
msg.info.avd-
msg_info.n2d_su_si_assign.su_name.value,
- msg.info.avd->msg_info.n2d_su_si_assign.si_name.value,
msg.info.avd->msg_info.n2d_su_si_assign.ha_state,
- msg.info.avd->msg_info.n2d_su_si_assign.error,
msg.info.avd-
msg_info.n2d_su_si_assign.single_csi);
+ /* send the msg to AvD */
+ TRACE("Sending. msg_id'%u', node_id'%u', msg_act'%u', su'%s',
si'%s', ha_state'%u', error'%u', single_csi'%u'",
+ msg.info.avd->msg_info.n2d_su_si_assign.msg_id,
msg.info.avd->msg_info.n2d_su_si_assign.node_id,
+ msg.info.avd->msg_info.n2d_su_si_assign.msg_act,
msg.info.avd->msg_info.n2d_su_si_assign.su_name.value,
+ msg.info.avd->msg_info.n2d_su_si_assign.si_name.value,
msg.info.avd->msg_info.n2d_su_si_assign.ha_state,
+ msg.info.avd->msg_info.n2d_su_si_assign.error,
+msg.info.avd->msg_info.n2d_su_si_assign.single_csi);
- if ((su->si_list.n_nodes > 1) && (si == nullptr)) {
- if
(msg.info.avd->msg_info.n2d_su_si_assign.msg_act ==
AVSV_SUSI_ACT_DEL)
- LOG_NO("Removed 'all SIs' from '%s'",
su->name.value);
+ if ((su->si_list.n_nodes > 1) && (si == nullptr)) {
+ if (msg.info.avd->msg_info.n2d_su_si_assign.msg_act ==
AVSV_SUSI_ACT_DEL)
+ LOG_NO("Removed 'all SIs' from '%s'", su-
name.value);
- if
(msg.info.avd->msg_info.n2d_su_si_assign.msg_act ==
AVSV_SUSI_ACT_MOD)
- LOG_NO("Assigned 'all SIs' %s of '%s'",
- ha_state[msg.info.avd-
msg_info.n2d_su_si_assign.ha_state],
- su->name.value);
- }
+ if (msg.info.avd->msg_info.n2d_su_si_assign.msg_act ==
AVSV_SUSI_ACT_MOD)
+ LOG_NO("Assigned 'all SIs' %s of '%s'",
+ ha_state[msg.info.avd-
msg_info.n2d_su_si_assign.ha_state],
+ su->name.value);
+ }
- rc = avnd_di_msg_send(cb, &msg);
- if (NCSCC_RC_SUCCESS == rc)
- msg.info.avd = 0;
-
- /* we have completed the SU SI msg processing */
- if (su_assign_state_is_stable(su))
- m_AVND_SU_ASSIGN_PEND_RESET(su);
- m_AVND_SU_ALL_SI_RESET(su);
+ if (cb->is_avd_down == true) {
+ // We are in headless, buffer this msg
+ msg.info.avd->msg_info.n2d_su_si_assign.msg_id = 0;
+ if (avnd_diq_rec_add(cb, &msg) == nullptr) {
+ rc = NCSCC_RC_FAILURE;
+ }
+ m_AVND_SU_ALL_SI_RESET(su);
+ LOG_NO("avnd_di_susi_resp_send() deferred as AMF
director is offline");
+ } else {
+ // We are in normal cluster, send msg to director
+ msg.info.avd->msg_info.n2d_su_si_assign.msg_id = ++(cb-
snd_msg_id);
+ /* send the msg to AvD */
+ rc = avnd_di_msg_send(cb, &msg);
+ if (NCSCC_RC_SUCCESS == rc)
+ msg.info.avd = 0;
+ /* we have completed the SU SI msg processing */
+ if (su_assign_state_is_stable(su)) {
+ m_AVND_SU_ASSIGN_PEND_RESET(su);
+ }
+ m_AVND_SU_ALL_SI_RESET(su);
+ }
/* free the contents of avnd message */
avnd_msg_content_free(cb, &msg); @@ -1255,14 +1262,7 @@ void
avnd_diq_rec_del(AVND_CB *cb, AVND_
/* stop the AvD msg response timer */
if (m_AVND_TMR_IS_ACTIVE(rec->resp_tmr)) {
m_AVND_TMR_MSG_RESP_STOP(cb, *rec);
- // Resend msgs from queue because amfd dropped during
sync
- if ((cb->dnd_list.head != nullptr)) {
- TRACE("retransmit message to amfd");
- AVND_DND_MSG_LIST *pending_rec = 0;
- for (pending_rec = cb->dnd_list.head; pending_rec !=
nullptr; pending_rec = pending_rec->next) {
- avnd_diq_rec_send(cb, pending_rec);
- }
- }
+ avnd_diq_rec_send_buffered_msg(cb);
/* resend pg start track */
avnd_di_resend_pg_start_track(cb);
}
@@ -1275,6 +1275,73 @@ void avnd_diq_rec_del(AVND_CB *cb,
AVND_
TRACE_LEAVE();
return;
}
+/************************************************************
****************
+ Name : avnd_diq_rec_send_buffered_msg
+
+ Description : Resend buffered msg
+
+ Arguments : cb - ptr to the AvND control block
+
+ Return Values : None.
+
+ Notes : None.
+*************************************************************
**********
+*******/ void avnd_diq_rec_send_buffered_msg(AVND_CB *cb) {
+ TRACE_ENTER();
+ // Resend msgs from queue because amfnd dropped during
headless
+ // or headless-synchronization
+ if ((cb->dnd_list.head != nullptr)) {
+ AVND_DND_MSG_LIST *pending_rec = 0;
+ TRACE("Attach msg_id of buffered msg");
+ bool found = true;
+ while (found) {
+ found = false;
+ for (pending_rec = cb->dnd_list.head; pending_rec !=
nullptr; pending_rec = pending_rec->next) {
+ if (pending_rec->msg.type ==
AVND_MSG_AVD) {
+ // At this moment, only oper_state
msg needs to report to director
+ if (pending_rec->msg.info.avd-
msg_type == AVSV_N2D_INFO_SU_SI_ASSIGN_MSG &&
+ pending_rec->msg.info.avd-
msg_info.n2d_su_si_assign.msg_id == 0) {
+ m_AVND_DIQ_REC_POP(cb,
pending_rec); #if 0
+ // only resend if this SUSI
does exist
+ AVND_SU *su =
m_AVND_SUDB_REC_GET(cb->sudb,
+ pending_rec-
msg.info.avd->msg_info.n2d_su_si_assign.su_name);
+ if (su != nullptr && su-
si_list.n_nodes > 0) { #endif
+ pending_rec-
msg.info.avd->msg_info.n2d_su_si_assign.msg_id =
++(cb->snd_msg_id);
+
m_AVND_DIQ_REC_PUSH(cb, pending_rec);
+ LOG_NO("Found and
resend buffered su_si_assign msg for SU:'%s', "
+
"SI:'%s', ha_state:'%u', msg_act:'%u', single_csi:'%u', "
+
"error:'%u', msg_id:'%u'",
+
pending_rec->msg.info.avd-
msg_info.n2d_su_si_assign.su_name.value,
+
pending_rec->msg.info.avd-
msg_info.n2d_su_si_assign.si_name.value,
+
pending_rec->msg.info.avd->msg_info.n2d_su_si_assign.ha_state,
+
pending_rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_act,
+
pending_rec->msg.info.avd->msg_info.n2d_su_si_assign.single_csi,
+
pending_rec->msg.info.avd->msg_info.n2d_su_si_assign.error,
+
pending_rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id);
+
+#if 0
+ } else {
+
avnd_msg_content_free(cb, &pending_rec->msg);
+ delete pending_rec;
+ pending_rec = cb-
dnd_list.head;
+ }
+#endif
+ found = true;
+ }
+ }
+ }
+ }
+ TRACE("retransmit message to amfd");
+ for (pending_rec = cb->dnd_list.head; pending_rec !=
nullptr;
pending_rec = pending_rec->next) {
+ avnd_diq_rec_send(cb, pending_rec);
+ }
+ }
+ TRACE_LEAVE();
+ return;
+}
/*************************************************************
***************
Name : avnd_diq_rec_send
diff --git a/osaf/services/saf/amf/amfnd/include/avnd_di.h
b/osaf/services/saf/amf/amfnd/include/avnd_di.h
--- a/osaf/services/saf/amf/amfnd/include/avnd_di.h
+++ b/osaf/services/saf/amf/amfnd/include/avnd_di.h
@@ -79,6 +79,7 @@ void avnd_di_msg_ack_process(struct avnd void
avnd_diq_del(struct avnd_cb_tag *); AVND_DND_MSG_LIST
*avnd_diq_rec_add(struct avnd_cb_tag *cb, AVND_MSG *msg); void
avnd_diq_rec_del(struct avnd_cb_tag *cb, AVND_DND_MSG_LIST
*rec);
+void avnd_diq_rec_send_buffered_msg(struct avnd_cb_tag *cb);
uint32_t avnd_diq_rec_send(struct avnd_cb_tag *cb,
AVND_DND_MSG_LIST
*rec); uint32_t avnd_di_reg_su_rsp_snd(struct avnd_cb_tag *cb,
SaNameT *su_name, uint32_t ret_code); uint32_t
avnd_di_ack_nack_msg_send(struct avnd_cb_tag *cb, uint32_t rcv_id,
uint32_t view_num);
------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel
diff --git a/osaf/services/saf/amf/amfd/cluster.cc b/osaf/services/saf/amf/amfd/cluster.cc
--- a/osaf/services/saf/amf/amfd/cluster.cc
+++ b/osaf/services/saf/amf/amfd/cluster.cc
@@ -54,6 +54,7 @@ void avd_cluster_tmr_init_evh(AVD_CL_CB
{
TRACE_ENTER();
AVD_SU *su = nullptr;
+ AVD_AVND *node = nullptr;
saflog(LOG_NOTICE, amfSvcUsrName, "Cluster startup timeout, assigning SIs to SUs");
osafassert(evt->info.tmr.type == AVD_TMR_CL_INIT);
@@ -72,6 +73,15 @@ void avd_cluster_tmr_init_evh(AVD_CL_CB
cb->init_state = AVD_APP_STATE;
m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(cb, cb, AVSV_CKPT_AVD_CB_CONFIG);
+ // Resend set_leds to veteran node
+
+ for (std::map<std::string, AVD_AVND *>::const_iterator it = node_name_db->begin();
+ it != node_name_db->end(); it++) {
+ node = it->second;
+ if (node->veteran)
+ avd_snd_set_leds_msg(cb, node);
+ }
+
/* call the realignment routine for each of the SGs in the
* system that are not NCS specific.
*/
diff --git a/osaf/services/saf/amf/amfd/include/node.h b/osaf/services/saf/amf/amfd/include/node.h
--- a/osaf/services/saf/amf/amfd/include/node.h
+++ b/osaf/services/saf/amf/amfd/include/node.h
@@ -142,6 +142,7 @@ class AVD_AVND {
AVD_AMF_NG *admin_ng; /* points to the nodegroup on which admin operation is going on.*/
uint16_t node_up_msg_count; /* to count of node_up msg that director had received from this node */
bool reboot;
+ bool veteran;
//Member functions.
void node_sus_termstate_set(bool term_state) const;
diff --git a/osaf/services/saf/amf/amfd/ndfsm.cc b/osaf/services/saf/amf/amfd/ndfsm.cc
--- a/osaf/services/saf/amf/amfd/ndfsm.cc
+++ b/osaf/services/saf/amf/amfd/ndfsm.cc
@@ -415,7 +415,7 @@ void avd_node_up_evh(AVD_CL_CB *cb, AVD_
// this node is already up
avd_node_state_set(avnd, AVD_AVND_STATE_PRESENT);
avd_node_oper_state_set(avnd, SA_AMF_OPERATIONAL_ENABLED);
-
+ avnd->veteran = true;
// Update readiness state of all SUs which are waiting for node
// oper state
for (const auto& su : avnd->list_of_ncs_su) {
@@ -677,6 +677,7 @@ void avd_mds_avnd_down_evh(AVD_CL_CB *cb
node->recvr_fail_sw = false;
node->node_info.initialViewNumber = 0;
node->node_info.member = SA_FALSE;
+ node->veteran = false;
}
}
diff --git a/osaf/services/saf/amf/amfd/node.cc b/osaf/services/saf/amf/amfd/node.cc
--- a/osaf/services/saf/amf/amfd/node.cc
+++ b/osaf/services/saf/amf/amfd/node.cc
@@ -153,6 +153,7 @@ AVD_AVND *avd_node_new(const std::string
node = new AVD_AVND(dn);
node->node_up_msg_count = 0;
node->reboot = false;
+ node->veteran = false;
return node;
}
diff --git a/osaf/services/saf/amf/amfnd/di.cc b/osaf/services/saf/amf/amfnd/di.cc
--- a/osaf/services/saf/amf/amfnd/di.cc
+++ b/osaf/services/saf/amf/amfnd/di.cc
@@ -1270,7 +1270,6 @@ void avnd_diq_rec_del(AVND_CB *cb, AVND_
/* stop the AvD msg response timer */
if (m_AVND_TMR_IS_ACTIVE(rec->resp_tmr)) {
m_AVND_TMR_MSG_RESP_STOP(cb, *rec);
- avnd_diq_rec_send_buffered_msg(cb);
/* resend pg start track */
avnd_di_resend_pg_start_track(cb);
}
diff --git a/osaf/services/saf/amf/amfnd/term.cc b/osaf/services/saf/amf/amfnd/term.cc
--- a/osaf/services/saf/amf/amfnd/term.cc
+++ b/osaf/services/saf/amf/amfnd/term.cc
@@ -217,12 +217,14 @@ uint32_t avnd_evt_avd_set_leds_evh(AVND_
cb->rcv_msg_id = info->msg_id;
if (cb->led_state == AVND_LED_STATE_GREEN) {
- /* Nothing to be done we have already got this msg */
+ // Resend buffered headless msg if NoRed MW SU is assigned ACTIVE
+ avnd_diq_rec_send_buffered_msg(cb);
goto done;
}
cb->led_state = AVND_LED_STATE_GREEN;
+
/* Notify the NIS script/deamon that we have fully come up */
rc = nid_notify(const_cast<char*>("AMFND"), NCSCC_RC_SUCCESS, nullptr);
------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel