Hi Hung, Find my comments inline started with [Zoran]
From: Hung Nguyen [mailto:hung.d.ngu...@dektech.com.au] Sent: Wednesday, February 03, 2016 12:36 PM To: Zoran Milinkovic; reddy.neelaka...@oracle.com Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [devel] [PATCH 3 of 5] imm: add IMMD support for cloud resilience feature [#1625] Hi Zoran, Please find my comments inline. BR, Hung Nguyen - DEK Technologies -------------------------------------------------------------------------------- From: Zoran Milinkovic zoran.milinko...@ericsson.com<mailto:zoran.milinko...@ericsson.com> Sent: Tuesday, December 22, 2015 9:14PM To: Neelakanta Reddy reddy.neelaka...@oracle.com<mailto:reddy.neelaka...@oracle.com> Cc: Opensaf-devel opensaf-devel@lists.sourceforge.net<mailto:opensaf-devel@lists.sourceforge.net> Subject: [devel] [PATCH 3 of 5] imm: add IMMD support for cloud resilience feature [#1625] osaf/services/saf/immsv/immd/immd_amf.c | 1 - osaf/services/saf/immsv/immd/immd_cb.h | 7 +- osaf/services/saf/immsv/immd/immd_db.c | 10 +- osaf/services/saf/immsv/immd/immd_evt.c | 191 +++++++++++++++++++++++++---- osaf/services/saf/immsv/immd/immd_main.c | 33 ++++- osaf/services/saf/immsv/immd/immd_mbcsv.c | 3 + osaf/services/saf/immsv/immd/immd_proc.c | 40 +++++- osaf/services/saf/immsv/immd/immd_sbevt.c | 13 +- 8 files changed, 247 insertions(+), 51 deletions(-) The patch contains IMMD code that is needed for supporting cloud resilience feature. diff --git a/osaf/services/saf/immsv/immd/immd_amf.c b/osaf/services/saf/immsv/immd/immd_amf.c --- a/osaf/services/saf/immsv/immd/immd_amf.c +++ b/osaf/services/saf/immsv/immd/immd_amf.c @@ -15,7 +15,6 @@ * */ -#include <logtrace.h> #include <nid_start_util.h> #include "immd.h" #include "immsv.h" diff --git a/osaf/services/saf/immsv/immd/immd_cb.h b/osaf/services/saf/immsv/immd/immd_cb.h --- a/osaf/services/saf/immsv/immd/immd_cb.h +++ b/osaf/services/saf/immsv/immd/immd_cb.h @@ -137,7 +137,10 @@ typedef struct immd_cb_tag { bool m2PbeCanLoad; /* true => 2PBE Loading arbitration completed */ bool m2PbeExtraWait; /* true => Used only to prolong wait if both SCs have been introduced but one has not yet replied. */ - bool nid_started; /**< true if started by NID */ + bool nid_started; /* true if started by NID */ + SaUint16T mScAbsenceAllowed; /* Non zero if "headless Hydra" allowed (loss of both IMMDs/SCs). + Value is number of seconds of SC absence tolerated. */ + MDS_DEST payload_coord_dest; /* IMMND coord may be at payload if mScAbsenceAllowed is nonzero */ } IMMD_CB; uint32_t immd_immnd_info_tree_init(IMMD_CB *cb); @@ -171,4 +174,6 @@ uint32_t immd_mds_change_role(IMMD_CB *c void immd_proc_immd_reset(IMMD_CB *cb, bool active); +uint32_t immd_immnd_info_node_cardinality(NCS_PATRICIA_TREE *immnd_tree); + #endif diff --git a/osaf/services/saf/immsv/immd/immd_db.c b/osaf/services/saf/immsv/immd/immd_db.c --- a/osaf/services/saf/immsv/immd/immd_db.c +++ b/osaf/services/saf/immsv/immd/immd_db.c @@ -256,9 +256,8 @@ uint32_t immd_cb_db_init(IMMD_CB *cb) } cb->mRim = SA_IMM_INIT_FROM_FILE; - cb->mIs2Pbe = false; - cb->m2PbeCanLoad = true; - if(cb->ha_state == SA_AMF_HA_ACTIVE) { + cb->m2PbeCanLoad = !cb->mIs2Pbe; + if((cb->ha_state == SA_AMF_HA_ACTIVE) && !cb->mScAbsenceAllowed) { cb->is_loading = true; } @@ -410,3 +409,8 @@ void immd_db_purge_fevs(IMMD_CB *cb) } TRACE_LEAVE(); } + +uint32_t immd_immnd_info_node_cardinality(NCS_PATRICIA_TREE *immnd_tree) +{ + return ncs_patricia_tree_size(immnd_tree); +} diff --git a/osaf/services/saf/immsv/immd/immd_evt.c b/osaf/services/saf/immsv/immd/immd_evt.c --- a/osaf/services/saf/immsv/immd/immd_evt.c +++ b/osaf/services/saf/immsv/immd/immd_evt.c @@ -366,8 +366,8 @@ static void immd_start_sync_ok(IMMD_CB * sync_evt.info.immnd.info.ctrl.rulingEpoch = cb->mRulingEpoch; sync_evt.info.immnd.info.ctrl.fevsMsgStart = cb->fevsSendCount; sync_evt.info.immnd.info.ctrl.nodeId = node_info->immnd_key; - sync_evt.info.immnd.info.ctrl.canBeCoord = node_info->isOnController; - sync_evt.info.immnd.info.ctrl.ndExecPid = node_info->immnd_execPid; + sync_evt.info.immnd.info.ctrl.canBeCoord = (node_info->isOnController)?1:(cb->mScAbsenceAllowed)?4:0; + sync_evt.info.immnd.info.ctrl.ndExecPid = (sync_evt.info.immnd.info.ctrl.canBeCoord==4)?(cb->mScAbsenceAllowed):node_info->immnd_execPid; sync_evt.info.immnd.info.ctrl.isCoord = node_info->isCoord; sync_evt.info.immnd.info.ctrl.syncStarted = node_info->syncStarted; sync_evt.info.immnd.info.ctrl.nodeEpoch = node_info->epoch; @@ -415,7 +415,8 @@ static void immd_abort_sync_ok(IMMD_CB * sync_evt.info.immnd.info.ctrl.rulingEpoch = cb->mRulingEpoch; sync_evt.info.immnd.info.ctrl.fevsMsgStart = cb->fevsSendCount; sync_evt.info.immnd.info.ctrl.ndExecPid = node_info->immnd_execPid; - sync_evt.info.immnd.info.ctrl.canBeCoord = node_info->isOnController; + sync_evt.info.immnd.info.ctrl.canBeCoord = (node_info->isOnController)?1:(cb->mScAbsenceAllowed)?4:0; + sync_evt.info.immnd.info.ctrl.ndExecPid = (sync_evt.info.immnd.info.ctrl.canBeCoord==4)?(cb->mScAbsenceAllowed):node_info->immnd_execPid; sync_evt.info.immnd.info.ctrl.isCoord = node_info->isCoord; sync_evt.info.immnd.info.ctrl.syncStarted = node_info->syncStarted; sync_evt.info.immnd.info.ctrl.nodeEpoch = node_info->epoch; @@ -458,7 +459,8 @@ static void immd_prto_purge_mutations(IM sync_evt.info.immnd.info.ctrl.rulingEpoch = cb->mRulingEpoch; sync_evt.info.immnd.info.ctrl.fevsMsgStart = cb->fevsSendCount; sync_evt.info.immnd.info.ctrl.ndExecPid = node_info->immnd_execPid; - sync_evt.info.immnd.info.ctrl.canBeCoord = node_info->isOnController; + sync_evt.info.immnd.info.ctrl.canBeCoord = (node_info->isOnController)?1:(cb->mScAbsenceAllowed)?4:0; + sync_evt.info.immnd.info.ctrl.ndExecPid = (sync_evt.info.immnd.info.ctrl.canBeCoord==4)?(cb->mScAbsenceAllowed):node_info->immnd_execPid; sync_evt.info.immnd.info.ctrl.isCoord = node_info->isCoord; sync_evt.info.immnd.info.ctrl.syncStarted = node_info->syncStarted; sync_evt.info.immnd.info.ctrl.nodeEpoch = node_info->epoch; @@ -489,8 +491,8 @@ static int immd_dump_ok(IMMD_CB *cb, SaU dump_evt.info.immnd.type = IMMND_EVT_D2ND_DUMP_OK; dump_evt.info.immnd.info.ctrl.rulingEpoch = cb->mRulingEpoch; dump_evt.info.immnd.info.ctrl.nodeId = node_info->immnd_key; - dump_evt.info.immnd.info.ctrl.canBeCoord = node_info->isOnController; - dump_evt.info.immnd.info.ctrl.ndExecPid = node_info->immnd_execPid; + dump_evt.info.immnd.info.ctrl.canBeCoord = (node_info->isOnController)?1:(cb->mScAbsenceAllowed)?4:0; + dump_evt.info.immnd.info.ctrl.ndExecPid = (dump_evt.info.immnd.info.ctrl.canBeCoord==4)?(cb->mScAbsenceAllowed):node_info->immnd_execPid; dump_evt.info.immnd.info.ctrl.isCoord = node_info->isCoord; dump_evt.info.immnd.info.ctrl.fevsMsgStart = cb->fevsSendCount; dump_evt.info.immnd.info.ctrl.syncStarted = node_info->syncStarted; @@ -562,9 +564,10 @@ static void immd_req_sync(IMMD_CB *cb, I rqsync_evt.info.immnd.type = IMMND_EVT_D2ND_SYNC_REQ; rqsync_evt.info.immnd.info.ctrl.nodeId = node_info->immnd_key; rqsync_evt.info.immnd.info.ctrl.rulingEpoch = cb->mRulingEpoch; - rqsync_evt.info.immnd.info.ctrl.canBeCoord = node_info->isOnController; + rqsync_evt.info.immnd.info.ctrl.canBeCoord = (node_info->isOnController)?1:(cb->mScAbsenceAllowed)?4:0; + rqsync_evt.info.immnd.info.ctrl.ndExecPid = node_info->immnd_execPid; + rqsync_evt.info.immnd.info.ctrl.ndExecPid = (rqsync_evt.info.immnd.info.ctrl.canBeCoord==4)?(cb->mScAbsenceAllowed):node_info->immnd_execPid; rqsync_evt.info.immnd.info.ctrl.fevsMsgStart = cb->fevsSendCount; - rqsync_evt.info.immnd.info.ctrl.ndExecPid = node_info->immnd_execPid; rqsync_evt.info.immnd.info.ctrl.isCoord = node_info->isCoord; rqsync_evt.info.immnd.info.ctrl.syncStarted = node_info->syncStarted; rqsync_evt.info.immnd.info.ctrl.nodeEpoch = node_info->epoch; @@ -588,7 +591,7 @@ static void immd_req_sync(IMMD_CB *cb, I if (cb->immnd_coord == cb->node_id) { /*Coord immnd is local, i.e. at active SC. */ if (!(cb->is_loc_immnd_up)) { - LOG_ER("No coordinator IMMND known - ignoring sync request"); + LOG_ER("No coordinator IMMND known (case A) - ignoring sync request"); goto done; } @@ -604,22 +607,36 @@ static void immd_req_sync(IMMD_CB *cb, I proc_rc, node_info->immnd_key); goto done; } - } else { /*Coord immnd is at remote, i.e. at standby SC. */ - - if (!(cb->is_rem_immnd_up)) { - LOG_WA("No coordinator IMMND known - ignoring sync request"); - goto done; - } - + } + + if (cb->is_rem_immnd_up && (cb->immd_remote_id == immd_get_slot_and_subslot_id_from_node_id(cb->immnd_coord))) { + /*Coord immnd is at remote, i.e. at standby SC. */ TRACE_5("Send-3 SYNC_REQ to remote coord IMMND at standby SC, dest:%" PRIu64, cb->rem_immnd_dest); proc_rc = immd_mds_msg_send(cb, NCSMDS_SVC_ID_IMMND, cb->rem_immnd_dest, &rqsync_evt); if (proc_rc != NCSCC_RC_SUCCESS) { LOG_WA("Failed to send rqsync message err:%u to coord IMMND " "at standby dest:%" PRIu64, proc_rc, cb->rem_immnd_dest); - goto done; } + goto done; } + LOG_IN("ABT coord at payload ? cb->immnd_coord:%x cb->mScAbsenceAllowed: %u dest:%" PRIu64, + cb->immnd_coord, cb->mScAbsenceAllowed, cb->payload_coord_dest); + + if(cb->immnd_coord && cb->mScAbsenceAllowed) { [Hung] Should be if(cb->payload_coord_dest && cb->mScAbsenceAllowed) [Zoran] Fixed + LOG_NO("Sc Absence Allowed is configured (%u) => IMMND coord at payload node:%x dest%" PRIu64, + cb->mScAbsenceAllowed, cb->immnd_coord, cb->payload_coord_dest); + TRACE_5("Send-4 SYNC_REQ to remote coord IMMND at payload, dest:%" PRIu64, cb->payload_coord_dest); + proc_rc = immd_mds_msg_send(cb, NCSMDS_SVC_ID_IMMND, cb->payload_coord_dest, &rqsync_evt); + if (proc_rc != NCSCC_RC_SUCCESS) { + LOG_WA("Failed to send rqsync message err:%u to coord IMMND " + "at PAYLOAD dest:%" PRIu64, proc_rc, cb->payload_coord_dest); + } + goto done; + } + + LOG_WA("No coordinator IMMND known (case B) - ignoring sync request"); + done: TRACE_LEAVE(); } @@ -651,29 +668,33 @@ static void immd_kill_node(IMMD_CB *cb, TRACE_LEAVE(); } -static void immd_accept_node(IMMD_CB *cb, IMMD_IMMND_INFO_NODE *node_info, bool doReply) +static uint16_t accepted_nodes = 0; [Hung] 'accepted_nodes' should be decreased in 'immd_evt_proc_mds_evt' after calling 'immd_process_immnd_down' (3 occurrences) [Zoran] This is implemented a different way as we discussed in emails. accepted_nodes is used only when SC starts, and decreasing is not necessary. The solution is implemented in IF statement in immd_accepted_node() like if(cb->mScAbsenceAllowed && !cb->immnd_coord && doReply) {++accepted_nodes;} + +static void immd_accept_node(IMMD_CB *cb, IMMD_IMMND_INFO_NODE *node_info, bool doReply, bool knownVeteran) { uint32_t proc_rc = NCSCC_RC_SUCCESS; IMMSV_EVT accept_evt; IMMD_MBCSV_MSG mbcp_msg; bool isOnController = node_info->isOnController; bool fsParamMbcp = false; + int32_t mds_attached_nodes = (int32_t)immd_immnd_info_node_cardinality(&cb->immnd_tree); + LOG_NO("Attached Nodes:%u Accepted nodes:%u KnownVeteran:%u doReply:%u", mds_attached_nodes, accepted_nodes, knownVeteran, doReply); TRACE_ENTER(); memset(&accept_evt, 0, sizeof(IMMSV_EVT)); memset(&mbcp_msg, 0, sizeof(IMMD_MBCSV_MSG)); + if(cb->mScAbsenceAllowed && doReply) {++accepted_nodes;} accept_evt.type = IMMSV_EVT_TYPE_IMMND; accept_evt.info.immnd.type = IMMND_EVT_D2ND_INTRO_RSP; accept_evt.info.immnd.info.ctrl.nodeId = node_info->immnd_key; accept_evt.info.immnd.info.ctrl.rulingEpoch = cb->mRulingEpoch; - accept_evt.info.immnd.info.ctrl.canBeCoord = isOnController; - accept_evt.info.immnd.info.ctrl.ndExecPid = node_info->immnd_execPid; + accept_evt.info.immnd.info.ctrl.canBeCoord = (node_info->isOnController)?1:0; /* ScAbsenceAllowed case handled below*/ + accept_evt.info.immnd.info.ctrl.ndExecPid = /*(accept_evt.info.immnd.info.ctrl.canBeCoord==4)?(cb->mScAbsenceAllowed):*/node_info->immnd_execPid; accept_evt.info.immnd.info.ctrl.fevsMsgStart = cb->fevsSendCount; accept_evt.info.immnd.info.ctrl.nodeEpoch = node_info->epoch; /* Sending back pbeEnabled from IMMD to IMMNDs not really needed.*/ accept_evt.info.immnd.info.ctrl.pbeEnabled = (cb->mRim == SA_IMM_KEEP_REPOSITORY); - if (isOnController) { if(cb->immnd_coord == 0) {/* No coord */ if(cb->mIs2Pbe) { @@ -681,12 +702,21 @@ static void immd_accept_node(IMMD_CB *cb " Cluster is loading. 2PBE configured => Wait.", node_info->immnd_key, cb->node_id); accept_evt.info.immnd.info.ctrl.canBeCoord = 2; /* 2PBE => order preload. */ - } else { + } else if(!(cb->mScAbsenceAllowed) || knownVeteran || (mds_attached_nodes == 1) || [Hung] 'knownVeteran' can not be true when 'isOnController' is true. We can remove 'knownVeteran' here. [Zoran] knownVeteran is removed from IF statement. + (accepted_nodes > (mds_attached_nodes - 2))) { + /* ABT possibly need timeout conditionj ALSO. But we dont want another config variable */ LOG_NO("First IMMND on SC found at %x this IMMD at %x." " Cluster is loading, *not* 2PBE => designating that IMMND as coordinator", node_info->immnd_key, cb->node_id); cb->immnd_coord = node_info->immnd_key; node_info->isCoord = true; + } else if(cb->mScAbsenceAllowed && doReply) { + LOG_NO("Postponing acceptance of SC IMMND until %u nodes introduced.", mds_attached_nodes); + doReply = false; + --accepted_nodes; + } else { + LOG_NO("PROBLEM CASE (?) ScAbsenceAllowed: %u; accepted_nodes:%u; mds_attached_nodess:%u", + cb->mScAbsenceAllowed, accepted_nodes, mds_attached_nodes); } } else { /* Coord already exists. An SC is joining the cluster. */ @@ -695,9 +725,16 @@ static void immd_accept_node(IMMD_CB *cb accept_evt.info.immnd.info.ctrl.canBeCoord = 3; } else { /* 1PBE or 0PBE joining SC must sync */ - accept_evt.info.immnd.info.ctrl.canBeCoord = 1; + accept_evt.info.immnd.info.ctrl.canBeCoord = (node_info->isOnController)?1:(cb->mScAbsenceAllowed)?4:0; } } + } else if(cb->immnd_coord == 0 && cb->mScAbsenceAllowed && knownVeteran) { + LOG_NO("First Veteran IMMND found (payload) at %x this IMMD at %x." + " Apparent IMMD lapse, *not* 2PBE => designating that IMMND as coordinator", + node_info->immnd_key, cb->node_id); + cb->immnd_coord = node_info->immnd_key; + cb->payload_coord_dest = node_info->immnd_dest; + node_info->isCoord = true; } if (node_info->isCoord) { @@ -734,6 +771,15 @@ static void immd_accept_node(IMMD_CB *cb if (doReply) { /*If doReply is false then this was only an epoch refresh from an IMMND. Send reply on intro (accept) message back to sending IMMND */ + + if(cb->mScAbsenceAllowed) { + osafassert(accept_evt.info.immnd.info.ctrl.canBeCoord < 2); /* Not 2PBE */ + accept_evt.info.immnd.info.ctrl.canBeCoord = 4; /* Allow all nodes including payloads to be coord */ + accept_evt.info.immnd.info.ctrl.ndExecPid = cb->mScAbsenceAllowed; + /* ExecPid not realy used by IMMND as receiver on reply to its intro request. + Here we overload the use of the ndExecPid field to transport the ScAbsenceAllowed value.*/ + } + proc_rc = immd_mds_msg_send(cb, NCSMDS_SVC_ID_IMMND, node_info->immnd_dest, &accept_evt); if (proc_rc != NCSCC_RC_SUCCESS) { LOG_ER("Failed to send accept message to IMMND %x", node_info->immnd_key); @@ -786,6 +832,17 @@ static void immd_accept_node(IMMD_CB *cb goto done; } } + + if(cb->mScAbsenceAllowed && cb->payload_coord_dest) { [Hung] The condition should be if(cb->mScAbsenceAllowed && cb->payload_coord_dest && !node_info->isCoord) to avoid sending to the new payload-based coordinator twice. We already send response to new payload-based coordinator just right before 'if(isOnController)'. [Zoran] Done as you said. + /* SC absence allowed and coord is configured at payload. */ + TRACE("Payload intro sent to IMMND coord at payload (%x)", cb->immnd_coord); + proc_rc = immd_mds_msg_send(cb, NCSMDS_SVC_ID_IMMND, cb->payload_coord_dest, &accept_evt); + if (proc_rc != NCSCC_RC_SUCCESS) { + LOG_WA("Failed to send immnd-payload accept message to IMMND at " + "payload %" PRIu64 " error:%u", cb->payload_coord_dest, proc_rc); + goto done; + } + } } } else { /* Not doReply => epoch refresh => probably a sync => reset sync request. */ /*Reset any syncRequester to normal. */ @@ -1294,6 +1351,7 @@ static uint32_t immd_evt_proc_immnd_intr IMMD_IMMND_INFO_NODE *node_info = NULL; int oldPid, newPid; int oldEpoch, newEpoch; + bool veteranImmndNode = false; TRACE_ENTER(); @@ -1321,7 +1379,7 @@ static uint32_t immd_evt_proc_immnd_intr if (node_info->syncStarted) { osafassert(oldPid == newPid); osafassert(node_info->isCoord); - osafassert(node_info->isOnController); + osafassert(node_info->isOnController || cb->mScAbsenceAllowed); if(node_info->epoch != cb->mRulingEpoch) { LOG_ER("immd_evt_proc_immnd_intro: syncStarted true for node with " "strange epoch node_info->epoch(%u) != cb->mRulingEpoc(%u)", @@ -1345,8 +1403,11 @@ static uint32_t immd_evt_proc_immnd_intr node_info->immnd_key, node_info->immnd_execPid, node_info->epoch, node_info->syncRequested); if (evt->info.ctrl_msg.refresh) { - TRACE_5("ONLY A REFRESH OF epoch for %x, newE:%u RulngE:%u", - node_info->immnd_key, node_info->epoch, cb->mRulingEpoch); + if(evt->info.ctrl_msg.refresh==1) { + TRACE_5("ONLY A REFRESH OF epoch for %x, newE:%u RulngE:%u", + node_info->immnd_key, node_info->epoch, cb->mRulingEpoch); + } + if (cb->mRulingEpoch < node_info->epoch) { cb->mRulingEpoch = node_info->epoch; LOG_NO("Ruling epoch changed to:%u", cb->mRulingEpoch); @@ -1363,8 +1424,71 @@ static uint32_t immd_evt_proc_immnd_intr } } - immd_accept_node(cb, node_info, false); - goto done; + if(evt->info.ctrl_msg.refresh==2) { + /* Refresh from up and running IMMND perspective but not from restarted IMMDs perspective. + IMMNDs update IMMDs with current global counters. There is a potential race here to + worry about. Probably need a timer mechanism to ensure that restarted IMMDs do not + start responding to requests involving increment of these counters, before some + lagard IMMND provides the latest counter values. The up-side here is that all "veteran" + IMMNDs should have the same value on all counters. + + What we need to guard against is some IMMNDs that have also restarted during the IMMD + outage. If these truly restarted IMMNDs introduce themselves before any veteran (non + restarted) IMMND re-introduces itself (resets the counters), there could in theory be + trouble. But we should be saved by the fact that truly restarted IMMNDs are waiting to + be loaded or synced. They should not be generating fevs messages before that. + */ + + if(!(cb->mScAbsenceAllowed)) { + LOG_WA("ABSENT_SC_ALLOWED is NOT configured yet IMMND reports absent IMMD - ignoring"); + goto done; + } + + veteranImmndNode = true; + + if(cb->fevsSendCount < evt->info.ctrl_msg.fevs_count) { + LOG_NO("Refresh of fevs count from %llu to %llu from %x.", cb->fevsSendCount, + evt->info.ctrl_msg.fevs_count, node_info->immnd_key); + cb->fevsSendCount = evt->info.ctrl_msg.fevs_count; + } else { + LOG_IN("Ignoring refresh of fevs count from %x. Local:%llu >= Refresh:%llu.", + node_info->immnd_key, cb->fevsSendCount, evt->info.ctrl_msg.fevs_count); + } + + if(cb->admo_id_count < evt->info.ctrl_msg.admo_id_count) { + LOG_NO("Refresh of admoId count from %u to %u from %x.", cb->admo_id_count, + evt->info.ctrl_msg.admo_id_count, node_info->immnd_key); + cb->admo_id_count = evt->info.ctrl_msg.admo_id_count; + } else { + LOG_IN("Ignoring refresh of admoId count from %x. Local:%u >= Refresh:%u.", + node_info->immnd_key, cb->admo_id_count, evt->info.ctrl_msg.admo_id_count); + } + + if(cb->ccb_id_count < evt->info.ctrl_msg.ccb_id_count) { + LOG_NO("Refresh of ccbId count from %u to %u from %x.", cb->ccb_id_count, + evt->info.ctrl_msg.ccb_id_count, node_info->immnd_key); + cb->ccb_id_count = evt->info.ctrl_msg.ccb_id_count; + } else { + LOG_IN("Ignoring refresh of ccbId count from %x. Local:%u >= Refresh:%u.", + node_info->immnd_key, cb->ccb_id_count, evt->info.ctrl_msg.ccb_id_count); + } + + if(cb->impl_count < evt->info.ctrl_msg.impl_count) { + LOG_NO("Refresh of impl count from %u to %u from %x.", cb->impl_count, + evt->info.ctrl_msg.impl_count, node_info->immnd_key); + cb->impl_count = evt->info.ctrl_msg.impl_count; + } else { + LOG_IN("Ignoring refresh of impl count from %x. Local:%u >= Refresh:%u.", + node_info->immnd_key, cb->impl_count, evt->info.ctrl_msg.impl_count); + } + + /* Fall down into reception at new IMMD */ + } else { + /* Regular old refresh, basically just statistics and tracing. */ + immd_accept_node(cb, node_info, false, false); + goto done; + } + } /* Determine type of node. */ @@ -1386,6 +1510,10 @@ static uint32_t immd_evt_proc_immnd_intr LOG_IN("New IMMND process is on PAYLOAD at:%x", node_info->immnd_key); } + if(evt->info.ctrl_msg.refresh==2) { + goto accept_node; + } + /* Check for consistent file/dir/pbe configuration. If problem is found then node is not accepted and no reply is sent for the intro request from that node. But first check if node to be introduced is of older @@ -1407,7 +1535,7 @@ static uint32_t immd_evt_proc_immnd_intr if(evt->info.ctrl_msg.pbeFile.size > 1) { node_info->pbeConfigured = true; } - } + } if(!(node_info->pbeConfigured)) { /* New node does not have pbe configured. */ if(cb->mIs2Pbe) { @@ -1589,7 +1717,7 @@ static uint32_t immd_evt_proc_immnd_intr accept_node: - immd_accept_node(cb, node_info, true); + immd_accept_node(cb, node_info, true, veteranImmndNode); done: @@ -2518,7 +2646,7 @@ static uint32_t immd_evt_proc_mds_evt(IM TRACE_5("Located STDBY IMMND = %x node_id:%x", immd_get_slot_and_subslot_id_from_node_id(mds_info->node_id), mds_info->node_id); - immd_accept_node(cb, node_info, true); + immd_accept_node(cb, node_info, true, false); /* <==== Can not be sc-absence veteran if on sc. */ } /* Break out of while-1. We found */ break; @@ -2539,6 +2667,7 @@ static uint32_t immd_evt_proc_mds_evt(IM if (mds_info->svc_id == NCSMDS_SVC_ID_IMMND) { phy_slot_sub_slot = immd_get_slot_and_subslot_id_from_mds_dest(mds_info->dest); immd_immnd_info_node_find_add(&cb->immnd_tree, &mds_info->dest, &node_info, &add_flag); + LOG_IN("node with dest ADDED %" PRIu64, mds_info->dest); if (m_IMMND_IS_ON_SCXB(cb->immd_self_id, immd_get_slot_and_subslot_id_from_mds_dest(mds_info->dest))) { diff --git a/osaf/services/saf/immsv/immd/immd_main.c b/osaf/services/saf/immsv/immd/immd_main.c --- a/osaf/services/saf/immsv/immd/immd_main.c +++ b/osaf/services/saf/immsv/immd/immd_main.c @@ -217,21 +217,23 @@ int main(int argc, char *argv[]) struct pollfd fds[4]; const int peerMaxWaitMin = 5; /*5 sec*/ const char * peerWaitStr = getenv("IMMSV_2PBE_PEER_SC_MAX_WAIT"); + const char * absentScStr = getenv("IMMSV_SC_ABSENCE_ALLOWED"); int32_t timeout = (-1); int32_t total_wait = (-1); int64_t start_time = 0LL; uint32_t print_at_secs = 1LL; int term_fd; + uint16_t scAbsenceAllowed = 0; daemonize(argc, argv); - if (immd_initialize() != NCSCC_RC_SUCCESS) { - TRACE("initialize_immd failed"); - goto done; + if(absentScStr) { + scAbsenceAllowed = atoi(absentScStr); + if(!scAbsenceAllowed) { + LOG_WA("SC_ABSENCE_ALLOWED malconfigured: '%s'", absentScStr); + } } - daemon_sigterm_install(&term_fd); - if(peerWaitStr) { int32_t peerMaxWait = atoi(peerWaitStr); if(peerMaxWait < peerMaxWaitMin) { @@ -247,9 +249,28 @@ int main(int argc, char *argv[]) start_time = m_NCS_GET_TIME_MS; immd_cb->mIs2Pbe = true; /* Redundant PBE */ - immd_cb->m2PbeCanLoad = false; /* Not ready to load yet */ + + if(scAbsenceAllowed) { + LOG_ER("SC_ABSENCE_ALLOWED is *incompatible* with 2PBE - 2PBE overrides"); + scAbsenceAllowed = 0; + } } + if(scAbsenceAllowed) { + LOG_NO("******* SC_ABSENCE_ALLOWED (Headless Hydra) is configured: %u ***********", + scAbsenceAllowed); + LOG_NO("Waiting 3 seconds to allow IMMND MDS attachments to get processed."); + sleep(3); + } + + immd_cb->mScAbsenceAllowed = scAbsenceAllowed; + + if (immd_initialize() != NCSCC_RC_SUCCESS) { + TRACE("initialize_immd failed"); + goto done; + } [Hung] I think the mds initialization (immd_initialize) must be done before the sleep. Otherwise, the sleep is useless here. [Zoran] I think this is ok. 3 seconds are for payloads to process MDS messages when the cluster goes headless. + + daemon_sigterm_install(&term_fd); /* Get file descriptor for mailbox */ mbx_fd = ncs_ipc_get_sel_obj(&immd_cb->mbx); diff --git a/osaf/services/saf/immsv/immd/immd_mbcsv.c b/osaf/services/saf/immsv/immd/immd_mbcsv.c --- a/osaf/services/saf/immsv/immd/immd_mbcsv.c +++ b/osaf/services/saf/immsv/immd/immd_mbcsv.c @@ -1115,6 +1115,9 @@ static uint32_t mbcsv_dec_sync_resp(IMMD if (node_info->isCoord) { cb->immnd_coord = node_info->immnd_key; + if(!node_info->isOnController && cb->mScAbsenceAllowed) { + cb->payload_coord_dest = node_info->immnd_dest; + } } ptr = ncs_dec_flatten_space(&arg->info.decode.i_uba, data, sizeof(uint8_t)); diff --git a/osaf/services/saf/immsv/immd/immd_proc.c b/osaf/services/saf/immsv/immd/immd_proc.c --- a/osaf/services/saf/immsv/immd/immd_proc.c +++ b/osaf/services/saf/immsv/immd/immd_proc.c @@ -126,6 +126,7 @@ void immd_proc_immd_reset(IMMD_CB *cb, b cb->mRulingEpoch = 0; cb->immnd_coord = 0; + cb->payload_coord_dest = 0L; cb->fevsSendCount = 0LL; cb->locPbe.epoch = 0; @@ -242,6 +243,11 @@ bool immd_proc_elect_coord(IMMD_CB *cb, } else { /* Re-elect local coord. See #578 */ if(immnd_info_node->immnd_key != cb->node_id) { + if(cb->mScAbsenceAllowed) { + LOG_WA("ScAbsenceAllowed(%u), failover after SC-absence => coord at payload", + cb->mScAbsenceAllowed); + break; + } LOG_ER("Changing IMMND coord while old coord is still up!"); /* Could theoretically happen if remote IMMD is down, i.e. failover, but MDS has not yet provided IMMND DOWN for that @@ -270,7 +276,8 @@ bool immd_proc_elect_coord(IMMD_CB *cb, one step higher than the coord epoch. */ } else { - /* Try to elect a new coord. */ + /* Try to elect a new coord. ABT Do I need to reset payload_coord_dest earlier ? */ + cb->payload_coord_dest = 0LL; memset(&key, 0, sizeof(MDS_DEST)); immd_immnd_info_node_getnext(&cb->immnd_tree, &key, &immnd_info_node); while (immnd_info_node) { @@ -284,8 +291,33 @@ bool immd_proc_elect_coord(IMMD_CB *cb, immd_immnd_info_node_getnext(&cb->immnd_tree, &key, &immnd_info_node); } + if (!immnd_info_node && cb->mScAbsenceAllowed) { + /* If SC absence is allowed and no SC based IMMND is available + then elect an IMMND coord at a payload. Note this means that + an IMMND at a payload may be elected coord even if one or both + SCs are available, but no synced IMMND is avaialble at any SC. + */ + memset(&key, 0, sizeof(MDS_DEST)); + immd_immnd_info_node_getnext(&cb->immnd_tree, &key, &immnd_info_node); + while (immnd_info_node) { + key = immnd_info_node->immnd_dest; + if (immnd_info_node->epoch == cb->mRulingEpoch) { + /*We found a new candidate for cordinator */ + immnd_info_node->isCoord = true; + cb->payload_coord_dest = immnd_info_node->immnd_dest; + LOG_NO("Coord elected at payload:%x", immnd_info_node->immnd_key); + break; + } else { + LOG_IN("Payload %x rejected as coord, epoch(%u) != rulingEpoch(%u)", + immnd_info_node->immnd_key, immnd_info_node->epoch, cb->mRulingEpoch); + } + immd_immnd_info_node_getnext(&cb->immnd_tree, &key, &immnd_info_node); + } + } + if (!immnd_info_node) { - LOG_ER("Failed to find candidate for new IMMND coordinator"); + LOG_ER("Failed to find candidate for new IMMND coordinator (ScAbsenceAllowed:%u RulingEpoch:%u", + cb->mScAbsenceAllowed, cb->mRulingEpoch); TRACE_LEAVE(); immd_proc_immd_reset(cb, true); @@ -320,7 +352,7 @@ bool immd_proc_elect_coord(IMMD_CB *cb, send_evt.info.immnd.type = IMMND_EVT_D2ND_INTRO_RSP; send_evt.info.immnd.info.ctrl.nodeId = immnd_info_node->immnd_key; send_evt.info.immnd.info.ctrl.rulingEpoch = cb->mRulingEpoch; - send_evt.info.immnd.info.ctrl.canBeCoord = immnd_info_node->isOnController; + send_evt.info.immnd.info.ctrl.canBeCoord = (immnd_info_node->isOnController)?1:(cb->mScAbsenceAllowed)?4:0; send_evt.info.immnd.info.ctrl.ndExecPid = immnd_info_node->immnd_execPid; send_evt.info.immnd.info.ctrl.isCoord = true; send_evt.info.immnd.info.ctrl.fevsMsgStart = cb->fevsSendCount; @@ -604,7 +636,9 @@ uint32_t immd_process_immnd_down(IMMD_CB } immnd_info->isCoord = 0; immnd_info->isOnController = 0; + immnd_info->epoch = 0; /* needed ? */ cb->immnd_coord = 0; + cb->payload_coord_dest = 0L; coord_exists = immd_proc_elect_coord(cb, false); } } else { diff --git a/osaf/services/saf/immsv/immd/immd_sbevt.c b/osaf/services/saf/immsv/immd/immd_sbevt.c --- a/osaf/services/saf/immsv/immd/immd_sbevt.c +++ b/osaf/services/saf/immsv/immd/immd_sbevt.c [Hung] In immd_process_node_accept(), we also need to set/clear 'payload_coord_dest' when standby IMMD receives a warm-sync for new coordinator. if (immnd_info_node->isCoord) { if (!immnd_info_node->isOnController) { cb->payload_coord_dest = immnd_info_node->immnd_dest; } else { cb->payload_coord_dest = 0LL; } } [Zoran] Added in IF statement like: if(ctrl->isCoord) { SaImmRepositoryInitModeT oldRim = cb->mRim; cb->immnd_coord = immnd_info_node->immnd_key; + cb->payload_coord_dest = (immnd_info_node->isOnController) ? 0LL : immnd_info_node->immnd_dest; Thanks, Zoran @@ -137,7 +137,7 @@ uint32_t immd_process_node_accept(IMMD_C ctrl->nodeId, ctrl->nodeEpoch, ctrl->canBeCoord, ctrl->isCoord, ctrl->syncStarted, ctrl->rulingEpoch, ctrl->pbeEnabled); - if((ctrl->canBeCoord > 1) && !(immd_cb->mIs2Pbe)) { + if((ctrl->canBeCoord > 1) && (ctrl->canBeCoord < 4) && !(immd_cb->mIs2Pbe)) { LOG_ER("Active IMMD has 2PBE enabled, yet this standby is not enabled for 2PBE - exiting"); exit(1); } else if((cb->immnd_coord == 0) && immd_cb->mIs2Pbe && (ctrl->canBeCoord == 1)) { @@ -175,7 +175,7 @@ uint32_t immd_process_node_accept(IMMD_C immnd_info_node->epoch = ctrl->nodeEpoch; } - if (!(immnd_info_node->isOnController) && ctrl->canBeCoord) { + if (!(immnd_info_node->isOnController) && ctrl->canBeCoord && (ctrl->canBeCoord < 4)) { immnd_info_node->isOnController = true; TRACE_5("Corrected isOnController status for immnd node info"); @@ -215,7 +215,7 @@ uint32_t immd_process_node_accept(IMMD_C } } - if(!(ctrl->canBeCoord)) { /* payload node */ + if(!(ctrl->canBeCoord) || (ctrl->canBeCoord== 4)) { /* payload node */ /* Remove the node-id from the list of detached payloads. */ IMMD_IMMND_DETACHED_NODE *detached_node = cb->detached_nodes; IMMD_IMMND_DETACHED_NODE **prev = &(cb->detached_nodes); @@ -246,11 +246,12 @@ uint32_t immd_process_node_accept(IMMD_C TRACE("Standby receiving FS params: %s %s %s", ctrl->dir.buf, ctrl->xmlFile.buf, ctrl->pbeFile.buf); - if(ctrl->dir.size && cb->mDir==NULL && ctrl->canBeCoord) { + if(ctrl->dir.size && cb->mDir==NULL && (ctrl->canBeCoord && (ctrl->canBeCoord < 4))) { TRACE("cb->mDir set to %s in standby", ctrl->dir.buf); cb->mDir = ctrl->dir.buf; /*steal*/ } else if(ctrl->dir.size && cb->mDir) { /* Should not get here since fs params sent only once.*/ + LOG_NO("ABT Should not get here since fs params sent only once"); if(strcmp(cb->mDir, ctrl->dir.buf)) { LOG_WA("SBY: Discrepancy on IMM directory: %s != %s", cb->mDir, ctrl->dir.buf); @@ -261,7 +262,7 @@ uint32_t immd_process_node_accept(IMMD_C ctrl->dir.size=0; - if(ctrl->xmlFile.size && cb->mFile==NULL && ctrl->canBeCoord) { + if(ctrl->xmlFile.size && cb->mFile==NULL && (ctrl->canBeCoord && (ctrl->canBeCoord < 4))) { TRACE("cb->mFile set to %s in standby",ctrl->xmlFile.buf ); cb->mFile = ctrl->xmlFile.buf; /*steal*/ } else if(ctrl->xmlFile.size && cb->mFile) { @@ -276,7 +277,7 @@ uint32_t immd_process_node_accept(IMMD_C ctrl->xmlFile.size=0; - if(ctrl->pbeFile.size && cb->mPbeFile==NULL && ctrl->canBeCoord) { + if(ctrl->pbeFile.size && cb->mPbeFile==NULL && (ctrl->canBeCoord && (ctrl->canBeCoord < 4))) { TRACE("cb->mPbeFile set to %s in standby", ctrl->pbeFile.buf); cb->mPbeFile = ctrl->pbeFile.buf; /*steal*/ } else if(ctrl->pbeFile.size && cb->mPbeFile) { ------------------------------------------------------------------------------ _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net<mailto:Opensaf-devel@lists.sourceforge.net> https://lists.sourceforge.net/lists/listinfo/opensaf-devel ------------------------------------------------------------------------------ Site24x7 APM Insight: Get Deep Visibility into Application Performance APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month Monitor end-to-end web transactions and take corrective actions now Troubleshoot faster and improve end-user experience. Signup Now! http://pubads.g.doubleclick.net/gampad/clk?id=272487151&iu=/4140 _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel