osaf/services/saf/immsv/immd/immd_amf.c   |    1 -
 osaf/services/saf/immsv/immd/immd_cb.h    |    7 +-
 osaf/services/saf/immsv/immd/immd_db.c    |   10 +-
 osaf/services/saf/immsv/immd/immd_evt.c   |  191 +++++++++++++++++++++++++----
 osaf/services/saf/immsv/immd/immd_main.c  |   33 ++++-
 osaf/services/saf/immsv/immd/immd_mbcsv.c |    3 +
 osaf/services/saf/immsv/immd/immd_proc.c  |   40 +++++-
 osaf/services/saf/immsv/immd/immd_sbevt.c |   13 +-
 8 files changed, 247 insertions(+), 51 deletions(-)


The patch contains IMMD code that is needed for supporting cloud resilience 
feature.

diff --git a/osaf/services/saf/immsv/immd/immd_amf.c 
b/osaf/services/saf/immsv/immd/immd_amf.c
--- a/osaf/services/saf/immsv/immd/immd_amf.c
+++ b/osaf/services/saf/immsv/immd/immd_amf.c
@@ -15,7 +15,6 @@
  *
  */
 
-#include <logtrace.h>
 #include <nid_start_util.h>
 #include "immd.h"
 #include "immsv.h"
diff --git a/osaf/services/saf/immsv/immd/immd_cb.h 
b/osaf/services/saf/immsv/immd/immd_cb.h
--- a/osaf/services/saf/immsv/immd/immd_cb.h
+++ b/osaf/services/saf/immsv/immd/immd_cb.h
@@ -137,7 +137,10 @@ typedef struct immd_cb_tag {
        bool m2PbeCanLoad;      /* true => 2PBE Loading arbitration completed */
        bool m2PbeExtraWait;    /* true => Used only to prolong wait if both SCs
                                   have been introduced but one has not yet 
replied. */
-       bool nid_started;       /**< true if started by NID */
+       bool nid_started;       /* true if started by NID */
+       SaUint16T mScAbsenceAllowed; /* Non zero if "headless Hydra" allowed 
(loss of both IMMDs/SCs).
+                                      Value is number of seconds of SC absence 
tolerated. */
+       MDS_DEST payload_coord_dest; /* IMMND coord may be at payload if 
mScAbsenceAllowed is nonzero */
 } IMMD_CB;
 
 uint32_t immd_immnd_info_tree_init(IMMD_CB *cb);
@@ -171,4 +174,6 @@ uint32_t immd_mds_change_role(IMMD_CB *c
 
 void immd_proc_immd_reset(IMMD_CB *cb, bool active);
 
+uint32_t immd_immnd_info_node_cardinality(NCS_PATRICIA_TREE *immnd_tree);
+
 #endif
diff --git a/osaf/services/saf/immsv/immd/immd_db.c 
b/osaf/services/saf/immsv/immd/immd_db.c
--- a/osaf/services/saf/immsv/immd/immd_db.c
+++ b/osaf/services/saf/immsv/immd/immd_db.c
@@ -256,9 +256,8 @@ uint32_t immd_cb_db_init(IMMD_CB *cb)
        }
 
        cb->mRim = SA_IMM_INIT_FROM_FILE;
-       cb->mIs2Pbe = false;
-       cb->m2PbeCanLoad = true;
-       if(cb->ha_state == SA_AMF_HA_ACTIVE) {
+       cb->m2PbeCanLoad = !cb->mIs2Pbe;
+       if((cb->ha_state == SA_AMF_HA_ACTIVE) && !cb->mScAbsenceAllowed) {
                cb->is_loading = true;
        }
 
@@ -410,3 +409,8 @@ void immd_db_purge_fevs(IMMD_CB *cb)
        }
        TRACE_LEAVE();
 }
+
+uint32_t immd_immnd_info_node_cardinality(NCS_PATRICIA_TREE *immnd_tree)
+{
+       return  ncs_patricia_tree_size(immnd_tree);
+}
diff --git a/osaf/services/saf/immsv/immd/immd_evt.c 
b/osaf/services/saf/immsv/immd/immd_evt.c
--- a/osaf/services/saf/immsv/immd/immd_evt.c
+++ b/osaf/services/saf/immsv/immd/immd_evt.c
@@ -366,8 +366,8 @@ static void immd_start_sync_ok(IMMD_CB *
        sync_evt.info.immnd.info.ctrl.rulingEpoch = cb->mRulingEpoch;
        sync_evt.info.immnd.info.ctrl.fevsMsgStart = cb->fevsSendCount;
        sync_evt.info.immnd.info.ctrl.nodeId = node_info->immnd_key;
-       sync_evt.info.immnd.info.ctrl.canBeCoord = node_info->isOnController;
-       sync_evt.info.immnd.info.ctrl.ndExecPid = node_info->immnd_execPid;
+       sync_evt.info.immnd.info.ctrl.canBeCoord = 
(node_info->isOnController)?1:(cb->mScAbsenceAllowed)?4:0;
+       sync_evt.info.immnd.info.ctrl.ndExecPid = 
(sync_evt.info.immnd.info.ctrl.canBeCoord==4)?(cb->mScAbsenceAllowed):node_info->immnd_execPid;
        sync_evt.info.immnd.info.ctrl.isCoord = node_info->isCoord;
        sync_evt.info.immnd.info.ctrl.syncStarted = node_info->syncStarted;
        sync_evt.info.immnd.info.ctrl.nodeEpoch = node_info->epoch;
@@ -415,7 +415,8 @@ static void immd_abort_sync_ok(IMMD_CB *
        sync_evt.info.immnd.info.ctrl.rulingEpoch = cb->mRulingEpoch;
        sync_evt.info.immnd.info.ctrl.fevsMsgStart = cb->fevsSendCount;
        sync_evt.info.immnd.info.ctrl.ndExecPid = node_info->immnd_execPid;
-       sync_evt.info.immnd.info.ctrl.canBeCoord = node_info->isOnController;
+       sync_evt.info.immnd.info.ctrl.canBeCoord = 
(node_info->isOnController)?1:(cb->mScAbsenceAllowed)?4:0;
+       sync_evt.info.immnd.info.ctrl.ndExecPid = 
(sync_evt.info.immnd.info.ctrl.canBeCoord==4)?(cb->mScAbsenceAllowed):node_info->immnd_execPid;
        sync_evt.info.immnd.info.ctrl.isCoord = node_info->isCoord;
        sync_evt.info.immnd.info.ctrl.syncStarted = node_info->syncStarted;
        sync_evt.info.immnd.info.ctrl.nodeEpoch = node_info->epoch;
@@ -458,7 +459,8 @@ static void immd_prto_purge_mutations(IM
        sync_evt.info.immnd.info.ctrl.rulingEpoch = cb->mRulingEpoch;
        sync_evt.info.immnd.info.ctrl.fevsMsgStart = cb->fevsSendCount;
        sync_evt.info.immnd.info.ctrl.ndExecPid = node_info->immnd_execPid;
-       sync_evt.info.immnd.info.ctrl.canBeCoord = node_info->isOnController;
+       sync_evt.info.immnd.info.ctrl.canBeCoord = 
(node_info->isOnController)?1:(cb->mScAbsenceAllowed)?4:0;
+       sync_evt.info.immnd.info.ctrl.ndExecPid = 
(sync_evt.info.immnd.info.ctrl.canBeCoord==4)?(cb->mScAbsenceAllowed):node_info->immnd_execPid;
        sync_evt.info.immnd.info.ctrl.isCoord = node_info->isCoord;
        sync_evt.info.immnd.info.ctrl.syncStarted = node_info->syncStarted;
        sync_evt.info.immnd.info.ctrl.nodeEpoch = node_info->epoch;
@@ -489,8 +491,8 @@ static int immd_dump_ok(IMMD_CB *cb, SaU
        dump_evt.info.immnd.type = IMMND_EVT_D2ND_DUMP_OK;
        dump_evt.info.immnd.info.ctrl.rulingEpoch = cb->mRulingEpoch;
        dump_evt.info.immnd.info.ctrl.nodeId = node_info->immnd_key;
-       dump_evt.info.immnd.info.ctrl.canBeCoord = node_info->isOnController;
-       dump_evt.info.immnd.info.ctrl.ndExecPid = node_info->immnd_execPid;
+       dump_evt.info.immnd.info.ctrl.canBeCoord = 
(node_info->isOnController)?1:(cb->mScAbsenceAllowed)?4:0;
+       dump_evt.info.immnd.info.ctrl.ndExecPid = 
(dump_evt.info.immnd.info.ctrl.canBeCoord==4)?(cb->mScAbsenceAllowed):node_info->immnd_execPid;
        dump_evt.info.immnd.info.ctrl.isCoord = node_info->isCoord;
        dump_evt.info.immnd.info.ctrl.fevsMsgStart = cb->fevsSendCount;
        dump_evt.info.immnd.info.ctrl.syncStarted = node_info->syncStarted;
@@ -562,9 +564,10 @@ static void immd_req_sync(IMMD_CB *cb, I
        rqsync_evt.info.immnd.type = IMMND_EVT_D2ND_SYNC_REQ;
        rqsync_evt.info.immnd.info.ctrl.nodeId = node_info->immnd_key;
        rqsync_evt.info.immnd.info.ctrl.rulingEpoch = cb->mRulingEpoch;
-       rqsync_evt.info.immnd.info.ctrl.canBeCoord = node_info->isOnController;
+       rqsync_evt.info.immnd.info.ctrl.canBeCoord = 
(node_info->isOnController)?1:(cb->mScAbsenceAllowed)?4:0;
+       rqsync_evt.info.immnd.info.ctrl.ndExecPid = node_info->immnd_execPid;
+       rqsync_evt.info.immnd.info.ctrl.ndExecPid = 
(rqsync_evt.info.immnd.info.ctrl.canBeCoord==4)?(cb->mScAbsenceAllowed):node_info->immnd_execPid;
        rqsync_evt.info.immnd.info.ctrl.fevsMsgStart = cb->fevsSendCount;
-       rqsync_evt.info.immnd.info.ctrl.ndExecPid = node_info->immnd_execPid;
        rqsync_evt.info.immnd.info.ctrl.isCoord = node_info->isCoord;
        rqsync_evt.info.immnd.info.ctrl.syncStarted = node_info->syncStarted;
        rqsync_evt.info.immnd.info.ctrl.nodeEpoch = node_info->epoch;
@@ -588,7 +591,7 @@ static void immd_req_sync(IMMD_CB *cb, I
 
        if (cb->immnd_coord == cb->node_id) {   /*Coord immnd is local, i.e. at 
active SC. */
                if (!(cb->is_loc_immnd_up)) {
-                       LOG_ER("No coordinator IMMND known - ignoring sync 
request");
+                       LOG_ER("No coordinator IMMND known (case A) - ignoring 
sync request");
                        goto done;
                }
 
@@ -604,22 +607,36 @@ static void immd_req_sync(IMMD_CB *cb, I
                               proc_rc, node_info->immnd_key);
                        goto done;
                }
-       } else {                /*Coord immnd is at remote, i.e. at standby SC. 
*/
-
-               if (!(cb->is_rem_immnd_up)) {
-                       LOG_WA("No coordinator IMMND known - ignoring sync 
request");
-                       goto done;
-               }
-
+       }
+
+       if (cb->is_rem_immnd_up && (cb->immd_remote_id == 
immd_get_slot_and_subslot_id_from_node_id(cb->immnd_coord))) {
+               /*Coord immnd is at remote, i.e. at standby SC. */
                TRACE_5("Send-3 SYNC_REQ to remote coord IMMND at standby SC, 
dest:%" PRIu64, cb->rem_immnd_dest);
                proc_rc = immd_mds_msg_send(cb, NCSMDS_SVC_ID_IMMND, 
cb->rem_immnd_dest, &rqsync_evt);
                if (proc_rc != NCSCC_RC_SUCCESS) {
                        LOG_WA("Failed to send rqsync message err:%u to coord 
IMMND "
                               "at standby dest:%" PRIu64, proc_rc, 
cb->rem_immnd_dest);
-                       goto done;
                }
+               goto done;
        }
 
+       LOG_IN("ABT coord at payload ? cb->immnd_coord:%x 
cb->mScAbsenceAllowed: %u dest:%" PRIu64,
+               cb->immnd_coord, cb->mScAbsenceAllowed, cb->payload_coord_dest);
+
+       if(cb->immnd_coord && cb->mScAbsenceAllowed) {
+               LOG_NO("Sc Absence Allowed is configured (%u) => IMMND coord at 
payload node:%x dest%" PRIu64,
+                       cb->mScAbsenceAllowed, cb->immnd_coord, 
cb->payload_coord_dest);
+               TRACE_5("Send-4 SYNC_REQ to remote coord IMMND at payload, 
dest:%" PRIu64, cb->payload_coord_dest);
+               proc_rc = immd_mds_msg_send(cb, NCSMDS_SVC_ID_IMMND, 
cb->payload_coord_dest, &rqsync_evt);
+               if (proc_rc != NCSCC_RC_SUCCESS) {
+                       LOG_WA("Failed to send rqsync message err:%u to coord 
IMMND "
+                               "at PAYLOAD dest:%" PRIu64, proc_rc, 
cb->payload_coord_dest);
+               }
+               goto done;
+       }
+
+       LOG_WA("No coordinator IMMND known (case B) - ignoring sync request");
+
  done:
        TRACE_LEAVE();
 }
@@ -651,29 +668,33 @@ static void immd_kill_node(IMMD_CB *cb, 
        TRACE_LEAVE();
 }
 
-static void immd_accept_node(IMMD_CB *cb, IMMD_IMMND_INFO_NODE *node_info, 
bool doReply)
+static uint16_t accepted_nodes = 0;
+
+static void immd_accept_node(IMMD_CB *cb, IMMD_IMMND_INFO_NODE *node_info, 
bool doReply, bool knownVeteran)
 {
        uint32_t proc_rc = NCSCC_RC_SUCCESS;
        IMMSV_EVT accept_evt;
        IMMD_MBCSV_MSG mbcp_msg;
        bool isOnController = node_info->isOnController;
        bool fsParamMbcp = false;
+       int32_t mds_attached_nodes = 
(int32_t)immd_immnd_info_node_cardinality(&cb->immnd_tree);
+       LOG_NO("Attached Nodes:%u Accepted nodes:%u KnownVeteran:%u 
doReply:%u", mds_attached_nodes, accepted_nodes, knownVeteran, doReply);
        TRACE_ENTER();
 
        memset(&accept_evt, 0, sizeof(IMMSV_EVT));
        memset(&mbcp_msg, 0, sizeof(IMMD_MBCSV_MSG));
+       if(cb->mScAbsenceAllowed && doReply) {++accepted_nodes;}
 
        accept_evt.type = IMMSV_EVT_TYPE_IMMND;
        accept_evt.info.immnd.type = IMMND_EVT_D2ND_INTRO_RSP;
        accept_evt.info.immnd.info.ctrl.nodeId = node_info->immnd_key;
        accept_evt.info.immnd.info.ctrl.rulingEpoch = cb->mRulingEpoch;
-       accept_evt.info.immnd.info.ctrl.canBeCoord = isOnController;
-       accept_evt.info.immnd.info.ctrl.ndExecPid = node_info->immnd_execPid;
+       accept_evt.info.immnd.info.ctrl.canBeCoord = 
(node_info->isOnController)?1:0; /* ScAbsenceAllowed case handled below*/
+       accept_evt.info.immnd.info.ctrl.ndExecPid = 
/*(accept_evt.info.immnd.info.ctrl.canBeCoord==4)?(cb->mScAbsenceAllowed):*/node_info->immnd_execPid;
        accept_evt.info.immnd.info.ctrl.fevsMsgStart = cb->fevsSendCount;
        accept_evt.info.immnd.info.ctrl.nodeEpoch = node_info->epoch;
        /* Sending back pbeEnabled from IMMD to IMMNDs not really needed.*/
        accept_evt.info.immnd.info.ctrl.pbeEnabled = (cb->mRim == 
SA_IMM_KEEP_REPOSITORY);
-
        if (isOnController) {
                if(cb->immnd_coord == 0) {/* No coord */                        
                        if(cb->mIs2Pbe) {
@@ -681,12 +702,21 @@ static void immd_accept_node(IMMD_CB *cb
                                        " Cluster is loading. 2PBE configured 
=> Wait.",
                                        node_info->immnd_key, cb->node_id);
                                accept_evt.info.immnd.info.ctrl.canBeCoord = 2; 
/* 2PBE => order preload. */
-                       } else {
+                       } else if(!(cb->mScAbsenceAllowed) || knownVeteran || 
(mds_attached_nodes == 1) ||
+                                  (accepted_nodes > (mds_attached_nodes - 2))) 
{
+                               /* ABT possibly need timeout conditionj ALSO. 
But we dont want another config variable */
                                LOG_NO("First IMMND on SC found at %x this IMMD 
at %x."
                                        " Cluster is loading, *not* 2PBE => 
designating that IMMND as coordinator",
                                        node_info->immnd_key, cb->node_id);
                                cb->immnd_coord = node_info->immnd_key;
                                node_info->isCoord = true;
+                       } else if(cb->mScAbsenceAllowed && doReply) {
+                               LOG_NO("Postponing acceptance of SC IMMND until 
%u nodes introduced.", mds_attached_nodes);
+                               doReply = false;
+                               --accepted_nodes;
+                       } else {
+                              LOG_NO("PROBLEM CASE (?) ScAbsenceAllowed: %u; 
accepted_nodes:%u; mds_attached_nodess:%u",
+                                     cb->mScAbsenceAllowed, accepted_nodes, 
mds_attached_nodes);
                        }
                } else {
                        /* Coord already exists. An SC is joining the cluster. 
*/
@@ -695,9 +725,16 @@ static void immd_accept_node(IMMD_CB *cb
                                accept_evt.info.immnd.info.ctrl.canBeCoord = 3; 
                        } else {
                                /* 1PBE or 0PBE joining SC must sync */
-                               accept_evt.info.immnd.info.ctrl.canBeCoord = 1;
+                               accept_evt.info.immnd.info.ctrl.canBeCoord = 
(node_info->isOnController)?1:(cb->mScAbsenceAllowed)?4:0;
                        }
                }
+       } else if(cb->immnd_coord == 0 && cb->mScAbsenceAllowed && 
knownVeteran) {
+               LOG_NO("First Veteran IMMND found (payload) at %x this IMMD at 
%x."
+                       " Apparent IMMD lapse, *not* 2PBE => designating that 
IMMND as coordinator",
+                       node_info->immnd_key, cb->node_id);
+               cb->immnd_coord = node_info->immnd_key;
+               cb->payload_coord_dest = node_info->immnd_dest;
+               node_info->isCoord = true;
        }
 
        if (node_info->isCoord) {
@@ -734,6 +771,15 @@ static void immd_accept_node(IMMD_CB *cb
        if (doReply) {
                /*If doReply is false then this was only an epoch refresh from 
an IMMND.
                  Send reply on intro (accept) message back to sending IMMND */
+
+               if(cb->mScAbsenceAllowed) {
+                       osafassert(accept_evt.info.immnd.info.ctrl.canBeCoord < 
2); /* Not 2PBE */
+                       accept_evt.info.immnd.info.ctrl.canBeCoord = 4; /* 
Allow all nodes including payloads to be coord */
+                       accept_evt.info.immnd.info.ctrl.ndExecPid = 
cb->mScAbsenceAllowed;
+                       /* ExecPid not realy used by IMMND as receiver on reply 
to its intro request.
+                          Here we overload the use of the ndExecPid field to 
transport the ScAbsenceAllowed value.*/
+               }
+
                proc_rc = immd_mds_msg_send(cb, NCSMDS_SVC_ID_IMMND, 
node_info->immnd_dest, &accept_evt);
                if (proc_rc != NCSCC_RC_SUCCESS) {
                        LOG_ER("Failed to send accept message to IMMND %x", 
node_info->immnd_key);
@@ -786,6 +832,17 @@ static void immd_accept_node(IMMD_CB *cb
                                        goto done;
                                }
                        }
+
+                       if(cb->mScAbsenceAllowed && cb->payload_coord_dest) {
+                               /* SC absence allowed and coord is configured 
at payload. */
+                               TRACE("Payload intro sent to IMMND coord at 
payload (%x)", cb->immnd_coord);
+                               proc_rc = immd_mds_msg_send(cb, 
NCSMDS_SVC_ID_IMMND, cb->payload_coord_dest, &accept_evt);
+                               if (proc_rc != NCSCC_RC_SUCCESS) {
+                                       LOG_WA("Failed to send immnd-payload 
accept message to IMMND at "
+                                               "payload %" PRIu64 " error:%u", 
cb->payload_coord_dest, proc_rc);
+                                       goto done;
+                               }
+                       }
                }
        } else {                /* Not doReply => epoch refresh => probably a 
sync => reset sync request. */
                /*Reset any syncRequester to normal. */
@@ -1294,6 +1351,7 @@ static uint32_t immd_evt_proc_immnd_intr
        IMMD_IMMND_INFO_NODE *node_info = NULL;
        int oldPid, newPid;
        int oldEpoch, newEpoch;
+       bool veteranImmndNode = false;
 
        TRACE_ENTER();
 
@@ -1321,7 +1379,7 @@ static uint32_t immd_evt_proc_immnd_intr
                if (node_info->syncStarted) {
                        osafassert(oldPid == newPid);
                        osafassert(node_info->isCoord);
-                       osafassert(node_info->isOnController);
+                       osafassert(node_info->isOnController || 
cb->mScAbsenceAllowed);
                        if(node_info->epoch != cb->mRulingEpoch) {
                                LOG_ER("immd_evt_proc_immnd_intro: syncStarted 
true for node with "
                                        "strange epoch node_info->epoch(%u) != 
cb->mRulingEpoc(%u)",    
@@ -1345,8 +1403,11 @@ static uint32_t immd_evt_proc_immnd_intr
                node_info->immnd_key, node_info->immnd_execPid, 
node_info->epoch, node_info->syncRequested);
 
        if (evt->info.ctrl_msg.refresh) {
-               TRACE_5("ONLY A REFRESH OF epoch for %x, newE:%u RulngE:%u",
-                       node_info->immnd_key, node_info->epoch, 
cb->mRulingEpoch);
+               if(evt->info.ctrl_msg.refresh==1) {
+                       TRACE_5("ONLY A REFRESH OF epoch for %x, newE:%u 
RulngE:%u",
+                               node_info->immnd_key, node_info->epoch, 
cb->mRulingEpoch);
+               }
+
                if (cb->mRulingEpoch < node_info->epoch) {
                        cb->mRulingEpoch = node_info->epoch;
                        LOG_NO("Ruling epoch changed to:%u", cb->mRulingEpoch);
@@ -1363,8 +1424,71 @@ static uint32_t immd_evt_proc_immnd_intr
                        }
                }
 
-               immd_accept_node(cb, node_info, false);
-               goto done;
+               if(evt->info.ctrl_msg.refresh==2) {
+                       /* Refresh from up and running IMMND perspective but 
not from restarted IMMDs perspective.
+                          IMMNDs update IMMDs with current global counters. 
There is a potential race here to
+                          worry about. Probably need a timer mechanism to 
ensure that restarted IMMDs do not
+                          start responding to requests involving increment of 
these counters, before some
+                          lagard IMMND provides the latest counter values. The 
up-side here is that all "veteran"
+                          IMMNDs should have the same value on all counters.
+
+                          What we need to guard against is some IMMNDs that 
have also restarted during the IMMD
+                          outage. If these truly restarted IMMNDs introduce 
themselves before any veteran (non
+                          restarted) IMMND re-introduces itself (resets the 
counters), there could in theory be
+                          trouble. But we should be saved by the fact that 
truly restarted IMMNDs are waiting to
+                          be loaded or synced. They should not be generating 
fevs messages before that.
+                        */
+
+                       if(!(cb->mScAbsenceAllowed)) {
+                               LOG_WA("ABSENT_SC_ALLOWED is NOT configured yet 
IMMND reports absent IMMD - ignoring");
+                               goto done;
+                       }
+
+                       veteranImmndNode = true;
+
+                       if(cb->fevsSendCount < evt->info.ctrl_msg.fevs_count) {
+                               LOG_NO("Refresh of fevs count from %llu to %llu 
from %x.", cb->fevsSendCount,
+                                       evt->info.ctrl_msg.fevs_count, 
node_info->immnd_key);
+                               cb->fevsSendCount = 
evt->info.ctrl_msg.fevs_count;
+                       } else {
+                               LOG_IN("Ignoring refresh of fevs count from %x. 
Local:%llu >= Refresh:%llu.",
+                                       node_info->immnd_key, 
cb->fevsSendCount, evt->info.ctrl_msg.fevs_count);
+                       }
+
+                       if(cb->admo_id_count < 
evt->info.ctrl_msg.admo_id_count) {
+                               LOG_NO("Refresh of admoId count from %u to %u 
from %x.", cb->admo_id_count,
+                                       evt->info.ctrl_msg.admo_id_count, 
node_info->immnd_key);
+                               cb->admo_id_count = 
evt->info.ctrl_msg.admo_id_count;
+                       } else {
+                               LOG_IN("Ignoring refresh of admoId count from 
%x. Local:%u >= Refresh:%u.",
+                                       node_info->immnd_key, 
cb->admo_id_count, evt->info.ctrl_msg.admo_id_count);
+                       }
+
+                       if(cb->ccb_id_count < evt->info.ctrl_msg.ccb_id_count) {
+                               LOG_NO("Refresh of ccbId count from %u to %u 
from %x.", cb->ccb_id_count,
+                                       evt->info.ctrl_msg.ccb_id_count, 
node_info->immnd_key);
+                               cb->ccb_id_count = 
evt->info.ctrl_msg.ccb_id_count;
+                       } else {
+                               LOG_IN("Ignoring refresh of ccbId count from 
%x. Local:%u >= Refresh:%u.",
+                                       node_info->immnd_key, cb->ccb_id_count, 
evt->info.ctrl_msg.ccb_id_count);
+                       }
+
+                       if(cb->impl_count < evt->info.ctrl_msg.impl_count) {
+                               LOG_NO("Refresh of impl count from %u to %u 
from %x.", cb->impl_count,
+                                       evt->info.ctrl_msg.impl_count, 
node_info->immnd_key);
+                               cb->impl_count = evt->info.ctrl_msg.impl_count;
+                       } else {
+                               LOG_IN("Ignoring refresh of impl count from %x. 
Local:%u >= Refresh:%u.",
+                                       node_info->immnd_key, cb->impl_count, 
evt->info.ctrl_msg.impl_count);
+                       }
+
+                       /* Fall down into reception at new IMMD */
+               } else {
+                       /* Regular old refresh, basically just statistics and 
tracing. */
+                       immd_accept_node(cb, node_info, false, false);
+                       goto done;
+               }
+
        }
 
        /* Determine type of node. */
@@ -1386,6 +1510,10 @@ static uint32_t immd_evt_proc_immnd_intr
                LOG_IN("New IMMND process is on PAYLOAD at:%x", 
node_info->immnd_key);
        }
 
+       if(evt->info.ctrl_msg.refresh==2) {
+               goto accept_node;
+       }
+
        /* Check for consistent file/dir/pbe configuration. If problem is found
           then node is not accepted and no reply is sent for the intro request
           from that node. But first check if node to be introduced is of older
@@ -1407,7 +1535,7 @@ static uint32_t immd_evt_proc_immnd_intr
                if(evt->info.ctrl_msg.pbeFile.size > 1) {
                        node_info->pbeConfigured = true;
                }
-       } 
+       }
 
        if(!(node_info->pbeConfigured)) { /* New node does not have pbe 
configured. */
                if(cb->mIs2Pbe) {
@@ -1589,7 +1717,7 @@ static uint32_t immd_evt_proc_immnd_intr
 
  accept_node:
 
-       immd_accept_node(cb, node_info, true);
+       immd_accept_node(cb, node_info, true, veteranImmndNode);
 
  done:
 
@@ -2518,7 +2646,7 @@ static uint32_t immd_evt_proc_mds_evt(IM
                                                TRACE_5("Located STDBY IMMND =  
%x node_id:%x",
                                                        
immd_get_slot_and_subslot_id_from_node_id(mds_info->node_id),
                                                        mds_info->node_id);
-                                               immd_accept_node(cb, node_info, 
true);
+                                               immd_accept_node(cb, node_info, 
true, false); /* <==== Can not be sc-absence veteran if on sc. */
                                        }
                                        /* Break out of while-1. We found */
                                        break;
@@ -2539,6 +2667,7 @@ static uint32_t immd_evt_proc_mds_evt(IM
                if (mds_info->svc_id == NCSMDS_SVC_ID_IMMND) {
                        phy_slot_sub_slot = 
immd_get_slot_and_subslot_id_from_mds_dest(mds_info->dest);
                        immd_immnd_info_node_find_add(&cb->immnd_tree, 
&mds_info->dest, &node_info, &add_flag);
+                       LOG_IN("node with dest ADDED %" PRIu64, mds_info->dest);
 
                        if (m_IMMND_IS_ON_SCXB(cb->immd_self_id,
                                               
immd_get_slot_and_subslot_id_from_mds_dest(mds_info->dest))) {
diff --git a/osaf/services/saf/immsv/immd/immd_main.c 
b/osaf/services/saf/immsv/immd/immd_main.c
--- a/osaf/services/saf/immsv/immd/immd_main.c
+++ b/osaf/services/saf/immsv/immd/immd_main.c
@@ -217,21 +217,23 @@ int main(int argc, char *argv[])
        struct pollfd fds[4];
        const int peerMaxWaitMin = 5; /*5 sec*/
        const char * peerWaitStr = getenv("IMMSV_2PBE_PEER_SC_MAX_WAIT");
+       const char * absentScStr = getenv("IMMSV_SC_ABSENCE_ALLOWED");
        int32_t timeout = (-1);
        int32_t total_wait = (-1);
        int64_t start_time = 0LL;
        uint32_t print_at_secs = 1LL;
        int term_fd;
+       uint16_t scAbsenceAllowed = 0;
 
        daemonize(argc, argv);
 
-       if (immd_initialize() != NCSCC_RC_SUCCESS) {
-               TRACE("initialize_immd failed");
-               goto done;
+       if(absentScStr) {
+               scAbsenceAllowed = atoi(absentScStr);
+               if(!scAbsenceAllowed) {
+                       LOG_WA("SC_ABSENCE_ALLOWED malconfigured: '%s'", 
absentScStr);
+               }
        }
 
-       daemon_sigterm_install(&term_fd);
-
        if(peerWaitStr) {
                int32_t peerMaxWait = atoi(peerWaitStr);
                if(peerMaxWait < peerMaxWaitMin) {
@@ -247,9 +249,28 @@ int main(int argc, char *argv[])
                start_time = m_NCS_GET_TIME_MS;
 
                immd_cb->mIs2Pbe = true; /* Redundant PBE */
-               immd_cb->m2PbeCanLoad = false; /* Not ready to load yet */
+
+               if(scAbsenceAllowed) {
+                       LOG_ER("SC_ABSENCE_ALLOWED  is *incompatible* with 2PBE 
- 2PBE overrides");
+                       scAbsenceAllowed = 0;
+               }
        }
 
+       if(scAbsenceAllowed) {
+               LOG_NO("******* SC_ABSENCE_ALLOWED (Headless Hydra) is 
configured: %u ***********",
+                       scAbsenceAllowed);
+               LOG_NO("Waiting 3 seconds to allow IMMND MDS attachments to get 
processed.");
+               sleep(3);
+       }
+
+       immd_cb->mScAbsenceAllowed = scAbsenceAllowed;
+
+       if (immd_initialize() != NCSCC_RC_SUCCESS) {
+               TRACE("initialize_immd failed");
+               goto done;
+       }
+
+       daemon_sigterm_install(&term_fd);
 
        /* Get file descriptor for mailbox */
        mbx_fd = ncs_ipc_get_sel_obj(&immd_cb->mbx);
diff --git a/osaf/services/saf/immsv/immd/immd_mbcsv.c 
b/osaf/services/saf/immsv/immd/immd_mbcsv.c
--- a/osaf/services/saf/immsv/immd/immd_mbcsv.c
+++ b/osaf/services/saf/immsv/immd/immd_mbcsv.c
@@ -1115,6 +1115,9 @@ static uint32_t mbcsv_dec_sync_resp(IMMD
 
                if (node_info->isCoord) {
                        cb->immnd_coord = node_info->immnd_key;
+                       if(!node_info->isOnController && cb->mScAbsenceAllowed) 
{
+                               cb->payload_coord_dest = node_info->immnd_dest;
+                       }
                }
 
                ptr = ncs_dec_flatten_space(&arg->info.decode.i_uba, data, 
sizeof(uint8_t));
diff --git a/osaf/services/saf/immsv/immd/immd_proc.c 
b/osaf/services/saf/immsv/immd/immd_proc.c
--- a/osaf/services/saf/immsv/immd/immd_proc.c
+++ b/osaf/services/saf/immsv/immd/immd_proc.c
@@ -126,6 +126,7 @@ void immd_proc_immd_reset(IMMD_CB *cb, b
 
        cb->mRulingEpoch = 0;
        cb->immnd_coord = 0;
+       cb->payload_coord_dest = 0L;
        cb->fevsSendCount = 0LL;
 
        cb->locPbe.epoch = 0;
@@ -242,6 +243,11 @@ bool immd_proc_elect_coord(IMMD_CB *cb, 
                                } else {
                                        /* Re-elect local coord. See #578 */
                                        if(immnd_info_node->immnd_key != 
cb->node_id) {
+                                               if(cb->mScAbsenceAllowed) {
+                                                       
LOG_WA("ScAbsenceAllowed(%u), failover after SC-absence => coord at payload",
+                                                              
cb->mScAbsenceAllowed);
+                                                       break;
+                                               }
                                                LOG_ER("Changing IMMND coord 
while old coord is still up!");
                                                /* Could theoretically happen 
if remote IMMD is down, i.e. 
                                                   failover, but MDS has not 
yet provided IMMND DOWN for that
@@ -270,7 +276,8 @@ bool immd_proc_elect_coord(IMMD_CB *cb, 
                   one step higher than the coord epoch.
                 */
        } else {
-               /* Try to elect a new coord. */
+               /* Try to elect a new coord. ABT Do I need to reset 
payload_coord_dest earlier ? */
+               cb->payload_coord_dest = 0LL;
                memset(&key, 0, sizeof(MDS_DEST));
                immd_immnd_info_node_getnext(&cb->immnd_tree, &key, 
&immnd_info_node);
                while (immnd_info_node) {
@@ -284,8 +291,33 @@ bool immd_proc_elect_coord(IMMD_CB *cb, 
                        immd_immnd_info_node_getnext(&cb->immnd_tree, &key, 
&immnd_info_node);
                }
 
+               if (!immnd_info_node && cb->mScAbsenceAllowed) {
+                       /* If SC absence is allowed and no SC based IMMND is 
available
+                          then elect an IMMND coord at a payload. Note this 
means that
+                          an IMMND at a payload may be elected coord even if 
one or both
+                          SCs are available, but no synced IMMND is avaialble 
at any SC.
+                       */
+                      memset(&key, 0, sizeof(MDS_DEST));
+                      immd_immnd_info_node_getnext(&cb->immnd_tree, &key, 
&immnd_info_node);
+                      while (immnd_info_node) {
+                              key = immnd_info_node->immnd_dest;
+                              if (immnd_info_node->epoch == cb->mRulingEpoch) {
+                                      /*We found a new candidate for 
cordinator */
+                                      immnd_info_node->isCoord = true;
+                                      cb->payload_coord_dest = 
immnd_info_node->immnd_dest;
+                                      LOG_NO("Coord elected at payload:%x", 
immnd_info_node->immnd_key);
+                                      break;
+                              } else {
+                                      LOG_IN("Payload %x rejected as coord, 
epoch(%u) != rulingEpoch(%u)",
+                                              immnd_info_node->immnd_key, 
immnd_info_node->epoch, cb->mRulingEpoch);
+                              }
+                              immd_immnd_info_node_getnext(&cb->immnd_tree, 
&key, &immnd_info_node);
+                      }
+               }
+
                if (!immnd_info_node) {
-                       LOG_ER("Failed to find candidate for new IMMND 
coordinator");
+                       LOG_ER("Failed to find candidate for new IMMND 
coordinator (ScAbsenceAllowed:%u RulingEpoch:%u",
+                               cb->mScAbsenceAllowed,  cb->mRulingEpoch);
 
                        TRACE_LEAVE();
                        immd_proc_immd_reset(cb, true);
@@ -320,7 +352,7 @@ bool immd_proc_elect_coord(IMMD_CB *cb, 
                send_evt.info.immnd.type = IMMND_EVT_D2ND_INTRO_RSP;
                send_evt.info.immnd.info.ctrl.nodeId = 
immnd_info_node->immnd_key;
                send_evt.info.immnd.info.ctrl.rulingEpoch = cb->mRulingEpoch;
-               send_evt.info.immnd.info.ctrl.canBeCoord = 
immnd_info_node->isOnController;
+               send_evt.info.immnd.info.ctrl.canBeCoord = 
(immnd_info_node->isOnController)?1:(cb->mScAbsenceAllowed)?4:0;
                send_evt.info.immnd.info.ctrl.ndExecPid = 
immnd_info_node->immnd_execPid;
                send_evt.info.immnd.info.ctrl.isCoord = true;
                send_evt.info.immnd.info.ctrl.fevsMsgStart = cb->fevsSendCount;
@@ -604,7 +636,9 @@ uint32_t immd_process_immnd_down(IMMD_CB
                        }
                        immnd_info->isCoord = 0;
                        immnd_info->isOnController = 0;
+                       immnd_info->epoch = 0; /* needed ? */
                        cb->immnd_coord = 0;
+                       cb->payload_coord_dest = 0L;
                        coord_exists = immd_proc_elect_coord(cb, false);
                }
        } else {
diff --git a/osaf/services/saf/immsv/immd/immd_sbevt.c 
b/osaf/services/saf/immsv/immd/immd_sbevt.c
--- a/osaf/services/saf/immsv/immd/immd_sbevt.c
+++ b/osaf/services/saf/immsv/immd/immd_sbevt.c
@@ -137,7 +137,7 @@ uint32_t immd_process_node_accept(IMMD_C
                ctrl->nodeId, ctrl->nodeEpoch, ctrl->canBeCoord, ctrl->isCoord, 
ctrl->syncStarted, 
                ctrl->rulingEpoch, ctrl->pbeEnabled);
 
-       if((ctrl->canBeCoord > 1) && !(immd_cb->mIs2Pbe)) {
+       if((ctrl->canBeCoord > 1) && (ctrl->canBeCoord < 4) && 
!(immd_cb->mIs2Pbe)) {
                LOG_ER("Active IMMD has 2PBE enabled, yet this standby is not 
enabled for 2PBE - exiting");
                exit(1);
        } else if((cb->immnd_coord == 0) && immd_cb->mIs2Pbe && 
(ctrl->canBeCoord == 1)) {
@@ -175,7 +175,7 @@ uint32_t immd_process_node_accept(IMMD_C
 
                        immnd_info_node->epoch = ctrl->nodeEpoch;
                }
-               if (!(immnd_info_node->isOnController) && ctrl->canBeCoord) {
+               if (!(immnd_info_node->isOnController) && ctrl->canBeCoord && 
(ctrl->canBeCoord < 4)) {
                        immnd_info_node->isOnController = true;
                        TRACE_5("Corrected isOnController status for immnd node 
info");
 
@@ -215,7 +215,7 @@ uint32_t immd_process_node_accept(IMMD_C
                        }
                }
 
-               if(!(ctrl->canBeCoord)) { /* payload node */
+               if(!(ctrl->canBeCoord) || (ctrl->canBeCoord== 4)) { /* payload 
node */
                        /* Remove the node-id from the list of detached 
payloads. */
                        IMMD_IMMND_DETACHED_NODE *detached_node = 
cb->detached_nodes;
                        IMMD_IMMND_DETACHED_NODE **prev = &(cb->detached_nodes);
@@ -246,11 +246,12 @@ uint32_t immd_process_node_accept(IMMD_C
                TRACE("Standby receiving FS params: %s %s %s", 
                        ctrl->dir.buf, ctrl->xmlFile.buf, ctrl->pbeFile.buf);
 
-               if(ctrl->dir.size && cb->mDir==NULL && ctrl->canBeCoord) {
+               if(ctrl->dir.size && cb->mDir==NULL && (ctrl->canBeCoord && 
(ctrl->canBeCoord < 4))) {
                        TRACE("cb->mDir set to %s in standby", ctrl->dir.buf);
                        cb->mDir = ctrl->dir.buf; /*steal*/
                } else if(ctrl->dir.size && cb->mDir) {
                        /* Should not get here since fs params sent only once.*/
+                       LOG_NO("ABT Should not get here since fs params sent 
only once");
                        if(strcmp(cb->mDir, ctrl->dir.buf)) {
                                LOG_WA("SBY: Discrepancy on IMM directory: %s 
!= %s",
                                        cb->mDir, ctrl->dir.buf);
@@ -261,7 +262,7 @@ uint32_t immd_process_node_accept(IMMD_C
                ctrl->dir.size=0;
 
 
-               if(ctrl->xmlFile.size && cb->mFile==NULL && ctrl->canBeCoord) {
+               if(ctrl->xmlFile.size && cb->mFile==NULL && (ctrl->canBeCoord 
&& (ctrl->canBeCoord < 4))) {
                        TRACE("cb->mFile set to %s in 
standby",ctrl->xmlFile.buf );
                        cb->mFile = ctrl->xmlFile.buf; /*steal*/
                } else if(ctrl->xmlFile.size && cb->mFile) {
@@ -276,7 +277,7 @@ uint32_t immd_process_node_accept(IMMD_C
                ctrl->xmlFile.size=0;
 
 
-               if(ctrl->pbeFile.size && cb->mPbeFile==NULL && 
ctrl->canBeCoord) {
+               if(ctrl->pbeFile.size && cb->mPbeFile==NULL && 
(ctrl->canBeCoord && (ctrl->canBeCoord < 4))) {
                        TRACE("cb->mPbeFile set to %s in standby", 
ctrl->pbeFile.buf);
                        cb->mPbeFile = ctrl->pbeFile.buf; /*steal*/
                } else if(ctrl->pbeFile.size && cb->mPbeFile) {

------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to