osaf/services/saf/immsv/immnd/immnd_evt.c | 37 ++++++++++++++++++++---------
osaf/services/saf/immsv/immnd/immnd_proc.c | 34 +++++++++++++++++++++++++--
2 files changed, 56 insertions(+), 15 deletions(-)
Set 'mIsCoord' to false when headless to avoid coordinator from restarting.
Also handle the cases when headless occurs before/during sync.
diff --git a/osaf/services/saf/immsv/immnd/immnd_evt.c
b/osaf/services/saf/immsv/immnd/immnd_evt.c
--- a/osaf/services/saf/immsv/immnd/immnd_evt.c
+++ b/osaf/services/saf/immsv/immnd/immnd_evt.c
@@ -10194,24 +10194,38 @@ static uint32_t immnd_evt_proc_mds_evt(I
}
exit(1);
} else { /* SC ABSENCE ALLOWED */
+ cb->mIntroduced = 2;
LOG_WA("SC Absence IS allowed:%u IMMD service is DOWN",
cb->mScAbsenceAllowed);
if(cb->mIsCoord) {
- /* Note that normally the coord will reside at
SCs so this branch will
- only be relevant if REPEATED toal scAbsence
occurs. After SC absence
- and subsequent return of SC, the coord will
be elected at a payload.
- That coord will be active untill restart of
that payload..
- unless we add functionality for the payload
coord to restart after
- a few minutes .. ?
- */
- LOG_WA("This IMMND coord has to exit allowing
restarted IMMD to select new coord");
- if(cb->mState < IMM_SERVER_SYNC_SERVER) {
- immnd_ackToNid(NCSCC_RC_FAILURE);
+ cb->mIsCoord = false;
+
+ if (cb->mSyncRequested) {
+ /* Just got sync requested from IMMD,
nothing happened yet */
+ cb->mSyncRequested = false;
+
+ } else if (cb->mState == IMM_SERVER_SYNC_SERVER
&& cb->mPendSync) {
+ /* Sent out sync-start msg but sync
didn't start yet, revert the state to IMM_SERVER_READY */
+ cb->mPendSync = false;
+ cb->mState = IMM_SERVER_READY;
+ LOG_NO("SERVER STATE:
IMM_SERVER_SYNC_SERVER --> IMM_SERVER_READY");
+
+ } else if (cb->mState == IMM_SERVER_SYNC_SERVER
&& (cb->syncPid > 0)) {
+ /* Sync started, kill sync process to
trigger sync abort in immnd_proc_server() */
+ osafassert(!cb->mPendSync);
+ kill(cb->syncPid, SIGTERM);
}
- exit(1);
+
} else if(cb->mState <= IMM_SERVER_LOADING_PENDING) {
/* Reset state in payloads that had not joined.
No need to restart. */
LOG_IN("Resetting IMMND state from %u to
IMM_SERVER_ANONYMOUS", cb->mState);
cb->mState = IMM_SERVER_ANONYMOUS;
+
+ } else if (cb->mState == IMM_SERVER_READY &&
immModel_immNotWritable(cb)) {
+ /* This SC absence allowed case, when IMMD is
down and
+ The sync is in progress. Veteran nodes Other
than the syncing node,
+ has to change the node state from
NODE_R_AVAILABLE to NODE_FULLY_AVAILABLE*/
+ immnd_abortSync(cb);
+
} else if(cb->mState < IMM_SERVER_READY) {
LOG_WA("IMMND was being synced or loaded (%u),
has to restart", cb->mState);
if(cb->mState < IMM_SERVER_SYNC_SERVER) {
@@ -10220,7 +10234,6 @@ static uint32_t immnd_evt_proc_mds_evt(I
exit(1);
}
}
- cb->mIntroduced = 2;
LOG_NO("IMMD SERVICE IS DOWN, HYDRA IS CONFIGURED =>
UNREGISTERING IMMND form MDS");
immnd_mds_unregister(cb);
/* Discard local clients ... */
diff --git a/osaf/services/saf/immsv/immnd/immnd_proc.c
b/osaf/services/saf/immsv/immnd/immnd_proc.c
--- a/osaf/services/saf/immsv/immnd/immnd_proc.c
+++ b/osaf/services/saf/immsv/immnd/immnd_proc.c
@@ -872,7 +872,7 @@ void immnd_abortSync(IMMND_CB *cb)
memset(&send_evt, '\0', sizeof(IMMSV_EVT));
TRACE_ENTER();
TRACE("ME:%u RE:%u", cb->mMyEpoch, cb->mRulingEpoch);
- osafassert(cb->mIsCoord);
+ osafassert(cb->mIsCoord || (cb->mScAbsenceAllowed && cb->mIntroduced ==
2 ));
cb->mPendSync = 0;
if(cb->mSyncFinalizing) {
cb->mSyncFinalizing = 0x0;
@@ -898,6 +898,12 @@ void immnd_abortSync(IMMND_CB *cb)
LOG_ER("immnd_abortSync not clean on epoch: RE:%u ME:%u",
cb->mRulingEpoch, cb->mMyEpoch);
}
+ /* Skip broadcasting sync abort msg when SC are absent */
+ if (cb->mScAbsenceAllowed && cb->mIntroduced == 2) {
+ TRACE_LEAVE();
+ return;
+ }
+
while (!immnd_is_immd_up(cb) && (retryCount++ < 20)) {
LOG_WA("Coord blocked in sending ABORT_SYNC because IMMD is
DOWN %u", retryCount);
sleep(1);
@@ -1319,6 +1325,10 @@ void immnd_proc_global_abort_ccb(IMMND_C
static SaBoolT immnd_ccbsTerminated(IMMND_CB *cb, SaUint32T duration, SaBoolT*
pbeImmndDeadlock)
{
+ if (cb->mIntroduced == 2) {
+ /* Return true to enter phase 2 or phase 3 of SYNC_SERVER */
+ return SA_TRUE;
+ }
osafassert(cb->mIsCoord);
osafassert(pbeImmndDeadlock);
(*pbeImmndDeadlock) = SA_FALSE;
@@ -1999,9 +2009,14 @@ uint32_t immnd_proc_server(uint32_t *tim
/*Phase 2 */
if (cb->syncPid <= 0) {
/*Fork sync-agent */
- cb->syncPid = immnd_forkSync(cb);
+ /* When SC are absent, we don't fork to trigger
abortSync */
+ if (cb->mIntroduced != 2) {
+ cb->syncPid = immnd_forkSync(cb);
+ }
if (cb->syncPid <= 0) {
- LOG_ER("Failed to fork sync process");
+ if (cb->mIntroduced != 2) {
+ LOG_ER("Failed to fork sync
process");
+ }
cb->syncPid = 0;
cb->mStep = 0;
cb->mJobStart = now;
@@ -2063,6 +2078,19 @@ uint32_t immnd_proc_server(uint32_t *tim
if(cb->mIntroduced == 2) {
immnd_introduceMe(cb);
+ if(cb->pbePid > 0) {
+ /* Check if pbe process is terminated.
+ * Will send SIGKILL if it's not terminated. */
+ int status = 0;
+ if (waitpid(cb->pbePid, &status, WNOHANG) > 0) {
+ cb->pbePid = 0;
+ LOG_NO("PBE has terminated due to SC
absence");
+ } else {
+ cb->pbePid = 0;
+ LOG_WA("SC were absent and PBE appears
hung, sending SIGKILL");
+ kill(cb->pbePid, SIGKILL);
+ }
+ }
break;
}
------------------------------------------------------------------------------
Transform Data into Opportunity.
Accelerate data analysis in your applications with
Intel Data Analytics Acceleration Library.
Click to learn more.
http://makebettercode.com/inteldaal-eval
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel