osaf/services/saf/immsv/immnd/immnd_evt.c | 13 +------ osaf/services/saf/immsv/immnd/immnd_init.h | 1 + osaf/services/saf/immsv/immnd/immnd_proc.c | 55 ++++++++++++++++++++++++++++- 3 files changed, 55 insertions(+), 14 deletions(-)
Set 'mIsCoord' to false when headless to avoid coordinator from restarting. Also handle the cases when headless occurs before/during sync. diff --git a/osaf/services/saf/immsv/immnd/immnd_evt.c b/osaf/services/saf/immsv/immnd/immnd_evt.c --- a/osaf/services/saf/immsv/immnd/immnd_evt.c +++ b/osaf/services/saf/immsv/immnd/immnd_evt.c @@ -10196,18 +10196,7 @@ static uint32_t immnd_evt_proc_mds_evt(I } else { /* SC ABSENCE ALLOWED */ LOG_WA("SC Absence IS allowed:%u IMMD service is DOWN", cb->mScAbsenceAllowed); if(cb->mIsCoord) { - /* Note that normally the coord will reside at SCs so this branch will - only be relevant if REPEATED toal scAbsence occurs. After SC absence - and subsequent return of SC, the coord will be elected at a payload. - That coord will be active untill restart of that payload.. - unless we add functionality for the payload coord to restart after - a few minutes .. ? - */ - LOG_WA("This IMMND coord has to exit allowing restarted IMMD to select new coord"); - if(cb->mState < IMM_SERVER_SYNC_SERVER) { - immnd_ackToNid(NCSCC_RC_FAILURE); - } - exit(1); + immnd_proc_remove_coordinator_role(cb); } else if(cb->mState <= IMM_SERVER_LOADING_PENDING) { /* Reset state in payloads that had not joined. No need to restart. */ LOG_IN("Resetting IMMND state from %u to IMM_SERVER_ANONYMOUS", cb->mState); diff --git a/osaf/services/saf/immsv/immnd/immnd_init.h b/osaf/services/saf/immsv/immnd/immnd_init.h --- a/osaf/services/saf/immsv/immnd/immnd_init.h +++ b/osaf/services/saf/immsv/immnd/immnd_init.h @@ -40,6 +40,7 @@ extern IMMND_CB *immnd_cb; /* file : - immnd_proc.c */ void immnd_proc_discard_other_nodes(IMMND_CB *cb); +void immnd_proc_remove_coordinator_role(IMMND_CB *cb); void immnd_proc_imma_down(IMMND_CB *cb, MDS_DEST dest, NCSMDS_SVC_ID sv_id); uint32_t immnd_proc_imma_discard_connection(IMMND_CB *cb, IMMND_IMM_CLIENT_NODE *cl_node, bool scAbsenceAllowed); diff --git a/osaf/services/saf/immsv/immnd/immnd_proc.c b/osaf/services/saf/immsv/immnd/immnd_proc.c --- a/osaf/services/saf/immsv/immnd/immnd_proc.c +++ b/osaf/services/saf/immsv/immnd/immnd_proc.c @@ -872,7 +872,7 @@ void immnd_abortSync(IMMND_CB *cb) memset(&send_evt, '\0', sizeof(IMMSV_EVT)); TRACE_ENTER(); TRACE("ME:%u RE:%u", cb->mMyEpoch, cb->mRulingEpoch); - osafassert(cb->mIsCoord); + osafassert(cb->mIsCoord || cb->mIntroduced == 2); cb->mPendSync = 0; if(cb->mSyncFinalizing) { cb->mSyncFinalizing = 0x0; @@ -898,6 +898,12 @@ void immnd_abortSync(IMMND_CB *cb) LOG_ER("immnd_abortSync not clean on epoch: RE:%u ME:%u", cb->mRulingEpoch, cb->mMyEpoch); } + /* Skip broadcasting sync abort msg when SC are absent */ + if (cb->mIntroduced == 2) { + TRACE_LEAVE(); + return; + } + while (!immnd_is_immd_up(cb) && (retryCount++ < 20)) { LOG_WA("Coord blocked in sending ABORT_SYNC because IMMD is DOWN %u", retryCount); sleep(1); @@ -1319,6 +1325,10 @@ void immnd_proc_global_abort_ccb(IMMND_C static SaBoolT immnd_ccbsTerminated(IMMND_CB *cb, SaUint32T duration, SaBoolT* pbeImmndDeadlock) { + if (cb->mIntroduced == 2) { + /* Return true to enter phase 2 or phase 3 of SYNC_SERVER */ + return SA_TRUE; + } osafassert(cb->mIsCoord); osafassert(pbeImmndDeadlock); (*pbeImmndDeadlock) = SA_FALSE; @@ -1999,7 +2009,10 @@ uint32_t immnd_proc_server(uint32_t *tim /*Phase 2 */ if (cb->syncPid <= 0) { /*Fork sync-agent */ - cb->syncPid = immnd_forkSync(cb); + /* When SC are absent, we don't fork to trigger abortSync */ + if (cb->mIntroduced != 2) { + cb->syncPid = immnd_forkSync(cb); + } if (cb->syncPid <= 0) { LOG_ER("Failed to fork sync process"); cb->syncPid = 0; @@ -2063,6 +2076,19 @@ uint32_t immnd_proc_server(uint32_t *tim if(cb->mIntroduced == 2) { immnd_introduceMe(cb); + if(cb->pbePid > 0) { + /* Check if pbe process is terminated. + * Will send SIGKILL if it's not terminated. */ + int status = 0; + if (waitpid(cb->pbePid, &status, WNOHANG) > 0) { + cb->pbePid = 0; + LOG_NO("PBE has terminated due to SC absence"); + } else if (++cb->mPbeKills > 10) { /* More than 10 senconds */ + cb->pbePid = 0; + LOG_WA("SC were absent and PBE appears hung, sending SIGKILL"); + kill(cb->pbePid, SIGKILL); + } + } break; } @@ -2333,3 +2359,28 @@ void immnd_proc_discard_other_nodes(IMMN cb->mPbeVeteran = SA_FALSE; TRACE_LEAVE(); } + +void immnd_proc_remove_coordinator_role(IMMND_CB *cb) +{ + TRACE_ENTER(); + + cb->mIsCoord = false; + + if (cb->mSyncRequested) { + /* Just got sync requested from IMMD, nothing happened yet */ + cb->mSyncRequested = false; + + } else if (cb->mState == IMM_SERVER_SYNC_SERVER && cb->mPendSync) { + /* Sent out sync-start msg but sync didn't start yet, revert the state to IMM_SERVER_READY */ + cb->mPendSync = false; + cb->mState = IMM_SERVER_READY; + LOG_NO("SERVER STATE: IMM_SERVER_SYNC_SERVER --> IMM_SERVER_READY"); + + } else if (cb->mState == IMM_SERVER_SYNC_SERVER && (cb->syncPid > 0)) { + /* Sync started, kill sync process to trigger sync abort in immnd_proc_server() */ + osafassert(!cb->mPendSync); + kill(cb->syncPid, SIGTERM); + } + + TRACE_LEAVE(); +} ------------------------------------------------------------------------------ Site24x7 APM Insight: Get Deep Visibility into Application Performance APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month Monitor end-to-end web transactions and take corrective actions now Troubleshoot faster and improve end-user experience. Signup Now! http://pubads.g.doubleclick.net/gampad/clk?id=272487151&iu=/4140 _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel