Hi Hung, Reviewed and tested the patch. Following are the comments:
1. This patch has to go in a separate defect,After #1625 is pushed. 2. re-publish the patch with the comments when the defect is opened 3. Comments inline. Thanks, Neel. On Thursday 25 February 2016 05:29 PM, Hung Nguyen wrote: > osaf/services/saf/immsv/immnd/immnd_evt.c | 13 +------ > osaf/services/saf/immsv/immnd/immnd_init.h | 1 + > osaf/services/saf/immsv/immnd/immnd_proc.c | 55 > ++++++++++++++++++++++++++++- > 3 files changed, 55 insertions(+), 14 deletions(-) > > > Set 'mIsCoord' to false when headless to avoid coordinator from restarting. > Also handle the cases when headless occurs before/during sync. > > diff --git a/osaf/services/saf/immsv/immnd/immnd_evt.c > b/osaf/services/saf/immsv/immnd/immnd_evt.c > --- a/osaf/services/saf/immsv/immnd/immnd_evt.c > +++ b/osaf/services/saf/immsv/immnd/immnd_evt.c > @@ -10196,18 +10196,7 @@ static uint32_t immnd_evt_proc_mds_evt(I > } else { /* SC ABSENCE ALLOWED */ > LOG_WA("SC Absence IS allowed:%u IMMD service is DOWN", > cb->mScAbsenceAllowed); > if(cb->mIsCoord) { > - /* Note that normally the coord will reside at > SCs so this branch will > - only be relevant if REPEATED toal scAbsence > occurs. After SC absence > - and subsequent return of SC, the coord will > be elected at a payload. > - That coord will be active untill restart of > that payload.. > - unless we add functionality for the payload > coord to restart after > - a few minutes .. ? > - */ > - LOG_WA("This IMMND coord has to exit allowing > restarted IMMD to select new coord"); > - if(cb->mState < IMM_SERVER_SYNC_SERVER) { > - immnd_ackToNid(NCSCC_RC_FAILURE); > - } > - exit(1); > + immnd_proc_remove_coordinator_role(cb); No, need of new function, add the code here. > } else if(cb->mState <= IMM_SERVER_LOADING_PENDING) { > /* Reset state in payloads that had not joined. > No need to restart. */ > LOG_IN("Resetting IMMND state from %u to > IMM_SERVER_ANONYMOUS", cb->mState); > diff --git a/osaf/services/saf/immsv/immnd/immnd_init.h > b/osaf/services/saf/immsv/immnd/immnd_init.h > --- a/osaf/services/saf/immsv/immnd/immnd_init.h > +++ b/osaf/services/saf/immsv/immnd/immnd_init.h > @@ -40,6 +40,7 @@ extern IMMND_CB *immnd_cb; > /* file : - immnd_proc.c */ > > void immnd_proc_discard_other_nodes(IMMND_CB *cb); > +void immnd_proc_remove_coordinator_role(IMMND_CB *cb); > > void immnd_proc_imma_down(IMMND_CB *cb, MDS_DEST dest, NCSMDS_SVC_ID sv_id); > uint32_t immnd_proc_imma_discard_connection(IMMND_CB *cb, > IMMND_IMM_CLIENT_NODE *cl_node, bool scAbsenceAllowed); > diff --git a/osaf/services/saf/immsv/immnd/immnd_proc.c > b/osaf/services/saf/immsv/immnd/immnd_proc.c > --- a/osaf/services/saf/immsv/immnd/immnd_proc.c > +++ b/osaf/services/saf/immsv/immnd/immnd_proc.c > @@ -872,7 +872,7 @@ void immnd_abortSync(IMMND_CB *cb) > memset(&send_evt, '\0', sizeof(IMMSV_EVT)); > TRACE_ENTER(); > TRACE("ME:%u RE:%u", cb->mMyEpoch, cb->mRulingEpoch); > - osafassert(cb->mIsCoord); > + osafassert(cb->mIsCoord || cb->mIntroduced == 2); osafassert(cb->mIsCoord || (cb->mScAbsenceAllowed && cb->mIntroduced == 2 ); > cb->mPendSync = 0; > if(cb->mSyncFinalizing) { > cb->mSyncFinalizing = 0x0; > @@ -898,6 +898,12 @@ void immnd_abortSync(IMMND_CB *cb) > LOG_ER("immnd_abortSync not clean on epoch: RE:%u ME:%u", > cb->mRulingEpoch, cb->mMyEpoch); > } > > + /* Skip broadcasting sync abort msg when SC are absent */ > + if (cb->mIntroduced == 2) { (cb->mScAbsenceAllowed && cb->mIntroduced == 2 ) > + TRACE_LEAVE(); > + return; > + } > + > while (!immnd_is_immd_up(cb) && (retryCount++ < 20)) { > LOG_WA("Coord blocked in sending ABORT_SYNC because IMMD is > DOWN %u", retryCount); > sleep(1); > @@ -1319,6 +1325,10 @@ void immnd_proc_global_abort_ccb(IMMND_C > > static SaBoolT immnd_ccbsTerminated(IMMND_CB *cb, SaUint32T duration, > SaBoolT* pbeImmndDeadlock) > { > + if (cb->mIntroduced == 2) { > + /* Return true to enter phase 2 or phase 3 of SYNC_SERVER */ > + return SA_TRUE; > + } > osafassert(cb->mIsCoord); > osafassert(pbeImmndDeadlock); > (*pbeImmndDeadlock) = SA_FALSE; > @@ -1999,7 +2009,10 @@ uint32_t immnd_proc_server(uint32_t *tim > /*Phase 2 */ > if (cb->syncPid <= 0) { > /*Fork sync-agent */ > - cb->syncPid = immnd_forkSync(cb); > + /* When SC are absent, we don't fork to trigger > abortSync */ > + if (cb->mIntroduced != 2) { > + cb->syncPid = immnd_forkSync(cb); > + } > if (cb->syncPid <= 0) { For the ER logging also ad the if condition if(cb->mIntroduced == 2 ) > LOG_ER("Failed to fork sync process"); > cb->syncPid = 0; > @@ -2063,6 +2076,19 @@ uint32_t immnd_proc_server(uint32_t *tim > > if(cb->mIntroduced == 2) { > immnd_introduceMe(cb); > + if(cb->pbePid > 0) { > + /* Check if pbe process is terminated. > + * Will send SIGKILL if it's not terminated. */ > + int status = 0; > + if (waitpid(cb->pbePid, &status, WNOHANG) > 0) { > + cb->pbePid = 0; > + LOG_NO("PBE has terminated due to SC > absence"); > + } else if (++cb->mPbeKills > 10) { /* More than > 10 senconds */ > + cb->pbePid = 0; > + LOG_WA("SC were absent and PBE appears > hung, sending SIGKILL"); > + kill(cb->pbePid, SIGKILL); > + } > + } > break; > } The above else if is not required. When the IMMD is down PBE will automatically exit and present in defunct. if (waitpid(cb->pbePid, &status, WNOHANG) > 0) { cb->pbePid = 0; LOG_NO("PBE has terminated due to SC absence"); kill(cb->pbePid, SIGKILL); } > > @@ -2333,3 +2359,28 @@ void immnd_proc_discard_other_nodes(IMMN > cb->mPbeVeteran = SA_FALSE; > TRACE_LEAVE(); > } > + > +void immnd_proc_remove_coordinator_role(IMMND_CB *cb) > +{ > + TRACE_ENTER(); > + > + cb->mIsCoord = false; > + > + if (cb->mSyncRequested) { > + /* Just got sync requested from IMMD, nothing happened yet */ > + cb->mSyncRequested = false; > + > + } else if (cb->mState == IMM_SERVER_SYNC_SERVER && cb->mPendSync) { > + /* Sent out sync-start msg but sync didn't start yet, revert > the state to IMM_SERVER_READY */ > + cb->mPendSync = false; > + cb->mState = IMM_SERVER_READY; > + LOG_NO("SERVER STATE: IMM_SERVER_SYNC_SERVER --> > IMM_SERVER_READY"); The above else if is not required, as the above changes will be taken care at immnd_proc_server > + > + } else if (cb->mState == IMM_SERVER_SYNC_SERVER && (cb->syncPid > 0)) { > + /* Sync started, kill sync process to trigger sync abort in > immnd_proc_server() */ > + osafassert(!cb->mPendSync); > + kill(cb->syncPid, SIGTERM); > + } > + > + TRACE_LEAVE(); > +} when the sync is in progress, additional check is required, when the non-cordinator IMMND will be R_AVAILABLE. This has to be changed back to W_AVAILABLE. Add an if condition; else if(cb->mState == IMM_SERVER_READY && immModel_immNotWritable(cb)) { /* This SC absence allowed case, when IMMD is down and The sync is in progress. Veteran nodes Other than the syncing node, has to change the node state from NODE_R_AVAILABLE to NODE_FULLY_AVAILABLE*/ immnd_abortSync(cb); } The whole function can be added as inline, may not be as separate function. ------------------------------------------------------------------------------ Site24x7 APM Insight: Get Deep Visibility into Application Performance APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month Monitor end-to-end web transactions and take corrective actions now Troubleshoot faster and improve end-user experience. Signup Now! http://pubads.g.doubleclick.net/gampad/clk?id=272487151&iu=/4140 _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel