Hi Hung,
Reviewed and tested the patch.
Ack.
/Neel.
On Tuesday 08 March 2016 10:53 AM, Hung Nguyen wrote:
> osaf/services/saf/immsv/immnd/immnd_evt.c | 37
> ++++++++++++++++++++---------
> osaf/services/saf/immsv/immnd/immnd_proc.c | 34 +++++++++++++++++++++++++--
> 2 files changed, 56 insertions(+), 15 deletions(-)
>
>
> Set 'mIsCoord' to false when headless to avoid coordinator from restarting.
> Also handle the cases when headless occurs before/during sync.
>
> diff --git a/osaf/services/saf/immsv/immnd/immnd_evt.c
> b/osaf/services/saf/immsv/immnd/immnd_evt.c
> --- a/osaf/services/saf/immsv/immnd/immnd_evt.c
> +++ b/osaf/services/saf/immsv/immnd/immnd_evt.c
> @@ -10194,24 +10194,38 @@ static uint32_t immnd_evt_proc_mds_evt(I
> }
> exit(1);
> } else { /* SC ABSENCE ALLOWED */
> + cb->mIntroduced = 2;
> LOG_WA("SC Absence IS allowed:%u IMMD service is DOWN",
> cb->mScAbsenceAllowed);
> if(cb->mIsCoord) {
> - /* Note that normally the coord will reside at
> SCs so this branch will
> - only be relevant if REPEATED toal scAbsence
> occurs. After SC absence
> - and subsequent return of SC, the coord will
> be elected at a payload.
> - That coord will be active untill restart of
> that payload..
> - unless we add functionality for the payload
> coord to restart after
> - a few minutes .. ?
> - */
> - LOG_WA("This IMMND coord has to exit allowing
> restarted IMMD to select new coord");
> - if(cb->mState < IMM_SERVER_SYNC_SERVER) {
> - immnd_ackToNid(NCSCC_RC_FAILURE);
> + cb->mIsCoord = false;
> +
> + if (cb->mSyncRequested) {
> + /* Just got sync requested from IMMD,
> nothing happened yet */
> + cb->mSyncRequested = false;
> +
> + } else if (cb->mState == IMM_SERVER_SYNC_SERVER
> && cb->mPendSync) {
> + /* Sent out sync-start msg but sync
> didn't start yet, revert the state to IMM_SERVER_READY */
> + cb->mPendSync = false;
> + cb->mState = IMM_SERVER_READY;
> + LOG_NO("SERVER STATE:
> IMM_SERVER_SYNC_SERVER --> IMM_SERVER_READY");
> +
> + } else if (cb->mState == IMM_SERVER_SYNC_SERVER
> && (cb->syncPid > 0)) {
> + /* Sync started, kill sync process to
> trigger sync abort in immnd_proc_server() */
> + osafassert(!cb->mPendSync);
> + kill(cb->syncPid, SIGTERM);
> }
> - exit(1);
> +
> } else if(cb->mState <= IMM_SERVER_LOADING_PENDING) {
> /* Reset state in payloads that had not joined.
> No need to restart. */
> LOG_IN("Resetting IMMND state from %u to
> IMM_SERVER_ANONYMOUS", cb->mState);
> cb->mState = IMM_SERVER_ANONYMOUS;
> +
> + } else if (cb->mState == IMM_SERVER_READY &&
> immModel_immNotWritable(cb)) {
> + /* This SC absence allowed case, when IMMD is
> down and
> + The sync is in progress. Veteran nodes Other
> than the syncing node,
> + has to change the node state from
> NODE_R_AVAILABLE to NODE_FULLY_AVAILABLE*/
> + immnd_abortSync(cb);
> +
> } else if(cb->mState < IMM_SERVER_READY) {
> LOG_WA("IMMND was being synced or loaded (%u),
> has to restart", cb->mState);
> if(cb->mState < IMM_SERVER_SYNC_SERVER) {
> @@ -10220,7 +10234,6 @@ static uint32_t immnd_evt_proc_mds_evt(I
> exit(1);
> }
> }
> - cb->mIntroduced = 2;
> LOG_NO("IMMD SERVICE IS DOWN, HYDRA IS CONFIGURED =>
> UNREGISTERING IMMND form MDS");
> immnd_mds_unregister(cb);
> /* Discard local clients ... */
> diff --git a/osaf/services/saf/immsv/immnd/immnd_proc.c
> b/osaf/services/saf/immsv/immnd/immnd_proc.c
> --- a/osaf/services/saf/immsv/immnd/immnd_proc.c
> +++ b/osaf/services/saf/immsv/immnd/immnd_proc.c
> @@ -872,7 +872,7 @@ void immnd_abortSync(IMMND_CB *cb)
> memset(&send_evt, '\0', sizeof(IMMSV_EVT));
> TRACE_ENTER();
> TRACE("ME:%u RE:%u", cb->mMyEpoch, cb->mRulingEpoch);
> - osafassert(cb->mIsCoord);
> + osafassert(cb->mIsCoord || (cb->mScAbsenceAllowed && cb->mIntroduced ==
> 2 ));
> cb->mPendSync = 0;
> if(cb->mSyncFinalizing) {
> cb->mSyncFinalizing = 0x0;
> @@ -898,6 +898,12 @@ void immnd_abortSync(IMMND_CB *cb)
> LOG_ER("immnd_abortSync not clean on epoch: RE:%u ME:%u",
> cb->mRulingEpoch, cb->mMyEpoch);
> }
>
> + /* Skip broadcasting sync abort msg when SC are absent */
> + if (cb->mScAbsenceAllowed && cb->mIntroduced == 2) {
> + TRACE_LEAVE();
> + return;
> + }
> +
> while (!immnd_is_immd_up(cb) && (retryCount++ < 20)) {
> LOG_WA("Coord blocked in sending ABORT_SYNC because IMMD is
> DOWN %u", retryCount);
> sleep(1);
> @@ -1319,6 +1325,10 @@ void immnd_proc_global_abort_ccb(IMMND_C
>
> static SaBoolT immnd_ccbsTerminated(IMMND_CB *cb, SaUint32T duration,
> SaBoolT* pbeImmndDeadlock)
> {
> + if (cb->mIntroduced == 2) {
> + /* Return true to enter phase 2 or phase 3 of SYNC_SERVER */
> + return SA_TRUE;
> + }
> osafassert(cb->mIsCoord);
> osafassert(pbeImmndDeadlock);
> (*pbeImmndDeadlock) = SA_FALSE;
> @@ -1999,9 +2009,14 @@ uint32_t immnd_proc_server(uint32_t *tim
> /*Phase 2 */
> if (cb->syncPid <= 0) {
> /*Fork sync-agent */
> - cb->syncPid = immnd_forkSync(cb);
> + /* When SC are absent, we don't fork to trigger
> abortSync */
> + if (cb->mIntroduced != 2) {
> + cb->syncPid = immnd_forkSync(cb);
> + }
> if (cb->syncPid <= 0) {
> - LOG_ER("Failed to fork sync process");
> + if (cb->mIntroduced != 2) {
> + LOG_ER("Failed to fork sync
> process");
> + }
> cb->syncPid = 0;
> cb->mStep = 0;
> cb->mJobStart = now;
> @@ -2063,6 +2078,19 @@ uint32_t immnd_proc_server(uint32_t *tim
>
> if(cb->mIntroduced == 2) {
> immnd_introduceMe(cb);
> + if(cb->pbePid > 0) {
> + /* Check if pbe process is terminated.
> + * Will send SIGKILL if it's not terminated. */
> + int status = 0;
> + if (waitpid(cb->pbePid, &status, WNOHANG) > 0) {
> + cb->pbePid = 0;
> + LOG_NO("PBE has terminated due to SC
> absence");
> + } else {
> + cb->pbePid = 0;
> + LOG_WA("SC were absent and PBE appears
> hung, sending SIGKILL");
> + kill(cb->pbePid, SIGKILL);
> + }
> + }
> break;
> }
>
------------------------------------------------------------------------------
Transform Data into Opportunity.
Accelerate data analysis in your applications with
Intel Data Analytics Acceleration Library.
Click to learn more.
http://pubads.g.doubleclick.net/gampad/clk?id=278785351&iu=/4140
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel