osaf/services/saf/immsv/immnd/immnd_evt.c  |  13 +------
 osaf/services/saf/immsv/immnd/immnd_init.h |   1 +
 osaf/services/saf/immsv/immnd/immnd_proc.c |  55 ++++++++++++++++++++++++++++-
 3 files changed, 55 insertions(+), 14 deletions(-)


Set 'mIsCoord' to false when headless to avoid coordinator from restarting.
Also handle the cases when headless occurs before/during sync.

diff --git a/osaf/services/saf/immsv/immnd/immnd_evt.c 
b/osaf/services/saf/immsv/immnd/immnd_evt.c
--- a/osaf/services/saf/immsv/immnd/immnd_evt.c
+++ b/osaf/services/saf/immsv/immnd/immnd_evt.c
@@ -10196,18 +10196,7 @@ static uint32_t immnd_evt_proc_mds_evt(I
                } else { /* SC ABSENCE ALLOWED */
                        LOG_WA("SC Absence IS allowed:%u IMMD service is DOWN", 
cb->mScAbsenceAllowed);
                        if(cb->mIsCoord) {
-                               /* Note that normally the coord will reside at 
SCs so this branch will
-                                  only be relevant if REPEATED toal scAbsence 
occurs. After SC absence
-                                  and subsequent return of SC, the coord will 
be elected at a payload.
-                                  That coord will be active untill restart of 
that payload..
-                                  unless we add functionality for the payload 
coord to restart after
-                                  a few minutes .. ?
-                               */
-                               LOG_WA("This IMMND coord has to exit allowing 
restarted IMMD to select new coord");
-                               if(cb->mState < IMM_SERVER_SYNC_SERVER) {
-                                       immnd_ackToNid(NCSCC_RC_FAILURE);
-                               }
-                               exit(1);
+                               immnd_proc_remove_coordinator_role(cb);
                        } else if(cb->mState <= IMM_SERVER_LOADING_PENDING) {
                                /* Reset state in payloads that had not joined. 
No need to restart. */
                                LOG_IN("Resetting IMMND state from %u to 
IMM_SERVER_ANONYMOUS", cb->mState);
diff --git a/osaf/services/saf/immsv/immnd/immnd_init.h 
b/osaf/services/saf/immsv/immnd/immnd_init.h
--- a/osaf/services/saf/immsv/immnd/immnd_init.h
+++ b/osaf/services/saf/immsv/immnd/immnd_init.h
@@ -40,6 +40,7 @@ extern IMMND_CB *immnd_cb;
 /* file : -  immnd_proc.c */
 
 void immnd_proc_discard_other_nodes(IMMND_CB *cb);
+void immnd_proc_remove_coordinator_role(IMMND_CB *cb);
 
 void immnd_proc_imma_down(IMMND_CB *cb, MDS_DEST dest, NCSMDS_SVC_ID sv_id);
 uint32_t immnd_proc_imma_discard_connection(IMMND_CB *cb, 
IMMND_IMM_CLIENT_NODE *cl_node, bool scAbsenceAllowed);
diff --git a/osaf/services/saf/immsv/immnd/immnd_proc.c 
b/osaf/services/saf/immsv/immnd/immnd_proc.c
--- a/osaf/services/saf/immsv/immnd/immnd_proc.c
+++ b/osaf/services/saf/immsv/immnd/immnd_proc.c
@@ -872,7 +872,7 @@ void immnd_abortSync(IMMND_CB *cb)
        memset(&send_evt, '\0', sizeof(IMMSV_EVT));
        TRACE_ENTER();
        TRACE("ME:%u RE:%u", cb->mMyEpoch, cb->mRulingEpoch);
-       osafassert(cb->mIsCoord);
+       osafassert(cb->mIsCoord || cb->mIntroduced == 2);
        cb->mPendSync = 0;
        if(cb->mSyncFinalizing) {
                cb->mSyncFinalizing = 0x0;
@@ -898,6 +898,12 @@ void immnd_abortSync(IMMND_CB *cb)
                LOG_ER("immnd_abortSync not clean on epoch: RE:%u ME:%u", 
cb->mRulingEpoch, cb->mMyEpoch);
        }
 
+       /* Skip broadcasting sync abort msg when SC are absent */
+       if (cb->mIntroduced == 2) {
+               TRACE_LEAVE();
+               return;
+       }
+
        while (!immnd_is_immd_up(cb) && (retryCount++ < 20)) {
                LOG_WA("Coord blocked in sending ABORT_SYNC because IMMD is 
DOWN %u", retryCount);
                sleep(1);
@@ -1319,6 +1325,10 @@ void immnd_proc_global_abort_ccb(IMMND_C
 
 static SaBoolT immnd_ccbsTerminated(IMMND_CB *cb, SaUint32T duration, SaBoolT* 
pbeImmndDeadlock)
 {
+       if (cb->mIntroduced == 2) {
+               /* Return true to enter phase 2 or phase 3 of SYNC_SERVER */
+               return SA_TRUE;
+       }
        osafassert(cb->mIsCoord);
        osafassert(pbeImmndDeadlock);
        (*pbeImmndDeadlock) = SA_FALSE;
@@ -1999,7 +2009,10 @@ uint32_t immnd_proc_server(uint32_t *tim
                        /*Phase 2 */
                        if (cb->syncPid <= 0) {
                                /*Fork sync-agent */
-                               cb->syncPid = immnd_forkSync(cb);
+                               /* When SC are absent, we don't fork to trigger 
abortSync */
+                               if (cb->mIntroduced != 2) {
+                                       cb->syncPid = immnd_forkSync(cb);
+                               }
                                if (cb->syncPid <= 0) {
                                        LOG_ER("Failed to fork sync process");
                                        cb->syncPid = 0;
@@ -2063,6 +2076,19 @@ uint32_t immnd_proc_server(uint32_t *tim
 
                if(cb->mIntroduced == 2) {
                        immnd_introduceMe(cb);
+                       if(cb->pbePid > 0) {
+                               /* Check if pbe process is terminated.
+                                * Will send SIGKILL if it's not terminated. */
+                               int status = 0;
+                               if (waitpid(cb->pbePid, &status, WNOHANG) > 0) {
+                                       cb->pbePid = 0;
+                                       LOG_NO("PBE has terminated due to SC 
absence");
+                               } else if (++cb->mPbeKills > 10) { /* More than 
10 senconds */
+                                       cb->pbePid = 0;
+                                       LOG_WA("SC were absent and PBE appears 
hung, sending SIGKILL");
+                                       kill(cb->pbePid, SIGKILL);
+                               }
+                       }
                        break;
                }
 
@@ -2333,3 +2359,28 @@ void immnd_proc_discard_other_nodes(IMMN
        cb->mPbeVeteran = SA_FALSE;
        TRACE_LEAVE();
 }
+
+void immnd_proc_remove_coordinator_role(IMMND_CB *cb)
+{
+       TRACE_ENTER();
+
+       cb->mIsCoord = false;
+
+       if (cb->mSyncRequested) {
+               /* Just got sync requested from IMMD, nothing happened yet */
+               cb->mSyncRequested = false;
+
+       } else if (cb->mState == IMM_SERVER_SYNC_SERVER && cb->mPendSync) {
+               /* Sent out sync-start msg but sync didn't start yet, revert 
the state to IMM_SERVER_READY */
+               cb->mPendSync = false;
+               cb->mState = IMM_SERVER_READY;
+               LOG_NO("SERVER STATE: IMM_SERVER_SYNC_SERVER --> 
IMM_SERVER_READY");
+
+       } else if (cb->mState == IMM_SERVER_SYNC_SERVER && (cb->syncPid > 0)) {
+               /* Sync started, kill sync process to trigger sync abort in 
immnd_proc_server() */
+               osafassert(!cb->mPendSync);
+               kill(cb->syncPid, SIGTERM);
+       }
+
+       TRACE_LEAVE();
+}

------------------------------------------------------------------------------
Site24x7 APM Insight: Get Deep Visibility into Application Performance
APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month
Monitor end-to-end web transactions and take corrective actions now
Troubleshoot faster and improve end-user experience. Signup Now!
http://pubads.g.doubleclick.net/gampad/clk?id=272487151&iu=/4140
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to