osaf/services/saf/immsv/immnd/ImmModel.cc |  120 ++++++++++++++++-------------
 osaf/services/saf/immsv/immnd/immnd_evt.c |    6 +-
 2 files changed, 72 insertions(+), 54 deletions(-)


This patch delays setting node state and deleting dead admin-owners until 
sync-finalize message is successfully sent.
If sync server fails to send, sync will be aborted (broadcasting sync-abort 
message).

In sync server and veteran nodes, admin-owners that are in demise will also be 
deleted when sync is aborted.
This will prevent IMM from having zombie admin-owners when sync is aborted.

diff --git a/osaf/services/saf/immsv/immnd/ImmModel.cc 
b/osaf/services/saf/immsv/immnd/ImmModel.cc
--- a/osaf/services/saf/immsv/immnd/ImmModel.cc
+++ b/osaf/services/saf/immsv/immnd/ImmModel.cc
@@ -2731,6 +2731,7 @@ ImmModel::pbePrtoPurgeMutations(unsigned
 void
 ImmModel::abortSync()
 {
+    AdminOwnerVector::iterator i;
     switch(sImmNodeState){ 
         
         case IMM_NODE_R_AVAILABLE:
@@ -2738,6 +2739,23 @@ ImmModel::abortSync()
             sNodesDeadDuringSync.clear(); 
             sImplsDeadDuringSync.clear(); 
 
+            /* Normally, admin-owners which are dead during sync
+             * will be cleared in finalizeSync().
+             * When sync is aborted, we don't enter finalizeSync().
+             * So we have to get rid of them here.
+             * This is done in sync server and veteran nodes. */
+            for (i = sOwnerVector.begin(); i != sOwnerVector.end();) {
+                if ((*i)->mDying) {
+                    LOG_WA("Removing admin owner %u %s which is in demise ",
+                           (*i)->mId, (*i)->mAdminOwnerName.c_str());
+                    osafassert(adminOwnerDelete((*i)->mId, true) == SA_AIS_OK);
+                    /* Above does a lookup of admin owner again. */
+                    i = sOwnerVector.begin(); /* Restart of iteration */
+                } else {
+                    ++i;
+                }
+            }
+
             LOG_NO("NODE STATE-> IMM_NODE_FULLY_AVAILABLE (%u)", 
                 __LINE__);
             break;
@@ -18066,48 +18084,58 @@ ImmModel::finalizeSync(ImmsvOmFinalizeSy
     
     if(isCoord) {//Produce the checkpoint 
         CcbVector::iterator ccbItr;
-
-        sImmNodeState = IMM_NODE_FULLY_AVAILABLE;
-        LOG_NO("NODE STATE-> IMM_NODE_FULLY_AVAILABLE %u", __LINE__);
-        /*WARNING the controller node here goes to writable state
-          based on a NON FEVS message, directly from the sync-process. 
-          Other nodes go to writable based on the reception of the 
-          finalizeSync message over FEVS. This means that the controller
-          actually opens up for writes before having received the 
-          finalize sync message. 
-          
-          This should still be safe based on the following reasoning:
-          Any writable operation now allowed at this node will be
-          sent over fevs AFTER the finalizeSync message that this node
-          just sent. This at least is the case for messages arriving at
-          this node. So we know that ALL nodes, including this coord node,
-          will receive any mutating messages AFTER ALL nodes, including this
-          coord node, have received the finalizeSync message. This at least
-          for messages arriving from clients at this node.
-          
-          At the other nodes. Any mutating message does a local check on 
writability.
-          If currently not writable, the request is rejected locally. 
-          Shift to writability is only by finalize sync or abortSync. 
-        */
+        AdminOwnerVector::iterator i;
+
+        /* Use 'req' to check if we enter here after sending sync-finalize 
message successfully. */
+        if (!req) {
+            /* Now we can change the node sate to FULLY_AVAILABLE,
+             * and clear the admin-owners which are in demise */
+
+            sImmNodeState = IMM_NODE_FULLY_AVAILABLE;
+            LOG_NO("NODE STATE-> IMM_NODE_FULLY_AVAILABLE %u", __LINE__);
+            /*WARNING the controller node here goes to writable state
+              based on a NON FEVS message, directly from the sync-process.
+              Other nodes go to writable based on the reception of the
+              finalizeSync message over FEVS. This means that the controller
+              actually opens up for writes before having received the
+              finalize sync message.
+
+              This should still be safe based on the following reasoning:
+              Any writable operation now allowed at this node will be
+              sent over fevs AFTER the finalizeSync message that this node
+              just sent. This at least is the case for messages arriving at
+              this node. So we know that ALL nodes, including this coord node,
+              will receive any mutating messages AFTER ALL nodes, including 
this
+              coord node, have received the finalizeSync message. This at least
+              for messages arriving from clients at this node.
+
+              At the other nodes. Any mutating message does a local check on 
writability.
+              If currently not writable, the request is rejected locally.
+              Shift to writability is only by finalize sync or abortSync.
+            */
+
+            for (i = sOwnerVector.begin(); i != sOwnerVector.end();) {
+                if ((*i)->mDying) {
+                    LOG_WA("Removing admin owner %u %s which is in demise ",
+                           (*i)->mId, (*i)->mAdminOwnerName.c_str());
+                    osafassert(adminOwnerDelete((*i)->mId, true) == SA_AIS_OK);
+                    /* Above does a lookup of admin owner again. */
+                    i = sOwnerVector.begin(); /* Restart of iteration */
+                } else {
+                    ++i;
+                }
+            }
+
+            /* We are not generating sync-finalize message */
+            goto done;
+        }
         
         req->lastContinuationId = sLastContinuationId;
         req->adminOwners = NULL;
-        AdminOwnerVector::iterator i;
-        
-        for(i=sOwnerVector.begin(); i!=sOwnerVector.end();) {
-            if((*i)->mDying && !((*i)->mReleaseOnFinalize)) {
-                LOG_WA("Removing admin owner %u %s (ROF==FALSE) which is in 
demise, "
-                       "BEFORE generating finalize sync message", (*i)->mId,
-                    (*i)->mAdminOwnerName.c_str());
-                osafassert(adminOwnerDelete((*i)->mId, true) == SA_AIS_OK);
-                //Above does a lookup of admin owner again.
-                i=sOwnerVector.begin();//Restart of iteration.
-            } else {
-                ++i;
-            }
-        }
-        
-        for(i=sOwnerVector.begin(); i!=sOwnerVector.end(); ++i) {
+
+        for (i = sOwnerVector.begin();
+             i != sOwnerVector.end() && (!(*i)->mDying || 
((*i)->mReleaseOnFinalize)); /* Exclude mDying with ROF==FALSE */
+             ++i) {
             ImmsvAdmoList* ai = (ImmsvAdmoList *) 
                 calloc(1, sizeof(ImmsvAdmoList));
             ai->id = (*i)->mId;
@@ -18156,20 +18184,6 @@ ImmModel::finalizeSync(ImmsvOmFinalizeSy
         LOG_IN("finalizeSync message contains %u admin-owners", 
             (unsigned int) sOwnerVector.size());
 
-        for(i=sOwnerVector.begin(); i!=sOwnerVector.end();) {
-            if((*i)->mDying) {
-                osafassert((*i)->mReleaseOnFinalize);
-                LOG_WA("Removing admin owner %u %s (ROF==TRUE) which is in 
demise, "
-                       "AFTER generating finalize sync message", (*i)->mId,
-                    (*i)->mAdminOwnerName.c_str());
-                osafassert(adminOwnerDelete((*i)->mId, true) == SA_AIS_OK);
-                //Above does a lookup of admin owner again.
-                i=sOwnerVector.begin();//Restart of iteration.
-            } else {
-                ++i;
-            }
-        }
-
         /* Done with generate Admo */
 
         req->implementers = NULL;
diff --git a/osaf/services/saf/immsv/immnd/immnd_evt.c 
b/osaf/services/saf/immsv/immnd/immnd_evt.c
--- a/osaf/services/saf/immsv/immnd/immnd_evt.c
+++ b/osaf/services/saf/immsv/immnd/immnd_evt.c
@@ -5763,10 +5763,14 @@ static uint32_t immnd_evt_proc_sync_fina
                        if (proc_rc != NCSCC_RC_SUCCESS) {
                                TRACE_2("Failed send fevs message");    /*Error 
already logged in fevs_fo */
                                err = SA_AIS_ERR_NO_RESOURCES;
+                               /* Sync will be aborted in immnd_proc_server() 
*/
+                       } else {
+                               /* Change the node sate to FULLY_AVAILABLE and 
clear the admin-owners which are in demise */
+                               immModel_finalizeSync(cb, NULL, SA_TRUE, 
SA_FALSE);
+                               cb->mSyncFinalizing = 0x1;
                        }
 
                        free(tmpData);
-                       cb->mSyncFinalizing = 0x1;
                }
        } else {
                LOG_ER("Will not allow sync messages from any process except 
sync process");

------------------------------------------------------------------------------
Find and fix application performance issues faster with Applications Manager
Applications Manager provides deep performance insights into multiple tiers of
your business applications. It resolves application problems quickly and
reduces your MTTR. Get your free trial!
https://ad.doubleclick.net/ddm/clk/302982198;130105516;z
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to