Ack from me. Only code review. Not tested. /AndersBj
-----Original Message----- From: [email protected] [mailto:[email protected]] Sent: den 5 augusti 2015 12:24 To: Anders Björnerstedt; [email protected]; Zoran Milinkovic Cc: [email protected] Subject: [PATCH 1 of 1] imm:Abort the non-critical CCBs when implememnter is disconnected [#1391] osaf/services/saf/immsv/immnd/ImmModel.cc | 98 +++++++++++++++++++++++++---- osaf/services/saf/immsv/immnd/ImmModel.hh | 6 +- osaf/services/saf/immsv/immnd/immnd_evt.c | 33 +++++++++- osaf/services/saf/immsv/immnd/immnd_init.h | 6 +- osaf/services/saf/immsv/immnd/immnd_proc.c | 2 +- 5 files changed, 124 insertions(+), 21 deletions(-) when implementer is disconnected (case where the OI application died or restarted (#1105)), then if there are Non-critical CCBs associated with this implementer. Then those CCBs may be aborted in IMM, Because the final outcome of these CCBs will be aborted. diff --git a/osaf/services/saf/immsv/immnd/ImmModel.cc b/osaf/services/saf/immsv/immnd/ImmModel.cc --- a/osaf/services/saf/immsv/immnd/ImmModel.cc +++ b/osaf/services/saf/immsv/immnd/ImmModel.cc @@ -1204,12 +1204,14 @@ void immModel_discardNode(IMMND_CB *cb, SaUint32T nodeId, SaUint32T* arrSize, - SaUint32T** ccbIdArr) -{ - ConnVector cv; - ConnVector::iterator cvi; + SaUint32T** ccbIdArr, + SaUint32T* globArrSize, + SaUint32T** globccbIdArr) +{ + ConnVector cv, gv; + ConnVector::iterator cvi, gvi; unsigned int ix=0; - ImmModel::instance(&cb->immModel)->discardNode(nodeId, cv); + ImmModel::instance(&cb->immModel)->discardNode(nodeId, cv, gv, + cb->mIsCoord); *arrSize = (SaUint32T) cv.size(); if(*arrSize) { *ccbIdArr = (SaUint32T *) malloc((*arrSize)* sizeof(SaUint32T)); @@ -1218,6 +1220,16 @@ immModel_discardNode(IMMND_CB *cb, } } osafassert(ix==(*arrSize)); + + *globArrSize = (SaUint32T) gv.size(); + ix=0; + if(*globArrSize) { + *globccbIdArr = (SaUint32T *) malloc((*globArrSize)* sizeof(SaUint32T)); + for(gvi = gv.begin(); gvi!=gv.end(); ++gvi, ++ix) { + (*globccbIdArr)[ix] = (*gvi); + } + } + osafassert(ix==(*globArrSize)); } void @@ -1599,9 +1611,26 @@ immModel_getImplementerId(IMMND_CB* cb, void immModel_discardImplementer(IMMND_CB* cb, SaUint32T implId, - SaBoolT reallyDiscard) -{ - ImmModel::instance(&cb->immModel)->discardImplementer(implId, reallyDiscard); + SaBoolT reallyDiscard, SaUint32T* globArrSize, SaUint32T** +globccbIdArr) { + ConnVector gv; + ConnVector::iterator gvi; + unsigned int ix=0; + ImmModel::instance(&cb->immModel)->discardImplementer(implId, reallyDiscard, + gv, cb->mIsCoord); + + if (globArrSize && globccbIdArr) { + *globArrSize = (SaUint32T) gv.size(); + ix=0; + if(*globArrSize) { + *globccbIdArr = (SaUint32T *) malloc((*globArrSize)* sizeof(SaUint32T)); + for(gvi = gv.begin(); gvi!=gv.end(); ++gvi, ++ix) { + (*globccbIdArr)[ix] = (*gvi); + } + } + osafassert(ix==(*globArrSize)); + } + } SaAisErrorT @@ -11913,11 +11942,13 @@ ImmModel::getImplementerId(SaUint32T loc } void -ImmModel::discardNode(unsigned int deadNode, IdVector& cv) +ImmModel::discardNode(unsigned int deadNode, IdVector& cv, IdVector& +gv, bool isAtCoord ) { ImplementerVector::iterator i; AdminOwnerVector::iterator i2; CcbVector::iterator i3; + ConnVector implv; + ConnVector::iterator i4; TRACE_ENTER(); if(sImmNodeState == IMM_NODE_W_AVAILABLE) { @@ -11958,6 +11989,12 @@ ImmModel::discardNode(unsigned int deadN ci2->second.mTimeout = 1; /* one second is minimum timeout. */ } } + + if(isAtCoord){ + // pushing to implv to find the ccbIds with implemeter id which are going to be disconnected. + // Later abort the CcbId. This is done only at co-ordinator. + implv.push_back(info->mId); + } //discardImplementer(info->mId); //Doing it directly here for efficiency. //But watch out for changes in discardImplementer @@ -11986,6 +12023,23 @@ ImmModel::discardNode(unsigned int deadN //implementer. } } + /* + Fethes CcbIds which are non-critcal and Ccbs has implementer which are + going to be disconnected, because of node down. This is done only at co-ordinator. + + */ + if(isAtCoord && (implv.size()>0)){ + CcbImplementerMap::iterator isi; + for(i4 = implv.begin(); i4!=implv.end(); ++i4) { + for(i3=sCcbVector.begin(); i3!=sCcbVector.end(); ++i3) { + isi = ((*i3)->mImplementers.find(*i4)); + if(isi != ((*i3)->mImplementers.end()) && ((*i3)->mState < IMM_CCB_CRITICAL)) { + gv.push_back((*i3)->mId); + } + } + } + } + //Discard AdminOwners i2 = sOwnerVector.begin(); @@ -12014,11 +12068,14 @@ ImmModel::discardNode(unsigned int deadN } void -ImmModel::discardImplementer(unsigned int implHandle, bool reallyDiscard) +ImmModel::discardImplementer(unsigned int implHandle, bool +reallyDiscard, IdVector& gv, bool isAtCoord) { //Note: If this function is altered, then you may need to make //changes in ImmModel::discardNode, since that function also deletes //implementers. + CcbVector::iterator i1; + SaUint32T mid; + TRACE_ENTER(); ImplementerInfo* info = findImplementer(implHandle); if(info) { @@ -12030,6 +12087,18 @@ ImmModel::discardImplementer(unsigned in //Note the time of death and id of the demised implementer. sImplDetachTime[info] = ContinuationInfo2(info->mId, DEFAULT_TIMEOUT_SEC); + if(isAtCoord){ + // Find the Non-critical ccbs with implemeter id which are going to be Disconnected. + // Later abort the CCBId. This is done only at co-ordinator. + mid = info->mId; + CcbImplementerMap::iterator isi; + for(i1=sCcbVector.begin(); i1!=sCcbVector.end(); ++i1) { + isi = (*i1)->mImplementers.find(mid); + if(isi != ((*i1)->mImplementers.end()) && ((*i1)->mState < IMM_CCB_CRITICAL)) { + gv.push_back((*i1)->mId); + } + } + } info->mId = 0; info->mConn = 0; @@ -13931,6 +14000,7 @@ ImmModel::implementerClear(const struct unsigned int nodeId) { SaAisErrorT err = SA_AIS_OK; + ConnVector gv; TRACE_ENTER(); ImplementerInfo* info = findImplementer(req->impl_id); @@ -13939,7 +14009,7 @@ ImmModel::implementerClear(const struct /* Sync is ongoing and we are a sync client. Remember the death of the implementer. */ - discardImplementer(req->impl_id, true); + discardImplementer(req->impl_id, true, gv, false); goto done; } LOG_NO("ERR_BAD_HANDLE: Not a correct implementer handle? %llu id:%u", @@ -13952,7 +14022,7 @@ ImmModel::implementerClear(const struct conn, nodeId); err = SA_AIS_ERR_BAD_HANDLE; } else { - discardImplementer(req->impl_id, true); + discardImplementer(req->impl_id, true, gv, false); } } @@ -17301,9 +17371,9 @@ ImmModel::finalizeSync(ImmsvOmFinalizeSy if(!sNodesDeadDuringSync.empty()) { IdVector::iterator ivi = sNodesDeadDuringSync.begin(); for(;ivi != sNodesDeadDuringSync.end(); ++ivi) { - ConnVector cv; + ConnVector cv, gv; LOG_NO("Sync client re-executing discardNode for node %x", (*ivi)); - this->discardNode((*ivi), cv); + this->discardNode((*ivi), cv, gv, false); if(!(cv.empty())) { LOG_ER("Sync can not discard node with active ccbs"); err = SA_AIS_ERR_FAILED_OPERATION; diff --git a/osaf/services/saf/immsv/immnd/ImmModel.hh b/osaf/services/saf/immsv/immnd/ImmModel.hh --- a/osaf/services/saf/immsv/immnd/ImmModel.hh +++ b/osaf/services/saf/immsv/immnd/ImmModel.hh @@ -533,9 +533,11 @@ public: SaUint32T getImplementerId(SaUint32T localConn); void discardImplementer( unsigned int implHandle, - bool reallyDiscard); + bool reallyDiscard, + IdVector& gv, + bool isAtCoord); void discardContinuations(SaUint32T dead); - void discardNode(unsigned int nodeId, IdVector& cv); + void discardNode(unsigned int nodeId, IdVector& cv, IdVector& gv, bool isAtCoord); void getCcbIdsForOrigCon(SaUint32T dead, IdVector& cv); void getAdminOwnerIdsForCon(SaUint32T dead, IdVector& cv); bool ccbCommit(SaUint32T ccbId, ConnVector& connVector); diff --git a/osaf/services/saf/immsv/immnd/immnd_evt.c b/osaf/services/saf/immsv/immnd/immnd_evt.c --- a/osaf/services/saf/immsv/immnd/immnd_evt.c +++ b/osaf/services/saf/immsv/immnd/immnd_evt.c @@ -8781,10 +8781,24 @@ static void immnd_evt_proc_discard_impl( IMMND_EVT *evt, SaBoolT originatedAtThisNd, SaImmHandleT clnt_hdl, MDS_DEST reply_dest) { + SaUint32T *globIdArr = NULL; + SaUint32T globArrSize = 0; TRACE_ENTER(); osafassert(evt); TRACE_2("Global discard implementer for id:%u", evt->info.implSet.impl_id); - immModel_discardImplementer(cb, evt->info.implSet.impl_id, SA_TRUE); + immModel_discardImplementer(cb, evt->info.implSet.impl_id, SA_TRUE, &globArrSize, &globIdArr); + if(globArrSize) { + SaUint32T ix; + for (ix = 0; ix < globArrSize; ++ix) { + LOG_WA("Discard implementer for id %u, abort Non-critical ccbId %u which has " + "discarded implementer", evt->info.implSet.impl_id, globIdArr[ix]); + immnd_proc_global_abort_ccb(cb, globIdArr[ix]); + } + free(globIdArr); + globIdArr = NULL; + globArrSize = 0; + } + TRACE_LEAVE(); } @@ -8809,6 +8823,9 @@ static void immnd_evt_proc_discard_node( { SaUint32T *idArr = NULL; SaUint32T arrSize = 0; + SaUint32T *globIdArr = NULL; + SaUint32T globArrSize = 0; + TRACE_ENTER(); osafassert(evt); if(evt->info.ctrl.nodeId == cb->node_id) { @@ -8822,7 +8839,19 @@ static void immnd_evt_proc_discard_node( causing a newly reattached node being discarded. */ cb->mLostNodes++; - immModel_discardNode(cb, evt->info.ctrl.nodeId, &arrSize, &idArr); + immModel_discardNode(cb, evt->info.ctrl.nodeId, &arrSize, &idArr, &globArrSize, &globIdArr); + if(globArrSize) { + SaUint32T ix; + for (ix = 0; ix < globArrSize; ++ix) { + LOG_WA("Detected crash at node %x, abort Non-critical ccbId %u which has implementer " + "on the crashed node ", evt->info.ctrl.nodeId, globIdArr[ix]); + immnd_proc_global_abort_ccb(cb, globIdArr[ix]); + } + free(globIdArr); + globIdArr = NULL; + globArrSize = 0; + } + if (arrSize) { SaAisErrorT err = SA_AIS_OK; SaUint32T ix; diff --git a/osaf/services/saf/immsv/immnd/immnd_init.h b/osaf/services/saf/immsv/immnd/immnd_init.h --- a/osaf/services/saf/immsv/immnd/immnd_init.h +++ b/osaf/services/saf/immsv/immnd/immnd_init.h @@ -150,7 +150,8 @@ extern "C" { void immModel_getCcbIdsForOrigCon(IMMND_CB *cb, SaUint32T origConn, SaUint32T *arrSize, SaUint32T **ccbIdArr); - void immModel_discardNode(IMMND_CB *cb, SaUint32T nodeId, SaUint32T *arrSize, SaUint32T **ccbIdArr); + void immModel_discardNode(IMMND_CB *cb, SaUint32T nodeId, SaUint32T *arrSize, SaUint32T **ccbIdArr, + SaUint32T* globArrSize, SaUint32T** globccbIdArr); SaAisErrorT immModel_ccbObjectDelete(IMMND_CB *cb, @@ -254,7 +255,8 @@ extern "C" { SaUint32T implConn, SaUint32T implNodeId); SaUint32T immModel_getImplementerId(IMMND_CB *cb, SaUint32T implConn); - void immModel_discardImplementer(IMMND_CB *cb, SaUint32T implId, SaBoolT reallyDiscard); + void immModel_discardImplementer(IMMND_CB *cb, SaUint32T implId, SaBoolT reallyDiscard, + SaUint32T* globArrSize, SaUint32T** globccbIdArr); void immModel_fetchAdmOpContinuations(IMMND_CB *cb, SaInvocationT inv, SaBoolT local, SaUint32T *implConn, diff --git a/osaf/services/saf/immsv/immnd/immnd_proc.c b/osaf/services/saf/immsv/immnd/immnd_proc.c --- a/osaf/services/saf/immsv/immnd/immnd_proc.c +++ b/osaf/services/saf/immsv/immnd/immnd_proc.c @@ -142,7 +142,7 @@ uint32_t immnd_proc_imma_discard_connect /*Discard the local implementer directly and redundantly to avoid race conditions using this implementer (ccb's causing abort upcalls). */ - immModel_discardImplementer(cb, implId, SA_FALSE); + immModel_discardImplementer(cb, implId, SA_FALSE, NULL, NULL); } if (cl_node->mIsStale) { ------------------------------------------------------------------------------ _______________________________________________ Opensaf-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/opensaf-devel
