- Incorrect counting lost nodes cause new coordinator postpone sync waiting for a number of node bigger than cluster size. - Correct counting lost nodes by a set of lost node Id. --- src/imm/immnd/ImmModel.cc | 14 ++++++++++++++ src/imm/immnd/immnd_evt.c | 4 ++-- src/imm/immnd/immnd_init.h | 4 ++++ 3 files changed, 20 insertions(+), 2 deletions(-)
diff --git a/src/imm/immnd/ImmModel.cc b/src/imm/immnd/ImmModel.cc index 631597b8a..00d7f4794 100644 --- a/src/imm/immnd/ImmModel.cc +++ b/src/imm/immnd/ImmModel.cc @@ -524,6 +524,7 @@ typedef std::map<std::string, ObjectSet> MissingParentsMap; // Local variables +static std::set<SaUint32T> sDiscardNodeSet; static ClassMap sClassMap; static AdminOwnerVector sOwnerVector; static CcbVector sCcbVector; @@ -1364,12 +1365,25 @@ void immModel_getCcbIdsForOrigCon(IMMND_CB* cb, SaUint32T deadCon, osafassert(ix == (*arrSize)); } +void immModel_resetDiscardNodes(IMMND_CB* cb) { + cb->mLostNodes = 0; + sDiscardNodeSet.clear(); +} + +void immModel_eraseDiscardNode(SaUint32T nodeId) { + sDiscardNodeSet.erase(nodeId); +} + void immModel_discardNode(IMMND_CB* cb, SaUint32T nodeId, SaUint32T* arrSize, SaUint32T** ccbIdArr, SaUint32T* globArrSize, SaUint32T** globccbIdArr) { ConnVector cv, gv; ConnVector::iterator cvi, gvi; unsigned int ix = 0; + if (sDiscardNodeSet.find(nodeId) == sDiscardNodeSet.end()) { + sDiscardNodeSet.insert(nodeId); + cb->mLostNodes++; + } ImmModel::instance(&cb->immModel) ->discardNode(nodeId, cv, gv, cb->mIsCoord, false); *arrSize = (SaUint32T)cv.size(); diff --git a/src/imm/immnd/immnd_evt.c b/src/imm/immnd/immnd_evt.c index dfef6c0a5..af8f5876a 100644 --- a/src/imm/immnd/immnd_evt.c +++ b/src/imm/immnd/immnd_evt.c @@ -10321,7 +10321,7 @@ static uint32_t immnd_evt_proc_start_sync(IMMND_CB *cb, IMMND_EVT *evt, Nodes. This is mostly relevant for "standby" i.e. the non-coord immnd which is on an SC. */ - cb->mLostNodes = 0; + immModel_resetDiscardNodes(cb); } } immModel_prepareForSync(cb, cb->mSync); @@ -10488,6 +10488,7 @@ static uint32_t immnd_evt_proc_sync_req(IMMND_CB *cb, IMMND_EVT *evt, cb->mSyncRequested = true; if (cb->mLostNodes > 0) { cb->mLostNodes--; + immModel_eraseDiscardNode(evt->info.ctrl.nodeId); } /*osafassert(cb->mRulingEpoch == evt->info.ctrl.rulingEpoch); */ TRACE_2("At COORD: My Ruling Epoch:%u Cenral Ruling Epoch:%u", @@ -10989,7 +10990,6 @@ static void immnd_evt_proc_discard_node(IMMND_CB *cb, IMMND_EVT *evt, /* We should remember the nodeId/pid pair to avoid a redundant message causing a newly reattached node being discarded. */ - cb->mLostNodes++; immModel_discardNode(cb, evt->info.ctrl.nodeId, &arrSize, &idArr, &globArrSize, &globIdArr); if (globArrSize) { diff --git a/src/imm/immnd/immnd_init.h b/src/imm/immnd/immnd_init.h index 9a3f70072..0732f43f0 100644 --- a/src/imm/immnd/immnd_init.h +++ b/src/imm/immnd/immnd_init.h @@ -154,6 +154,10 @@ bool immModel_ccbAbort(IMMND_CB *cb, SaUint32T ccbId, SaUint32T *arrSize, void immModel_getCcbIdsForOrigCon(IMMND_CB *cb, SaUint32T origConn, SaUint32T *arrSize, SaUint32T **ccbIdArr); +void immModel_resetDiscardNodes(IMMND_CB* cb); + +void immModel_eraseDiscardNode(SaUint32T nodeId); + void immModel_discardNode(IMMND_CB *cb, SaUint32T nodeId, SaUint32T *arrSize, SaUint32T **ccbIdArr, SaUint32T *globArrSize, SaUint32T **globccbIdArr); -- 2.25.1 _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel