osaf/libs/common/immsv/include/immsv_api.h | 5 +-
osaf/services/saf/immsv/README | 33 +++++++++++++++++---
osaf/services/saf/immsv/immnd/ImmModel.cc | 47 +++++++++++++++++++++++------
osaf/services/saf/immsv/immnd/ImmModel.hh | 5 +-
4 files changed, 71 insertions(+), 19 deletions(-)
See the diff for osaf/services/saf/immsv/REAMDE for an explanation
of this enhancement.
diff --git a/osaf/libs/common/immsv/include/immsv_api.h
b/osaf/libs/common/immsv/include/immsv_api.h
--- a/osaf/libs/common/immsv/include/immsv_api.h
+++ b/osaf/libs/common/immsv/include/immsv_api.h
@@ -142,11 +142,12 @@ typedef enum {
typedef enum {
SA_IMM_ADMIN_EXPORT = 1, /* Defined in A.02.01 declared in A.03.01 */
- SA_IMM_ADMIN_INIT_FROM_FILE = 100 /* Non standard, force PBE disable. */
+ SA_IMM_ADMIN_INIT_FROM_FILE = 100, /* Non standard, force PBE disable. */
+ SA_IMM_ADMIN_ABORT_CCBS = 202 /* Non standard, abort non critical CCBs. */
} SaImmMngtAdminOperationT;
/*
- * Special flags only to be used by the imm-dummper, the imm-loader or
+ * Special flags only to be used by the imm-dumper, the imm-loader or
* new API functions.
*
* The first excludes non persistent runtime attributes from the dump.
diff --git a/osaf/services/saf/immsv/README b/osaf/services/saf/immsv/README
--- a/osaf/services/saf/immsv/README
+++ b/osaf/services/saf/immsv/README
@@ -2302,8 +2302,8 @@ the continuation times out in the server
receives an error reply when that om client has NOT also timed out.
-Improve error diagnostics when PBE is misconfigured.
-====================================================
+Improve error diagnostics when PBE is misconfigured (4.6)
+=========================================================
http://sourceforge.net/p/opensaf/tickets/1139
Configuration mistakes such as omitting to change immnd.conf to allow PBE
@@ -2329,15 +2329,15 @@ Error logging been improved and the imm
Ccb operation error cases. This should make troubleshooting this issue much
faster and easier.
-IMM API that replaces SaNameT with SaStringT and SA_IMM_ATTR_DN
-===============================================================
+IMM API that replaces SaNameT with SaStringT and SA_IMM_ATTR_DN (4.6)
+=====================================================================
http://sourceforge.net/p/opensaf/tickets/643
See: osaf/services/saf/immsv/README.SASTRINGT_API for details.
Notes on upgrading from OpenSAF 4.[1,2,3,4,5] to OpenSAF (4.6)
-==========================================================
+==============================================================
OpenSAF4.6 adds new message types that avoid using the SaNameT type (#969).
During a rolling upgrade from an earlier OpenSAF release to the 4.6 release
there
will be nodes executing the older release concurrently with nodes executing
OpenSAF 4.6.
@@ -2376,6 +2376,29 @@ Bit 5 controls OpenSAF4.5 protocols allo
Bit 6 controls OpenSAF4.6 protocols allowed or not (normally on/1).
+Provide an admin-operation for aborting all non-critical CCBs (4.7)
+===================================================================
+http://sourceforge.net/p/opensaf/tickets/1107
+
+There may arise situations where an open CCB that is not in critical,
+i.e. has not entered the commit protocol yet, is blocking an involved
+service/OI from performing some other task that is more urgent and more
+important than completing that CCB. The best example is the AMF, where
+an si-swap will fail and cause the standby to reboot if it was involved
+in an open CCB when the si-swap order was issued (see ticket #1105).
+Ticket #1105 can be fixed by the AMF (active or standby) sending an
+admin-operation directed at the IMM service requesting it to abort non
+critical CCBs. The AMF can either use a synchronous admin-op or an
+asyncronous admin-op. After the admin-operation has been invoked the AMF
+should allow a few seconds for the CCB to get aborted and the AMF OI to
+get the abort callback for the CCB. That should then clear the path for
+the AMF standby to succeed with the si-swap.
+The admin-operation for aborting non critical CCBs involves requesting the
+operation id '202' directed at the IMM SF service object:
+
+ immadm -o 202 safRdn=immManagement,safApp=safImmService
+
+
----------------------------------------
DEPENDENCIES
============
diff --git a/osaf/services/saf/immsv/immnd/ImmModel.cc
b/osaf/services/saf/immsv/immnd/ImmModel.cc
--- a/osaf/services/saf/immsv/immnd/ImmModel.cc
+++ b/osaf/services/saf/immsv/immnd/ImmModel.cc
@@ -453,6 +453,7 @@ static SaImmRepositoryInitModeT immInitM
static SaUint32T ccbIdLongDnGuard = 0; /* Disallow long DN additions if
longDnsAllowed is being changed in ccb*/
static bool sIsLongDnLoaded = false; /* track long DNs before
opensafImm=opensafImm,safApp=safImmService is created */
+static bool sAbortNonCriticalCcbs = false; /* Set to true at coord by the
special imm admin-op to abort ccbs #1107 */
struct AttrFlagIncludes
{
@@ -1252,7 +1253,7 @@ immModel_adminOperationInvoke(IMMND_CB *
{
return ImmModel::instance(&cb->immModel)->
adminOperationInvoke(req, reqConn, reply_dest, inv,
- implConn, implNodeId, pbeExpected, displayRes);
+ implConn, implNodeId, pbeExpected, displayRes, cb->mIsCoord);
}
SaUint32T /* Returns admo-id for object if object exists and active admo
exists, otherwise zero. */
@@ -3139,7 +3140,7 @@ ImmModel::classCreate(const ImmsvOmClass
if(attr->attrValueType != SA_IMM_ATTR_SANAMET
&& !((attr->attrFlags & SA_IMM_ATTR_DN) &&
(attr->attrValueType == SA_IMM_ATTR_SASTRINGT))) {
LOG_NO("ERR_INVALID_PARAM: Attribute '%s' must be of type
SaNameT, "
- "or of type SaStringT with DN flag", attNm);
+ "or of type SaStringT with DN flag", attNm);
illegal = 1;
}
@@ -10982,7 +10983,7 @@ SaAisErrorT ImmModel::adminOperationInvo
SaInvocationT& saInv,
SaUint32T* implConn,
unsigned int* implNodeId,
- bool pbeExpected, bool* displayRes)
+ bool pbeExpected, bool* displayRes,
bool isAtCoord)
{
TRACE_ENTER();
SaAisErrorT err = SA_AIS_OK;
@@ -11179,7 +11180,7 @@ SaAisErrorT ImmModel::adminOperationInvo
TRACE_7("Admin op on special object %s whith no implementer
ret:%u",
objectName.c_str(), err);
} else if(objectName == immManagementDn) {
- err = admoImmMngtObject(req);
+ err = admoImmMngtObject(req, isAtCoord);
TRACE_7("Admin op on special object %s whith no implementer
ret:%u",
objectName.c_str(), err);
} else {
@@ -11772,7 +11773,7 @@ ImmModel::resourceDisplay(const struct I
SaAisErrorT
-ImmModel::admoImmMngtObject(const ImmsvOmAdminOperationInvoke* req)
+ImmModel::admoImmMngtObject(const ImmsvOmAdminOperationInvoke* req, bool
isAtCoord)
{
SaAisErrorT err = SA_AIS_ERR_INTERRUPT;
/* Function for handling admin-ops directed at the immsv itself.
@@ -11810,6 +11811,13 @@ ImmModel::admoImmMngtObject(const ImmsvO
immInitMode = SA_IMM_INIT_FROM_FILE;
LOG_NO("SaImmRepositoryInitModeT FORCED to:
SA_IMM_INIT_FROM_FILE");
}
+ } else if (req->operationId == SA_IMM_ADMIN_ABORT_CCBS) { /* Non standard.
*/
+ LOG_NO("Received: immadm -o %u
safRdn=immManagement,safApp=safImmService",
+ SA_IMM_ADMIN_ABORT_CCBS);
+ if(isAtCoord) {
+ LOG_IN("sAbortNonCriticalCcbs = true;");
+ sAbortNonCriticalCcbs = true;
+ }
} else {
LOG_NO("Invalid operation ID %llu, for operation on %s", (SaUint64T)
req->operationId,
immManagementDn.c_str());
@@ -12476,7 +12484,7 @@ ImmModel::cleanTheBasement(InvocVector&
//AND ccbIds for ccbs in critical and marked with PbeRestartedId.
//Restarted PBE => try to recover outcome BEFORE timeout, making
//recovery transparent to user!
- //TODO the timeout should not be hardwired, but for now it is.
+ //Also handle the case of admin-op requesting abort of all
non-critical ccbs.
TRACE("Checking active ccb %u for deadlock or blocked implementer",
(*i3)->mId);
TRACE("state:%u waitsart:%u PberestartId:%u",(*i3)->mState,
@@ -12484,9 +12492,14 @@ ImmModel::cleanTheBasement(InvocVector&
CcbImplementerMap::iterator cim;
uint32_t max_oi_timeout = DEFAULT_TIMEOUT_SEC;
- for(cim = (*i3)->mImplementers.begin(); cim !=
(*i3)->mImplementers.end(); ++cim) {
- if(cim->second->mImplementer->mTimeout > max_oi_timeout) {
- max_oi_timeout = cim->second->mImplementer->mTimeout;
+ if(sAbortNonCriticalCcbs) {
+ LOG_IN("sAbortNonCriticalCcbs is true => set max_oi_timeout to
0");
+ max_oi_timeout = 0;
+ } else {
+ for(cim = (*i3)->mImplementers.begin(); cim !=
(*i3)->mImplementers.end(); ++cim) {
+ if(cim->second->mImplementer->mTimeout > max_oi_timeout) {
+ max_oi_timeout = cim->second->mImplementer->mTimeout;
+ }
}
}
@@ -12502,6 +12515,15 @@ ImmModel::cleanTheBasement(InvocVector&
oi_timeout = 0;
TRACE_5("CCB %u timeout while waiting on implementer
reply",
(*i3)->mId);
+ setCcbErrorString(*i3, "Resource Error: CCB timeout while "
+ "waiting on implementer reply");
+ }
+
+ if(sAbortNonCriticalCcbs) {
+ LOG_NO("CCB %u aborted by: immadm -o %u
safRdn=immManagement,safApp=safImmService",
+ (*i3)->mId, SA_IMM_ADMIN_ABORT_CCBS);
+ setCcbErrorString(*i3, "Resource Error: CCB aborted by
admin-operation"
+ " '202' on
safRdn=immManagement,safApp=safImmService");
}
if((*i3)->mState == IMM_CCB_CRITICAL) {
@@ -12528,6 +12550,11 @@ ImmModel::cleanTheBasement(InvocVector&
}
}
+ if(sAbortNonCriticalCcbs) {
+ LOG_IN("sAbortNonCriticalCcbs reset to false");
+ sAbortNonCriticalCcbs = false; /* Reset. */
+ }
+
while((i3 = ccbsToGc.begin()) != ccbsToGc.end()) {
CcbInfo* ccb = (*i3);
ccbsToGc.erase(i3);
@@ -12544,7 +12571,7 @@ ImmModel::cleanTheBasement(InvocVector&
//It needs to be long to allow reply on larger batch jobs such as a
//schema/class change with instance migration and slow file system.
//It can not be infinite as that could cause a memory leak.
- if(now - ci2->second.mCreateTime >= (DEFAULT_TIMEOUT_SEC * 20)) {
+ if(now - ci2->second.mCreateTime >= (DEFAULT_TIMEOUT_SEC * 20)) {
TRACE_5("Timeout on PbeRtReqContinuation %llu", ci2->first);
pbePrtoReqs.push_back(ci2->second.mConn);
sPbeRtReqContinuationMap.erase(ci2);
diff --git a/osaf/services/saf/immsv/immnd/ImmModel.hh
b/osaf/services/saf/immsv/immnd/ImmModel.hh
--- a/osaf/services/saf/immsv/immnd/ImmModel.hh
+++ b/osaf/services/saf/immsv/immnd/ImmModel.hh
@@ -361,7 +361,8 @@ public:
SaUint32T* implConn,
unsigned int* implNodeId,
bool pbeExpected,
- bool* displayRes);
+ bool* displayRes,
+ bool isAtCoord);
// Objects
@@ -653,7 +654,7 @@ private:
std::string newClassName,
bool remove=false);
SaAisErrorT updateImmObject2(const ImmsvOmAdminOperationInvoke*
req);
- SaAisErrorT admoImmMngtObject(const ImmsvOmAdminOperationInvoke*
req);
+ SaAisErrorT admoImmMngtObject(const ImmsvOmAdminOperationInvoke*
req, bool isAtCoord);
void addNoDanglingRefs(ObjectInfo *obj);
void removeNoDanglingRefs(
------------------------------------------------------------------------------
One dashboard for servers and applications across Physical-Virtual-Cloud
Widest out-of-the-box monitoring support with 50+ applications
Performance metrics, stats and reports that give you Actionable Insights
Deep dive visibility with transaction tracing using APM Insight.
http://ad.doubleclick.net/ddm/clk/290420510;117567292;y
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel