I have to NACK this patch because it is hiding the problem rather than truly solving the problem, which is a logical bug somewhere.
If you remember some year or two ago you found a couple of error cases with 2PBE where the handling of the PBE level sqlite spin lock was not correct. They where all of the pattern of some error case detected , which resulted in a short-circuited exit from the method where the problem was detected, but without process termination (which of course discards the heap based lock) and without correctly aborting the PBE level spinlock correctly. This 1PBE case is most probably another such "simple" case. By simple I mean that it likely to be a simple case of omitting to abort the transaction at the PBE level. The other possibility is some kind of heap corruption. /AndersBj -----Original Message----- From: [email protected] [mailto:[email protected]] Sent: den 8 oktober 2015 09:28 To: Anders Björnerstedt; Zoran Milinkovic Cc: [email protected] Subject: [PATCH 1 of 1] imm: Exit the 1PBE when pbeBeginTrans sees db as locked [#1526] osaf/libs/common/immsv/immpbe_dump.cc | 18 +++++++++- osaf/services/saf/immsv/immpbed/immpbe_daemon.cc | 41 ++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 2 deletions(-) For the 1PBE case, which is not multi threaded, if the sqlite db locked case is reached abort the PBE and let the PBE be re-generated(instead of blocking the PBE process). diff --git a/osaf/libs/common/immsv/immpbe_dump.cc b/osaf/libs/common/immsv/immpbe_dump.cc --- a/osaf/libs/common/immsv/immpbe_dump.cc +++ b/osaf/libs/common/immsv/immpbe_dump.cc @@ -2894,14 +2894,28 @@ SaAisErrorT pbeBeginTrans(void* db_handl ++try_count; } else { LOG_ER("Sqlite db appears blocked on other transaction"); - return SA_AIS_ERR_FAILED_OPERATION; + /* The SA_AIS_ERR_NOT_SUPPORTED is converted to SA_AIS_ERR_FAILED_OPERATION + if the case is not 1PBE. The 1PBE case is not multithreaded. so, the + sqliteTransLock should not be shared, the 1PBE must exit when + SA_AIS_ERR_NOT_SUPPORTED is received. The PBE/system is in bad state + like disk full(#1526). + */ + + return SA_AIS_ERR_NOT_SUPPORTED; } } ++sqliteTransLock; /* Lock is set. */ if(sqliteTransLock != 1) { /* i.e. not 2 or 3 */ LOG_ER("Failure in obtaining sqliteTransLock: %u", sqliteTransLock); - return SA_AIS_ERR_FAILED_OPERATION; + /* The SA_AIS_ERR_NOT_SUPPORTED is converted to SA_AIS_ERR_FAILED_OPERATION + if the case is not 1PBE. The 1PBE case is not multithreaded. so, the + sqliteTransLock should not be shared, the 1PBE must exit when + SA_AIS_ERR_NOT_SUPPORTED is received. The PBE/system is in bad state + like disk full(#1526). + */ + + return SA_AIS_ERR_NOT_SUPPORTED; } rc = sqlite3_exec(dbHandle, "BEGIN EXCLUSIVE TRANSACTION", NULL, NULL, &execErr); diff --git a/osaf/services/saf/immsv/immpbed/immpbe_daemon.cc b/osaf/services/saf/immsv/immpbed/immpbe_daemon.cc --- a/osaf/services/saf/immsv/immpbed/immpbe_daemon.cc +++ b/osaf/services/saf/immsv/immpbed/immpbe_daemon.cc @@ -486,6 +486,11 @@ static void saImmOiAdminOperationCallbac rc = pbeBeginTrans(sDbHandle); if(rc != SA_AIS_OK) { + if(!sPbe2 && !sPbe2B) {/* 1PBE case, is not multithreaded */ + exit(1); + } else if (rc == SA_AIS_ERR_NOT_SUPPORTED) { + rc = SA_AIS_ERR_FAILED_OPERATION; + } LOG_WA("PBE failed to start sqlite transaction for class create"); osafassert(rc != SA_AIS_ERR_TRY_AGAIN); goto fail; @@ -701,6 +706,12 @@ static void saImmOiAdminOperationCallbac rc = pbeBeginTrans(sDbHandle); if(rc != SA_AIS_OK) { + if(!sPbe2 && !sPbe2B) {/* 1PBE case, is not multithreaded */ + exit(1); + } else if (rc == SA_AIS_ERR_NOT_SUPPORTED) { + rc = SA_AIS_ERR_FAILED_OPERATION; + } + LOG_WA("PBE failed to start sqlite transaction for class delete"); osafassert(rc != SA_AIS_ERR_TRY_AGAIN); goto fail; @@ -828,6 +839,12 @@ static void saImmOiAdminOperationCallbac rc = pbeBeginTrans(sDbHandle); if(rc != SA_AIS_OK) { + if(!sPbe2 && !sPbe2B) {/* 1PBE case, is not multithreaded */ + exit(1); + } else if (rc == SA_AIS_ERR_NOT_SUPPORTED) { + rc = SA_AIS_ERR_FAILED_OPERATION; + } + LOG_WA("PBE failed to start sqlite transaction for update epoch"); osafassert(rc != SA_AIS_ERR_TRY_AGAIN); goto fail; @@ -1053,6 +1070,12 @@ static void saImmOiAdminOperationCallbac if(rc == SA_AIS_OK) { rc = pbeBeginTrans(sDbHandle); if(rc != SA_AIS_OK) { + if(!sPbe2 && !sPbe2B) {/* 1PBE case, is not multithreaded */ + exit(1); + } else if (rc == SA_AIS_ERR_NOT_SUPPORTED) { + rc = SA_AIS_ERR_FAILED_OPERATION; + } + LOG_WA("PBE failed to start sqlite transaction for CCB %llx/%llu", ccbId, ccbId); osafassert(rc != SA_AIS_ERR_TRY_AGAIN); goto fail; @@ -1250,6 +1273,12 @@ static SaAisErrorT saImmOiCcbObjectModif rc = pbeBeginTrans(sDbHandle); if(rc != SA_AIS_OK) { + if(!sPbe2 && !sPbe2B) {/* 1PBE case, is not multithreaded */ + exit(1); + } else if (rc == SA_AIS_ERR_NOT_SUPPORTED) { + rc = SA_AIS_ERR_FAILED_OPERATION; + } + LOG_WA("PBE failed to start sqlite transaction (ccbId:%llx) for PRT attr update", ccbId); rc = SA_AIS_ERR_NO_RESOURCES; goto done; @@ -1402,6 +1431,12 @@ static SaAisErrorT saImmOiCcbCompletedCa } if((rc = pbeBeginTrans(sDbHandle)) != SA_AIS_OK) { + if(!sPbe2 && !sPbe2B) {/* 1PBE case, is not multithreaded */ + exit(1); + } else if (rc == SA_AIS_ERR_NOT_SUPPORTED) { + rc = SA_AIS_ERR_FAILED_OPERATION; + } + LOG_WA("pbeBEginTrans returned error: %u", rc); goto done; } @@ -1691,6 +1726,12 @@ static SaAisErrorT saImmOiCcbObjectCreat rc = pbeBeginTrans(sDbHandle); if(rc != SA_AIS_OK) { + if(!sPbe2 && !sPbe2B) {/* 1PBE case, is not multithreaded */ + exit(1); + } else if (rc == SA_AIS_ERR_NOT_SUPPORTED) { + rc = SA_AIS_ERR_FAILED_OPERATION; + } + LOG_WA("PBE failed to start sqlite transaction (ccbId:%llx)for PRTO create", ccbId); rc = SA_AIS_ERR_NO_RESOURCES; goto done; ------------------------------------------------------------------------------ _______________________________________________ Opensaf-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/opensaf-devel
