osaf/services/saf/smfsv/smfd/SmfStepState.cc | 13 ++++ osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.cc | 68 +++++++++++++++++--- osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.hh | 6 + 3 files changed, 76 insertions(+), 11 deletions(-)
Sep 27 00:34:14 q50-s1 osafsmfd[6667]: NO SA_AMF_ADMIN_SI_SWAP [rc=1] successfully initiated Sep 27 00:34:15 q50-s1 osafimmnd[6571]: NO ERR_BAD_OPERATION: Mismatch on administrative owner '' != 'SMFSERVICE' Sep 27 00:34:17 q50-s1 osafsmfd[6667]: NO Fail to invoke admin operation, rc=SA_AIS_ERR_BAD_OPERATION (20). dn=[safSi=SC-2N,safApp=OpenSAF], opId=[7] Sep 27 00:34:17 q50-s1 osafsmfd[6667]: NO Admin op SA_AMF_ADMIN_SI_SWAP fail [rc = 20] Sep 27 00:34:17 q50-s1 osafsmfd[6667]: NO CAMP: Procedure safSmfProc=RollingUpgrade returned FAILED Sep 27 00:36:14 q50-s1 osafsmfd[6667]: NO Campaign thread does not disappear within 120 seconds after SA_AMF_ADMIN_SI_SWAP, the operation was assumed failed. Sep 27 00:36:14 q50-s1 kernel: [14934029.531187] osafsmfd[32024]: segfault at 4 ip 00000000004425b6 sp 00007f67f7ffe1c0 error 4 in osafsmfd[400000+9a000] Sep 27 00:36:14 q50-s1 osafamfnd[6649]: NO 'safComp=SMF,safSu=SC-1,safSg=2N,safApp=OpenSAF' faulted due to 'avaDown' : Recovery is 'nodeFailfast' Sep 27 00:36:14 q50-s1 osafamfnd[6649]: ER safComp=SMF,safSu=SC-1,safSg=2N,safApp=OpenSAF Faulted due to:avaDown Recovery is:nodeFailfast There are a few problems here. One is that the SmfSwapThread is pointing to a deleted procedure when the original active controller is reassigned active. The second problem is that a new SmfSwapThread is created when the original active controller is reassigned active, so now there are two running. The first thread tries to use its proc pointer (which has been deleted when the original active goes to quiesced) and causes the segfault. The proposed solution is a little different from that proposed in the ticket description. This solution proposes to use the existence of the SmfSwapThread as a test. When the original active controller is reassigned active because the si-swap failed, it will still remove the RestartIndicator as it does now. But, if the SmfSwapThread is still running, it will not create a new one, but update it with the recreated procedure pointer, and let it handle the si-swap timeout. Then it will report the error. I believe this solution is backwards compatible because no IMM changes are made like the ones proposed in the ticket. diff --git a/osaf/services/saf/smfsv/smfd/SmfStepState.cc b/osaf/services/saf/smfsv/smfd/SmfStepState.cc --- a/osaf/services/saf/smfsv/smfd/SmfStepState.cc +++ b/osaf/services/saf/smfsv/smfd/SmfStepState.cc @@ -424,6 +424,19 @@ SmfStepStateUndone::execute(SmfUpgradeSt { TRACE_ENTER(); + if (i_step->calculateStepType() != SA_AIS_OK) { + LOG_ER("SmfStepStateUndone: Failed to calculate step type"); + changeState(i_step, SmfStepStateFailed::instance()); + TRACE_LEAVE(); + return SMF_STEP_FAILED; + } + + if (i_step->getSwitchOver() == true) { + TRACE("Switch over is needed in this step"); + TRACE_LEAVE(); + return SMF_STEP_SWITCHOVER; + } + i_step->setRetryCount(0); /* Reset the retry counter */ changeState(i_step, SmfStepStateExecuting::instance()); diff --git a/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.cc b/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.cc --- a/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.cc +++ b/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.cc @@ -482,12 +482,17 @@ SmfUpgradeProcedure::switchOver() osafassert(0); } - TRACE("SmfUpgradeProcedure::switchOver: Create the restart indicator"); - SmfCampaignThread::instance()->campaign()->getUpgradeCampaign()->createSmfRestartIndicator(); - - SmfSwapThread *swapThread = new SmfSwapThread(this); - TRACE("SmfUpgradeProcedure::switchOver, Starting SI_SWAP thread"); - swapThread->start(); + if (!SmfSwapThread::running()) { + TRACE("SmfUpgradeProcedure::switchOver: Create the restart indicator"); + SmfCampaignThread::instance()->campaign()->getUpgradeCampaign()->createSmfRestartIndicator(); + + SmfSwapThread *swapThread = new SmfSwapThread(this); + TRACE("SmfUpgradeProcedure::switchOver, Starting SI_SWAP thread"); + swapThread->start(); + } else { + TRACE("SmfUpgradeProcedure::switchOver, SI_SWAP thread already running"); + SmfSwapThread::setProc(this); + } TRACE_LEAVE(); } @@ -4156,6 +4161,31 @@ SmfUpgradeProcedure::resetProcCounter() /* Static methods */ /*====================================================================*/ +SmfSwapThread *SmfSwapThread::me(0); +std::mutex SmfSwapThread::m_mutex; + +/** + * SmfSmfSwapThread::running + * Is the thread currently running? + */ +bool +SmfSwapThread::running(void) +{ + std::lock_guard<std::mutex> guard(m_mutex); + return me ? true : false; +} + +/** + * SmfSmfSwapThread::setProc + * Set the procedure pointer to the newly created procedure + */ +void +SmfSwapThread::setProc(SmfUpgradeProcedure *newProc) +{ + std::lock_guard<std::mutex> guard(m_mutex); + me->m_proc = newProc; +} + /** * SmfSmfSwapThread::main * static main for the thread @@ -4181,6 +4211,8 @@ SmfSwapThread::SmfSwapThread(SmfUpgradeP m_proc(i_proc) { sem_init(&m_semaphore, 0, 0); + std::lock_guard<std::mutex> guard(m_mutex); + me = this; } /** @@ -4188,6 +4220,8 @@ SmfSwapThread::SmfSwapThread(SmfUpgradeP */ SmfSwapThread::~SmfSwapThread() { + std::lock_guard<std::mutex> guard(m_mutex); + me = 0; } /** @@ -4309,13 +4343,25 @@ SmfSwapThread::main(void) exit_error: if (SmfCampaignThread::instance() != NULL) { - SmfProcStateExecFailed::instance()->changeState(m_proc, SmfProcStateExecFailed::instance()); - } - - if (SmfCampaignThread::instance() != NULL) { + std::lock_guard<std::mutex> guard(m_mutex); + + SmfProcStateExecuting::instance()->changeState(m_proc, SmfProcStateStepUndone::instance()); + + // find the failed upgrade step and set it to Undone + std::vector<SmfUpgradeStep *>& upgradeSteps(m_proc->getProcSteps()); + for (std::vector<SmfUpgradeStep *>::iterator it(upgradeSteps.begin()); it != upgradeSteps.end(); ++it) { + if ((*it)->getSwitchOver()) { + (*it)->changeState(SmfStepStateUndone::instance()); + break; + } + } + + std::string error("si-swap of middleware failed"); + SmfCampaignThread::instance()->campaign()->setError(error); + CAMPAIGN_EVT *evt = new CAMPAIGN_EVT(); evt->type = CAMPAIGN_EVT_PROCEDURE_RC; - evt->event.procResult.rc = SMF_PROC_FAILED; + evt->event.procResult.rc = SMF_PROC_STEPUNDONE; evt->event.procResult.procedure = m_proc; SmfCampaignThread::instance()->send(evt); } diff --git a/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.hh b/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.hh --- a/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.hh +++ b/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.hh @@ -29,6 +29,7 @@ #include <vector> #include <list> #include <map> +#include <mutex> #include <saSmf.h> #include <saImmOi.h> @@ -791,7 +792,12 @@ class SmfSwapThread { ~SmfSwapThread(); int start(void); + static bool running(void); + static void setProc(SmfUpgradeProcedure *); + private: + static SmfSwapThread *me; + static std::mutex m_mutex; void main(void); int init(void); ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel