ACK
On 10/10/2016 01:46 PM, Neelakanta Reddy wrote: > Hi Alex, > > Reviewed and tested the patch. > Ack. > > /Neel. > > On 2016/10/07 11:10 PM, Alex Jones wrote: >> osaf/services/saf/smfsv/smfd/SmfStepState.cc | 13 ++++ >> osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.cc | 68 >> +++++++++++++++++--- >> osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.hh | 6 + >> 3 files changed, 76 insertions(+), 11 deletions(-) >> >> >> Sep 27 00:34:14 q50-s1 osafsmfd[6667]: NO SA_AMF_ADMIN_SI_SWAP [rc=1] >> successfully initiated >> Sep 27 00:34:15 q50-s1 osafimmnd[6571]: NO ERR_BAD_OPERATION: >> Mismatch on administrative owner '' != 'SMFSERVICE' >> Sep 27 00:34:17 q50-s1 osafsmfd[6667]: NO Fail to invoke admin >> operation, rc=SA_AIS_ERR_BAD_OPERATION (20). >> dn=[safSi=SC-2N,safApp=OpenSAF], opId=[7] >> Sep 27 00:34:17 q50-s1 osafsmfd[6667]: NO Admin op >> SA_AMF_ADMIN_SI_SWAP fail [rc = 20] >> Sep 27 00:34:17 q50-s1 osafsmfd[6667]: NO CAMP: Procedure >> safSmfProc=RollingUpgrade returned FAILED >> Sep 27 00:36:14 q50-s1 osafsmfd[6667]: NO Campaign thread does not >> disappear within 120 seconds after SA_AMF_ADMIN_SI_SWAP, the >> operation was assumed failed. >> Sep 27 00:36:14 q50-s1 kernel: [14934029.531187] osafsmfd[32024]: >> segfault at 4 ip 00000000004425b6 sp 00007f67f7ffe1c0 error 4 in >> osafsmfd[400000+9a000] >> Sep 27 00:36:14 q50-s1 osafamfnd[6649]: NO >> 'safComp=SMF,safSu=SC-1,safSg=2N,safApp=OpenSAF' faulted due to >> 'avaDown' : Recovery is 'nodeFailfast' >> Sep 27 00:36:14 q50-s1 osafamfnd[6649]: ER >> safComp=SMF,safSu=SC-1,safSg=2N,safApp=OpenSAF Faulted due to:avaDown >> Recovery is:nodeFailfast >> >> There are a few problems here. One is that the SmfSwapThread is >> pointing to a >> deleted procedure when the original active controller is reassigned >> active. The >> second problem is that a new SmfSwapThread is created when the >> original active >> controller is reassigned active, so now there are two running. The >> first thread >> tries to use its proc pointer (which has been deleted when the >> original active >> goes to quiesced) and causes the segfault. >> >> The proposed solution is a little different from that proposed in the >> ticket >> description. This solution proposes to use the existence of the >> SmfSwapThread as >> a test. When the original active controller is reassigned active >> because the >> si-swap failed, it will still remove the RestartIndicator as it does >> now. But, >> if the SmfSwapThread is still running, it will not create a new one, >> but update >> it with the recreated procedure pointer, and let it handle the >> si-swap timeout. >> Then it will report the error. I believe this solution is backwards >> compatible >> because no IMM changes are made like the ones proposed in the ticket. >> >> diff --git a/osaf/services/saf/smfsv/smfd/SmfStepState.cc >> b/osaf/services/saf/smfsv/smfd/SmfStepState.cc >> --- a/osaf/services/saf/smfsv/smfd/SmfStepState.cc >> +++ b/osaf/services/saf/smfsv/smfd/SmfStepState.cc >> @@ -424,6 +424,19 @@ SmfStepStateUndone::execute(SmfUpgradeSt >> { >> TRACE_ENTER(); >> + if (i_step->calculateStepType() != SA_AIS_OK) { >> + LOG_ER("SmfStepStateUndone: Failed to calculate step >> type"); >> + changeState(i_step, SmfStepStateFailed::instance()); >> + TRACE_LEAVE(); >> + return SMF_STEP_FAILED; >> + } >> + >> + if (i_step->getSwitchOver() == true) { >> + TRACE("Switch over is needed in this step"); >> + TRACE_LEAVE(); >> + return SMF_STEP_SWITCHOVER; >> + } >> + >> i_step->setRetryCount(0); /* Reset the retry counter */ >> changeState(i_step, SmfStepStateExecuting::instance()); >> diff --git a/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.cc >> b/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.cc >> --- a/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.cc >> +++ b/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.cc >> @@ -482,12 +482,17 @@ SmfUpgradeProcedure::switchOver() >> osafassert(0); >> } >> - TRACE("SmfUpgradeProcedure::switchOver: Create the restart >> indicator"); >> - >> SmfCampaignThread::instance()->campaign()->getUpgradeCampaign()->createSmfRestartIndicator(); >> - >> - SmfSwapThread *swapThread = new SmfSwapThread(this); >> - TRACE("SmfUpgradeProcedure::switchOver, Starting SI_SWAP thread"); >> - swapThread->start(); >> + if (!SmfSwapThread::running()) { >> + TRACE("SmfUpgradeProcedure::switchOver: Create the restart >> indicator"); >> + >> SmfCampaignThread::instance()->campaign()->getUpgradeCampaign()->createSmfRestartIndicator(); >> + >> + SmfSwapThread *swapThread = new SmfSwapThread(this); >> + TRACE("SmfUpgradeProcedure::switchOver, Starting SI_SWAP >> thread"); >> + swapThread->start(); >> + } else { >> + TRACE("SmfUpgradeProcedure::switchOver, SI_SWAP thread >> already running"); >> + SmfSwapThread::setProc(this); >> + } >> TRACE_LEAVE(); >> } >> @@ -4156,6 +4161,31 @@ SmfUpgradeProcedure::resetProcCounter() >> /* Static >> methods */ >> /*====================================================================*/ >> +SmfSwapThread *SmfSwapThread::me(0); >> +std::mutex SmfSwapThread::m_mutex; >> + >> +/** >> + * SmfSmfSwapThread::running >> + * Is the thread currently running? >> + */ >> +bool >> +SmfSwapThread::running(void) >> +{ >> + std::lock_guard<std::mutex> guard(m_mutex); >> + return me ? true : false; >> +} >> + >> +/** >> + * SmfSmfSwapThread::setProc >> + * Set the procedure pointer to the newly created procedure >> + */ >> +void >> +SmfSwapThread::setProc(SmfUpgradeProcedure *newProc) >> +{ >> + std::lock_guard<std::mutex> guard(m_mutex); >> + me->m_proc = newProc; >> +} >> + >> /** >> * SmfSmfSwapThread::main >> * static main for the thread >> @@ -4181,6 +4211,8 @@ SmfSwapThread::SmfSwapThread(SmfUpgradeP >> m_proc(i_proc) >> { >> sem_init(&m_semaphore, 0, 0); >> + std::lock_guard<std::mutex> guard(m_mutex); >> + me = this; >> } >> /** >> @@ -4188,6 +4220,8 @@ SmfSwapThread::SmfSwapThread(SmfUpgradeP >> */ >> SmfSwapThread::~SmfSwapThread() >> { >> + std::lock_guard<std::mutex> guard(m_mutex); >> + me = 0; >> } >> /** >> @@ -4309,13 +4343,25 @@ SmfSwapThread::main(void) >> exit_error: >> if (SmfCampaignThread::instance() != NULL) { >> - SmfProcStateExecFailed::instance()->changeState(m_proc, >> SmfProcStateExecFailed::instance()); >> - } >> - >> - if (SmfCampaignThread::instance() != NULL) { >> + std::lock_guard<std::mutex> guard(m_mutex); >> + >> + SmfProcStateExecuting::instance()->changeState(m_proc, >> SmfProcStateStepUndone::instance()); >> + >> + // find the failed upgrade step and set it to Undone >> + std::vector<SmfUpgradeStep *>& >> upgradeSteps(m_proc->getProcSteps()); >> + for (std::vector<SmfUpgradeStep *>::iterator >> it(upgradeSteps.begin()); it != upgradeSteps.end(); ++it) { >> + if ((*it)->getSwitchOver()) { >> + (*it)->changeState(SmfStepStateUndone::instance()); >> + break; >> + } >> + } >> + >> + std::string error("si-swap of middleware failed"); >> + SmfCampaignThread::instance()->campaign()->setError(error); >> + >> CAMPAIGN_EVT *evt = new CAMPAIGN_EVT(); >> evt->type = CAMPAIGN_EVT_PROCEDURE_RC; >> - evt->event.procResult.rc = SMF_PROC_FAILED; >> + evt->event.procResult.rc = SMF_PROC_STEPUNDONE; >> evt->event.procResult.procedure = m_proc; >> SmfCampaignThread::instance()->send(evt); >> } >> diff --git a/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.hh >> b/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.hh >> --- a/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.hh >> +++ b/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.hh >> @@ -29,6 +29,7 @@ >> #include <vector> >> #include <list> >> #include <map> >> +#include <mutex> >> #include <saSmf.h> >> #include <saImmOi.h> >> @@ -791,7 +792,12 @@ class SmfSwapThread { >> ~SmfSwapThread(); >> int start(void); >> + static bool running(void); >> + static void setProc(SmfUpgradeProcedure *); >> + >> private: >> + static SmfSwapThread *me; >> + static std::mutex m_mutex; >> void main(void); >> int init(void); >> > ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel