osaf/services/saf/smfsv/smfd/SmfStepState.cc        |  13 ++++
 osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.cc |  68 +++++++++++++++++---
 osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.hh |   6 +
 3 files changed, 76 insertions(+), 11 deletions(-)


Sep 27 00:34:14 q50-s1 osafsmfd[6667]: NO SA_AMF_ADMIN_SI_SWAP [rc=1] 
successfully initiated
Sep 27 00:34:15 q50-s1 osafimmnd[6571]: NO ERR_BAD_OPERATION: Mismatch on 
administrative owner '' != 'SMFSERVICE'
Sep 27 00:34:17 q50-s1 osafsmfd[6667]: NO Fail to invoke admin operation, 
rc=SA_AIS_ERR_BAD_OPERATION (20). dn=[safSi=SC-2N,safApp=OpenSAF], opId=[7]
Sep 27 00:34:17 q50-s1 osafsmfd[6667]: NO Admin op SA_AMF_ADMIN_SI_SWAP fail 
[rc = 20]
Sep 27 00:34:17 q50-s1 osafsmfd[6667]: NO CAMP: Procedure 
safSmfProc=RollingUpgrade returned FAILED
Sep 27 00:36:14 q50-s1 osafsmfd[6667]: NO Campaign thread does not disappear 
within 120 seconds after SA_AMF_ADMIN_SI_SWAP, the operation was assumed failed.
Sep 27 00:36:14 q50-s1 kernel: [14934029.531187] osafsmfd[32024]: segfault at 4 
ip 00000000004425b6 sp 00007f67f7ffe1c0 error 4 in osafsmfd[400000+9a000]
Sep 27 00:36:14 q50-s1 osafamfnd[6649]: NO 
'safComp=SMF,safSu=SC-1,safSg=2N,safApp=OpenSAF' faulted due to 'avaDown' : 
Recovery is 'nodeFailfast'
Sep 27 00:36:14 q50-s1 osafamfnd[6649]: ER 
safComp=SMF,safSu=SC-1,safSg=2N,safApp=OpenSAF Faulted due to:avaDown Recovery 
is:nodeFailfast

There are a few problems here. One is that the SmfSwapThread is pointing to a
deleted procedure when the original active controller is reassigned active. The
second problem is that a new SmfSwapThread is created when the original active
controller is reassigned active, so now there are two running. The first thread
tries to use its proc pointer (which has been deleted when the original active
goes to quiesced) and causes the segfault.

The proposed solution is a little different from that proposed in the ticket
description. This solution proposes to use the existence of the SmfSwapThread as
a test. When the original active controller is reassigned active because the
si-swap failed, it will still remove the RestartIndicator as it does now. But,
if the SmfSwapThread is still running, it will not create a new one, but update
it with the recreated procedure pointer, and let it handle the si-swap timeout.
Then it will report the error. I believe this solution is backwards compatible
because no IMM changes are made like the ones proposed in the ticket.

diff --git a/osaf/services/saf/smfsv/smfd/SmfStepState.cc 
b/osaf/services/saf/smfsv/smfd/SmfStepState.cc
--- a/osaf/services/saf/smfsv/smfd/SmfStepState.cc
+++ b/osaf/services/saf/smfsv/smfd/SmfStepState.cc
@@ -424,6 +424,19 @@ SmfStepStateUndone::execute(SmfUpgradeSt
 {
        TRACE_ENTER();
 
+        if (i_step->calculateStepType() != SA_AIS_OK) {
+                LOG_ER("SmfStepStateUndone: Failed to calculate step type");
+                changeState(i_step, SmfStepStateFailed::instance());
+                TRACE_LEAVE();
+                return SMF_STEP_FAILED;
+        }
+
+        if (i_step->getSwitchOver() == true) {
+                TRACE("Switch over is needed in this step");
+                TRACE_LEAVE();
+                return SMF_STEP_SWITCHOVER;
+        }
+
         i_step->setRetryCount(0); /* Reset the retry counter */
        changeState(i_step, SmfStepStateExecuting::instance());
 
diff --git a/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.cc 
b/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.cc
--- a/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.cc
+++ b/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.cc
@@ -482,12 +482,17 @@ SmfUpgradeProcedure::switchOver()
                 osafassert(0);
         }
 
-       TRACE("SmfUpgradeProcedure::switchOver: Create the restart indicator");
-       
SmfCampaignThread::instance()->campaign()->getUpgradeCampaign()->createSmfRestartIndicator();
-
-       SmfSwapThread *swapThread = new SmfSwapThread(this);
-       TRACE("SmfUpgradeProcedure::switchOver, Starting SI_SWAP thread");
-       swapThread->start();
+       if (!SmfSwapThread::running()) {
+               TRACE("SmfUpgradeProcedure::switchOver: Create the restart 
indicator");
+               
SmfCampaignThread::instance()->campaign()->getUpgradeCampaign()->createSmfRestartIndicator();
+
+               SmfSwapThread *swapThread = new SmfSwapThread(this);
+               TRACE("SmfUpgradeProcedure::switchOver, Starting SI_SWAP 
thread");
+               swapThread->start();
+       } else {
+               TRACE("SmfUpgradeProcedure::switchOver, SI_SWAP thread already 
running");
+               SmfSwapThread::setProc(this);
+       }
 
        TRACE_LEAVE();
 }
@@ -4156,6 +4161,31 @@ SmfUpgradeProcedure::resetProcCounter()
 /*  Static methods                                                    */
 /*====================================================================*/
 
+SmfSwapThread *SmfSwapThread::me(0);
+std::mutex SmfSwapThread::m_mutex;
+
+/**
+ * SmfSmfSwapThread::running
+ * Is the thread currently running?
+ */
+bool
+SmfSwapThread::running(void)
+{
+       std::lock_guard<std::mutex> guard(m_mutex);
+       return me ? true : false;
+}
+
+/**
+ * SmfSmfSwapThread::setProc
+ * Set the procedure pointer to the newly created procedure
+ */
+void
+SmfSwapThread::setProc(SmfUpgradeProcedure *newProc)
+{
+       std::lock_guard<std::mutex> guard(m_mutex);
+       me->m_proc = newProc;
+}
+
 /** 
  * SmfSmfSwapThread::main
  * static main for the thread
@@ -4181,6 +4211,8 @@ SmfSwapThread::SmfSwapThread(SmfUpgradeP
        m_proc(i_proc)
 {
        sem_init(&m_semaphore, 0, 0);
+       std::lock_guard<std::mutex> guard(m_mutex);
+       me = this;
 }
 
 /** 
@@ -4188,6 +4220,8 @@ SmfSwapThread::SmfSwapThread(SmfUpgradeP
  */
 SmfSwapThread::~SmfSwapThread()
 {
+       std::lock_guard<std::mutex> guard(m_mutex);
+       me = 0;
 }
 
 /**
@@ -4309,13 +4343,25 @@ SmfSwapThread::main(void)
 
 exit_error:
         if (SmfCampaignThread::instance() != NULL) {
-                SmfProcStateExecFailed::instance()->changeState(m_proc, 
SmfProcStateExecFailed::instance());
-        }
-
-        if (SmfCampaignThread::instance() != NULL) {
+                std::lock_guard<std::mutex> guard(m_mutex);
+
+                SmfProcStateExecuting::instance()->changeState(m_proc, 
SmfProcStateStepUndone::instance());
+
+                // find the failed upgrade step and set it to Undone
+                std::vector<SmfUpgradeStep *>& 
upgradeSteps(m_proc->getProcSteps());
+                for (std::vector<SmfUpgradeStep *>::iterator 
it(upgradeSteps.begin()); it != upgradeSteps.end(); ++it) {
+                        if ((*it)->getSwitchOver()) {
+                                
(*it)->changeState(SmfStepStateUndone::instance());
+                                break;
+                        }
+                }
+
+                std::string error("si-swap of middleware failed");
+                SmfCampaignThread::instance()->campaign()->setError(error);
+
                 CAMPAIGN_EVT *evt = new CAMPAIGN_EVT();
                 evt->type = CAMPAIGN_EVT_PROCEDURE_RC;
-                evt->event.procResult.rc = SMF_PROC_FAILED;
+                evt->event.procResult.rc = SMF_PROC_STEPUNDONE;
                 evt->event.procResult.procedure = m_proc;
                 SmfCampaignThread::instance()->send(evt);
         }
diff --git a/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.hh 
b/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.hh
--- a/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.hh
+++ b/osaf/services/saf/smfsv/smfd/SmfUpgradeProcedure.hh
@@ -29,6 +29,7 @@
 #include <vector>
 #include <list>
 #include <map>
+#include <mutex>
 
 #include <saSmf.h>
 #include <saImmOi.h>
@@ -791,7 +792,12 @@ class SmfSwapThread {
        ~SmfSwapThread();
        int start(void);
 
+       static bool running(void);
+       static void setProc(SmfUpgradeProcedure *);
+
  private:
+       static SmfSwapThread *me;
+       static std::mutex m_mutex;
 
        void main(void);
        int init(void);


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most 
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to