osaf/services/saf/amf/amfd/csi.cc       |  14 +++++++++++++-
 osaf/services/saf/amf/amfd/include/su.h |   2 +-
 osaf/services/saf/amf/amfd/sg.cc        |   6 +++++-
 osaf/services/saf/amf/amfd/su.cc        |   1 +
 4 files changed, 20 insertions(+), 3 deletions(-)


The problem happens if csi is deleted and component delays the 
csi_remove_callback
after SC comes back from headless. At standby SU, this csi hasn't been removed

It's because the standby SU still sends assignment info as recovery data since
the component in active SU has pending the csi_remove_callback.

Logically, amfnd should verify all csi being sent to amfd as recovery data.
If csi is deleted, amfnd will issue remove callback and don't send deleted csi.
However, verifying csi needs to initialize IMM handle, that could lead to hang
amfnd (if IMMND dies) and eventually cause node synce timeout.
The patch views this scenario as an inconsistency of csi between amfd and amfnd,
thus the standby SU is removed assigment (including deleted csi) and re-assigned
standby assignment (excluding deleted csi).

diff --git a/osaf/services/saf/amf/amfd/csi.cc 
b/osaf/services/saf/amf/amfd/csi.cc
--- a/osaf/services/saf/amf/amfd/csi.cc
+++ b/osaf/services/saf/amf/amfd/csi.cc
@@ -1467,7 +1467,19 @@ SaAisErrorT avd_compcsi_recreate(AVSV_N2
 
        for (csicomp = info->csicomp_list; csicomp != nullptr; 
csicomp=csicomp->next) {
                csi = csi_db->find(Amf::to_string(&csicomp->safCSI));
-               osafassert(csi);
+               if (csi == nullptr) {
+                       // CSI may be not found in csi_db. csi is deleted that 
trigger
+                       // csi_remove_callback, but amf component hasn't 
responded to callback
+                       // and at the time cluster goes headless. Therefore, 
amfd gets csi
+                       // assignment for non-existed csi
+                       LOG_WA("CSI: %s not found in csi_db, it's currently 
assigned to comp: %s",
+                                       
Amf::to_string(&csicomp->safCSI).c_str(),
+                                       
Amf::to_string(&csicomp->safComp).c_str());
+                       comp = comp_db->find(Amf::to_string(&csicomp->safComp));
+                       osafassert(comp);
+                       comp->su->assignment_out_of_sync = true;
+                       continue;
+               }
 
                comp = comp_db->find(Amf::to_string(&csicomp->safComp));
                osafassert(comp);
diff --git a/osaf/services/saf/amf/amfd/include/su.h 
b/osaf/services/saf/amf/amfd/include/su.h
--- a/osaf/services/saf/amf/amfd/include/su.h
+++ b/osaf/services/saf/amf/amfd/include/su.h
@@ -94,7 +94,7 @@ class AVD_SU {
 
        AVD_SUTYPE *su_type;
        AVD_SU *su_list_su_type_next;
-
+       bool assignment_out_of_sync;
        void set_su_failover(bool value);
        void dec_curr_stdby_si();
        void inc_curr_stdby_si();
diff --git a/osaf/services/saf/amf/amfd/sg.cc b/osaf/services/saf/amf/amfd/sg.cc
--- a/osaf/services/saf/amf/amfd/sg.cc
+++ b/osaf/services/saf/amf/amfd/sg.cc
@@ -2116,7 +2116,11 @@ void AVD_SG::adjust_intermediate_sg(AVD_
                                su->su_on_node->saAmfNodeAdminState,
                                su->saAmfSUNumCurrActiveSIs,
                                su->saAmfSUNumCurrStandbySIs);
-
+               if (su->assignment_out_of_sync == true) {
+                       su_fault(cb, su);
+                       su->assignment_out_of_sync = false;
+                       continue;
+               }
                if (su->saAmfSUAdminState == SA_AMF_ADMIN_LOCKED ||
                        su->sg_of_su->saAmfSGAdminState == SA_AMF_ADMIN_LOCKED 
||
                        su->su_on_node->saAmfNodeAdminState == 
SA_AMF_ADMIN_LOCKED ||
diff --git a/osaf/services/saf/amf/amfd/su.cc b/osaf/services/saf/amf/amfd/su.cc
--- a/osaf/services/saf/amf/amfd/su.cc
+++ b/osaf/services/saf/amf/amfd/su.cc
@@ -64,6 +64,7 @@ void AVD_SU::initialize() {
        pend_cbk.invocation = 0;
        pend_cbk.admin_oper = (SaAmfAdminOperationIdT)0;
        surestart = false;
+       assignment_out_of_sync = false;
 }
 
 AVD_SU::AVD_SU() {

------------------------------------------------------------------------------
Site24x7 APM Insight: Get Deep Visibility into Application Performance
APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month
Monitor end-to-end web transactions and take corrective actions now
Troubleshoot faster and improve end-user experience. Signup Now!
http://pubads.g.doubleclick.net/gampad/clk?id=272487151&iu=/4140
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to