from:"praveen malviya"

Re: [devel] [PATCH 1/1] amfd: update SI assignment state when SU is added or removed [#2269]

2017-08-31 Thread praveen malviya


Ack, code review only.

Thanks
Praveen

On 01-Sep-17 9:24 AM, Gary Lee wrote:

---
  src/amf/amfd/sg.cc | 5 +
  src/amf/amfd/su.cc | 6 ++
  2 files changed, 11 insertions(+)

diff --git a/src/amf/amfd/sg.cc b/src/amf/amfd/sg.cc
index 8f3590190..986bb 100644
--- a/src/amf/amfd/sg.cc
+++ b/src/amf/amfd/sg.cc
@@ -1725,6 +1725,11 @@ void avd_sg_add_su(AVD_SU *su) {
  });
  
avd_verify_equal_ranked_su(su->sg_of_su);

+
+  // update any affected SI assignment state (if saAmfSGNumPrefAssignedSUs==0)
+  for (const auto  : su->sg_of_su->list_of_si) {
+si->update_ass_state();
+  }
  }
  
  void avd_sg_constructor(void) {

diff --git a/src/amf/amfd/su.cc b/src/amf/amfd/su.cc
index b091a5bfb..3726a71fb 100644
--- a/src/amf/amfd/su.cc
+++ b/src/amf/amfd/su.cc
@@ -2103,6 +2103,12 @@ void su_ccb_apply_delete_hdlr(struct 
CcbUtilOperationData *opdata) {
} /*if (AVD_SG_FSM_STABLE == sg->sg_fsm_state) */
  
  done:

+
+  // update any affected SI assignment state (if saAmfSGNumPrefAssignedSUs==0)
+  for (const auto  : sg->list_of_si) {
+si->update_ass_state();
+  }
+
TRACE_LEAVE();
  }
  



--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] amfd: honor PrefAssignedSU in nway and nway active model during assignments [#2269]

2017-08-31 Thread praveen malviya


I got confused with previous release strategies.

Please push in release branch also.


Thanks
Praveen

On 31-Aug-17 11:48 AM, praveen malviya wrote:

I am not seeing any milestone for any maintenance/release branch.

There is only one milestone 5.17.10 which is for new release.





Thanks

Praveen



On 31-Aug-17 11:34 AM, Gary Lee wrote:


OK - thanks.







Should we push it to release as well?











On 31/08/17 15:30, praveen malviya wrote:



Hi Gary,







 ack for part2.



 I think I did not grep on saAmfSGNumPrefInserviceSUs in all files.



 I have pushed part1. Please push part2.







Thanks



Praveen







On 31-Aug-17 9:50 AM, Gary Lee wrote:



Hi Praveen







ack, but I think further changes are required (see attachment).







Do you think you could push it today?







Thanks



Gary







On 27/07/17 15:36, Praveen wrote:


SG attribute saAmfSGNumPrefAssignedSUs is applicable to N-Way and 



N-Way Active model.


AMF is assigning more than saAmfSGNumPrefAssignedSUs in both N-Way 



and N-Way Active model.







Patch fixes this problem.



---


  src/amf/amfd/sg.cc | 49 



--



  src/amf/amfd/sg.h  |  1 +


  src/amf/amfd/sg_nway_fsm.cc    | 39 
+



  src/amf/amfd/sg_nwayact_fsm.cc | 29 -



  4 files changed, 87 insertions(+), 31 deletions(-)







diff --git a/src/amf/amfd/sg.cc b/src/amf/amfd/sg.cc



index 7bdf52a..8f35901 100644



--- a/src/amf/amfd/sg.cc



+++ b/src/amf/amfd/sg.cc



@@ -98,7 +98,7 @@ AVD_SG::AVD_SG()



    saAmfSGAutoAdjust(SA_FALSE),



    saAmfSGNumPrefActiveSUs(0),



    saAmfSGNumPrefStandbySUs(0),



-  saAmfSGNumPrefInserviceSUs(~0),



+  saAmfSGNumPrefInserviceSUs(0),



    saAmfSGNumPrefAssignedSUs(0),



    saAmfSGMaxActiveSIsperSU(0),



    saAmfSGMaxStandbySIsperSU(0),


@@ -978,18 +978,18 @@ static void 



ccb_apply_modify_hdlr(CcbUtilOperationData_t *opdata) {



    sg->saAmfSGNumPrefStandbySUs);


    } else if (!strcmp(attribute->attrName, 



"saAmfSGNumPrefInserviceSUs")) {



  if (value_is_deleted)



-  sg->saAmfSGNumPrefInserviceSUs = ~0;


+  sg->saAmfSGNumPrefInserviceSUs = 0; //default value for 



internal use.



  else



    sg->saAmfSGNumPrefInserviceSUs = *((SaUint32T *)value);



  TRACE("Modified saAmfSGNumPrefInserviceSUs is '%u'",



-  sg->saAmfSGNumPrefInserviceSUs);



+  sg->pref_inservice_sus());


    } else if (!strcmp(attribute->attrName, 



"saAmfSGNumPrefAssignedSUs")) {



  if (value_is_deleted)


-  sg->saAmfSGNumPrefAssignedSUs = 



sg->saAmfSGNumPrefInserviceSUs;


+  sg->saAmfSGNumPrefAssignedSUs = 0; //default value for 



internal use.



  else



    sg->saAmfSGNumPrefAssignedSUs = *((SaUint32T *)value);



  TRACE("Modified saAmfSGNumPrefAssignedSUs is '%u'",



-  sg->saAmfSGNumPrefAssignedSUs);



+  sg->pref_assigned_sus());


    } else if (!strcmp(attribute->attrName, 



"saAmfSGMaxActiveSIsperSU")) {



  if (value_is_deleted)



    sg->saAmfSGMaxActiveSIsperSU = -1;


@@ -1091,11 +1091,11 @@ static void 



ccb_apply_modify_hdlr(CcbUtilOperationData_t *opdata) {


    if (!strcmp(attribute->attrName, 



"saAmfSGNumPrefInserviceSUs")) {



  if (value_is_deleted)



-  sg->saAmfSGNumPrefInserviceSUs = ~0;



+  sg->saAmfSGNumPrefInserviceSUs = 0;



  else



    sg->saAmfSGNumPrefInserviceSUs = *((SaUint32T *)value);



  TRACE("Modified saAmfSGNumPrefInserviceSUs is '%u'",



-  sg->saAmfSGNumPrefInserviceSUs);



+  sg->pref_inservice_sus());



  if (avd_cb->avail_state_avd == SA_AMF_HA_ACTIVE) {


    if (avd_sg_app_su_inst_func(avd_cb, sg) != 



NCSCC_RC_SUCCESS) {


@@ -1256,7 +1256,7 @@ static void 



sg_app_sg_admin_unlock_inst(AVD_CL_CB *cb, AVD_SG *sg) {


  (su->saAmfSUPresenceState == 



SA_AMF_PRESENCE_UNINSTANTIATED)) {



    if (su->saAmfSUPreInstantiable == true) {



  if (su->su_on_node->node_state == AVD_AVND_STATE_PRESENT) {


-  if (su->sg_of_su->saAmfSGNumPrefInserviceSUs > 



su_try_inst) {



+  if (su->sg_of_su->pref_inservice_sus() > su_try_inst) {


  if (avd_snd_presence_msg(cb, su, false) != 



NCSCC_RC_SUCCESS) {


    LOG_NO("%s: Failed to send Instantiation order of 



'%s' to %x",



   __FUNCTION__, su->name.c_str(),



@@ -1944,19 +1944,6 @@ void avd_sg_adjust_config(AVD_SG *sg) {



    }



  }



    }



-


-  /* adj

Re: [devel] [PATCH 1/1] amfd: honor PrefAssignedSU in nway and nway active model during assignments [#2269]

2017-08-31 Thread praveen malviya


I am not seeing any milestone for any maintenance/release branch.
There is only one milestone 5.17.10 which is for new release.


Thanks
Praveen

On 31-Aug-17 11:34 AM, Gary Lee wrote:

OK - thanks.

Should we push it to release as well?


On 31/08/17 15:30, praveen malviya wrote:

Hi Gary,

 ack for part2.
 I think I did not grep on saAmfSGNumPrefInserviceSUs in all files.
 I have pushed part1. Please push part2.

Thanks
Praveen

On 31-Aug-17 9:50 AM, Gary Lee wrote:

Hi Praveen

ack, but I think further changes are required (see attachment).

Do you think you could push it today?

Thanks
Gary

On 27/07/17 15:36, Praveen wrote:
SG attribute saAmfSGNumPrefAssignedSUs is applicable to N-Way and 
N-Way Active model.
AMF is assigning more than saAmfSGNumPrefAssignedSUs in both N-Way 
and N-Way Active model.


Patch fixes this problem.
---
  src/amf/amfd/sg.cc | 49 
--

  src/amf/amfd/sg.h  |  1 +
  src/amf/amfd/sg_nway_fsm.cc    | 39 +
  src/amf/amfd/sg_nwayact_fsm.cc | 29 -
  4 files changed, 87 insertions(+), 31 deletions(-)

diff --git a/src/amf/amfd/sg.cc b/src/amf/amfd/sg.cc
index 7bdf52a..8f35901 100644
--- a/src/amf/amfd/sg.cc
+++ b/src/amf/amfd/sg.cc
@@ -98,7 +98,7 @@ AVD_SG::AVD_SG()
    saAmfSGAutoAdjust(SA_FALSE),
    saAmfSGNumPrefActiveSUs(0),
    saAmfSGNumPrefStandbySUs(0),
-  saAmfSGNumPrefInserviceSUs(~0),
+  saAmfSGNumPrefInserviceSUs(0),
    saAmfSGNumPrefAssignedSUs(0),
    saAmfSGMaxActiveSIsperSU(0),
    saAmfSGMaxStandbySIsperSU(0),
@@ -978,18 +978,18 @@ static void 
ccb_apply_modify_hdlr(CcbUtilOperationData_t *opdata) {

    sg->saAmfSGNumPrefStandbySUs);
    } else if (!strcmp(attribute->attrName, 
"saAmfSGNumPrefInserviceSUs")) {

  if (value_is_deleted)
-  sg->saAmfSGNumPrefInserviceSUs = ~0;
+  sg->saAmfSGNumPrefInserviceSUs = 0; //default value for 
internal use.

  else
    sg->saAmfSGNumPrefInserviceSUs = *((SaUint32T *)value);
  TRACE("Modified saAmfSGNumPrefInserviceSUs is '%u'",
-  sg->saAmfSGNumPrefInserviceSUs);
+  sg->pref_inservice_sus());
    } else if (!strcmp(attribute->attrName, 
"saAmfSGNumPrefAssignedSUs")) {

  if (value_is_deleted)
-  sg->saAmfSGNumPrefAssignedSUs = 
sg->saAmfSGNumPrefInserviceSUs;
+  sg->saAmfSGNumPrefAssignedSUs = 0; //default value for 
internal use.

  else
    sg->saAmfSGNumPrefAssignedSUs = *((SaUint32T *)value);
  TRACE("Modified saAmfSGNumPrefAssignedSUs is '%u'",
-  sg->saAmfSGNumPrefAssignedSUs);
+  sg->pref_assigned_sus());
    } else if (!strcmp(attribute->attrName, 
"saAmfSGMaxActiveSIsperSU")) {

  if (value_is_deleted)
    sg->saAmfSGMaxActiveSIsperSU = -1;
@@ -1091,11 +1091,11 @@ static void 
ccb_apply_modify_hdlr(CcbUtilOperationData_t *opdata) {
    if (!strcmp(attribute->attrName, 
"saAmfSGNumPrefInserviceSUs")) {

  if (value_is_deleted)
-  sg->saAmfSGNumPrefInserviceSUs = ~0;
+  sg->saAmfSGNumPrefInserviceSUs = 0;
  else
    sg->saAmfSGNumPrefInserviceSUs = *((SaUint32T *)value);
  TRACE("Modified saAmfSGNumPrefInserviceSUs is '%u'",
-  sg->saAmfSGNumPrefInserviceSUs);
+  sg->pref_inservice_sus());
  if (avd_cb->avail_state_avd == SA_AMF_HA_ACTIVE) {
    if (avd_sg_app_su_inst_func(avd_cb, sg) != 
NCSCC_RC_SUCCESS) {
@@ -1256,7 +1256,7 @@ static void 
sg_app_sg_admin_unlock_inst(AVD_CL_CB *cb, AVD_SG *sg) {
  (su->saAmfSUPresenceState == 
SA_AMF_PRESENCE_UNINSTANTIATED)) {

    if (su->saAmfSUPreInstantiable == true) {
  if (su->su_on_node->node_state == AVD_AVND_STATE_PRESENT) {
-  if (su->sg_of_su->saAmfSGNumPrefInserviceSUs > 
su_try_inst) {

+  if (su->sg_of_su->pref_inservice_sus() > su_try_inst) {
  if (avd_snd_presence_msg(cb, su, false) != 
NCSCC_RC_SUCCESS) {
    LOG_NO("%s: Failed to send Instantiation order of 
'%s' to %x",

   __FUNCTION__, su->name.c_str(),
@@ -1944,19 +1944,6 @@ void avd_sg_adjust_config(AVD_SG *sg) {
    }
  }
    }
-
-  /* adjust saAmfSGNumPrefAssignedSUs if not configured, only 
applicable for

-   * the N-way and N-way active redundancy models
-   */
-  if ((sg->saAmfSGNumPrefAssignedSUs == 0) &&
-  ((sg->sg_type->saAmfSgtRedundancyModel ==
-    SA_AMF_N_WAY_REDUNDANCY_MODEL) ||
-   (sg->sg_type->saAmfSgtRedundancyModel ==
-    SA_AMF_N_WAY_ACTIVE_REDUNDANCY_MODEL))) {
-    sg->saAmfSGNumPrefAssignedSUs = sg->saAmfSGNumPrefInserviceSUs;
-    LOG_NO("'%s

Re: [devel] [PATCH 1/1] amfd: honor PrefAssignedSU in nway and nway active model during assignments [#2269]

2017-08-30 Thread praveen malviya


Hi Gary,

 ack for part2.
 I think I did not grep on saAmfSGNumPrefInserviceSUs in all files.
 I have pushed part1. Please push part2.

Thanks
Praveen

On 31-Aug-17 9:50 AM, Gary Lee wrote:

Hi Praveen

ack, but I think further changes are required (see attachment).

Do you think you could push it today?

Thanks
Gary

On 27/07/17 15:36, Praveen wrote:
SG attribute saAmfSGNumPrefAssignedSUs is applicable to N-Way and 
N-Way Active model.
AMF is assigning more than saAmfSGNumPrefAssignedSUs in both N-Way and 
N-Way Active model.


Patch fixes this problem.
---
  src/amf/amfd/sg.cc | 49 
--

  src/amf/amfd/sg.h  |  1 +
  src/amf/amfd/sg_nway_fsm.cc    | 39 +
  src/amf/amfd/sg_nwayact_fsm.cc | 29 -
  4 files changed, 87 insertions(+), 31 deletions(-)

diff --git a/src/amf/amfd/sg.cc b/src/amf/amfd/sg.cc
index 7bdf52a..8f35901 100644
--- a/src/amf/amfd/sg.cc
+++ b/src/amf/amfd/sg.cc
@@ -98,7 +98,7 @@ AVD_SG::AVD_SG()
    saAmfSGAutoAdjust(SA_FALSE),
    saAmfSGNumPrefActiveSUs(0),
    saAmfSGNumPrefStandbySUs(0),
-  saAmfSGNumPrefInserviceSUs(~0),
+  saAmfSGNumPrefInserviceSUs(0),
    saAmfSGNumPrefAssignedSUs(0),
    saAmfSGMaxActiveSIsperSU(0),
    saAmfSGMaxStandbySIsperSU(0),
@@ -978,18 +978,18 @@ static void 
ccb_apply_modify_hdlr(CcbUtilOperationData_t *opdata) {

    sg->saAmfSGNumPrefStandbySUs);
    } else if (!strcmp(attribute->attrName, 
"saAmfSGNumPrefInserviceSUs")) {

  if (value_is_deleted)
-  sg->saAmfSGNumPrefInserviceSUs = ~0;
+  sg->saAmfSGNumPrefInserviceSUs = 0; //default value for 
internal use.

  else
    sg->saAmfSGNumPrefInserviceSUs = *((SaUint32T *)value);
  TRACE("Modified saAmfSGNumPrefInserviceSUs is '%u'",
-  sg->saAmfSGNumPrefInserviceSUs);
+  sg->pref_inservice_sus());
    } else if (!strcmp(attribute->attrName, 
"saAmfSGNumPrefAssignedSUs")) {

  if (value_is_deleted)
-  sg->saAmfSGNumPrefAssignedSUs = 
sg->saAmfSGNumPrefInserviceSUs;
+  sg->saAmfSGNumPrefAssignedSUs = 0; //default value for 
internal use.

  else
    sg->saAmfSGNumPrefAssignedSUs = *((SaUint32T *)value);
  TRACE("Modified saAmfSGNumPrefAssignedSUs is '%u'",
-  sg->saAmfSGNumPrefAssignedSUs);
+  sg->pref_assigned_sus());
    } else if (!strcmp(attribute->attrName, 
"saAmfSGMaxActiveSIsperSU")) {

  if (value_is_deleted)
    sg->saAmfSGMaxActiveSIsperSU = -1;
@@ -1091,11 +1091,11 @@ static void 
ccb_apply_modify_hdlr(CcbUtilOperationData_t *opdata) {

    if (!strcmp(attribute->attrName, "saAmfSGNumPrefInserviceSUs")) {
  if (value_is_deleted)
-  sg->saAmfSGNumPrefInserviceSUs = ~0;
+  sg->saAmfSGNumPrefInserviceSUs = 0;
  else
    sg->saAmfSGNumPrefInserviceSUs = *((SaUint32T *)value);
  TRACE("Modified saAmfSGNumPrefInserviceSUs is '%u'",
-  sg->saAmfSGNumPrefInserviceSUs);
+  sg->pref_inservice_sus());
  if (avd_cb->avail_state_avd == SA_AMF_HA_ACTIVE) {
    if (avd_sg_app_su_inst_func(avd_cb, sg) != 
NCSCC_RC_SUCCESS) {
@@ -1256,7 +1256,7 @@ static void 
sg_app_sg_admin_unlock_inst(AVD_CL_CB *cb, AVD_SG *sg) {

  (su->saAmfSUPresenceState == SA_AMF_PRESENCE_UNINSTANTIATED)) {
    if (su->saAmfSUPreInstantiable == true) {
  if (su->su_on_node->node_state == AVD_AVND_STATE_PRESENT) {
-  if (su->sg_of_su->saAmfSGNumPrefInserviceSUs > su_try_inst) {
+  if (su->sg_of_su->pref_inservice_sus() > su_try_inst) {
  if (avd_snd_presence_msg(cb, su, false) != 
NCSCC_RC_SUCCESS) {
    LOG_NO("%s: Failed to send Instantiation order of '%s' 
to %x",

   __FUNCTION__, su->name.c_str(),
@@ -1944,19 +1944,6 @@ void avd_sg_adjust_config(AVD_SG *sg) {
    }
  }
    }
-
-  /* adjust saAmfSGNumPrefAssignedSUs if not configured, only 
applicable for

-   * the N-way and N-way active redundancy models
-   */
-  if ((sg->saAmfSGNumPrefAssignedSUs == 0) &&
-  ((sg->sg_type->saAmfSgtRedundancyModel ==
-    SA_AMF_N_WAY_REDUNDANCY_MODEL) ||
-   (sg->sg_type->saAmfSgtRedundancyModel ==
-    SA_AMF_N_WAY_ACTIVE_REDUNDANCY_MODEL))) {
-    sg->saAmfSGNumPrefAssignedSUs = sg->saAmfSGNumPrefInserviceSUs;
-    LOG_NO("'%s' saAmfSGNumPrefAssignedSUs adjusted to %u", 
sg->name.c_str(),

-   sg->saAmfSGNumPrefAssignedSUs);
-  }
  }
  /**
@@ -1972,7 +1959,7 @@ uint32_t sg_instantiated_su_count(const AVD_SG 
*sg) {

    for (const auto  : sg->list_of_su) {
  TRACE_1("su'%s', pres state'%u', in_serv'%u', PrefIn'%u'", 
su->name.c_str(),

  su->saAmfSUPresenceState, su->saAmfSuReadinessState,
-    sg->saAmfSGNumPrefInserviceSUs);
+    sg->pref_inservice_sus());
  if

Re: [devel] [PATCH 1/1] amfd: postpone deletion of node from node_id_db [#2547]

2017-08-28 Thread praveen malviya


Ack, code review only.

Thanks,
Praveen


On 14-Aug-17 9:05 AM, Gary Lee wrote:

CLM and MDS callbacks are delivered to the main thread via different paths.
If a node is restarted quickly, sometimes CLM JOIN is processed before the
prior MDS down. This means the node will not be able to join the cluster
as it is not in node_id_db (deleted in MDS down processing).

This patch ensures addition to, and removal from node_id_db is only done
from CLM callbacks to avoid race conditions such as above.
---
  src/amf/amfd/clm.cc| 10 --
  src/amf/amfd/ndfsm.cc  |  1 +
  src/amf/amfd/ndproc.cc |  2 +-
  src/amf/amfd/node.cc   |  1 +
  src/amf/amfd/node.h|  1 +
  5 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/amf/amfd/clm.cc b/src/amf/amfd/clm.cc
index da951d223..b2133b57e 100644
--- a/src/amf/amfd/clm.cc
+++ b/src/amf/amfd/clm.cc
@@ -203,6 +203,7 @@ static void clm_node_exit_complete(SaClmNodeIdT nodeId) {
}
  
avd_node_failover(node);

+  avd_node_delete_nodeid(node);
m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, node, AVSV_CKPT_AVD_NODE_CONFIG);
node->clm_change_start_preceded = false;
  
@@ -246,7 +247,7 @@ static void clm_track_cb(

case SA_CLM_CHANGE_VALIDATE:
  if (notifItem->clusterChange == SA_CLM_NODE_LEFT) {
node = avd_node_find_nodeid(notifItem->clusterNode.nodeId);
-  if (node == nullptr) {
+  if (node == nullptr || node->node_up == false) {
  LOG_IN("%s: CLM node '%s' is not an AMF cluster member",
 __FUNCTION__, node_name.c_str());
  goto done;
@@ -262,7 +263,7 @@ static void clm_track_cb(
  
case SA_CLM_CHANGE_START:

  node = avd_node_find_nodeid(notifItem->clusterNode.nodeId);
-if (node == nullptr) {
+if (node == nullptr || node->node_up == false) {
LOG_IN("%s: CLM node '%s' is not an AMF cluster member", 
__FUNCTION__,
   node_name.c_str());
goto done;
@@ -293,6 +294,11 @@ static void clm_track_cb(
  LOG_IN("%s: CLM node '%s' is not an AMF cluster member",
 __FUNCTION__, node_name.c_str());
  goto done;
+  } else if (node->node_up == false) {
+LOG_IN("%s: CLM node '%s' is not an AMF cluster member; MDS down 
received",
+   __FUNCTION__, node_name.c_str());
+avd_node_delete_nodeid(node);
+goto done;
}
TRACE(" Node Left: rootCauseEntity %s for node %u",
  osaf_extended_name_borrow(rootCauseEntity),
diff --git a/src/amf/amfd/ndfsm.cc b/src/amf/amfd/ndfsm.cc
index ca2e3f698..223f57f20 100644
--- a/src/amf/amfd/ndfsm.cc
+++ b/src/amf/amfd/ndfsm.cc
@@ -247,6 +247,7 @@ void record_node_up_msg_info(AVD_AVND *avnd, const 
AVD_DND_MSG *n2d_msg) {
osafassert(avnd != nullptr);
  
avnd->adest = n2d_msg->msg_info.n2d_node_up.adest_address;

+  avnd->node_up = true;
  
if (n2d_msg->msg_info.n2d_node_up.msg_id >= avnd->rcv_msg_id) {

  LOG_NO("Received node_up from %x: msg_id %u",
diff --git a/src/amf/amfd/ndproc.cc b/src/amf/amfd/ndproc.cc
index e80a0b3b8..2edb9b16e 100644
--- a/src/amf/amfd/ndproc.cc
+++ b/src/amf/amfd/ndproc.cc
@@ -1221,6 +1221,6 @@ void avd_node_failover(AVD_AVND *node) {
avd_pg_node_csi_del_all(avd_cb, node);
avd_node_down_mw_susi_failover(avd_cb, node);
avd_node_down_appl_susi_failover(avd_cb, node);
-  avd_node_delete_nodeid(node);
+  node->node_up = false; // postpone deletion from node_id_db
TRACE_LEAVE();
  }
diff --git a/src/amf/amfd/node.cc b/src/amf/amfd/node.cc
index 37f6ee389..8390515b4 100644
--- a/src/amf/amfd/node.cc
+++ b/src/amf/amfd/node.cc
@@ -120,6 +120,7 @@ void AVD_AVND::initialize() {
clm_change_start_preceded = {};
recvr_fail_sw = {};
admin_ng = {};
+  node_up = false;
  }
  
  //

diff --git a/src/amf/amfd/node.h b/src/amf/amfd/node.h
index e64bf8c93..4cee956cc 100644
--- a/src/amf/amfd/node.h
+++ b/src/amf/amfd/node.h
@@ -148,6 +148,7 @@ class AVD_AVND {
bool is_campaign_set_for_all_sus() const;
// Member functions.
void node_sus_termstate_set(bool term_state) const;
+  bool node_up; // true if MDS is up, false if MDS is down
  
   private:

void initialize();



--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] amfnd: convert dnd_list to a vector [#1945]

2017-08-17 Thread praveen malviya


Hi Gary,

Please find one comment inline with [Praveen].

Thanks
Praveen

On 05-Jul-17 2:15 PM, Gary Lee wrote:

---
  src/amf/amfnd/avnd_cb.h  |   3 +-
  src/amf/amfnd/avnd_di.h  |  36 --
  src/amf/amfnd/avnd_mds.h |  11 +-
  src/amf/amfnd/di.cc  | 321 +--
  src/amf/amfnd/proxy.cc   |  19 ++-
  src/amf/amfnd/verify.cc  |  14 +--
  6 files changed, 167 insertions(+), 237 deletions(-)

diff --git a/src/amf/amfnd/avnd_cb.h b/src/amf/amfnd/avnd_cb.h
index 2d642c752..ff21e3108 100644
--- a/src/amf/amfnd/avnd_cb.h
+++ b/src/amf/amfnd/avnd_cb.h
@@ -33,6 +33,7 @@
  #ifndef AMF_AMFND_AVND_CB_H_
  #define AMF_AMFND_AVND_CB_H_
  #include 
+#include 
  
  typedef struct avnd_cb_tag {

SYSF_MBX mbx; /* mailbox on which AvND waits */
@@ -99,7 +100,7 @@ typedef struct avnd_cb_tag {
 * Messages are removed when acked with the ACK message.
 * At director failover the list is scanned handling the
 * VERIFY message from the director and possibly resent again */
-  AVND_DND_LIST dnd_list;
+  std::vector dnd_list;
  
AVND_TERM_STATE term_state;

AVND_LED_STATE led_state;
diff --git a/src/amf/amfnd/avnd_di.h b/src/amf/amfnd/avnd_di.h
index d7ccd68fd..9870ad774 100644
--- a/src/amf/amfnd/avnd_di.h
+++ b/src/amf/amfnd/avnd_di.h
@@ -33,42 +33,6 @@
  
  #include "amf/common/amf_si_assign.h"
  
-/* macro to find the matching record (based on the msg-id) */

-/*
- * Caution!!! It is assumed that the msg-id is the 1st element in the message
- * structure. Ensure it. Else move the msg id to the common portion of the
- * message structure (outside the msg type specific contents).
- */
-#define m_AVND_DIQ_REC_FIND(cb, mid, o_rec)   \
-  {   \
-AVND_DND_LIST *list = &((cb)->dnd_list);  \
-for ((o_rec) = list->head;\
- (o_rec) &&   \
- !(*((uint32_t *)(&((o_rec)->msg.info.avd->msg_info))) == (mid)); \
- (o_rec) = (o_rec)->next) \
-  ;   \
-  }
-
-/* macro to find & pop a given record */
-#define m_AVND_DIQ_REC_FIND_POP(cb, rec) \
-  {  \
-AVND_DND_LIST *list = &((cb)->dnd_list); \
-AVND_DND_MSG_LIST *prv = list->head, *curr;  \
-for (curr = list->head; curr && !(curr == (rec));\
- prv = curr, curr = curr->next)  \
-  ;  \
-if (curr) {  \
-  if (curr == list->head) {  \
-list->head = curr->next; \
-if (list->tail == curr) list->tail = list->head; \
-  } else {   \
-prv->next = curr->next;  \
-if (list->tail == curr) list->tail = prv;\
-  }  \
-  curr->next = 0;\
-}\
-  }
-
  struct avnd_cb_tag;
  
  uint32_t avnd_di_oper_send(struct avnd_cb_tag *, const AVND_SU *, uint32_t);

diff --git a/src/amf/amfnd/avnd_mds.h b/src/amf/amfnd/avnd_mds.h
index 70173acaa..8c81f7bb1 100644
--- a/src/amf/amfnd/avnd_mds.h
+++ b/src/amf/amfnd/avnd_mds.h
@@ -63,18 +63,13 @@ typedef struct avnd_msg {
} info;
  } AVND_MSG;
  
-typedef struct avnd_dnd_msg_list_tag {

+class AVND_DND_MSG_LIST {
+public:
AVND_MSG msg;
AVND_TMR resp_tmr;
uint32_t opq_hdl;
uint16_t no_retries;
-  struct avnd_dnd_msg_list_tag *next;
-} AVND_DND_MSG_LIST;
-
-typedef struct avnd_dnd_list_tag {
-  AVND_DND_MSG_LIST *head;
-  AVND_DND_MSG_LIST *tail;
-} AVND_DND_LIST;
+};
  
  /*

   Macros to fill the MDS message structure
diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc
index 6f0a76cda..93350e62a 100644
--- a/src/amf/amfnd/di.cc
+++ b/src/amf/amfnd/di.cc
@@ -39,30 +39,7 @@
  
  #include "base/logtrace.h"

  #include "amf/amfnd/avnd.h"
-
-/* macro to push the AvD msg parameters (to the end of the list) */
-#define m_AVND_DIQ_REC_PUSH(cb, rec) \
-  {  \
-AVND_DND_LIST *list = &((cb)->dnd_list); \
-if (!(list->head))   \
-  list->head = (rec);\
-else \
-  list->tail->next = (rec);  \
-list->tail = (rec);  \
-  }
-
-/* macro to pop the record (from the beginning of the list) */
-#define m_AVND_DIQ_REC_POP(cb, o_rec)

Re: [devel] [PATCH 1/1] amfnd: convert dnd_list to a vector [#1945]

2017-08-16 Thread praveen malviya


Hi Gary,

I have started reviewing it.


Thanks
Praveen

On 15-Aug-17 7:02 AM, Gary Lee wrote:

Hi

I would like to push this in a week's time if no one has comments.

Thanks

Gary


On 05/07/17 18:45, Gary Lee wrote:

---
  src/amf/amfnd/avnd_cb.h  |   3 +-
  src/amf/amfnd/avnd_di.h  |  36 --
  src/amf/amfnd/avnd_mds.h |  11 +-
  src/amf/amfnd/di.cc  | 321 
+--

  src/amf/amfnd/proxy.cc   |  19 ++-
  src/amf/amfnd/verify.cc  |  14 +--
  6 files changed, 167 insertions(+), 237 deletions(-)

diff --git a/src/amf/amfnd/avnd_cb.h b/src/amf/amfnd/avnd_cb.h
index 2d642c752..ff21e3108 100644
--- a/src/amf/amfnd/avnd_cb.h
+++ b/src/amf/amfnd/avnd_cb.h
@@ -33,6 +33,7 @@
  #ifndef AMF_AMFND_AVND_CB_H_
  #define AMF_AMFND_AVND_CB_H_
  #include 
+#include 
  typedef struct avnd_cb_tag {
SYSF_MBX mbx; /* mailbox on which AvND waits */
@@ -99,7 +100,7 @@ typedef struct avnd_cb_tag {
 * Messages are removed when acked with the ACK message.
 * At director failover the list is scanned handling the
 * VERIFY message from the director and possibly resent again */
-  AVND_DND_LIST dnd_list;
+  std::vector dnd_list;
AVND_TERM_STATE term_state;
AVND_LED_STATE led_state;
diff --git a/src/amf/amfnd/avnd_di.h b/src/amf/amfnd/avnd_di.h
index d7ccd68fd..9870ad774 100644
--- a/src/amf/amfnd/avnd_di.h
+++ b/src/amf/amfnd/avnd_di.h
@@ -33,42 +33,6 @@
  #include "amf/common/amf_si_assign.h"
-/* macro to find the matching record (based on the msg-id) */
-/*
- * Caution!!! It is assumed that the msg-id is the 1st element in the 
message
- * structure. Ensure it. Else move the msg id to the common portion 
of the

- * message structure (outside the msg type specific contents).
- */
-#define m_AVND_DIQ_REC_FIND(cb, mid, 
o_rec)   \
-  
{   \
-AVND_DND_LIST *list = 
&((cb)->dnd_list);  \
-for ((o_rec) = 
list->head;\
- (o_rec) 
&&   \
- !(*((uint32_t *)(&((o_rec)->msg.info.avd->msg_info))) == 
(mid)); \
- (o_rec) = 
(o_rec)->next) \
-  
;   \

-  }
-
-/* macro to find & pop a given record */
-#define m_AVND_DIQ_REC_FIND_POP(cb, rec) \
-  {  \
-AVND_DND_LIST *list = &((cb)->dnd_list); \
-AVND_DND_MSG_LIST *prv = list->head, *curr;  \
-for (curr = list->head; curr && !(curr == (rec));\
- prv = curr, curr = curr->next)  \
-  ;  \
-if (curr) {  \
-  if (curr == list->head) {  \
-list->head = curr->next; \
-if (list->tail == curr) list->tail = list->head; \
-  } else {   \
-prv->next = curr->next;  \
-if (list->tail == curr) list->tail = prv;\
-  }  \
-  curr->next = 0;\
-}\
-  }
-
  struct avnd_cb_tag;
  uint32_t avnd_di_oper_send(struct avnd_cb_tag *, const AVND_SU *, 
uint32_t);

diff --git a/src/amf/amfnd/avnd_mds.h b/src/amf/amfnd/avnd_mds.h
index 70173acaa..8c81f7bb1 100644
--- a/src/amf/amfnd/avnd_mds.h
+++ b/src/amf/amfnd/avnd_mds.h
@@ -63,18 +63,13 @@ typedef struct avnd_msg {
} info;
  } AVND_MSG;
-typedef struct avnd_dnd_msg_list_tag {
+class AVND_DND_MSG_LIST {
+public:
AVND_MSG msg;
AVND_TMR resp_tmr;
uint32_t opq_hdl;
uint16_t no_retries;
-  struct avnd_dnd_msg_list_tag *next;
-} AVND_DND_MSG_LIST;
-
-typedef struct avnd_dnd_list_tag {
-  AVND_DND_MSG_LIST *head;
-  AVND_DND_MSG_LIST *tail;
-} AVND_DND_LIST;
+};
  
/* 


   Macros to fill the MDS message structure
diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc
index 6f0a76cda..93350e62a 100644
--- a/src/amf/amfnd/di.cc
+++ b/src/amf/amfnd/di.cc
@@ -39,30 +39,7 @@
  #include "base/logtrace.h"
  #include "amf/amfnd/avnd.h"
-
-/* macro to push the AvD msg parameters (to the end of the list) */
-#define m_AVND_DIQ_REC_PUSH(cb, rec) \
-  {  \
-AVND_DND_LIST *list = &((cb)->dnd_list); \
-if (!(list->head))   \
-  list->head = (rec);\
-else \
-  list->tail->next = (rec);  \
-list->tail = (rec);  \
-  }
-
-/* macro to pop

Re: [devel] Review Request for amf: update PR doc compliance report for saAmfComponentErrorClear_4() [#2540]

2017-08-16 Thread praveen malviya


Ack.

Thanks
Praveen

On 16-Aug-17 1:56 PM, Nguyen Luu wrote:
Summary: amf: update PR doc compliance report for 
saAmfComponentErrorClear_4() [#2540]

Review request for Trac Ticket(s): #2540
Peer Reviewer(s): AMF devs
Pull request to: AMF maintainers
Affected branch(es): default
Development branch: default


Impacted area   Impact y/n

   Docsy
   Build systemn
   RPM/packaging   n
   Configuration files n
   Startup scripts n
   SAF servicesn
   OpenSAF servicesn
   Core libraries  n
   Samples n
   Tests   n
   Other   n


Comments (indicate scope for each "y" above):
-
amf: update PR doc compliance report for saAmfComponentErrorClear_4() 
[#2540]



Conditions of Submission:
-
Ack from reviewers


Arch  Built StartedLinux distro
---
mipsn  n
mips64  n  n
x86 n  n
x86_64  n  n
powerpc n  n
powerpc64   n  n


Reviewer Checklist:
---
[Submitters: make sure that your review doesn't trigger any checkmarks!]


Your checkin has not passed review because (see checked entries):

___ Your RR template is generally incomplete; it has too many blank entries
  that need proper data filled in.

___ You have failed to nominate the proper persons for review and push.

___ Your patches do not have proper short+long header

___ You have grammar/spelling in your header that is unacceptable.

___ You have exceeded a sensible line length in your headers/comments/text.

___ You have failed to put in a proper Trac Ticket # into your commits.

___ You have incorrectly put/left internal data in your comments/files
  (i.e. internal bug tracking tool IDs, product names etc)

___ You have not given any evidence of testing beyond basic build tests.
  Demonstrate some level of runtime or other sanity testing.

___ You have ^M present in some of your files. These have to be removed.

___ You have needlessly changed whitespace or added whitespace crimes
  like trailing spaces, or spaces before tabs.

___ You have mixed real technical changes with whitespace and other
  cosmetic code cleanup changes. These have to be separate commits.

___ You need to refactor your submission into logical chunks; there is
  too much content into a single commit.

___ You have extraneous garbage in your review (merge commits etc)

___ You have giant attachments which should never have been sent;
  Instead you should place your content in a public tree to be pulled.

___ You have too many commits attached to an e-mail; resend as threaded
  commits, or place in a public tree for a pull.

___ You have resent this content multiple times without a clear indication
  of what has changed between each re-send.

___ You have failed to adequately and individually address all of the
  comments and change requests that were proposed in the initial 
review.


___ You have a misconfigured ~/.hgrc file (i.e. username, email etc)

___ Your computer have a badly configured date and time; confusing the
  the threaded patch review.

___ Your changes affect IPC mechanism, and you don't present any results
  for in-service upgradability test.

___ Your changes affect user manual and documentation, your patch series
  do not contain the patch that updates the Doxygen manual


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org!http://sdm.link/slashdot


___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel



--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] amfa: Fix saAmfComponentErrorClear_4 to return ERR_NOT_EXIST for non-exist comp [#2540]

2017-08-16 Thread praveen malviya


Ack, code review only.

Thanks
Praveen

On 16-Aug-17 1:44 PM, Nguyen Luu wrote:

When called with a non-existing component name, saAmfComponentErrorClear_4
should return SA_AIS_ERR_NOT_EXIST instead of SA_AIS_ERR_BAD_OPERATION
as previously done.
---
  src/amf/amfnd/err.cc | 17 +++--
  1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/amf/amfnd/err.cc b/src/amf/amfnd/err.cc
index e4cb9f0..a0529b9 100644
--- a/src/amf/amfnd/err.cc
+++ b/src/amf/amfnd/err.cc
@@ -2,6 +2,7 @@
   *
   * (C) Copyright 2008 The OpenSAF Foundation
   * Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (C) 2017, Ericsson AB. All rights reserved.
   *
   * This program is distributed in the hope that it will be useful, but
   * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
@@ -13,6 +14,7 @@
   * licensing terms.
   *
   * Author(s): Emerson Network Power
+ *Ericsson
   *
   */
  
@@ -164,11 +166,12 @@ uint32_t avnd_evt_ava_err_rep_evh(AVND_CB *cb, AVND_EVT *evt) {

  }
}
  
-  /* get the comp */

+  /* check if component exists on local AvND node */
comp = avnd_compdb_rec_get(cb->compdb, Amf::to_string(_rep->err_comp));
-  /* determine the error code, if any */
if (!comp) amf_rc = SA_AIS_ERR_NOT_EXIST;
  
+  /* determine other error codes, if any */

+
/* We need not entertain errors when comp is not in shape */
if (comp && (m_AVND_COMP_PRES_STATE_IS_UNINSTANTIATED(comp) ||
 m_AVND_COMP_PRES_STATE_IS_INSTANTIATIONFAILED(comp) ||
@@ -265,13 +268,15 @@ uint32_t avnd_evt_ava_err_clear_evh(AVND_CB *cb, AVND_EVT 
*evt) {
  }
}
  
-  /* get the comp */

+  /* check if component exists on local AvND node */
comp = avnd_compdb_rec_get(cb->compdb, 
Amf::to_string(_clear->comp_name));
+  if (!comp) amf_rc = SA_AIS_ERR_NOT_EXIST;
+
+  /* determine other error codes, if any */
  
-  /* determine the error code, if any */

-  if (!comp || !m_AVND_COMP_IS_REG(comp) ||
+  if ((comp) && (!m_AVND_COMP_IS_REG(comp) ||
(!m_AVND_COMP_TYPE_IS_PREINSTANTIABLE(comp) &&
-   !m_AVND_COMP_TYPE_IS_PROXIED(comp)))
+   !m_AVND_COMP_TYPE_IS_PROXIED(comp
  amf_rc = SA_AIS_ERR_BAD_OPERATION;
  
if ((comp) && m_AVND_COMP_OPER_STATE_IS_ENABLED(comp))




--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] amfa: Fix saAmfComponentErrorClear_4 to return ERR_NOT_EXIST for non-exist comp [#2540]

2017-08-13 Thread praveen malviya


Hi,

Both ErrorReport() and ErrorClear() APIs can be called for any component 
hosted anywhere.
Since amfnd only maintains local components, component may not be found 
in its data base. A message should be sent to AMFD or AMFND should 
instantly read IMM database for validating the component.


I am ok, if this patch is pushed by documenting this limitation for both 
ErrorReport() and ErrorClear() APIs.



Thanks
Praveen

On 02-Aug-17 12:51 PM, Nguyen Luu wrote:

When called with a non-existing component name, saAmfComponentErrorClear_4
should return SA_AIS_ERR_NOT_EXIST instead of SA_AIS_ERR_BAD_OPERATION
as previously done.
---
  src/amf/amfnd/err.cc | 6 --
  1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/amf/amfnd/err.cc b/src/amf/amfnd/err.cc
index e4cb9f0..65c54f5 100644
--- a/src/amf/amfnd/err.cc
+++ b/src/amf/amfnd/err.cc
@@ -269,9 +269,11 @@ uint32_t avnd_evt_ava_err_clear_evh(AVND_CB *cb, AVND_EVT 
*evt) {
comp = avnd_compdb_rec_get(cb->compdb, 
Amf::to_string(_clear->comp_name));
  
/* determine the error code, if any */

-  if (!comp || !m_AVND_COMP_IS_REG(comp) ||
+  if (!comp) amf_rc = SA_AIS_ERR_NOT_EXIST;
+
+  if ((comp) && (!m_AVND_COMP_IS_REG(comp) ||
(!m_AVND_COMP_TYPE_IS_PREINSTANTIABLE(comp) &&
-   !m_AVND_COMP_TYPE_IS_PROXIED(comp)))
+   !m_AVND_COMP_TYPE_IS_PROXIED(comp
  amf_rc = SA_AIS_ERR_BAD_OPERATION;
  
if ((comp) && m_AVND_COMP_OPER_STATE_IS_ENABLED(comp))




--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] amfa: Fix saAmfPmStart_3 and saAmfResponse_4 to correctly return BAD_HANDLE [#2539]

2017-08-13 Thread praveen malviya


Ack, code review only.


Thanks
Praveen

On 02-Aug-17 9:31 AM, Nguyen Luu wrote:

When called with an uninitialized or already finalized handle,
saAmfPmStart_3 and saAmfResponse_4 should return SA_AIS_ERR_BAD_HANDLE
instead of SA_AIS_ERR_VERSION as previously done.
---
  src/amf/agent/amf_agent.cc | 14 ++
  1 file changed, 14 insertions(+)

diff --git a/src/amf/agent/amf_agent.cc b/src/amf/agent/amf_agent.cc
index 20528e9..b9191dd 100644
--- a/src/amf/agent/amf_agent.cc
+++ b/src/amf/agent/amf_agent.cc
@@ -2296,6 +2296,13 @@ SaAisErrorT AmfAgent::PmStart_3(SaAmfHandleT hdl, const 
SaNameT *comp_name,
SaAisErrorT rc = SA_AIS_OK;
TRACE_ENTER2("SaAmfHandleT passed is %llx", hdl);
  
+  /* Verifying the input Handle & global handle */

+  if (!gl_ava_hdl || hdl > AVSV_UNS32_HDL_MAX) {
+TRACE_2("Invalid SaAmfHandle passed by component: %llx", hdl);
+rc = SA_AIS_ERR_BAD_HANDLE;
+goto done;
+  }
+
/* Version is previously set in in initialize function */
if (!ava_B4_ver_used(0)) {
  TRACE_2(
@@ -2844,6 +2851,13 @@ SaAisErrorT AmfAgent::Response_4(SaAmfHandleT hdl, 
SaInvocationT inv,
SaAisErrorT rc = SA_AIS_OK;
TRACE_ENTER2("SaAmfHandleT passed is %llx", hdl);
  
+  /* Verifying the input Handle & global handle */

+  if (!gl_ava_hdl || hdl > AVSV_UNS32_HDL_MAX) {
+TRACE_2("Invalid SaAmfHandle passed by component: %llx", hdl);
+rc = SA_AIS_ERR_BAD_HANDLE;
+goto done;
+  }
+
/* Version is previously set in in initialize function */
if (!ava_B4_ver_used(0)) {
  TRACE_2(



--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] clm: Provide the node address as a parameter to the scale-out script [#2538]

2017-08-07 Thread praveen malviya


Ack.


Thanks
Praveen

On 01-Aug-17 4:42 PM, Anders Widell wrote:

Provide the node address as a command-line parameter when calling the scale-out
script. This can be useful if the scale-out script needs to contact the node
(e.g. copy some files to it or update some configuration on the node's local
disk) as part of the scale-out operation.
---
  src/clm/clmd/clms_evt.c | 57 ++---
  1 file changed, 30 insertions(+), 27 deletions(-)

diff --git a/src/clm/clmd/clms_evt.c b/src/clm/clmd/clms_evt.c
index ace140db4..84e7b3c6d 100644
--- a/src/clm/clmd/clms_evt.c
+++ b/src/clm/clmd/clms_evt.c
@@ -488,9 +488,17 @@ static void scale_out_node(CLMS_CB *cb,
queue_the_node = false;
}
if (queue_the_node) {
+   char node_address[SA_CLM_MAX_ADDRESS_LENGTH + 1];
+   size_t addr_len = nodeup_info->address.length;
+   if (addr_len > SA_CLM_MAX_ADDRESS_LENGTH)
+   addr_len = SA_CLM_MAX_ADDRESS_LENGTH;
+   if (nodeup_info->no_of_addresses == 0)
+   addr_len = 0;
+   memcpy(node_address, nodeup_info->address.value, addr_len);
+   node_address[addr_len] = '\0';
char *strp;
-   if (asprintf(, "%" PRIu32 ",%s,", nodeup_info->node_id,
-node_name) != -1) {
+   if (asprintf(, "%" PRIu32 ",%s,%s,", nodeup_info->node_id,
+node_name, node_address) != -1) {
LOG_NO("Queuing request to scale out node 0x%" PRIx32
   " (%s)",
   nodeup_info->node_id, node_name);
@@ -525,13 +533,10 @@ uint32_t proc_node_up_msg(CLMS_CB *cb, CLMSV_CLMS_EVT 
*evt)
  {
clmsv_clms_node_up_info_t *nodeup_info =
&(evt->info.msg.info.api_info.param).nodeup_info;
-   CLMS_CLUSTER_NODE *node = NULL;
-   SaUint32T nodeid;
uint32_t rc = NCSCC_RC_SUCCESS;
SaNameT node_name = {0};
CLMSV_MSG clm_msg;
SaBoolT check_member;
-   IPLIST *ip = NULL;
  
  	TRACE_ENTER2("Node up mesg for nodename length %d %s",

 nodeup_info->node_name.length,
@@ -542,10 +547,21 @@ uint32_t proc_node_up_msg(CLMS_CB *cb, CLMSV_CLMS_EVT 
*evt)
(char *)node_name.value, sizeof(node_name.value), "safNode=%s,%s",
nodeup_info->node_name.value, osaf_cluster->name.value);
  
-	nodeid = evt->info.msg.info.api_info.param.nodeup_info.node_id;

+   SaUint32T nodeid = nodeup_info->node_id;
+
+   /* Retrieve IP information */
+   IPLIST *ip = (IPLIST *)ncs_patricia_tree_get(_cb->iplist,
+(uint8_t *));
+
+   if (ip != NULL && ip->addr.length != 0 &&
+   nodeup_info->no_of_addresses == 0) {
+   nodeup_info->no_of_addresses = 1;
+   memcpy(&(nodeup_info->address), &(ip->addr), sizeof(ip->addr));
+   }
  
-	node = clms_node_get_by_name(_name);

+   CLMS_CLUSTER_NODE *node = clms_node_get_by_name(_name);
clm_msg.info.api_resp_info.rc = SA_AIS_OK;
+
if (node == NULL) {
/* The /etc/opensaf/node_name is an user exposed configuration
 * file. The node_name file contains the RDN value of the CLM
@@ -573,8 +589,7 @@ uint32_t proc_node_up_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt)
  
  	if (node != NULL) {

/* Retrieve IP information */
-   if ((ip = (IPLIST *)ncs_patricia_tree_get(
-_cb->iplist, (uint8_t *))) == NULL) {
+   if (ip == NULL) {
clm_msg.info.api_resp_info.rc = SA_AIS_ERR_NOT_EXIST;
LOG_ER(
"IP information not found for: %s with node_id: %u",
@@ -653,8 +668,7 @@ uint32_t proc_node_up_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt)
/* Self Node needs to be added tp patricia tree before hand during init
 */
if (NULL == clms_node_get_by_id(nodeid)) {
-   node->node_id =
-   evt->info.msg.info.api_info.param.nodeup_info.node_id;
+   node->node_id = nodeup_info->node_id;
  
  		TRACE("node->node_id %u node->nodeup %d", node->node_id,

  node->nodeup);
@@ -665,29 +679,18 @@ uint32_t proc_node_up_msg(CLMS_CB *cb, CLMSV_CLMS_EVT 
*evt)
"/node_name configuration");
}
}
-   node->boot_time =
-   evt->info.msg.info.api_info.param.nodeup_info.boot_time;
+
+   node->boot_time = nodeup_info->boot_time;
  
  	/* Update the node with ipaddress information */

-   if (ip->addr.length) {
-   memset(>node_addr, 0, sizeof(SaClmNodeAddressT));
-   node->node_addr.family = ip->addr.family;
-   node->node_addr.length = ip->addr.length;
-   memcpy(node->node_addr.value, ip->addr.value,

Re: [devel] [PATCH 1/1] clm: Include boot time and node address in join request message [#2489]

2017-08-07 Thread praveen malviya


Ack.


Thanks,
Praveen

On 07-Aug-17 1:35 PM, Anders Widell wrote:
A node can have more than one single network address. If you run the 
ifconfig command, you get a list of network interfaces. Each one of 
these interfaces can have several address assigned to it: IPv4 
addresses, IPv6 addresses, and alias addresses. In addition, the node 
can have a TIPC address. So in the case of ticket [#2479], we might need 
to make both saClmNodeAddress and saClmNodeCurrAddress multi-value. 
However, I don't intend to implement [#2479] in the near future (or at 
all), since ticket [#2489] is probably enough for most real-world use 
cases. In most real-world use cases, it is enough for the application to 
get one single address for each node, but we need the flexibility to 
select which one of the addresses to present to the application.


saClmNodeAddressFamily and saClmNodeAddress are currently ignored by 
OpenSAF. I am not sure how saClmNodeAddressFamily and saClmNodeAddress 
are intended to be used, but my best guess is that saClmNodeAddress is 
intended for the case when you have statically assigned network 
addresses, and saClmNodeCurrAddress is intended for the case with 
dynamically assigned addresses, though there is no reason why we can't 
present a statically assigned address in saClmNodeCurrAddress as well. 
Since saClmNodeAddress is a configuration attribute, I am assuming here 
that you should actually be able to /set/ the node's address using the 
saClmNodeAddress configuration attribute! But in order for that to work, 
the node needs to read its IMM configuration immediately after booting, 
before it has configured its own network address. The only way this 
could work is if we are not actually talking about the network address 
used internally by OpenSAF, but the node's address on a separate network 
intended to be used by the application. Otherwise OpenSAF would not be 
able to communicate with IMM to read the node's own address. So 
according to this interpretation, each node has at least two addresses: 
one address used for internal OpenSAF communication, and another address 
used by the application. And it is the address used by the application 
which is configured using the saClmNodeAddress and presented in the 
saClmNodeCurrAddress attribute. Ticket doesn't favour any particular 
interpretation though, you are free to select the internal OpenSAF 
communication address or some other address to present in 
saClmNodeCurrAddress.


regards,

Anders Widell

On 08/07/2017 07:12 AM, praveen malviya wrote:

Hi Anders,

I have started reviewing this patch.

One initial query:
We have two sets of attributes for address in "SaClmNode":
set A) saClmNodeAddressFamily & saClmNodeAddress  and
set B )saClmNodeCurrAddressFamily & saClmNodeCurrAddress.

For ticket #2479, its description says making set B as Multi valued. I 
think it is set A that should be made multi-valued and set B should 
reflect the address currently in use. This will resolve some backward 
compatibility issue also as set B remains single valued.


Also the set B should reflect which address? address used by OpenSAF 
or by application? Till this time it has been OpenSAF internal 
communication address.



Thanks,
Praveen

On 31-Jul-17 6:40 PM, Anders Widell wrote:

The node join request message now has two new fields: boot time and node
address. This allows us to provide more accurate and correct 
information in the

CLM node runtime attributes in the information model:

* The boot time field transmits the node's actual boot time to the 
CLM server.
   Previously, the node join time was used as an approximation of the 
node boot
   time, but this might be inaccurate or incorrect. For example, if 
OpenSAF was
   started much later than the node was booted (e.g. if OpenSAF was 
restarted
   without a node reboot), then the node join time will differ 
significantly from

   the node boot time.

* The node address field transmits the node address to be presented 
to the
   application through the information model. Previously, the IP 
address which
   was used by OpenSAF internal communication was presented as the 
one and only
   node address, and there was no way to select some other address in 
case the
   node has multiple network addresses. The application now has the 
possibility

   to select which network address to present in the information model.
---
  00-README.conf |  8 
  src/clm/clmd/clms.h|  1 -
  src/clm/clmd/clms_evt.c| 15 ++-
  src/clm/clmd/clms_main.c   | 22 ++
  src/clm/clmd/clms_mbcsv.c  | 11 ++---
  src/clm/clmd/clms_mbcsv.h  |  2 -
  src/clm/clmd/clms_mds.c| 92 
+-

  src/clm/clmd/clms_util.c   | 15 ---
  src/clm/clmnd/cb.h | 14 ---
  src/clm/clmnd/clmna.conf   | 13 ++
  src/clm/clmnd/main.c   | 89 


  src/clm/comm

Re: [devel] [PATCH 1/1] clm: Include boot time and node address in join request message [#2489]

2017-08-06 Thread praveen malviya


Hi Anders,

I have started reviewing this patch.

One initial query:
We have two sets of attributes for address in "SaClmNode":
set A) saClmNodeAddressFamily & saClmNodeAddress  and
set B )saClmNodeCurrAddressFamily & saClmNodeCurrAddress.

For ticket #2479, its description says making set B as Multi valued. I 
think it is set A that should be made multi-valued and set B should 
reflect the address currently in use. This will resolve some backward 
compatibility issue also as set B remains single valued.


Also the set B should reflect which address? address used by OpenSAF or 
by application? Till this time it has been OpenSAF internal 
communication address.



Thanks,
Praveen

On 31-Jul-17 6:40 PM, Anders Widell wrote:

The node join request message now has two new fields: boot time and node
address. This allows us to provide more accurate and correct information in the
CLM node runtime attributes in the information model:

* The boot time field transmits the node's actual boot time to the CLM server.
   Previously, the node join time was used as an approximation of the node boot
   time, but this might be inaccurate or incorrect. For example, if OpenSAF was
   started much later than the node was booted (e.g. if OpenSAF was restarted
   without a node reboot), then the node join time will differ significantly 
from
   the node boot time.

* The node address field transmits the node address to be presented to the
   application through the information model. Previously, the IP address which
   was used by OpenSAF internal communication was presented as the one and only
   node address, and there was no way to select some other address in case the
   node has multiple network addresses. The application now has the possibility
   to select which network address to present in the information model.
---
  00-README.conf |  8 
  src/clm/clmd/clms.h|  1 -
  src/clm/clmd/clms_evt.c| 15 ++-
  src/clm/clmd/clms_main.c   | 22 ++
  src/clm/clmd/clms_mbcsv.c  | 11 ++---
  src/clm/clmd/clms_mbcsv.h  |  2 -
  src/clm/clmd/clms_mds.c| 92 +-
  src/clm/clmd/clms_util.c   | 15 ---
  src/clm/clmnd/cb.h | 14 ---
  src/clm/clmnd/clmna.conf   | 13 ++
  src/clm/clmnd/main.c   | 89 
  src/clm/common/clmsv_enc_dec.c | 34 +++-
  src/clm/common/clmsv_enc_dec.h |  2 +
  src/clm/common/clmsv_msg.h |  5 +++
  14 files changed, 232 insertions(+), 91 deletions(-)

diff --git a/00-README.conf b/00-README.conf
index 380cdc2f4..b9ef1 100644
--- a/00-README.conf
+++ b/00-README.conf
@@ -65,6 +65,14 @@ controller nodes, the default delay of 200 ms should be 
sufficient. For systems
  with a very large number of configured system controller nodes and/or with
  unreliable network connections, values larger than 5000 may be needed.
  
+CLMNA_ADDR_FAMILY and CLMNA_ADDR_VALUE let you specify the values shown in the

+saClmNodeCurrAddressFamily and saClmNodeCurrAddress runtime attributes of the
+node's SaClmNode IMM object. If these options are not set, CLM will try to try
+to figure out the node's address by itself. Since a node can have more than one
+network address, the address chosen by CLM may not be the address your
+application is interested in. By explicitly specifying the address using
+CLMNA_ADDR_FAMILY and CLMNA_ADDR_VALUE, you will be guaranteed that the correct
+address is presented.
  
  ***

  dtmd.conf
diff --git a/src/clm/clmd/clms.h b/src/clm/clmd/clms.h
index 664c0da29..2ac69eade 100644
--- a/src/clm/clmd/clms.h
+++ b/src/clm/clmd/clms.h
@@ -116,7 +116,6 @@ extern SaAisErrorT clms_plm_init(CLMS_CB *cb);
  #endif
  extern void clms_node_add_to_model(CLMS_CLUSTER_NODE *node);
  extern SaTimeT clms_get_SaTime(void);
-extern SaTimeT clms_get_BootTime(void);
  extern void clms_imm_impl_set(CLMS_CB *cb);
  extern uint32_t clms_rda_init(CLMS_CB *cb);
  extern void clms_adminop_pending(void);
diff --git a/src/clm/clmd/clms_evt.c b/src/clm/clmd/clms_evt.c
index d29925b77..ace140db4 100644
--- a/src/clm/clmd/clms_evt.c
+++ b/src/clm/clmd/clms_evt.c
@@ -1,6 +1,7 @@
  /*  -*- OpenSAF  -*-
   *
   * (C) Copyright 2010,2015 The OpenSAF Foundation
+ * Copyright Ericsson AB 2017 - All Rights Reserved.
   *
   * This program is distributed in the hope that it will be useful, but
   * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
@@ -664,11 +665,12 @@ uint32_t proc_node_up_msg(CLMS_CB *cb, CLMSV_CLMS_EVT 
*evt)
"/node_name configuration");
}
}
-   node->boot_time = clms_get_SaTime();
+   node->boot_time =
+   evt->info.msg.info.api_info.param.nodeup_info.boot_time;
  
  	/* Update the node with ipaddress information */

if (ip->addr.length) {
-

Re: [devel] [PATCH 0/1] Review Request for clm: Include boot time and node address in join request message [#2489]

2017-08-04 Thread praveen malviya


Hi Anders,

I will review both #2538 and #2538 by Monday.

Thanks,
Praveen


On 04-Aug-17 12:24 PM, Anders Widell wrote:

Hi!

Did you get a chance to look at this yet?

thanks,

Anders Widell


On 07/31/2017 03:10 PM, Anders Widell wrote:
Summary: clm: Include boot time and node address in join request 
message [#2489]

Review request for Ticket(s): 2489
Peer Reviewer(s): Praveen
Pull request to:
Affected branch(es): develop
Development branch: ticket-2489
Base revision: 10b4c9e2f952456c3ef7c4413e224c3365e4b18f
Personal repository: git://git.code.sf.net/u/anders-w/review


Impacted area   Impact y/n

  Docsn
  Build systemn
  RPM/packaging   n
  Configuration files n
  Startup scripts n
  SAF servicesy
  OpenSAF servicesn
  Core libraries  n
  Samples n
  Tests   n
  Other   n


Comments (indicate scope for each "y" above):
-

NOTE: This ticket depens on ticket [#2535] which is still out on review.

revision 56e7a62729974e03f3cd441a182121ea67937136
Author:Anders Widell 
Date:Mon, 31 Jul 2017 14:02:07 +0200

clm: Include boot time and node address in join request message [#2489]

The node join request message now has two new fields: boot time and node
address. This allows us to provide more accurate and correct 
information in the

CLM node runtime attributes in the information model:

* The boot time field transmits the node's actual boot time to the CLM 
server.
   Previously, the node join time was used as an approximation of the 
node boot
   time, but this might be inaccurate or incorrect. For example, if 
OpenSAF was
   started much later than the node was booted (e.g. if OpenSAF was 
restarted
   without a node reboot), then the node join time will differ 
significantly from

   the node boot time.

* The node address field transmits the node address to be presented to 
the
   application through the information model. Previously, the IP 
address which
   was used by OpenSAF internal communication was presented as the one 
and only
   node address, and there was no way to select some other address in 
case the
   node has multiple network addresses. The application now has the 
possibility

   to select which network address to present in the information model.



Complete diffstat:
--
  00-README.conf |  8 
  src/clm/clmd/clms.h|  1 -
  src/clm/clmd/clms_evt.c| 15 ++-
  src/clm/clmd/clms_main.c   | 22 ++
  src/clm/clmd/clms_mbcsv.c  | 11 ++---
  src/clm/clmd/clms_mbcsv.h  |  2 -
  src/clm/clmd/clms_mds.c| 92 
+-

  src/clm/clmd/clms_util.c   | 15 ---
  src/clm/clmnd/cb.h | 14 ---
  src/clm/clmnd/clmna.conf   | 13 ++
  src/clm/clmnd/main.c   | 89 


  src/clm/common/clmsv_enc_dec.c | 34 +++-
  src/clm/common/clmsv_enc_dec.h |  2 +
  src/clm/common/clmsv_msg.h |  5 +++
  14 files changed, 232 insertions(+), 91 deletions(-)


Testing Commands:
-

For boot time: Make a note of saClmNodeBootTimeStamp for a node in the
cluster. Run /etc/init.d/opensafd stop followed by 
/etc/init.d/opensafd start on
that node. Check saClmNodeBootTimeStamp of that node again. The time 
stamp shall

not be affected by restarting OpenSAF (without a node reboot).

For node address: Configure OpenSAF to use TIPC for internal 
communication. Set
CLMNA_ADDR_FAMILY and CLMNA_ADDR_VALUE in /etc/opensaf/clmna.conf for 
a node in

the cluster. Start the node. Check saClmNodeCurrAddressFamily and
saClmNodeCurrAddress of that node. The values shall match what you 
entered in

/etc/opensaf/clmna.conf.


Testing, Expected Results:
--

See above.


Conditions of Submission:
-

Ack from reviewer(s)


Arch  Built StartedLinux distro
---
mipsn  n
mips64  n  n
x86 n  n
x86_64  y  y
powerpc n  n
powerpc64   n  n


Reviewer Checklist:
---
[Submitters: make sure that your review doesn't trigger any checkmarks!]


Your checkin has not passed review because (see checked entries):

___ Your RR template is generally incomplete; it has too many blank 
entries

 that need proper data filled in.

___ You have failed to nominate the proper persons for review and push.

___ Your patches do not have proper short+long header

___ You have grammar/spelling in your header that is unacceptable.

___ You have exceeded a sensible line length in your 
headers/comments/text.


___ You have failed to put in a proper Trac Ticket # into your commits.

___ You have

Re: [devel] [PATCH 1/1] amfd: Do not create duplicated HA state absent SUSI [#2530]

2017-08-02 Thread praveen malviya


Ack, code review only.

Thanks
Praveen

On 02-Aug-17 5:03 PM, minh chau wrote:

Hi Praveen,

This ticket as well as the *if* block we are talking about, it is for 
absent SUSI which is read from IMM as a helper to failover after SG 
absence stage. The current SG 2N code can not failover an absent SUSI to 
another present SUSI that both have the same HA state, so we exclude 
this case as in #2477, #2530.


The case you mention that has SU1, SU2, both of SUs have present SUSI, 
and it won't run in the code of creating absent SUSI. It should be also 
working as long as no loss of RTA, since after SC absence stage the SG 
operation resumes to what it was before loss of SCs.


Thanks,
Minh

On 02/08/17 21:18, praveen malviya wrote:

Hi Minh,

I wanted to highlight a valid case when quiesced HA state can be there 
in two SUs in 2N model. In switchover situation when one SU1 has 
successfully quiesced, amfd sends active assignment to standby SU2. 
While standby SU2 is becoming acitve it faults with comp-failover 
recovery and AMFD sends it a quiesced HA state. Thus there can be two 
quiesced valid SUSI in a SG at momentarily.


Thanks,
Praveen

On 02-Aug-17 4:41 PM, minh chau wrote:

Hi Praveen,

Please find my reply inline.

Thanks,
Minh

On 02/08/17 20:17, praveen malviya wrote:

Hi Minh,

Please find one query inline with [Praveen].
Thanks,
Praveen
On 28-Jul-17 7:44 AM, Minh Chau wrote:

Symtomp is similar to #2477, this patch fixes for case of
2 STANDBY assignment for same SI
---
  src/amf/amfd/si.cc| 14 ++
  src/amf/amfd/si.h |  1 +
  src/amf/amfd/siass.cc |  6 ++
  3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/amf/amfd/si.cc b/src/amf/amfd/si.cc
index 3f76c1476..27245339c 100644
--- a/src/amf/amfd/si.cc
+++ b/src/amf/amfd/si.cc
@@ -1594,6 +1594,20 @@ const AVD_SIRANKEDSU *AVD_SI::get_si_ranked_su(
  return sirankedsu;
  }
+/*
+ * @brief Count number of SUSI assignment that are assigned to 
this SI

+ *with specified HA state
+ * @param [in] @ha: HA state
+ * @return: number of SUSI assignment
+ */
+uint32_t AVD_SI::count_sisu_with(SaAmfHAStateT ha) {
+  uint32_t count = 0;
+  for (AVD_SU_SI_REL *sisu = list_of_sisu; sisu != nullptr;
+  sisu = sisu->si_next) {
+if (sisu->state == ha) count++;
+  }
+  return count;
+}
/*
   * @brief Update alarm_sent by new value of @alarm_state,
diff --git a/src/amf/amfd/si.h b/src/amf/amfd/si.h
index 4f8dc5718..af14363b6 100644
--- a/src/amf/amfd/si.h
+++ b/src/amf/amfd/si.h
@@ -152,6 +152,7 @@ class AVD_SI {
const AVD_SIRANKEDSU *get_si_ranked_su(const std::string 
_name) const;

bool is_active() const;
SaAisErrorT si_swap_validate();
+  uint32_t count_sisu_with(SaAmfHAStateT ha);
 private:
bool is_assigned() const { return list_of_sisu ? true : false; }
diff --git a/src/amf/amfd/siass.cc b/src/amf/amfd/siass.cc
index d14d279dc..267c55c07 100644
--- a/src/amf/amfd/siass.cc
+++ b/src/amf/amfd/siass.cc
@@ -351,11 +351,9 @@ bool 
avd_susi_validate_absent_assignment(AVD_SU *su, AVD_SI *si,

goto done;
}
// No need to create absent SUSI assignment for the 2N SI that 
already has

-  // ACTIVE SUSI
+  // the same @imm_ha_state SUSI
if (su->sg_of_su->sg_redundancy_model == 
SA_AMF_2N_REDUNDANCY_MODEL) {

-if (si->list_of_sisu != nullptr &&
-si->list_of_sisu->state == SA_AMF_HA_ACTIVE &&
-imm_ha_state == SA_AMF_HA_ACTIVE)
+if (si->count_sisu_with(imm_ha_state) > 0)
[Praveen] Ticket is raised for 2 standby case and earliar fix was 
for 2 acitve case. This if block now does not check HA state. What 
will happen in the case of quiesced state? We can have two quiesced 
state in case of faults when a SU faults when it is becoming active 
in switchover situation and amf sends quiesced state to this faulted 
active SU.
[Minh]: This *if* block now is for all HA states. It means that we 
don't create an absent SUSI when amfd already had another SUSI with 
same HA state in 2N SG. I should have made it this way earlier in 
ticket #2477 so we would not have this ticket #2530.




goto done;
}











--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] amfd: Do not create duplicated HA state absent SUSI [#2530]

2017-08-02 Thread praveen malviya


Hi Minh,

I wanted to highlight a valid case when quiesced HA state can be there 
in two SUs in 2N model. In switchover situation when one SU1 has 
successfully quiesced, amfd sends active assignment to standby SU2. 
While standby SU2 is becoming acitve it faults with comp-failover 
recovery and AMFD sends it a quiesced HA state. Thus there can be two 
quiesced valid SUSI in a SG at momentarily.


Thanks,
Praveen

On 02-Aug-17 4:41 PM, minh chau wrote:

Hi Praveen,

Please find my reply inline.

Thanks,
Minh

On 02/08/17 20:17, praveen malviya wrote:

Hi Minh,

Please find one query inline with [Praveen].
Thanks,
Praveen
On 28-Jul-17 7:44 AM, Minh Chau wrote:

Symtomp is similar to #2477, this patch fixes for case of
2 STANDBY assignment for same SI
---
  src/amf/amfd/si.cc| 14 ++
  src/amf/amfd/si.h |  1 +
  src/amf/amfd/siass.cc |  6 ++
  3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/amf/amfd/si.cc b/src/amf/amfd/si.cc
index 3f76c1476..27245339c 100644
--- a/src/amf/amfd/si.cc
+++ b/src/amf/amfd/si.cc
@@ -1594,6 +1594,20 @@ const AVD_SIRANKEDSU *AVD_SI::get_si_ranked_su(
  return sirankedsu;
  }
+/*
+ * @brief Count number of SUSI assignment that are assigned to this SI
+ *with specified HA state
+ * @param [in] @ha: HA state
+ * @return: number of SUSI assignment
+ */
+uint32_t AVD_SI::count_sisu_with(SaAmfHAStateT ha) {
+  uint32_t count = 0;
+  for (AVD_SU_SI_REL *sisu = list_of_sisu; sisu != nullptr;
+  sisu = sisu->si_next) {
+if (sisu->state == ha) count++;
+  }
+  return count;
+}
/*
   * @brief Update alarm_sent by new value of @alarm_state,
diff --git a/src/amf/amfd/si.h b/src/amf/amfd/si.h
index 4f8dc5718..af14363b6 100644
--- a/src/amf/amfd/si.h
+++ b/src/amf/amfd/si.h
@@ -152,6 +152,7 @@ class AVD_SI {
const AVD_SIRANKEDSU *get_si_ranked_su(const std::string 
_name) const;

bool is_active() const;
SaAisErrorT si_swap_validate();
+  uint32_t count_sisu_with(SaAmfHAStateT ha);
 private:
bool is_assigned() const { return list_of_sisu ? true : false; }
diff --git a/src/amf/amfd/siass.cc b/src/amf/amfd/siass.cc
index d14d279dc..267c55c07 100644
--- a/src/amf/amfd/siass.cc
+++ b/src/amf/amfd/siass.cc
@@ -351,11 +351,9 @@ bool avd_susi_validate_absent_assignment(AVD_SU 
*su, AVD_SI *si,

goto done;
}
// No need to create absent SUSI assignment for the 2N SI that 
already has

-  // ACTIVE SUSI
+  // the same @imm_ha_state SUSI
if (su->sg_of_su->sg_redundancy_model == 
SA_AMF_2N_REDUNDANCY_MODEL) {

-if (si->list_of_sisu != nullptr &&
-si->list_of_sisu->state == SA_AMF_HA_ACTIVE &&
-imm_ha_state == SA_AMF_HA_ACTIVE)
+if (si->count_sisu_with(imm_ha_state) > 0)
[Praveen] Ticket is raised for 2 standby case and earliar fix was for 
2 acitve case. This if block now does not check HA state. What will 
happen in the case of quiesced state? We can have two quiesced state 
in case of faults when a SU faults when it is becoming active in 
switchover situation and amf sends quiesced state to this faulted 
active SU.
[Minh]: This *if* block now is for all HA states. It means that we don't 
create an absent SUSI when amfd already had another SUSI with same HA 
state in 2N SG. I should have made it this way earlier in ticket #2477 
so we would not have this ticket #2530.




goto done;
}







--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/4] amf: Log CLM initialization error only once on unconfigured nodes [#2509]

2017-07-31 Thread praveen malviya


Ack for all the patches.


Thanks,
Praveen

On 24-Jul-17 7:27 PM, Anders Widell wrote:

Avoid spamming the syslog with more than one log message in case CLM returns
SA_AIS_ERR_UNAVAILABLE (i.e. we are running on a currently unconfigured node).
---
  src/amf/amfd/clm.cc  | 6 --
  src/amf/amfnd/clm.cc | 6 --
  2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/amf/amfd/clm.cc b/src/amf/amfd/clm.cc
index 86c23ea46..da951d223 100644
--- a/src/amf/amfd/clm.cc
+++ b/src/amf/amfd/clm.cc
@@ -433,13 +433,15 @@ SaAisErrorT avd_clm_init(AVD_CL_CB *cb) {
 * BAD_HANDLE. Also, duplicated codes in initialization thread
 * will be moved to osaf dedicated thread
 */
+  bool has_logged_clm_error = false;
for (;;) {
  SaVersionT Version = {'B', 4, 1};
  error = saClmInitialize_4(_handle, _callbacks, );
  if (error == SA_AIS_ERR_TRY_AGAIN || error == SA_AIS_ERR_TIMEOUT ||
  error == SA_AIS_ERR_UNAVAILABLE) {
-  if (error != SA_AIS_ERR_TRY_AGAIN) {
-LOG_WA("saClmInitialize_4 returned %u", (unsigned)error);
+  if (error != SA_AIS_ERR_TRY_AGAIN && !has_logged_clm_error) {
+LOG_WA("saClmInitialize_4 returned %u", static_cast(error));
+has_logged_clm_error = true;
}
osaf_nanosleep();
continue;
diff --git a/src/amf/amfnd/clm.cc b/src/amf/amfnd/clm.cc
index 6985f3685..f1f65bcef 100644
--- a/src/amf/amfnd/clm.cc
+++ b/src/amf/amfnd/clm.cc
@@ -276,13 +276,15 @@ SaAisErrorT avnd_clm_init(AVND_CB *cb) {
  
cb->first_time_up = true;

cb->clmHandle = 0;
+  bool has_logged_clm_error = false;
for (;;) {
  SaVersionT Version = {'B', 4, 1};
  error = saClmInitialize_4(>clmHandle, , );
  if (error == SA_AIS_ERR_TRY_AGAIN || error == SA_AIS_ERR_TIMEOUT ||
  error == SA_AIS_ERR_UNAVAILABLE) {
-  if (error != SA_AIS_ERR_TRY_AGAIN) {
-LOG_WA("saClmInitialize_4 returned %u", (unsigned)error);
+  if (error != SA_AIS_ERR_TRY_AGAIN && !has_logged_clm_error) {
+LOG_WA("saClmInitialize_4 returned %u", static_cast(error));
+has_logged_clm_error = true;
}
osaf_nanosleep();
continue;



--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 0/4] Review Request for clm: Make it possible for a node to scale out itself using autoscaling [#2509]

2017-07-30 Thread praveen malviya


I am reviewing the patches.

Thanks,
Praveen

On 24-Jul-17 7:27 PM, Anders Widell wrote:

Summary: clm: Make it possible for a node to scale out itself using autoscaling 
[#2509]
Review request for Ticket(s): 2509
Peer Reviewer(s): Praveen
Pull request to:
Affected branch(es): develop
Development branch: ticket-2509
Base revision: de977bacba9b452fe2a8abcae26a1188a7a15f31
Personal repository: git://git.code.sf.net/u/anders-w/review


Impacted area   Impact y/n

  Docsn
  Build systemn
  RPM/packaging   n
  Configuration files n
  Startup scripts n
  SAF servicesy
  OpenSAF servicesn
  Core libraries  n
  Samples n
  Tests   n
  Other   n


Comments (indicate scope for each "y" above):
-

revision 7782f5e526729b97a973e11feb52105c120fe2ce
Author: Anders Widell 
Date:   Mon, 24 Jul 2017 15:41:51 +0200

clm: Make it possible for a node to scale out itself using autoscaling [#2509]

Ticket [#1453] added support for autoscaling, which allows scale-out from an
initial cluster containing at least one node. This commit adds support for
scaling out from a cluster containing zero nodes, or alternatively, a cluster
where the active node is not a configured node. The use cases are as follows:

* Support loading a backup that was created on a different cluster where none of
   the new nodes have the same name as any of the nodes in the old cluster.
* Support cluster restart on a system where nodes don't have persistent local
   storage (or persistent host names / node names) - i.e. a system where a node
   reboot will always result in a scale-in followed by a scale-out
* Make scaling more robust, e.g. imagine a case when a one-node cluster is
   scaled out by adding a second node, but then the original node is removed
   before scale-out of the new node has completed.



revision 262d4c7a96e663dc00335278df28e0788c19d334
Author: Anders Widell 
Date:   Mon, 24 Jul 2017 15:41:15 +0200

ntf: Re-try initializing CLM on unconfigured nodes [#2509]

Re-try initializing the CLM API when it returns SA_AIS_ERR_UNAVAILABLE, so that
the NTF service properly waits for the node to become configured by the
autoscaling functionality.



revision 2e2f4dd43621a1113262caf274bac4989f2d9d7d
Author: Anders Widell 
Date:   Mon, 24 Jul 2017 15:34:47 +0200

log: Re-try initializing CLM on unconfigured nodes [#2509]

Re-try initializing the CLM API when it returns SA_AIS_ERR_UNAVAILABLE. This
error code is returned if the LOG service has been started on an unconfigured
node, which may happen for a while when the autoscaling feature is used.



revision 31305c94edb9eae1aecc66c6d13105324ffcfa1b
Author: Anders Widell 
Date:   Mon, 24 Jul 2017 15:32:24 +0200

amf: Log CLM initialization error only once on unconfigured nodes [#2509]

Avoid spamming the syslog with more than one log message in case CLM returns
SA_AIS_ERR_UNAVAILABLE (i.e. we are running on a currently unconfigured node).



Complete diffstat:
--
  src/amf/amfd/clm.cc  |  6 --
  src/amf/amfnd/clm.cc |  6 --
  src/clm/clmd/clms_main.c | 12 ++--
  src/log/logd/lgs_clm.cc  |  3 ++-
  src/ntf/ntfd/ntfs_clm.c  |  3 ++-
  5 files changed, 22 insertions(+), 8 deletions(-)


Testing Commands:
-

Enable autoscaling in clmd.conf and by removing the "exit" command from
opensaf_scale_out script. Start a cluster where none of the nodes are configured
in IMM.


Testing, Expected Results:
--

The nodes shall be scaled out.


Conditions of Submission:
-

Ack from reviewer(s)


Arch  Built StartedLinux distro
---
mipsn  n
mips64  n  n
x86 n  n
x86_64  y  y
powerpc n  n
powerpc64   n  n


Reviewer Checklist:
---
[Submitters: make sure that your review doesn't trigger any checkmarks!]


Your checkin has not passed review because (see checked entries):

___ Your RR template is generally incomplete; it has too many blank entries
 that need proper data filled in.

___ You have failed to nominate the proper persons for review and push.

___ Your patches do not have proper short+long header

___ You have grammar/spelling in your header that is unacceptable.

___ You have exceeded a sensible line length in your headers/comments/text.

___ You have failed to put in a proper Trac Ticket # into your commits.

___ You have incorrectly put/left internal data in your comments/files
 (i.e. internal bug tracking tool IDs, product names etc)

___ You have not given any evidence of testing beyond basic build tests.

Re: [devel] [PATCH 1/1] clm: add clm tool for tracking and for getting node info [#2429]

2017-07-28 Thread praveen malviya


Hi Anders,

Attached is the patch after incorporating the comments.
I will be pushing it on Monday.
Please go through it.

Thanks
Praveen

On 18-Jul-17 9:20 PM, Anders Widell wrote:

Ack with comments:

* Indentation seems to be according to Google C++ style guide, although 
the file is written in C and should be indented according to the Linux 
Kernel coding style. Either change the file extension from .c to .cc or 
change the formatting. It can also be a good idea to run the style 
checkers: "make cpplint" for C++ code or "make checkpatch" for C code.


* The program is installed in sbin, but shouldn't it be installed in 
bin? sbin is intended for system administration tools.


* The name of the program is not consistent with already existing tools 
like clm-adm etc. Maybe rename it to clm-app? Though "app" doesn't say 
much about what the program is doing. Better choices could be clm-list, 
clm-show, or clm-print.


* Since there is only one source file, it is probably a good idea to 
give it the same name as the executable, but with a .c or .cc extension 
and any hyphens replaced with underscores (e.g. clm_print.cc)


* It is probably better to remove the -f and -i flags, and replace them 
with optional arguments for the -n, -a, -m flags.


* Node id (-i flag) doesn't support hexadecimal numbers. Use e.g. 
strtoul() with base 0 instead of atoi().


* It is probably better to use comma (,) instead of vertical bar (|) to 
separate track flags, because vertical bar is treated specially by the 
shell and must thus always be quoted.


* Why not use the value supplied with the -t parameter also when using 
-n, instead of the hard-coded TIME_OUT value (if -t was not specified or 
negative then you can use TIME_OUT)?


* Shouldn't the program exit once it has received the asynchronous node 
get callback?


* osaf_extended_name_borrow() and osaf_extended_name_length() are mainly 
intended to be used in agent libraries. Please use saAisNameBorrow() and 
strlen(saAisNameBorrow()) instead.


* Inconsistent use of EXIT_FAILURE/EXIT_SUCCESS and 1/0 for exit() and 
return from main (sometimes name is used, sometimes number).


regards,

Anders Widell

On 07/14/2017 11:02 AM, Praveen wrote:

Add a utility/application which enables user to:
-perform tracking using saClmClusterTrack_4().
-get node info by calling saClmClusterNodeGet_4().
-get node info asynchronously by calling saClmClusterNodeGetAsync().
---
  opensaf.spec.in |   1 +
  src/clm/Makefile.am |  13 ++
  src/clm/tools/clm_api_app.c | 349 


  3 files changed, 363 insertions(+)
  create mode 100644 src/clm/tools/clm_api_app.c

diff --git a/opensaf.spec.in b/opensaf.spec.in
index 56e8d78..8c4b2c1 100644
--- a/opensaf.spec.in
+++ b/opensaf.spec.in
@@ -995,6 +995,7 @@ fi
  %defattr(-,root,root)
  %{_sbindir}/amfpm
  %{_sbindir}/amfclusterstatus
+%{_sbindir}/clmapp
  %if %is_ais_ckpt
diff --git a/src/clm/Makefile.am b/src/clm/Makefile.am
index be3a668..417dc63 100644
--- a/src/clm/Makefile.am
+++ b/src/clm/Makefile.am
@@ -85,6 +85,7 @@ noinst_HEADERS += \
  src/clm/common/clmsv_enc_dec.h \
  src/clm/common/clmsv_msg.h
+sbin_PROGRAMS += bin/clmapp
  osaf_execbin_PROGRAMS += bin/osafclmd bin/osafclmna
  nodist_pkgclccli_SCRIPTS += \
@@ -165,6 +166,18 @@ dist_bin_SCRIPTS += \
 src/clm/tools/clm-find \
 src/clm/tools/clm-state
+bin_clmapp_CPPFLAGS = \
+-DSA_EXTENDED_NAME_SOURCE \
+$(AM_CPPFLAGS)
+
+bin_clmapp_SOURCES = \
+src/clm/tools/clm_api_app.c
+
+bin_clmapp_LDADD = \
+lib/libSaClm.la \
+lib/libopensaf_core.la
+
+
  if ENABLE_TESTS
  bin_PROGRAMS += bin/clmtest
diff --git a/src/clm/tools/clm_api_app.c b/src/clm/tools/clm_api_app.c
new file mode 100644
index 000..cae27f2
--- /dev/null
+++ b/src/clm/tools/clm_api_app.c
@@ -0,0 +1,349 @@
+/*  -*- OpenSAF  -*-
+ *
+ * Copyright (C) 2017, Oracle and/or its affiliates. All rights 
reserved.

+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of 
MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are 
licensed
+ * under the GNU Lesser General Public License Version 2.1, February 
1999.

+ * The complete license can be accessed from the following location:
+ * 
https://urldefense.proofpoint.com/v2/url?u=http-3A__opensource.org_licenses_lgpl-2Dlicense.php=DwICaQ=RoP1YumCXCgaWHvlZYR8PQcxBKCX5YTpkKY057SbK10=Lehk1PZKwfDQtYJXNyUKbPAqrw5O--SlPRAF9DIEps4=WlWfNt4__h4REFw1hAKezXL8ZHVNlOzgMtMpNjhhWes=l2RAg-511WMwwdADnRrm1xBQSVnrb_z3zcohqea1vEA= 
+ * See the Copying file included with the OpenSAF distribution for full

+ * licensing terms.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+#define SIZE_NOTIFICATIONS 100
+#define TIME_OUT  ((SaTimeT)15 *

Re: [devel] [PATCH 1/1] clm: handle ERR_BAD_HANDLE for saImmOmSearchInitialize [#2528]

2017-07-20 Thread praveen malviya


Ack, code review only.

Thanks,
Praveen

On 14-Jul-17 6:42 PM, Zoran Milinkovic wrote:

CLM handles ERR_BAD_HANDLE for saImmOmSearchInitialize in 
clms_cluster_config_get.
As part of this patch, handling of IMM version is improved in the same function.
---
  src/clm/clmd/clms_imm.c | 18 +-
  1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/clm/clmd/clms_imm.c b/src/clm/clmd/clms_imm.c
index 06a7df8..51429ec 100644
--- a/src/clm/clmd/clms_imm.c
+++ b/src/clm/clmd/clms_imm.c
@@ -432,10 +432,12 @@ SaAisErrorT clms_cluster_config_get(void)
SaNameT dn;
SaImmAttrValuesT_2 **attributes;
const char *className = "SaClmCluster";
+   SaVersionT version;
  
  	TRACE_ENTER();
  
-	(void)immutil_saImmOmInitialize(_om_hdl, NULL, );

+   version = immVersion;
+   (void)immutil_saImmOmInitialize(_om_hdl, NULL, );
  
  	searchParam.searchOneAttr.attrName = "SaImmAttrClassName";

searchParam.searchOneAttr.attrValueType = SA_IMM_ATTR_SASTRINGT;
@@ -446,6 +448,20 @@ SaAisErrorT clms_cluster_config_get(void)
SA_IMM_SEARCH_ONE_ATTR | SA_IMM_SEARCH_GET_ALL_ATTR, ,
NULL, _hdl);
  
+	if (rc == SA_AIS_ERR_BAD_HANDLE) {

+   // Repeat one more search on ERR_BAD_HANDLE
+
+   // Close the open OM handle, and initialize a new one
+   (void)immutil_saImmOmFinalize(imm_om_hdl);
+   version = immVersion;
+   (void)immutil_saImmOmInitialize(_om_hdl, NULL, );
+
+   rc = immutil_saImmOmSearchInitialize_2(
+   imm_om_hdl, _cluster->name, SA_IMM_SUBTREE,
+   SA_IMM_SEARCH_ONE_ATTR | SA_IMM_SEARCH_GET_ALL_ATTR,
+   , NULL, _hdl);
+   }
+
if (rc != SA_AIS_OK) {
LOG_ER("No Object of  SaClmCluster Class was found");
goto done1;



--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] clm: make CLM tests independent of other CLM tests [#2520]

2017-07-10 Thread praveen malviya


Ack.

Thanks
Praveen

On 05-Jul-17 8:23 PM, Zoran Milinkovic wrote:

The patch removes dependencies between CLM tests. CLM tests can be run more 
times now.
Duplicated CLM tests are removed from clmtest.
---
  src/clm/apitest/clmtest.c   |  15 
  src/clm/apitest/tet_saClmClusterTrack.c | 130 +++-
  2 files changed, 94 insertions(+), 51 deletions(-)

diff --git a/src/clm/apitest/clmtest.c b/src/clm/apitest/clmtest.c
index 3e8d95e..683bfe4 100644
--- a/src/clm/apitest/clmtest.c
+++ b/src/clm/apitest/clmtest.c
@@ -38,6 +38,13 @@ SaNameT node_name;
  void clm_init(void)
  {
FILE *fp;
+   // Command list to execute before tests start
+   char *command[] = {
+   // Unlock PL-3
+   "immadm -o 1 safNode=PL-3,safCluster=myClmCluster 2> /dev/null"
+   };
+   int command_list_size = 1;
+   int i;
  
  	fp = fopen("/etc/opensaf/node_name", "r");

if (fp == NULL) {
@@ -49,6 +56,14 @@ void clm_init(void)
if (cnt == 1)
node_name.length = strlen((char *)node_name.value);
fclose(fp);
+
+   // Execute commands
+   for(i=0; i /dev/null", nodeName);
+   } else {
+   sprintf(command, "immadm -o 2 %s", nodeName);
+   }
+   return system(command);
+}
+
+static int clm_node_unlock(const char *nodeName, int ignoreOutput) {
+   char command[256];
+
+   if (ignoreOutput) {
+   sprintf(command, "immadm -o 1 %s 2> /dev/null", nodeName);
+   } else {
+   sprintf(command, "immadm -o 1 %s", nodeName);
+   }
+   return system(command);
+}
+
+static int clm_node_shutdown(const char *nodeName, int ignoreOutput) {
+   char command[256];
+
+   if (ignoreOutput) {
+   sprintf(command, "immadm -o 3 %s 2> /dev/null", nodeName);
+   } else {
+   sprintf(command, "immadm -o 3 %s", nodeName);
+   }
+   return system(command);
+}
+
+static void *admin_lock(void *dummy)
+{
+   assert(clm_node_lock(s_node_name, 0) != -1);
/*test_validate(WEXITSTATUS(rc), 0);*/
return NULL;
  }
  
  static void *admin_unlock(void *dummy)

  {
-   int rc;
-   char command[256];
-   char name[] = "safNode=PL-3,safCluster=myClmCluster";
-
-   sprintf(command, "immadm -o 1 %s", name);
-   assert((rc = system(command)) != -1);
+   assert(clm_node_unlock(s_node_name, 0) != -1);
/*test_validate(WEXITSTATUS(rc), 0);*/
return NULL;
  }
  
  static void *admin_shutdown(void *dummy)

  {
-   int rc;
-   char command[256];
-   char name[] = "safNode=PL-3,safCluster=myClmCluster";
-
-   sprintf(command, "immadm -o 3 %s", name);
-   assert((rc = system(command)) != -1);
+   assert(clm_node_shutdown(s_node_name, 0) != -1);
/*test_validate(WEXITSTATUS(rc), 0);*/
return NULL;
  }
@@ -68,9 +87,15 @@ static void saClmadmin_lock1(void)
char command[256];
char name[] = "safNode=PL-3,safCluster=myClmCluster";
  
+	// Lock node

+   clm_node_lock(name, 1);
+
sprintf(command, "immadm -o 2 %s", name);
assert((rc = system(command)) != -1);
test_validate(WEXITSTATUS(rc), 1);
+
+   // Reset CLM state
+   clm_node_unlock(name, 1);
  }
  
  static void saClmadmin_unlock1(void)

@@ -90,9 +115,15 @@ static void saClmadmin_shutdown1(void)
char command[256];
char name[] = "safNode=PL-3,safCluster=myClmCluster";
  
+	// Shutdown node

+   clm_node_shutdown(name, 1);
+
sprintf(command, "immadm -o 3 %s", name);
assert((rc = system(command)) != -1);
test_validate(WEXITSTATUS(rc), 1);
+
+   // Reset CLM state
+   clm_node_unlock(name, 1);
  }
  
  static void *plm_admin_trylock(void *dummy)

@@ -610,6 +641,9 @@

Re: [devel] [PATCH 1/1] amfd: increase msg priority for node ups [#2510]

2017-07-05 Thread praveen malviya


Hi Gary,

When SC joins back after SC Absence state, there will not be many 
messages in the queue to be processed even in large cluster also.

Also there is no error like ipc send failure.
Is AMFD stuck somewhere and it could not take any message from queue?
I think raising priority will not solve problem in such a situation.

Thanks
Praveen

On 23-Jun-17 7:18 AM, Gary Lee wrote:

---
  src/amf/amfd/ndmsg.cc | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/amf/amfd/ndmsg.cc b/src/amf/amfd/ndmsg.cc
index 11bc8ac35..9bfab6fd8 100644
--- a/src/amf/amfd/ndmsg.cc
+++ b/src/amf/amfd/ndmsg.cc
@@ -371,7 +371,12 @@ uint32_t avd_n2d_msg_rcv(AVD_DND_MSG *rcv_msg, NODE_ID 
node_id,
  
evt->info.avnd_msg = rcv_msg;
  
-  if (m_NCS_IPC_SEND(>avd_mbx, evt, NCS_IPC_PRIORITY_HIGH) !=

+  NCS_IPC_PRIORITY priority = NCS_IPC_PRIORITY_HIGH;
+  if (evt->rcv_evt == AVD_EVT_NODE_UP_MSG) {
+priority = NCS_IPC_PRIORITY_VERY_HIGH;
+  }
+
+  if (m_NCS_IPC_SEND(>avd_mbx, evt, priority) !=
NCSCC_RC_SUCCESS) {
  LOG_ER("%s: ncs_ipc_send failed", __FUNCTION__);
  avsv_dnd_msg_free(rcv_msg);



--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] ntfd: Do not stop ntfimcn if surveillance thread has not started [#2508]

2017-07-05 Thread praveen malviya


Ack.

Thanks,
Praveen

On 05-Jul-17 2:49 PM, Minh Chau wrote:

ntfd can sometimes receive SIG_TERM in stopping opensafd even surveillance
thread and ntfimcnd have not been started.
The patch prevents to stop surveillance thread and ntfimcnd in such case.
---
  src/ntf/ntfd/ntfs_imcnutil.c | 13 ++---
  1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/ntf/ntfd/ntfs_imcnutil.c b/src/ntf/ntfd/ntfs_imcnutil.c
index cc257703b..00c2c0039 100644
--- a/src/ntf/ntfd/ntfs_imcnutil.c
+++ b/src/ntf/ntfd/ntfs_imcnutil.c
@@ -48,7 +48,7 @@ typedef struct {
bool ntfimcn_on;
  } init_params_t;
  
-static init_params_t ipar;

+static init_params_t ipar = {0, 0, 0, false};
  pthread_mutex_t ntfimcn_mutex;
  
  /**

@@ -357,13 +357,12 @@ int stop_ntfimcn(void)
int rc = 0;
TRACE_ENTER();
  
+	if (ipar.ntfimcn_on == false) goto done;

/* Kill ntfimcn */
osaf_mutex_lock_ordie(_mutex);
-   if (ipar.ntfimcn_on == true) {
-   ipar.ntfimcn_on = false;
-   TRACE("%s: Terminating osafntfimcnd process", __FUNCTION__);
-   timedwait_imcn_exit();
-   }
+   ipar.ntfimcn_on = false;
+   TRACE("%s: Terminating osafntfimcnd process", __FUNCTION__);
+   timedwait_imcn_exit();
osaf_mutex_unlock_ordie(_mutex);
  
  	/* Cancel the surveillance thread */

@@ -377,7 +376,7 @@ int stop_ntfimcn(void)
rc = pthread_mutex_destroy(_mutex);
if (rc != 0)
osaf_abort(rc);
-
+done:
TRACE_LEAVE();
return rc;
  }



--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] ntfd: Ensure mutex is not taken after cnsurvail_thread is canceled V2 [#2508]

2017-07-03 Thread praveen malviya


Ack, code review only.


Thanks
Praveen

On 03-Jul-17 6:04 AM, minh chau wrote:

Hi Lennart, Praveen

If we don't have any comments, I would like to push the patch today

Thanks,
Minh

On 28/06/17 14:32, Minh Chau wrote:

In the scenario of shutting down SC while SC switchover is on going,
ntfd coredump is generated due to failure of pthread_mutex_destroy()
with errorcode:16(EBUSY). That means the mutex had been taken and
was not unlocked at the time phtread_mutex_destroy() is called.

This patch changes the way ntfd stops ntfimcn and cnsruvail_thread()
so that the cnsurvai_thread does not restart ntfimcn in stop sequence.
Therefore, when cnsurval_thread receives cancellation request, this
thread does not do anything that may lead to cancellation point with
a locked mutex.
---
  src/ntf/ntfd/ntfs_imcnutil.c | 58 
+---

  1 file changed, 39 insertions(+), 19 deletions(-)

diff --git a/src/ntf/ntfd/ntfs_imcnutil.c b/src/ntf/ntfd/ntfs_imcnutil.c
index dd27a255c..cc257703b 100644
--- a/src/ntf/ntfd/ntfs_imcnutil.c
+++ b/src/ntf/ntfd/ntfs_imcnutil.c
@@ -44,6 +44,8 @@ typedef struct {
  SaAmfHAStateT ha_state;
  pid_t pid;
  pthread_t thread;
+/* ntfimcn functionality: true(enabled), false(disabled)  */
+bool ntfimcn_on;
  } init_params_t;
  static init_params_t ipar;
@@ -240,8 +242,17 @@ static void *cnsurvail_thread(void *_init_params)
  while (1) {
  osaf_mutex_lock_ordie(_mutex);
-pid = create_imcnprocess(ipar->ha_state);
-ipar->pid = pid;
+/* Only start ntfimcn process if this functionality is
+ * enabled, this is to avoid restarting ntfimcn when ntfd
+ * receives SIGTERM (shutting down)
+ * NOTE: Do not add any code outside below *if @ntfimcn_on*
+ * block that may lead to a thread cancellation point while
+ * ntfimcn_mutex is being locked
+ */
+if (ipar->ntfimcn_on == true) {
+pid = create_imcnprocess(ipar->ha_state);
+ipar->pid = pid;
+}
  osaf_mutex_unlock_ordie(_mutex);
  /* Wait for child process to exit */
@@ -271,7 +282,8 @@ static void *cnsurvail_thread(void *_init_params)
  /**
   * Start the imcn process surveillance thread
- *
+ * When surveillance thread is running, this thread
+ * will start and monitor ntfimcn process in cnsurvail_thread()
   * @param ha_state[in]
   */
  static void start_cnprocess(SaAmfHAStateT ha_state)
@@ -285,7 +297,8 @@ static void start_cnprocess(SaAmfHAStateT ha_state)
  osaf_abort(rc);
  ipar.ha_state = ha_state;
-
+ipar.ntfimcn_on = true;
+ipar.pid = 0;
  rc =
  pthread_create(, NULL, cnsurvail_thread, (void 
*));

  if (rc != 0)
@@ -330,33 +343,40 @@ void handle_state_ntfimcn(SaAmfHAStateT ha_state)
  }
  /**
- * Cancel the surveillance trhead and kill the imcn process.
+ * This function stops functionality of ntfimcn by:
+ * First: Kill imcn process
+ * Second: Cancel the surveillance thread
+ * (in reversed order of start ntfimcn)
   * Use the pid and thread id saved when the process was started
   * This will terminate the process permanently.
   *
- * @return -1 if error
+ * @return 0 if success, abort() on any error
   */
  int stop_ntfimcn(void)
  {
-void *join_ret;
  int rc = 0;
  TRACE_ENTER();
-if (ipar.ha_state != 0) {
-TRACE("%s: Cancel the imcn surveillance thread", __FUNCTION__);
-rc = pthread_cancel(ipar.thread);
-if (rc != 0)
-osaf_abort(rc);
-rc = pthread_join(ipar.thread, _ret);
-if (rc != 0)
-osaf_abort(rc);
-rc = pthread_mutex_destroy(_mutex);
-if (rc != 0)
-osaf_abort(rc);
-
+/* Kill ntfimcn */
+osaf_mutex_lock_ordie(_mutex);
+if (ipar.ntfimcn_on == true) {
+ipar.ntfimcn_on = false;
  TRACE("%s: Terminating osafntfimcnd process", __FUNCTION__);
  timedwait_imcn_exit();
  }
+osaf_mutex_unlock_ordie(_mutex);
+
+/* Cancel the surveillance thread */
+TRACE("%s: Cancel the imcn surveillance thread", __FUNCTION__);
+rc = pthread_cancel(ipar.thread);
+if (rc != 0)
+osaf_abort(rc);
+rc = pthread_join(ipar.thread, NULL);
+if (rc != 0)
+osaf_abort(rc);
+rc = pthread_mutex_destroy(_mutex);
+if (rc != 0)
+osaf_abort(rc);
  TRACE_LEAVE();
  return rc;




--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 0/5] Review Request for ntf: ntfimcn does not handle SA_ERR_UNAVAILABLE [#2506]

2017-06-28 Thread praveen malviya


Hi Lennart,

NTFIMCN initializes with IMM with SAF version A.02.12.
IMM integration with CLM is done in last release and for that SAF 
version is A.02.18 (src/imm/README mentions it).
Like any legacy application, NTFIMCN should not get ERR_UNAVAILABLE from 
IMM for any API call.


Thanks,
Praveen



On 28-Jun-17 8:48 PM, Lennart Lund wrote:

Summary: ntf: ntfimcn does not handle SA_ERR_UNAVAILABLE [#2506]
Review request for Ticket(s): 2506
Peer Reviewer(s): praveen.malv...@oracle.com; minh.c...@dektech.com.au
Pull request to: *** LIST THE PERSON WITH PUSH ACCESS HERE ***
Affected branch(es): develop
Development branch: ticket-2506
Base revision: f089f030a322a43c79f3f259f07a4c42bb4d0da1
Personal repository: git://git.code.sf.net/u/elunlen/review


Impacted area   Impact y/n

  Docsn
  Build systemn
  RPM/packaging   n
  Configuration files n
  Startup scripts n
  SAF servicesy
  OpenSAF servicesn
  Core libraries  n
  Samples n
  Tests   n
  Other   n


Comments (indicate scope for each "y" above):
-
*** EXPLAIN/COMMENT THE PATCH SERIES HERE ***

revision beae5c3dacaa05fe68b50433251947d8c045cca7
Author: Lennart Lund 
Date:   Wed, 28 Jun 2017 17:09:23 +0200

ntf: ntfimcn does not handle SA_ERR_UNAVAILABLE [#2506]

Changed according to comment from Minh.
OM Handle is now initialized only when imcn process start and
is reinitialized if SA_AIS_ERR_UNAVAILABLE
Also comment about resource handling in case of error in get_rdn_attr_name()



revision 7c3bc31dda6099becf7f246093d0bd7b0d652340
Author: Lennart Lund 
Date:   Wed, 28 Jun 2017 16:49:04 +0200

ntf: ntfimcn does not handle SA_ERR_UNAVAILABLE [#2506]

Changed according to comment from Minh.
OM Handle is now initialized only when imcn process start and
is reinitialized if SA_AIS_ERR_UNAVAILABLE
Also comment about resource handling in case of error in get_rdn_attr_name()

long_description



revision caa854ce873f90208a5a14894b51c997e25a924c
Author: Lennart Lund 
Date:   Wed, 28 Jun 2017 16:49:04 +0200

ntf: ntfimcn does not handle SA_ERR_UNAVAILABLE [#2506]

In ntfimcn the OM handle shall have a short lifespan. Change from creating a
handle once when ntfimcn process starts to create a handle each time it is
needed and finalize when no longer needed.



revision 0db3b249ffa8d00b05cc7aefecb1d422c30c8faf
Author: Lennart Lund 
Date:   Wed, 28 Jun 2017 16:49:04 +0200

ntf: ntfimcn does not handle SA_ERR_UNAVAILABLE [#2506]

In ntfimcn the OM handle shall have a short lifespan. Change from creating a
handle once when ntfimcn process starts to create a handle each time it is
needed and finalize when no longer needed.

Change start handling of ntfimcn (in ntf process) so the ntfimcn process is
started on the active node only since the ntfimcn process is not doing
anything on the standby node. Refactor/simplify code accordingly.



revision 71763df94b6d58d6e553fa26cc41dbd7cb7d264a
Author: Lennart Lund 
Date:   Tue, 27 Jun 2017 16:05:44 +0200

ntf: ntfimcn does not handle SA_ERR_UNAVAILABLE [#2506]

In ntfimcn the OM handle shall have a short lifespan. Change from creating a
handle once when ntfimcn process starts to create a handle each time it is
needed and finalize when no longer needed.



revision 8f43f5d28b282812926d5a5bad29e604c76e4697
Author: Lennart Lund 
Date:   Mon, 26 Jun 2017 14:03:38 +0200

ntf: ntfimcn does not handle SA_ERR_UNAVAILABLE [#2506]

In ntfimcn the OM handle shall have a short lifespan. Change from creating a
handle once when ntfimcn process starts to create a handle each time it is
needed and finalize when no longer needed.

Change start handling of ntfimcn (in ntf process) so the ntfimcn process is
started on the active node only since the ntfimcn process is not doing
anything on the standby node. Refactor/simplify code accordingly.



Complete diffstat:
--
  src/ntf/ntfimcnd/ntfimcn_imm.c  | 205 
  src/ntf/ntfimcnd/ntfimcn_imm.h  |  11 ++-
  src/ntf/ntfimcnd/ntfimcn_main.h |   2 +-
  3 files changed, 158 insertions(+), 60 deletions(-)


Testing Commands:
-
*** LIST THE COMMAND LINE TOOLS/STEPS TO TEST YOUR CHANGES ***


Testing, Expected Results:
--
*** PASTE COMMAND OUTPUTS / TEST RESULTS ***


Conditions of Submission:
-
*** HOW MANY DAYS BEFORE PUSHING, CONSENSUS ETC ***


Arch  Built StartedLinux distro
---
mipsn  n
mips64  n  n
x86 n  n
x86_64  n  n
powerpc n  n
powerpc64   n  n


Reviewer

Re: [devel] [PATCH 1/1] amfd: Do not log warning when create (or delete) a existed(or nonexisted) SUSI [#2467]

2017-06-28 Thread praveen malviya


Ack, not tested.

Thanks,
Praveen

On 27-Jun-17 2:17 PM, minh chau wrote:

Hi Praveen,

I had the same thought as yours, but in case lock  then unlock , 
Create_sync (from unlock) gets ERR_EXIST because the previous 
Delete_sync(from lock) first fails and is still in queue, then we will 
not create the object. This patch doesn't mean to change any logic, it 
just doesn't make the log confusing


Thanks,
Minh

On 27/06/17 18:33, praveen malviya wrote:

Hi Minh,

One comment inline with [ Praveen].

Thanks
Praveen

On 25-May-17 12:53 PM, Minh Chau wrote:

---
  src/amf/amfd/imm.cc | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/amf/amfd/imm.cc b/src/amf/amfd/imm.cc
index 7b1aa333e..26faffcb5 100644
--- a/src/amf/amfd/imm.cc
+++ b/src/amf/amfd/imm.cc
@@ -1893,7 +1893,7 @@ void avd_saImmOiRtObjectCreate_sync(const 
std::string ,

  rc = saImmOiRtObjectCreate_2(avd_cb->immOiHandle,
  const_cast(className.c_str()),
  parent_name, attrValues);
-if (rc != SA_AIS_OK) {
+if (rc != SA_AIS_OK && rc != SA_AIS_ERR_EXIST) {
LOG_WA("saImmOiRtObjectCreate_2 of className:'%s', 
parentName:'%s',"
" failed with %u", className.c_str(), parentName.c_str(), 
rc);

  }
[Praveen] if return code is ERR_EXIST, then it means RT object exists 
in IMM. In such a situation second if block in this function should 
not push it in job queue.
@@ -1946,7 +1946,7 @@ void avd_saImmOiRtObjectDelete_sync(const 
std::string ) {

  if (isImmReady == true) {
  rc = saImmOiRtObjectDelete_o3(avd_cb->immOiHandle, dn.c_str());
-if (rc != SA_AIS_OK) {
+if (rc != SA_AIS_OK  && rc != SA_AIS_ERR_NOT_EXIST) {
LOG_WA("saImmOiRtObjectDelete_o3 of '%s' failed with %u", 
dn.c_str(), rc);

  }
}







--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] clmtest: update non-member node_id for new test environment [#2512]

2017-06-27 Thread praveen malviya


Ack.

Thanks,
Praveen

On 27-Jun-17 3:09 PM, Hoang Vo wrote:

clmtest 7 7 check saClmClusterNodeGet with non-member node,
previously designed as 0x2060F.
change test node_id to 0x2990F to avoid future conflict
when test environment is upgraded.
---
  src/clm/apitest/tet_saClmClusterNodeGet.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/clm/apitest/tet_saClmClusterNodeGet.c 
b/src/clm/apitest/tet_saClmClusterNodeGet.c
index 51683f9..c26939d 100644
--- a/src/clm/apitest/tet_saClmClusterNodeGet.c
+++ b/src/clm/apitest/tet_saClmClusterNodeGet.c
@@ -126,7 +126,7 @@ void saClmClusterNodeGet_06(void)
  
  void saClmClusterNodeGet_07(void)

  {
-   nodeId = 132623; /*node is non member*/
+   nodeId = 170255; /*node is non member, 0x2990F*/
safassert(saClmInitialize(, _1, _1),
  SA_AIS_OK);
rc = saClmClusterNodeGet(clmHandle, nodeId, timeout, _1);
@@ -134,7 +134,7 @@ void saClmClusterNodeGet_07(void)
/*test_validate(rc, SA_AIS_ERR_UNAVAILABLE);*/
test_validate(rc, SA_AIS_ERR_NOT_EXIST);
  
-	nodeId = 132623;

+   nodeId = 170255;
safassert(saClmInitialize_4(, _4, _4),
  SA_AIS_OK);
rc = saClmClusterNodeGet_4(clmHandle, nodeId, timeout, _4);



--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] amfd: Do not log warning when create (or delete) a existed(or nonexisted) SUSI [#2467]

2017-06-27 Thread praveen malviya


Hi Minh,

One comment inline with [ Praveen].

Thanks
Praveen

On 25-May-17 12:53 PM, Minh Chau wrote:

---
  src/amf/amfd/imm.cc | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/amf/amfd/imm.cc b/src/amf/amfd/imm.cc
index 7b1aa333e..26faffcb5 100644
--- a/src/amf/amfd/imm.cc
+++ b/src/amf/amfd/imm.cc
@@ -1893,7 +1893,7 @@ void avd_saImmOiRtObjectCreate_sync(const std::string 
,
  rc = saImmOiRtObjectCreate_2(avd_cb->immOiHandle,
  const_cast(className.c_str()),
  parent_name, attrValues);
-if (rc != SA_AIS_OK) {
+if (rc != SA_AIS_OK && rc != SA_AIS_ERR_EXIST) {
LOG_WA("saImmOiRtObjectCreate_2 of className:'%s', parentName:'%s',"
" failed with %u", className.c_str(), parentName.c_str(), rc);
  }
[Praveen] if return code is ERR_EXIST, then it means RT object exists in 
IMM. In such a situation second if block in this function should not 
push it in job queue.

@@ -1946,7 +1946,7 @@ void avd_saImmOiRtObjectDelete_sync(const std::string 
) {
  
if (isImmReady == true) {

  rc = saImmOiRtObjectDelete_o3(avd_cb->immOiHandle, dn.c_str());
-if (rc != SA_AIS_OK) {
+if (rc != SA_AIS_OK  && rc != SA_AIS_ERR_NOT_EXIST) {
LOG_WA("saImmOiRtObjectDelete_o3 of '%s' failed with %u", dn.c_str(), 
rc);
  }
}



--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] ntf: Test cases fail on SC nodes [#2505]

2017-06-27 Thread praveen malviya

If rdegetrole does not exist (on payload) then also else block will be 
excuted and else block is for payload. So present patch will always work.


Thanks,
Praveen

On 27-Jun-17 12:50 PM, Lennart Lund wrote:

Hi Praveen

I removed the check if rdegetrole exist on the node but I think it is a good 
idea to still have this check.
This means that the check if we are on a payload node should be to first check 
if rdegetrole exist and if it does then check if the return code is Fail.

Thanks
Lennart


-Original Message-
From: praveen malviya [mailto:praveen.malv...@oracle.com]
Sent: den 27 juni 2017 08:29
To: Lennart Lund <lennart.l...@ericsson.com>
Cc: opensaf-devel@lists.sourceforge.net
Subject: Re: [PATCH 1/1] ntf: Test cases fail on SC nodes [#2505]

Ack.

Thanks
Praveen

On 26-Jun-17 8:35 PM, Lennart Lund wrote:

Fix incorrect detection of node type and misleading information to the user

of ntftest

---
   src/ntf/apitest/tet_ntf_clm.c | 14 --
   1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/ntf/apitest/tet_ntf_clm.c b/src/ntf/apitest/tet_ntf_clm.c
index 5b1d8c6..0f2c7d0 100644
--- a/src/ntf/apitest/tet_ntf_clm.c
+++ b/src/ntf/apitest/tet_ntf_clm.c
@@ -444,13 +444,13 @@ __attribute__((constructor)) static void

ntf_clm_constructor(void)

// printf("lock_cmd:'%s'\n",lock_cmd);
// printf("unlock_cmd:'%s'\n",unlock_cmd);

-   // Add these test cases on other than active controller.
+   // The following tests are added only if not running on an Active
+   // controller node
int rc = 0;
char role[80];
-   rc = system("which rdegetrole");
+   rc = system("rdegetrole");
if (rc == 0) {
-   printf("This is a controller node\n");
-   // Command rdegetrole exists means a controller.
+   // Command rdegetrole returning OK means controller node.
memset(buffer, '\0', sizeof(buffer));
memset(role, '\0', sizeof(role));
strcpy(buffer, "rdegetrole");
@@ -459,14 +459,16 @@ __attribute__((constructor)) static void

ntf_clm_constructor(void)

if ((ptr = strchr(role, '\n')) != NULL)
*ptr = '\0';
if (!strcmp((char *)role, "ACTIVE")) {
-   // printf("Active controller node\n");
+   printf("Active controller node. "
+   "Do not run CLM tests\n");
pclose(fp);
return;
}
+   printf("Standby controller node. Run CLM tests\n");
}
pclose(fp);
} else {
-   printf("This is a payload node\n");
+   printf("Payload node. Run CLM tests\n");
}

test_suite_add(40, "Ntf CLM Integration test suite\n");



--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] ntf: Test cases fail on SC nodes [#2505]

2017-06-27 Thread praveen malviya


Ack.

Thanks
Praveen

On 26-Jun-17 8:35 PM, Lennart Lund wrote:

Fix incorrect detection of node type and misleading information to the user of 
ntftest
---
  src/ntf/apitest/tet_ntf_clm.c | 14 --
  1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/ntf/apitest/tet_ntf_clm.c b/src/ntf/apitest/tet_ntf_clm.c
index 5b1d8c6..0f2c7d0 100644
--- a/src/ntf/apitest/tet_ntf_clm.c
+++ b/src/ntf/apitest/tet_ntf_clm.c
@@ -444,13 +444,13 @@ __attribute__((constructor)) static void 
ntf_clm_constructor(void)
// printf("lock_cmd:'%s'\n",lock_cmd);
// printf("unlock_cmd:'%s'\n",unlock_cmd);
  
-	// Add these test cases on other than active controller.

+   // The following tests are added only if not running on an Active
+   // controller node
int rc = 0;
char role[80];
-   rc = system("which rdegetrole");
+   rc = system("rdegetrole");
if (rc == 0) {
-   printf("This is a controller node\n");
-   // Command rdegetrole exists means a controller.
+   // Command rdegetrole returning OK means controller node.
memset(buffer, '\0', sizeof(buffer));
memset(role, '\0', sizeof(role));
strcpy(buffer, "rdegetrole");
@@ -459,14 +459,16 @@ __attribute__((constructor)) static void 
ntf_clm_constructor(void)
if ((ptr = strchr(role, '\n')) != NULL)
*ptr = '\0';
if (!strcmp((char *)role, "ACTIVE")) {
-   // printf("Active controller node\n");
+   printf("Active controller node. "
+   "Do not run CLM tests\n");
pclose(fp);
return;
}
+   printf("Standby controller node. Run CLM tests\n");
}
pclose(fp);
} else {
-   printf("This is a payload node\n");
+   printf("Payload node. Run CLM tests\n");
}
  
  	test_suite_add(40, "Ntf CLM Integration test suite\n");




--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] amfd: Accept ERR_NOT_EXIST on stopping track callback [#2469]

2017-06-27 Thread praveen malviya


Ack, code review only.

Thanks,
Praveen

On 26-Jun-17 9:25 AM, Minh Chau wrote:

During switchover, standby amfd tries to stop clm tracking,
amfd first got ERR_TIMEOUT and second tries got ERR_NOT_EXIST.

In CLM spec, ERR_TIMEOUT return means the stop clm tracking
may or may not be successful. If the first call doesn't succeed,
the second call will be OK. In the scope of this ticket, the
first already succeeded, therefore amfd got ERR_NOT_EXIST.

Note that ERR_NOT_EXIST doesn't mean that the CLM handle is
invalid (or BAD HANDLE), thus this error code can be ignored
since standby amfd doesn't need to track clm anymore.
---
  src/amf/amfd/clm.cc  | 4 
  src/amf/amfd/role.cc | 5 +++--
  2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/amf/amfd/clm.cc b/src/amf/amfd/clm.cc
index 4f69d4a58..1b451e9e7 100644
--- a/src/amf/amfd/clm.cc
+++ b/src/amf/amfd/clm.cc
@@ -495,6 +495,10 @@ SaAisErrorT avd_clm_track_stop(void) {
  if (error == SA_AIS_ERR_TRY_AGAIN || error == SA_AIS_ERR_TIMEOUT ||
  error == SA_AIS_ERR_UNAVAILABLE) {
LOG_WA("Failed to stop cluster tracking %u", error);
+} else if (error == SA_AIS_ERR_NOT_EXIST) {
+  /* track changes was not started or stopped successfully */
+  LOG_WA("Failed to stop cluster tracking %u", error);
+  avd_cb->is_clm_track_started = false;
  } else {
LOG_ER("Failed to stop cluster tracking %u", error);
  }
diff --git a/src/amf/amfd/role.cc b/src/amf/amfd/role.cc
index 85cde7fb7..ec13c3bd8 100644
--- a/src/amf/amfd/role.cc
+++ b/src/amf/amfd/role.cc
@@ -1105,7 +1105,7 @@ uint32_t amfd_switch_actv_qsd(AVD_CL_CB *cb) {
  
  uint32_t amfd_switch_qsd_stdby(AVD_CL_CB *cb) {

uint32_t status = NCSCC_RC_SUCCESS;
-
+  SaAisErrorT ais_rc;
TRACE_ENTER();
LOG_NO("Switching Quiesced --> StandBy");
  
@@ -1139,7 +1139,8 @@ uint32_t amfd_switch_qsd_stdby(AVD_CL_CB *cb) {

}
  
if (cb->is_clm_track_started == true) {

-if (avd_clm_track_stop() != SA_AIS_OK) {
+ais_rc = avd_clm_track_stop();
+if (ais_rc != SA_AIS_OK && ais_rc != SA_AIS_ERR_NOT_EXIST) {
LOG_ER("Failed to stop cluster tracking after switch over");
  }
}



--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] amfnd: retry on ERR_NOT_EXIST [#2490]

2017-06-21 Thread praveen malviya


Hi Gary,

Is there any ticket in IMM for any related issue?
Both creation of SU and admin operation on it will go through IMM only.
I do not know how IMM handles it and how IMM spec talks about it.
Does IMM spec allow admin operation on a entity for which all synced 
IMMNDS (leaving non synced IMMNDS on the nodes joining the cluster) are 
not updated?


Thanks,
Praveen

On 15-Jun-17 1:11 PM, Gary Lee wrote:

On a congested network, sometimes a newly created IMM object can take some
time to be available on other nodes.

In our test, a new SU is created on SC-1 and unlocked. But sometimes
it fails on a remote node due to:

2017-05-19 13:55:19 SC-2 osafamfnd[258]: ER amf_saImmOmSearchInitialize_o2 
failed: 12

To get around this, we will retry on SA_AIS_ERR_NOT_EXIST a few times.
---
  src/amf/amfnd/util.cc | 15 +++
  1 file changed, 15 insertions(+)

diff --git a/src/amf/amfnd/util.cc b/src/amf/amfnd/util.cc
index ed0905ce2..bca642eac 100644
--- a/src/amf/amfnd/util.cc
+++ b/src/amf/amfnd/util.cc
@@ -38,6 +38,9 @@
  #include 
  #include "osaf/configmake.h"
  #include "amf/amfnd/avnd.h"
+#include "base/osaf_time.h"
+
+extern struct ImmutilWrapperProfile immutilWrapperProfile;
  
  const char *presence_state[] = {

  "OUT_OF_RANGE", "UNINSTANTIATED", "INSTANTIATING",
@@ -335,6 +338,18 @@ SaAisErrorT amf_saImmOmSearchInitialize_o2(
scope, searchOptions, 
searchParam,
attributeNames, );
  }
+  } else if (rc == SA_AIS_ERR_NOT_EXIST) {
+// it is possible for 'rootName' to be not yet available
+// at the local immnd. Retry a few times to allow CCB to be propagated.
+unsigned int nTries = 1;
+while (rc == SA_AIS_ERR_NOT_EXIST &&
+  nTries < immutilWrapperProfile.nTries) {
+  osaf_nanosleep();
+  rc = immutil_saImmOmSearchInitialize_o2(immHandle, rootName.c_str(),
+scope, searchOptions, searchParam,
+attributeNames, );
+  nTries++;
+}
}
return rc;
  }



--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] clmtest: correct independent test cases [#2497]

2017-06-19 Thread praveen malviya


Ack.

Thanks
Praveen

On 15-Jun-17 2:01 PM, Hoang Vo wrote:

clmtest 10 3 and 10 4 does not initialize its data struct but reuse
global one lead to failure result when run alone.
---
  src/clm/apitest/tet_saClmClusterNotificationFree.c | 12 
  1 file changed, 12 insertions(+)

diff --git a/src/clm/apitest/tet_saClmClusterNotificationFree.c 
b/src/clm/apitest/tet_saClmClusterNotificationFree.c
index 5ce002a..22faf15 100644
--- a/src/clm/apitest/tet_saClmClusterNotificationFree.c
+++ b/src/clm/apitest/tet_saClmClusterNotificationFree.c
@@ -53,6 +53,12 @@ void saClmClusterNotificationFree_02(void)
  
  void saClmClusterNotificationFree_03(void)

  {
+   notificationBuffer_4.numberOfItems = 1;
+   notificationBuffer_4.notification =
+   (SaClmClusterNotificationT_4 *)malloc(
+   sizeof(SaClmClusterNotificationT_4) *
+   notificationBuffer_4.numberOfItems);
+
rc = saClmClusterNotificationFree_4(0,
notificationBuffer_4.notification);
test_validate(rc, SA_AIS_ERR_BAD_HANDLE);
@@ -62,6 +68,12 @@ void saClmClusterNotificationFree_03(void)
  
  void saClmClusterNotificationFree_04(void)

  {
+   notificationBuffer_4.numberOfItems = 1;
+   notificationBuffer_4.notification =
+   (SaClmClusterNotificationT_4 *)malloc(
+   sizeof(SaClmClusterNotificationT_4) *
+   notificationBuffer_4.numberOfItems);
+
rc = saClmClusterNotificationFree_4(-1,
notificationBuffer_4.notification);
test_validate(rc, SA_AIS_ERR_BAD_HANDLE);



--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/3] amfnd: Refactor AVND_COMP for simpler environment variable handling [#1945]

2017-06-14 Thread praveen malviya


Ack for the series.


Thanks
PRaveen

On 13-Jun-17 4:54 PM, Hans Nordeback wrote:

---
  src/amf/amfnd/avnd_comp.h | 134 ++
  src/amf/amfnd/avnd_err.h  |   4 +-
  src/amf/amfnd/avnd_hc.h   |   2 +-
  src/amf/amfnd/avnd_proc.h |  10 ++--
  src/amf/amfnd/avnd_util.h |   2 +-
  src/amf/amfnd/clc.cc  | 125 --
  src/amf/amfnd/comp.cc |  35 
  src/amf/amfnd/compdb.cc   |  36 +++--
  src/amf/amfnd/proxydb.cc  |   2 +-
  9 files changed, 138 insertions(+), 212 deletions(-)

diff --git a/src/amf/amfnd/avnd_comp.h b/src/amf/amfnd/avnd_comp.h
index 611e90e11..68de4cc8e 100644
--- a/src/amf/amfnd/avnd_comp.h
+++ b/src/amf/amfnd/avnd_comp.h
@@ -31,6 +31,8 @@
  #define AMF_AMFND_AVND_COMP_H_
  
  #include 

+#include 
+#include 
  
  struct avnd_cb_tag;

  struct avnd_su_si_rec;
@@ -57,7 +59,7 @@ struct avnd_srm_req_tag;
  
  /* clc event handler declaration */

  typedef uint32_t (*AVND_COMP_CLC_FSM_FN)(struct avnd_cb_tag *,
- struct avnd_comp_tag *);
+ AVND_COMP *);
  
  /* clc fsm events */

  typedef enum avnd_comp_clc_pres_fsm_ev {
@@ -141,7 +143,7 @@ typedef struct avnd_cbk_tag {
AVSV_AMF_CBK_INFO *cbk_info; /* callbk info */
  
/* link to other elements */

-  struct avnd_comp_tag *comp; /* bk ptr to the comp */
+  AVND_COMP *comp; /* bk ptr to the comp */
struct avnd_cbk_tag *next;
std::string comp_name; /* For checkpointing */
  } AVND_COMP_CBK;
@@ -182,7 +184,7 @@ typedef struct avnd_comp_csi_rec {
 wrt prv ha state */
  
/* links to other entities */

-  struct avnd_comp_tag *comp; /* bk ptr to the comp */
+  AVND_COMP *comp; /* bk ptr to the comp */
struct avnd_su_si_rec *si;  /* bk ptr to the si record */
std::string comp_name;  /* For Checkpointing */
std::string si_name;/* For Checkpointing */
@@ -256,7 +258,7 @@ typedef struct avnd_hc_rec_tag {
uint32_t opq_hdl; /* hdl returned by hdl-mngr (used during tmr expiry) */
AVND_COMP_HC_STATUS status; /* indicates status of hc rec */
  
-  struct avnd_comp_tag *comp; /* back ptr to the comp */

+  AVND_COMP *comp; /* back ptr to the comp */
struct avnd_hc_rec_tag *next;
std::string comp_name; /* For checkpoiting */
  } AVND_COMP_HC_REC;
@@ -278,7 +280,7 @@ typedef struct avnd_pm_rec {
} rec_rcvr;
  
/* links to other entities */

-  struct avnd_comp_tag *comp; /* back ptr to the comp */
+  AVND_COMP *comp; /* back ptr to the comp */
  } AVND_COMP_PM_REC;
  
  /*##

@@ -288,7 +290,7 @@ typedef struct avnd_pm_rec {
  /* proxied info */
  typedef struct avnd_pxied_rec {
NCS_DB_LINK_LIST_NODE comp_dll_node; /* node in the comp-pxied dll  */
-  struct avnd_comp_tag *pxied_comp;/* ptr to the proxied comp */
+  AVND_COMP *pxied_comp;/* ptr to the proxied comp */
  } AVND_COMP_PXIED_REC;
  
  #define AVND_COMP_TYPE_LOCAL_NODE 0x0001

@@ -319,100 +321,115 @@ enum UsedComptypeAttrs {
NumAttrs
  };
  
-typedef struct avnd_comp_tag {

-  NCS_DB_LINK_LIST_NODE su_dll_node; /* su dll node (key is inst-level) */
+class AVND_COMP {
+ public:
+  // TODO(uabhano) replace the NCS_DB_LINK_LIST_NODE with C++ STL. Now 
su_dll_node must be first in AVND_COMP
+  // as the macro m_AVND_COMP_SU_DLL_NODE_OFFSET depends on the offset. 
offsetof is to be avoided in classes.
+  NCS_DB_LINK_LIST_NODE su_dll_node {}; /* su dll node (key is inst-level) */
+  AVND_COMP() {}
+  ~AVND_COMP() {}
  
std::string name; /* comp name */

std::string saAmfCompType;
-  uint32_t numOfCompCmdEnv;   /* number of comp command environment variables 
*/
-  SaStringT *saAmfCompCmdEnv; /* comp command environment variables */
-  uint32_t inst_level;/* comp instantiation level */
  
-  uint32_t comp_hdl; /* hdl returned by hdl-mngr */

+  uint32_t inst_level {};/* comp instantiation level */
+
+  uint32_t comp_hdl {}; /* hdl returned by hdl-mngr */
  
/* component attributes */

-  uint32_t flag;  /* comp attributes */
-  bool is_restart_en; /* flag to indicate if comp-restart is allowed */
-  SaAmfCompCapabilityModelT cap; /* comp capability model */
-  bool is_am_en;
-  bool is_hc_cmd_configured;
+  uint32_t flag {};  /* comp attributes */
+  bool is_restart_en {}; /* flag to indicate if comp-restart is allowed */
+  SaAmfCompCapabilityModelT cap {}; /* comp capability model */
+  bool is_am_en {};
+  bool is_hc_cmd_configured {};
  
/* clc info */

-  AVND_COMP_CLC_INFO clc_info;
+  AVND_COMP_CLC_INFO clc_info {};
  
/* Update received flag, which will normally be false and will be

 * true if updates are received from the AVD on fail-over.*/
-  bool avd_updt_flag;
+  bool avd_updt_flag {};
  
/*

Re: [devel] [PATCH 3/3] amfnd: Refactor AVND_COMP for simpler cmd argument handling V2 [#1945]

2017-06-13 Thread praveen malviya


Hi Hans,

One comment on this patch inline with [Praveen].

Thanks,
Praveen

On 18-May-17 3:32 PM, Hans Nordeback wrote:

---
  src/amf/amfnd/avnd_comp.h |  71 ++---
  src/amf/amfnd/avnd_tmr.h  |   6 +--
  src/amf/amfnd/cam.cc  |   2 +-
  src/amf/amfnd/chc.cc  |   2 +-
  src/amf/amfnd/clc.cc  |  34 --
  src/amf/amfnd/comp.cc |  45 ++
  src/amf/amfnd/compdb.cc   | 113 +++---
  src/amf/amfnd/susm.cc |   2 +-
  8 files changed, 140 insertions(+), 135 deletions(-)

diff --git a/src/amf/amfnd/avnd_comp.h b/src/amf/amfnd/avnd_comp.h
index a2fc22691..52bf84e47 100644
--- a/src/amf/amfnd/avnd_comp.h
+++ b/src/amf/amfnd/avnd_comp.h
@@ -33,11 +33,14 @@
  #include 
  #include 
  #include 
+#include 
+#include "avnd_tmr.h"
  
  struct avnd_cb_tag;

  struct avnd_su_si_rec;
  class AVND_SU;
  struct avnd_srm_req_tag;
+class AVND_COMP;
  
  /***

   **  S T R U C T U R E / E N U M  D E F I N I T I O N S  ***
@@ -90,16 +93,37 @@ typedef enum avnd_comp_clc_cmd_type {
  } AVND_COMP_CLC_CMD_TYPE;
  
  /* clc command parameter definition */

-typedef struct avnd_comp_clc_param {
-  char cmd[SAAMF_CLC_LEN]; /* cmd ascii string */
-  SaTimeT timeout; /* cmd timeout value */
-  uint32_t len;/* cmd len */
-} AVND_COMP_CLC_CMD_PARAM;
+struct CompClcCmdParam {
+  void init_clc_cli_command(const char *clc_cmd, char **clc_cmd_argv,
+const SaImmAttrValuesT_2 **attributes,
+const char *attr_name);
+  std::string cmd;
+  std::vector cmd_argv;  /* cmd argv */
+  SaTimeT timeout;/* cmd timeout value */
+};
  
  /* clc info definition (top level wrapper structure) */

-typedef struct avnd_comp_clc_info {
-  /* clc commands (indexed by cmd type) */
-  AVND_COMP_CLC_CMD_PARAM cmds[AVND_COMP_CLC_CMD_TYPE_MAX - 1];
+struct CompClcInfo {
+  std::map cmds;
+  std::string get_cmd(AVND_COMP_CLC_CMD_TYPE cmd_type);
+
+  template
+  void create_argv(std::array , uint32_t , 
AVND_COMP_CLC_CMD_TYPE cmd_type) {
+argc = 0;
+
+std::string tmp = saAmfNodeSwBundlePathPrefix + cmds[cmd_type].cmd;
+argv[argc++] = strdup(tmp.data());
+
+for (auto str : cmds[cmd_type].cmd_argv) {
+   if (argc >= argv.size()) {
+LOG_WA("Too many arguments given, max %zu arguments are supported", 
argv.size());
+break;
+  }
+  argv[argc++] = strdup(str.data());
+}
+  }
+
+  std::string saAmfNodeSwBundlePathPrefix;
  
uint32_t inst_retry_max; /* configured no of instantiate retry attempts */

uint32_t inst_retry_cnt; /* curr no of instantiate retry attempts */
@@ -123,7 +147,7 @@ typedef struct avnd_comp_clc_info {
uint32_t inst_code_rcvd; /* Store the error value
received from the instantiate script */
  
-} AVND_COMP_CLC_INFO;

+};
  
  /*##

  COMPONENT CALLBACK DEFINITIONS
@@ -341,7 +365,7 @@ class AVND_COMP {
bool is_hc_cmd_configured {};
  
/* clc info */

-  AVND_COMP_CLC_INFO clc_info {};
+  CompClcInfo clc_info {};
  
/* Update received flag, which will normally be false and will be

 * true if updates are received from the AVD on fail-over.*/
@@ -684,33 +708,6 @@ class AVND_COMP {
  void m_AVND_COMP_OPER_STATE_AVD_SYNC(struct avnd_cb_tag *cb,
   const AVND_COMP *comp, uint32_t _rc);
  
-/* macro to parse the clc cmd string */

-#define m_AVND_COMP_CLC_STR_PARSE(st, sc, ac, av, tav)   \
-  {  \
-char str[SAAMF_CLC_LEN], *tok = nullptr; \
-/* copy the str as strtok modifies the original str */   \
-strcpy(str, st); \
-ac = 0;  \
-if (nullptr != (tok = strtok(str, " "))) {   \
-  strncpy(sc, tok, SAAMF_CLC_LEN - 1);   \
-  av[ac] = sc;   \
-}\
-ac++;\
-while ((nullptr != (tok = strtok(nullptr, " "))) &&  \
-   (ac < (AVND_COMP_CLC_PARAM_MAX + 1))) {   \
-  if (strlen(tok) > AVND_COMP_CLC_PARAM_SIZE_MAX) break; \
-  strcpy(tav[ac], tok);  \
-  av[ac] = tav[ac];  \
-  ac++;  \
-}\
-if (nullptr != tok) {\
-  sc[0] = (char)(long)nullptr;   \
-  av[0] =

Re: [devel] [PATCH 1/3] amfnd: Refactor AVND_COMP for simpler environment variable handling [#1945]

2017-06-13 Thread praveen malviya


Hi Hans,

One comment on this patch inline with [Praveen].

Thanks,
Praveen
On 18-May-17 3:32 PM, Hans Nordeback wrote:

---
  src/amf/amfnd/avnd_comp.h | 134 ++
  src/amf/amfnd/avnd_err.h  |   4 +-
  src/amf/amfnd/avnd_hc.h   |   2 +-
  src/amf/amfnd/avnd_proc.h |  10 ++--
  src/amf/amfnd/avnd_util.h |   2 +-
  src/amf/amfnd/clc.cc  | 125 --
  src/amf/amfnd/comp.cc |  35 
  src/amf/amfnd/compdb.cc   |  36 +++--
  src/amf/amfnd/proxydb.cc  |   2 +-
  9 files changed, 138 insertions(+), 212 deletions(-)

diff --git a/src/amf/amfnd/avnd_comp.h b/src/amf/amfnd/avnd_comp.h
index 611e90e11..68de4cc8e 100644
--- a/src/amf/amfnd/avnd_comp.h
+++ b/src/amf/amfnd/avnd_comp.h
@@ -31,6 +31,8 @@
  #define AMF_AMFND_AVND_COMP_H_
  
  #include 

+#include 
+#include 
  
  struct avnd_cb_tag;

  struct avnd_su_si_rec;
@@ -57,7 +59,7 @@ struct avnd_srm_req_tag;
  
  /* clc event handler declaration */

  typedef uint32_t (*AVND_COMP_CLC_FSM_FN)(struct avnd_cb_tag *,
- struct avnd_comp_tag *);
+ AVND_COMP *);
  
  /* clc fsm events */

  typedef enum avnd_comp_clc_pres_fsm_ev {
@@ -141,7 +143,7 @@ typedef struct avnd_cbk_tag {
AVSV_AMF_CBK_INFO *cbk_info; /* callbk info */
  
/* link to other elements */

-  struct avnd_comp_tag *comp; /* bk ptr to the comp */
+  AVND_COMP *comp; /* bk ptr to the comp */
struct avnd_cbk_tag *next;
std::string comp_name; /* For checkpointing */
  } AVND_COMP_CBK;
@@ -182,7 +184,7 @@ typedef struct avnd_comp_csi_rec {
 wrt prv ha state */
  
/* links to other entities */

-  struct avnd_comp_tag *comp; /* bk ptr to the comp */
+  AVND_COMP *comp; /* bk ptr to the comp */
struct avnd_su_si_rec *si;  /* bk ptr to the si record */
std::string comp_name;  /* For Checkpointing */
std::string si_name;/* For Checkpointing */
@@ -256,7 +258,7 @@ typedef struct avnd_hc_rec_tag {
uint32_t opq_hdl; /* hdl returned by hdl-mngr (used during tmr expiry) */
AVND_COMP_HC_STATUS status; /* indicates status of hc rec */
  
-  struct avnd_comp_tag *comp; /* back ptr to the comp */

+  AVND_COMP *comp; /* back ptr to the comp */
struct avnd_hc_rec_tag *next;
std::string comp_name; /* For checkpoiting */
  } AVND_COMP_HC_REC;
@@ -278,7 +280,7 @@ typedef struct avnd_pm_rec {
} rec_rcvr;
  
/* links to other entities */

-  struct avnd_comp_tag *comp; /* back ptr to the comp */
+  AVND_COMP *comp; /* back ptr to the comp */
  } AVND_COMP_PM_REC;
  
  /*##

@@ -288,7 +290,7 @@ typedef struct avnd_pm_rec {
  /* proxied info */
  typedef struct avnd_pxied_rec {
NCS_DB_LINK_LIST_NODE comp_dll_node; /* node in the comp-pxied dll  */
-  struct avnd_comp_tag *pxied_comp;/* ptr to the proxied comp */
+  AVND_COMP *pxied_comp;/* ptr to the proxied comp */
  } AVND_COMP_PXIED_REC;
  
  #define AVND_COMP_TYPE_LOCAL_NODE 0x0001

@@ -319,100 +321,115 @@ enum UsedComptypeAttrs {
NumAttrs
  };
  
-typedef struct avnd_comp_tag {

-  NCS_DB_LINK_LIST_NODE su_dll_node; /* su dll node (key is inst-level) */
+class AVND_COMP {
+ public:
+  // TODO(uabhano) replace the NCS_DB_LINK_LIST_NODE with C++ STL. Now 
su_dll_node must be first in AVND_COMP
+  // as the macro m_AVND_COMP_SU_DLL_NODE_OFFSET depends on the offset. 
offsetof is to be avoided in classes.
+  NCS_DB_LINK_LIST_NODE su_dll_node {}; /* su dll node (key is inst-level) */
+  AVND_COMP() {}
+  ~AVND_COMP() {}
  
std::string name; /* comp name */

std::string saAmfCompType;
-  uint32_t numOfCompCmdEnv;   /* number of comp command environment variables 
*/
-  SaStringT *saAmfCompCmdEnv; /* comp command environment variables */
-  uint32_t inst_level;/* comp instantiation level */
  
-  uint32_t comp_hdl; /* hdl returned by hdl-mngr */

+  uint32_t inst_level {};/* comp instantiation level */
+
+  uint32_t comp_hdl {}; /* hdl returned by hdl-mngr */
  
/* component attributes */

-  uint32_t flag;  /* comp attributes */
-  bool is_restart_en; /* flag to indicate if comp-restart is allowed */
-  SaAmfCompCapabilityModelT cap; /* comp capability model */
-  bool is_am_en;
-  bool is_hc_cmd_configured;
+  uint32_t flag {};  /* comp attributes */
+  bool is_restart_en {}; /* flag to indicate if comp-restart is allowed */
+  SaAmfCompCapabilityModelT cap {}; /* comp capability model */
+  bool is_am_en {};
+  bool is_hc_cmd_configured {};
  
/* clc info */

-  AVND_COMP_CLC_INFO clc_info;
+  AVND_COMP_CLC_INFO clc_info {};
  
/* Update received flag, which will normally be false and will be

 * true if updates are received from the AVD on fail-over.*/
-  bool avd_updt_flag;
+

Re: [devel] [PATCH 1/1] amfnd: Send pending susi response message after restarting component finishes [#2485]

2017-06-12 Thread praveen malviya


Ack.

I think this entire if block can be removed and Restarting macro can be 
Ored with Assigning macro in if-else block:


diff --git a/src/amf/amfnd/comp.cc b/src/amf/amfnd/comp.cc
index 74b33a3..2f8bb3a 100644
--- a/src/amf/amfnd/comp.cc
+++ b/src/amf/amfnd/comp.cc
@@ -1615,19 +1615,14 @@ uint32_t avnd_comp_csi_assign_done(AVND_CB *cb, 
AVND_COMP *comp,

   /* delete any pending cbk rec for csi assignment / removal */
   avnd_comp_cbq_csi_rec_del(cb, comp, (csi) ? csi->name : "");

-  /* while restarting, we wont use assign all, so csi will not be null */
-  if (csi && m_AVND_COMP_CSI_CURR_ASSIGN_STATE_IS_RESTARTING(csi)) {
-m_AVND_COMP_CSI_CURR_ASSIGN_STATE_SET(csi,
- 
AVND_COMP_CSI_ASSIGN_STATE_ASSIGNED);

-goto done;
-  }

   if (!csi && m_AVND_COMP_IS_ALL_CSI(comp)) {
 m_AVND_COMP_ALL_CSI_RESET(comp);
   }
   /* mark the csi(s) assigned */
   if (csi) {
-if (m_AVND_COMP_CSI_CURR_ASSIGN_STATE_IS_ASSIGNING(csi)) {
+if (m_AVND_COMP_CSI_CURR_ASSIGN_STATE_IS_ASSIGNING(csi) ||
+   m_AVND_COMP_CSI_CURR_ASSIGN_STATE_IS_RESTARTING(csi)) {
   m_AVND_COMP_CSI_CURR_ASSIGN_STATE_SET(
   csi, AVND_COMP_CSI_ASSIGN_STATE_ASSIGNED);
 }


Thanks
Praveen

On 13-Jun-17 8:29 AM, Minh Chau wrote:

---
  src/amf/amfnd/comp.cc | 1 -
  1 file changed, 1 deletion(-)

diff --git a/src/amf/amfnd/comp.cc b/src/amf/amfnd/comp.cc
index 9dfe87a12..4693df217 100644
--- a/src/amf/amfnd/comp.cc
+++ b/src/amf/amfnd/comp.cc
@@ -1621,7 +1621,6 @@ uint32_t avnd_comp_csi_assign_done(AVND_CB *cb, AVND_COMP 
*comp,
if (csi && m_AVND_COMP_CSI_CURR_ASSIGN_STATE_IS_RESTARTING(csi)) {
  m_AVND_COMP_CSI_CURR_ASSIGN_STATE_SET(csi,

AVND_COMP_CSI_ASSIGN_STATE_ASSIGNED);
-goto done;
}
  
if (!csi && m_AVND_COMP_IS_ALL_CSI(comp)) {




--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] clmd: update saClmNodeCurrAddress and saClmNodeCurrAddressFamily in IMM [#2331]

2017-06-06 Thread praveen malviya


I have published v2. immlist will show empty for TIPC.

Thanks,
PRaveen

On 01-Jun-17 7:56 PM, Anders Widell wrote:

Hi!

What is the expected behaviour when using TIPC transport? This is what I 
got when I tried:


Name   Type Value(s)

safNodeSA_STRING_T safNode=SC-1
saClmNodeLockCallbackTimeout   SA_TIME_T 500 
(0xba43b7400, Thu Jan  1 01:00:50 1970)

saClmNodeIsMember  SA_UINT32_T  1 (0x1)
saClmNodeInitialViewNumber SA_UINT64_T  4 (0x4)
saClmNodeIDSA_UINT32_T 131343 
(0x2010f)

saClmNodeEESA_NAME_T 
saClmNodeDisableReboot SA_UINT32_T  0 (0x0)
saClmNodeCurrAddressFamily SA_UINT32_T  1 (0x1)
saClmNodeCurrAddress   SA_STRING_T
saClmNodeBootTimeStamp SA_TIME_T 
1496326541720341032 (0x14c4050fd33aca28, Thu Jun  1 16:15:41 2017)

saClmNodeAdminStateSA_UINT32_T  1 (0x1)
saClmNodeAddressFamily SA_UINT32_T 
saClmNodeAddress   SA_STRING_T 
SaImmAttrImplementerName   SA_STRING_T 
safClmService

SaImmAttrClassName SA_STRING_T SaClmNode
SaImmAttrAdminOwnerNameSA_STRING_T IMMLOADER

The best would be if we could add a SA_CLM_AF_TIPC = 3, but if you don't 
have the time to do that now it is probably better to keep the 
attributes empty (as before) when using TIPC.


regards,

Anders Widell


On 05/26/2017 08:54 AM, Praveen wrote:

CLM gets ip address and address family from MDS in node up event.
When node will join CLM cluster, CLM will update saClmNodeCurrAddress and
saClmNodeCurrAddressFamily in IMM.

Also changed permission of CLM tool commands.
---
  src/clm/clmd/clms_imm.c | 21 +++--
  src/clm/clmd/clms_mds.c | 35 +--
  src/clm/tools/clm-adm   |  0
  src/clm/tools/clm-find  |  0
  src/clm/tools/clm-state |  0
  5 files changed, 44 insertions(+), 12 deletions(-)
  mode change 100644 => 100755 src/clm/tools/clm-adm
  mode change 100644 => 100755 src/clm/tools/clm-find
  mode change 100644 => 100755 src/clm/tools/clm-state

diff --git a/src/clm/clmd/clms_imm.c b/src/clm/clmd/clms_imm.c
index a363b50..9c7e018 100644
--- a/src/clm/clmd/clms_imm.c
+++ b/src/clm/clmd/clms_imm.c
@@ -695,15 +695,20 @@ void 
clms_admin_state_update_rattr(CLMS_CLUSTER_NODE *nd)

   */
  void clms_node_update_rattr(CLMS_CLUSTER_NODE *nd)
  {
-SaImmAttrModificationT_2 attr_Mod[4];
+SaImmAttrModificationT_2 attr_Mod[6];
  SaAisErrorT rc;
+SaImmAttrValueT address[1];
  SaImmAttrValueT attrUpdateValue[] = {>member};
  SaImmAttrValueT attrUpdateValue1[] = {>node_id};
  SaImmAttrValueT attrUpdateValue2[] = {>boot_time};
  SaImmAttrValueT attrUpdateValue3[] = {>init_view};
+SaImmAttrValueT attrUpdateValue4[] = {>node_addr.family};
+address[0] = >node_addr.value;
+SaImmAttrValueT attrUpdateValue5[] = {address};
  const SaImmAttrModificationT_2 *attrMods[] = {
-_Mod[0], _Mod[1], _Mod[2], _Mod[3], NULL};
+_Mod[0], _Mod[1], _Mod[2], _Mod[3],
+_Mod[4], _Mod[5], NULL};
  CLMS_CLUSTER_NODE *node = NULL;
@@ -743,6 +748,18 @@ void clms_node_update_rattr(CLMS_CLUSTER_NODE *nd)
  attr_Mod[3].modAttr.attrValueType = SA_IMM_ATTR_SAUINT64T;
  attr_Mod[3].modAttr.attrValues = attrUpdateValue3;
+attr_Mod[4].modType = SA_IMM_ATTR_VALUES_REPLACE;
+attr_Mod[4].modAttr.attrName = "saClmNodeCurrAddressFamily";
+attr_Mod[4].modAttr.attrValuesNumber = 1;
+attr_Mod[4].modAttr.attrValueType = SA_IMM_ATTR_SAUINT32T;
+attr_Mod[4].modAttr.attrValues = attrUpdateValue4;
+
+attr_Mod[5].modType = SA_IMM_ATTR_VALUES_REPLACE;
+attr_Mod[5].modAttr.attrName = "saClmNodeCurrAddress";
+attr_Mod[5].modAttr.attrValuesNumber = 1;
+attr_Mod[5].modAttr.attrValueType = SA_IMM_ATTR_SASTRINGT;
+attr_Mod[5].modAttr.attrValues = attrUpdateValue5;
+
  rc = saImmOiRtObjectUpdate_2(clms_cb->immOiHandle, >node_name,
   attrMods);
diff --git a/src/clm/clmd/clms_mds.c b/src/clm/clmd/clms_mds.c
index cffcfaf..69982a7 100644
--- a/src/clm/clmd/clms_mds.c
+++ b/src/clm/clmd/clms_mds.c
@@ -1013,6 +1013,8 @@ static uint32_t clms_mds_node_event(struct 
ncsmds_callback_info *mds_info)

   * AF_INET4 before sending it to the CLM clients.
   */
  TRACE("Adding ipinformation to the ip list: %u", node_id);
+TRACE("addr_family:%u", mds_info->info.node_evt.addr_family);
+TRACE("ip_addr:%s", mds_info->info.node_evt.ip_addr);
  node_id = mds_info->info.node_evt.node_id;

Re: [devel] [PATCH 1/1] clmtest: correct test case following AIS [#2478]

2017-06-06 Thread praveen malviya


Ack.


Thanks,
Praveen

On 02-Jun-17 1:01 PM, Hoang Vo wrote:

---
  src/clm/apitest/tet_saClmClusterNodeGet.c | 8 
  src/clm/apitest/tet_saClmClusterTrack.c   | 4 ++--
  src/clm/apitest/tet_saClmSelectionObjectGet.c | 2 +-
  3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/clm/apitest/tet_saClmClusterNodeGet.c 
b/src/clm/apitest/tet_saClmClusterNodeGet.c
index 923578e..51683f9 100644
--- a/src/clm/apitest/tet_saClmClusterNodeGet.c
+++ b/src/clm/apitest/tet_saClmClusterNodeGet.c
@@ -80,14 +80,14 @@ void saClmClusterNodeGet_04(void)
  SA_AIS_OK);
rc = saClmClusterNodeGet(clmHandle, nodeId, 0, _1);
safassert(saClmFinalize(clmHandle), SA_AIS_OK);
-   test_validate(rc, SA_AIS_ERR_TIMEOUT);
+   test_validate(rc, SA_AIS_OK);
  
  	nodeId = 131343;

safassert(saClmInitialize_4(, _4, _4),
  SA_AIS_OK);
rc = saClmClusterNodeGet_4(clmHandle, nodeId, 0, _4);
safassert(saClmFinalize(clmHandle), SA_AIS_OK);
-   test_validate(rc, SA_AIS_ERR_TIMEOUT);
+   test_validate(rc, SA_AIS_OK);
  }
  
  void saClmClusterNodeGet_05(void)

@@ -126,7 +126,7 @@ void saClmClusterNodeGet_06(void)
  
  void saClmClusterNodeGet_07(void)

  {
-   nodeId = 131855; /*node is non member*/
+   nodeId = 132623; /*node is non member*/
safassert(saClmInitialize(, _1, _1),
  SA_AIS_OK);
rc = saClmClusterNodeGet(clmHandle, nodeId, timeout, _1);
@@ -134,7 +134,7 @@ void saClmClusterNodeGet_07(void)
/*test_validate(rc, SA_AIS_ERR_UNAVAILABLE);*/
test_validate(rc, SA_AIS_ERR_NOT_EXIST);
  
-	nodeId = 131855;

+   nodeId = 132623;
safassert(saClmInitialize_4(, _4, _4),
  SA_AIS_OK);
rc = saClmClusterNodeGet_4(clmHandle, nodeId, timeout, _4);
diff --git a/src/clm/apitest/tet_saClmClusterTrack.c 
b/src/clm/apitest/tet_saClmClusterTrack.c
index e62ae24..00a52b4 100644
--- a/src/clm/apitest/tet_saClmClusterTrack.c
+++ b/src/clm/apitest/tet_saClmClusterTrack.c
@@ -376,7 +376,7 @@ void saClmClusterTrack_08(void)
  clmHandle, notificationBuffer_4.notification),
  SA_AIS_OK);
safassert(saClmFinalize(clmHandle), SA_AIS_OK);
-   test_validate(rc, SA_AIS_ERR_INVALID_PARAM);
+   test_validate(rc, SA_AIS_ERR_BAD_HANDLE);
  
  	trackFlags = (SA_TRACK_CURRENT | SA_TRACK_LOCAL);

notificationBuffer_4.numberOfItems = 1;
@@ -413,7 +413,7 @@ void saClmClusterTrack_09(void)
  clmHandle, notificationBuffer_4.notification),
  SA_AIS_OK);
safassert(saClmFinalize(clmHandle), SA_AIS_OK);
-   test_validate(rc, SA_AIS_ERR_INVALID_PARAM);
+   test_validate(rc, SA_AIS_ERR_BAD_FLAGS);
  }
  
  void saClmClusterTrack_10(void)

diff --git a/src/clm/apitest/tet_saClmSelectionObjectGet.c 
b/src/clm/apitest/tet_saClmSelectionObjectGet.c
index 6767feb..34961c1 100644
--- a/src/clm/apitest/tet_saClmSelectionObjectGet.c
+++ b/src/clm/apitest/tet_saClmSelectionObjectGet.c
@@ -41,7 +41,7 @@ void saClmSelectionObjectGet_02(void)
  SA_AIS_OK);
rc = saClmSelectionObjectGet(0, );
safassert(saClmFinalize(clmHandle), SA_AIS_OK);
-   test_validate(rc, SA_AIS_ERR_INVALID_PARAM);
+   test_validate(rc, SA_AIS_ERR_BAD_HANDLE);
  }
  
  void saClmSelectionObjectGet_03(void)




--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] mds: clear mds lib valgrind warning [#2474]

2017-06-05 Thread praveen malviya


ack code review only.

One minor comment:
Magic no 1024 should be replaced by some #define or constant.


Thanks
Praveen

On 01-Jun-17 2:32 PM, A V Mahesh wrote:

---
  src/mds/mds_c_api.c   |  4 +++-
  src/mds/mds_c_db.c| 12 ++--
  src/mds/mds_dt_tcp.c  |  1 +
  src/mds/mds_dt_tipc.c |  1 +
  src/mds/mds_log.cc|  3 +++
  5 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/src/mds/mds_c_api.c b/src/mds/mds_c_api.c
index a5986c2..f5ba318 100644
--- a/src/mds/mds_c_api.c
+++ b/src/mds/mds_c_api.c
@@ -1778,7 +1778,7 @@ uint32_t mds_mcm_svc_up(PW_ENV_ID pwe_id, MDS_SVC_ID 
svc_id, V_DEST_RL role,
uint32_t status = NCSCC_RC_SUCCESS;
NCSMDS_SCOPE_TYPE local_subtn_scope;
MDS_VIEW local_subtn_view;
-   MDS_DEST active_adest;
+   MDS_DEST active_adest = 0;
V_DEST_RL dest_role;
bool tmr_running;
NCSMDS_CALLBACK_INFO cbinfo;
@@ -1964,6 +1964,7 @@ uint32_t mds_mcm_svc_up(PW_ENV_ID pwe_id, MDS_SVC_ID 
svc_id, V_DEST_RL role,
   present */
char to_adest_details
[MDS_MAX_PROCESS_NAME_LEN];
+   memset(to_adest_details, 0, 
MDS_MAX_PROCESS_NAME_LEN);
get_subtn_adest_details(

m_MDS_GET_PWE_HDL_FROM_SVC_HDL(
local_svc_hdl),
@@ -4932,6 +4933,7 @@ uint32_t mds_mcm_init(void)
  
  	/* STEP 1: Initialize MCM-CB. */

gl_mds_mcm_cb = m_MMGR_ALLOC_MCM_CB;
+   memset(gl_mds_mcm_cb, 0, sizeof(MDS_MCM_CB));
  
  	/* VDEST TREE */

memset(_tree_params, 0, sizeof(NCS_PATRICIA_PARAMS));
diff --git a/src/mds/mds_c_db.c b/src/mds/mds_c_db.c
index 7d56ad9..46b8eb4 100644
--- a/src/mds/mds_c_db.c
+++ b/src/mds/mds_c_db.c
@@ -37,12 +37,16 @@ void get_adest_details(MDS_DEST adest, char *adest_details)
char *token, *saveptr;
struct stat s;
uint32_t process_id = 0;
-   SlotSubslotId slot_subslot_id;
+   SlotSubslotId slot_subslot_id = 0;
char pid_path[1024];
char *pid_name = NULL;
char process_name[MDS_MAX_PROCESS_NAME_LEN];
bool remote = false;
  
+	memset(adest_details, 0, MDS_MAX_PROCESS_NAME_LEN);

+   memset(process_name, 0, MDS_MAX_PROCESS_NAME_LEN);
+   memset(pid_path, 0, 1024);
+
slot_subslot_id =
GetSlotSubslotIdFromNodeId(m_NCS_NODE_ID_FROM_MDS_DEST(adest));
  
@@ -139,7 +143,7 @@ void get_subtn_adest_details(MDS_PWE_HDL pwe_hdl, MDS_SVC_ID svc_id,

 MDS_DEST adest, char *adest_details)
  {
uint32_t process_id = 0;
-   SlotSubslotId slot_subslot_id;
+   SlotSubslotId slot_subslot_id = 0;
char process_name[MDS_MAX_PROCESS_NAME_LEN];
bool remote = false;
MDS_SVC_INFO *svc_info = NULL;
@@ -148,6 +152,9 @@ void get_subtn_adest_details(MDS_PWE_HDL pwe_hdl, 
MDS_SVC_ID svc_id,
char *pid_name = NULL;
struct stat s;
  
+	memset(process_name, 0, MDS_MAX_PROCESS_NAME_LEN);

+   memset(pid_path, 0, 1024);
+
slot_subslot_id =
GetSlotSubslotIdFromNodeId(m_NCS_NODE_ID_FROM_MDS_DEST(adest));
process_id = m_MDS_GET_PROCESS_ID_FROM_ADEST(adest);
@@ -2404,6 +2411,7 @@ uint32_t mds_subtn_res_tbl_get(MDS_SVC_HDL svc_hdl, 
MDS_SVC_ID subscr_svc_id,
if (subtn_res_info == NULL) {
/* Subscription result entry doesn't exist for active result */
m_MDS_LOG_DBG("MDS:DB: Subscription Result not present");
+   *adest = 0;
m_MDS_LEAVE();
return NCSCC_RC_FAILURE;
} else {
diff --git a/src/mds/mds_dt_tcp.c b/src/mds/mds_dt_tcp.c
index 0b45c07..1407eb1 100644
--- a/src/mds/mds_dt_tcp.c
+++ b/src/mds/mds_dt_tcp.c
@@ -104,6 +104,7 @@ uint32_t mds_mdtm_init_tcp(NODE_ID nodeid, uint32_t 
*mds_tcp_ref)
mdtm_num_subscriptions = 0;
mdtm_handle = 0;
mdtm_global_frag_num_tcp = 0;
+   *mds_tcp_ref = 0;
  
  	memset(_addr_un, 0, sizeof(struct sockaddr_un));

memset(_addr_un, 0, sizeof(struct sockaddr_un));
diff --git a/src/mds/mds_dt_tipc.c b/src/mds/mds_dt_tipc.c
index 66f69cc..37745e7 100644
--- a/src/mds/mds_dt_tipc.c
+++ b/src/mds/mds_dt_tipc.c
@@ -182,6 +182,7 @@ uint32_t mdtm_tipc_init(NODE_ID nodeid, uint32_t 
*mds_tipc_ref)
socklen_t sz = sizeof(addr);
  
  	memset(_cb, 0, sizeof(tipc_cb));

+   *mds_tipc_ref = 0;
  
  	/* Added to assist the shutdown bug */

mdtm_ref_hdl_list_hdr = NULL;
diff --git a/src/mds/mds_log.cc b/src/mds/mds_log.cc
index 67f2f46..94c94e6 100644
--- a/src/mds/mds_log.cc
+++ b/src/mds/mds_log.cc
@@ -94,6 +94,9 @@ bool MdsLog::Init() {
char *token, *saveptr;
char *pid_name = nullptr;
  
+  memset(app_name, 0,

Re: [devel] [PATCH 1/1] amfnd: Only report OperState in SURestart recovery if su is under SMF maintenance campaign [#2476]

2017-06-05 Thread praveen malviya



As per 3.11.1.4.2 Restrictions to Auto-Repair, AMFD has to disable the 
SU only when sumaintenance campaign is set.


Ack from me, code review only.

Thanks
Praveen,

On 05-Jun-17 10:35 AM, praveen malviya wrote:


 From surestart perspective:
For SURestart recovery AMFD is not informed for disabled state barbecue 
it would be spec deviation. While fixing spec deviation for surestart 
recovery, I had taken care of this by writing a new function 
su_send_suRestart_recovery_msg(). AMFND internally keeps su disabled to 
correctly execute the SU and comp FSM but does not inform AMFD.


I am going to analyses it further from suMaintenance perspective..


Thanks
Praveen

On 05-Jun-17 6:14 AM, minh chau wrote:

Hi Alex,

In legacy recovery, the SuRestart has not reported oper state to 
disabled and back to enabled when recovery completes. The state 
remains unchanged.
In AMF spec, 3.11.1.2, it seems the states should be enabled and 
in-service during restart.
For feature of su maintenance campaign, I think amfnd needs to report 
oper state as disabled for su restart, than the su will be manually 
repaired, but that oper state report is for the new feature only.


thanks,
Minh

On 03/06/17 04:56, Alex Jones wrote:

Hi Minh,

   In the legacy recovery, does the oper state change to disabled (and
then enabled) ever get reported to amfd? Really, the question is, do
both the NTF notifications that report disabled, and then enabled get
generated?

   I seem to remember that they didn't, which is why I put this there.

Alex

On 06/02/2017 08:08 AM, Minh Chau wrote:
 


NOTICE: This email was received from an EXTERNAL sender
 



Patch keeps legacy behavior of SURestart recovery before
saAmfSUMaintenanceCampaign feature
---
src/amf/amfnd/err.cc | 10 --
1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/amf/amfnd/err.cc b/src/amf/amfnd/err.cc
index 2abaf2007..e4cb9f08b 100644
--- a/src/amf/amfnd/err.cc
+++ b/src/amf/amfnd/err.cc
@@ -707,8 +707,14 @@ uint32_t avnd_err_rcvr_su_restart(AVND_CB *cb,
AVND_SU *su,
uint32_t rc = avnd_comp_oper_state_avd_sync(cb, failed_comp);
if (NCSCC_RC_SUCCESS != rc) goto done;

- avnd_di_uns32_upd_send(AVSV_SA_AMF_SU, saAmfSUOperState_ID, su->name,
- su->oper);
+ /* Keep SURestart recovery not to always report OperState to amfd
+ as legacy recovery. Only report OperState if SU is under SMF 
maintenance

+ campaign
+ */
+ if (!su->suMaintenanceCampaign.empty()) {
+ avnd_di_uns32_upd_send(AVSV_SA_AMF_SU, saAmfSUOperState_ID, su->name,
+ su->oper);
+ }

set_suRestart_flag(su);

--
2.11.0




-- 


Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! 
https://urldefense.proofpoint.com/v2/url?u=http-3A__sdm.link_slashdot=DwICAg=RoP1YumCXCgaWHvlZYR8PQcxBKCX5YTpkKY057SbK10=Lehk1PZKwfDQtYJXNyUKbPAqrw5O--SlPRAF9DIEps4=Q4kvIyr7bvemvKanM42H-wxdcMqTegXtaHjroZ116w8=Tda-67mquksw0rVQdwLNhr_iVG4mzi5bKP3Rv2Rt1dM= 
___

Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.sourceforge.net_lists_listinfo_opensaf-2Ddevel=DwICAg=RoP1YumCXCgaWHvlZYR8PQcxBKCX5YTpkKY057SbK10=Lehk1PZKwfDQtYJXNyUKbPAqrw5O--SlPRAF9DIEps4=Q4kvIyr7bvemvKanM42H-wxdcMqTegXtaHjroZ116w8=SkHfX55d6ZsjhrIkPpTOTfZzpmCgZOxconpjuP7gKMI= 



--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] amfnd: Only report OperState in SURestart recovery if su is under SMF maintenance campaign [#2476]

2017-06-04 Thread praveen malviya



From surestart perspective:
For SURestart recovery AMFD is not informed for disabled state barbecue 
it would be spec deviation. While fixing spec deviation for surestart 
recovery, I had taken care of this by writing a new function 
su_send_suRestart_recovery_msg(). AMFND internally keeps su disabled to 
correctly execute the SU and comp FSM but does not inform AMFD.


I am going to analyses it further from suMaintenance perspective..


Thanks
Praveen

On 05-Jun-17 6:14 AM, minh chau wrote:

Hi Alex,

In legacy recovery, the SuRestart has not reported oper state to 
disabled and back to enabled when recovery completes. The state remains 
unchanged.
In AMF spec, 3.11.1.2, it seems the states should be enabled and 
in-service during restart.
For feature of su maintenance campaign, I think amfnd needs to report 
oper state as disabled for su restart, than the su will be manually 
repaired, but that oper state report is for the new feature only.


thanks,
Minh

On 03/06/17 04:56, Alex Jones wrote:

Hi Minh,

   In the legacy recovery, does the oper state change to disabled (and
then enabled) ever get reported to amfd? Really, the question is, do
both the NTF notifications that report disabled, and then enabled get
generated?

   I seem to remember that they didn't, which is why I put this there.

Alex

On 06/02/2017 08:08 AM, Minh Chau wrote:


NOTICE: This email was received from an EXTERNAL sender


Patch keeps legacy behavior of SURestart recovery before
saAmfSUMaintenanceCampaign feature
---
src/amf/amfnd/err.cc | 10 --
1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/amf/amfnd/err.cc b/src/amf/amfnd/err.cc
index 2abaf2007..e4cb9f08b 100644
--- a/src/amf/amfnd/err.cc
+++ b/src/amf/amfnd/err.cc
@@ -707,8 +707,14 @@ uint32_t avnd_err_rcvr_su_restart(AVND_CB *cb,
AVND_SU *su,
uint32_t rc = avnd_comp_oper_state_avd_sync(cb, failed_comp);
if (NCSCC_RC_SUCCESS != rc) goto done;

- avnd_di_uns32_upd_send(AVSV_SA_AMF_SU, saAmfSUOperState_ID, su->name,
- su->oper);
+ /* Keep SURestart recovery not to always report OperState to amfd
+ as legacy recovery. Only report OperState if SU is under SMF 
maintenance

+ campaign
+ */
+ if (!su->suMaintenanceCampaign.empty()) {
+ avnd_di_uns32_upd_send(AVSV_SA_AMF_SU, saAmfSUOperState_ID, su->name,
+ su->oper);
+ }

set_suRestart_flag(su);

--
2.11.0




--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] amfd: do not assert unnecessarily [#2458]

2017-05-23 Thread praveen malviya

Ack, code review only.


Thanks,
Praveen

On 11-May-17 3:08 PM, Gary Lee wrote:
> IMM APIs can fail if immnd finishes shutting down before amfd.
> amfd should not unnecessarily assert and cause core dumps
> to be created.
> ---
>   src/amf/amfd/app.cc  | 7 +--
>   src/amf/amfd/comp.cc | 7 +--
>   src/amf/amfd/sg.cc   | 7 +--
>   src/amf/amfd/si.cc   | 7 +--
>   src/amf/amfd/su.cc   | 7 +--
>   5 files changed, 25 insertions(+), 10 deletions(-)
> 
> diff --git a/src/amf/amfd/app.cc b/src/amf/amfd/app.cc
> index 62ad34acb..424d82847 100644
> --- a/src/amf/amfd/app.cc
> +++ b/src/amf/amfd/app.cc
> @@ -470,8 +470,11 @@ SaAisErrorT avd_app_config_get(void) {
>   if (avd_si_config_get(app) != SA_AIS_OK) goto done2;
> }
>   
> -  osafassert(rc == SA_AIS_ERR_NOT_EXIST);
> -  error = SA_AIS_OK;
> +  if (rc == SA_AIS_ERR_NOT_EXIST) {
> +error = SA_AIS_OK;
> +  } else {
> +LOG_ER("avd_app_config_get FAILED %u", rc);
> +  }
>   done2:
> (void)immutil_saImmOmSearchFinalize(searchHandle);
>   done1:
> diff --git a/src/amf/amfd/comp.cc b/src/amf/amfd/comp.cc
> index 3e0dc5de1..e3f0f9051 100644
> --- a/src/amf/amfd/comp.cc
> +++ b/src/amf/amfd/comp.cc
> @@ -811,8 +811,11 @@ SaAisErrorT avd_comp_config_get(const std::string 
> _name, AVD_SU *su) {
>   goto done2;
> }
>   
> -  osafassert(rc == SA_AIS_ERR_NOT_EXIST);
> -  error = SA_AIS_OK;
> +  if (rc == SA_AIS_ERR_NOT_EXIST) {
> +error = SA_AIS_OK;
> +  } else {
> +LOG_ER("avd_comp_config_get FAILED %u", rc);
> +  }
>   
>   done2:
> (void)immutil_saImmOmSearchFinalize(searchHandle);
> diff --git a/src/amf/amfd/sg.cc b/src/amf/amfd/sg.cc
> index 9b04a423c..79e8f5fd7 100644
> --- a/src/amf/amfd/sg.cc
> +++ b/src/amf/amfd/sg.cc
> @@ -499,8 +499,11 @@ SaAisErrorT avd_sg_config_get(const std::string _dn, 
> AVD_APP *app) {
>   }
> }
>   
> -  osafassert(rc == SA_AIS_ERR_NOT_EXIST);
> -  error = SA_AIS_OK;
> +  if (rc == SA_AIS_ERR_NOT_EXIST) {
> +error = SA_AIS_OK;
> +  } else {
> +LOG_ER("avd_sg_config_get FAILED %u", rc);
> +  }
>   
>   done2:
> (void)immutil_saImmOmSearchFinalize(searchHandle);
> diff --git a/src/amf/amfd/si.cc b/src/amf/amfd/si.cc
> index 298188a84..51dd9a662 100644
> --- a/src/amf/amfd/si.cc
> +++ b/src/amf/amfd/si.cc
> @@ -705,8 +705,11 @@ SaAisErrorT avd_si_config_get(AVD_APP *app) {
>   if (avd_csi_config_get(si_str, si) != SA_AIS_OK) goto done2;
> }
>   
> -  osafassert(rc == SA_AIS_ERR_NOT_EXIST);
> -  error = SA_AIS_OK;
> +  if (rc == SA_AIS_ERR_NOT_EXIST) {
> +error = SA_AIS_OK;
> +  } else {
> +LOG_ER("avd_si_config_get FAILED %u", rc);
> +  }
>   
>   done2:
> (void)immutil_saImmOmSearchFinalize(searchHandle);
> diff --git a/src/amf/amfd/su.cc b/src/amf/amfd/su.cc
> index fac1188b5..62c372822 100644
> --- a/src/amf/amfd/su.cc
> +++ b/src/amf/amfd/su.cc
> @@ -750,8 +750,11 @@ SaAisErrorT avd_su_config_get(const std::string 
> _name, AVD_SG *sg) {
>   }
> }
>   
> -  osafassert(rc == SA_AIS_ERR_NOT_EXIST);
> -  error = SA_AIS_OK;
> +  if (rc == SA_AIS_ERR_NOT_EXIST) {
> +error = SA_AIS_OK;
> +  } else {
> +LOG_ER("avd_su_config_get FAILED %u", rc);
> +  }
>   
>   done2:
> (void)immutil_saImmOmSearchFinalize(searchHandle);
> 

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] amfd: only increment su_cnt_admin_oper for non-opensaf SUs [#2466]

2017-05-23 Thread praveen malviya

Ack (not tested).

Thanks
Praveen

On 24-May-17 6:21 AM, Gary Lee wrote:
> Only increment su_cnt_admin_oper for non-opensaf SUs. Otherwise a nodegroup
> admin operation may fail to finish if it is started while a node is joining.
> 
> based on analysis from Minh / Praveen
> ---
>   src/amf/amfd/sgproc.cc | 1 +
>   1 file changed, 1 insertion(+)
> 
> diff --git a/src/amf/amfd/sgproc.cc b/src/amf/amfd/sgproc.cc
> index cd95fe82c..20549808b 100644
> --- a/src/amf/amfd/sgproc.cc
> +++ b/src/amf/amfd/sgproc.cc
> @@ -262,6 +262,7 @@ npisu_done:
> AVD_AVND *node = su->su_on_node;
> if ((node->admin_node_pend_cbk.invocation != 0) ||
> ((node->admin_ng != nullptr) &&
> +   (su->sg_of_su->sg_ncs_spec == false) &&
>  (node->admin_ng->admin_ng_pend_cbk.invocation != 0))) {
>   node->su_cnt_admin_oper++;
>   TRACE("node:'%s', su_cnt_admin_oper:%u", node->name.c_str(),
> 

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] amfnd: Buffered not-ack susi assignment response after both SC go down V2 [#2105]

2017-05-19 Thread praveen malviya

Ack.

Thanks
Praveen

On 19-May-17 12:48 PM, Minh Chau wrote:
> When amfnd-payload responds susi assignment response just before both SC
> go down, and that response message does not come to director. Therefore,
> the status of that assignment could be seen as "modifying" in IMM. When
> SC comes back, active amfd will be waiting for that response forever.
> 
> Patch checks if a susi assignment response is sent but not-ack just before
> both SC come down, amfnd-payload will buffer it in a way as a susi get
> assigned during SC absence
> ---
>   src/amf/amfnd/avnd_di.h |  2 +-
>   src/amf/amfnd/di.cc | 85 
> +++--
>   2 files changed, 70 insertions(+), 17 deletions(-)
> 
> diff --git a/src/amf/amfnd/avnd_di.h b/src/amf/amfnd/avnd_di.h
> index 07222eb67..d7ccd68fd 100644
> --- a/src/amf/amfnd/avnd_di.h
> +++ b/src/amf/amfnd/avnd_di.h
> @@ -79,7 +79,7 @@ uint32_t avnd_di_pg_act_send(struct avnd_cb_tag *, const 
> std::string &,
>AVSV_PG_TRACK_ACT, bool);
>   uint32_t avnd_di_msg_send(struct avnd_cb_tag *, AVND_MSG *);
>   void avnd_di_msg_ack_process(struct avnd_cb_tag *, uint32_t);
> -void avnd_diq_del(struct avnd_cb_tag *);
> +void avnd_diq_rec_check_buffered_msg(struct avnd_cb_tag *);
>   AVND_DND_MSG_LIST *avnd_diq_rec_add(struct avnd_cb_tag *cb, AVND_MSG *msg);
>   void avnd_diq_rec_del(struct avnd_cb_tag *cb, AVND_DND_MSG_LIST *rec);
>   void avnd_diq_rec_send_buffered_msg(struct avnd_cb_tag *cb);
> diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc
> index e06b9260d..6f0a76cda 100644
> --- a/src/amf/amfnd/di.cc
> +++ b/src/amf/amfnd/di.cc
> @@ -698,8 +698,8 @@ uint32_t avnd_evt_mds_avd_dn_evh(AVND_CB *cb, AVND_EVT 
> *evt) {
> }
>   }
> } else {
> -TRACE("Delete all pending messages to be sent to AMFD");
> -avnd_diq_del(cb);
> +TRACE("Delete/Buffer pending messages to be sent to AMFD");
> +avnd_diq_rec_check_buffered_msg(cb);
> }
>   
> // check for pending messages FROM director
> @@ -1271,9 +1271,15 @@ void avnd_di_msg_ack_process(AVND_CB *cb, uint32_t 
> mid) {
>   }
>   
>   
> /
> -  Name  : avnd_diq_del
> +  Name  : avnd_diq_rec_check_buffered_msg
> +
> +  Description   : The routine buffers messages that are waiting for ack and 
> will
> +  resend to AMFD when AMFD is up.
> +  All messages are deleted, except following messages to be
> +  buffered:
> +  - AVSV_N2D_INFO_SU_SI_ASSIGN_MSG
> +  - AVSV_N2D_OPERATION_STATE_MSG
>   
> -  Description   : This routine clears the AvD msg list.
>   
> Arguments : cb - ptr to the AvND control block
>   
> @@ -1281,18 +1287,65 @@ void avnd_di_msg_ack_process(AVND_CB *cb, uint32_t 
> mid) {
>   
> Notes : None.
>   
> **/
> -void avnd_diq_del(AVND_CB *cb) {
> -  AVND_DND_MSG_LIST *rec = 0;
> -
> -  do {
> -/* pop the record */
> -m_AVND_DIQ_REC_POP(cb, rec);
> -if (!rec) break;
> -
> -/* delete the record */
> -avnd_diq_rec_del(cb, rec);
> -  } while (1);
> -
> +void avnd_diq_rec_check_buffered_msg(AVND_CB *cb) {
> +  if ((cb->dnd_list.head != nullptr)) {
> +AVND_DND_MSG_LIST *rec = 0;
> +bool found = true;
> +while (found) {
> +  found = false;
> +  for (rec = cb->dnd_list.head; rec != nullptr;) {
> +osafassert(rec->msg.type == AVND_MSG_AVD);
> +m_AVND_DIQ_REC_POP(cb, rec);
> +// Assignment response had been sent, but not ack because
> +// last controller go down, reset msg_id and will be resent later
> +if (rec->msg.info.avd->msg_type == AVSV_N2D_INFO_SU_SI_ASSIGN_MSG) {
> +  if (rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id != 0) {
> +rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id = 0;
> +found = true;
> +LOG_NO(
> +"Found not-ack su_si_assign msg for SU:'%s', "
> +"SI:'%s', ha_state:'%u', msg_act:'%u', single_csi:'%u', "
> +"error:'%u', msg_id:'%u'",
> +osaf_extended_name_borrow(>msg.info.avd->msg_info
> +   .n2d_su_si_assign.su_name),
> +osaf_extended_name_borrow(>msg.info.avd->msg_info
> +   .n2d_su_si_assign.si_name),
> +rec->msg.info.avd->msg_info.n2d_su_si_assign.ha_state,
> +rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_act,
> +rec->msg.info.avd->msg_info.n2d_su_si_assign
> +.single_csi,
> +rec->msg.info.avd->msg_info.n2d_su_si_assign.error,
> +rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id);
> +  }
> +  m_AVND_DIQ_REC_PUSH(cb, rec);
> +

Re: [devel] [PATCH 1/1] amfnd: Buffered not-ack susi assignment response after both SC go down [#2105]

2017-05-18 Thread praveen malviya

Hi Minh,

I had analysed the traces you attached.
Based on that I am able to test that. When MDS returns success patch 
works fine.
Minor correction is needed when MDS return failure.
I think susi message should be kept independent of no. of tries in 
avnd_diq_del().
Thanks
Praveen


On 18-May-17 12:41 PM, minh chau wrote:
> Hi Praveen,
> 
> Some comments in line with [Minh]
> 
> thanks,
> Minh
> 
> On 18/05/17 14:54, praveen malviya wrote:
>> Hi Minh,
>>
>> In the description of the ticket there is a log which is :
>> "
>> Oct 7 18:31:41 SYSTEST-PLD-1 osafamfnd[12467]: NO Assigned 
>> 'safSi=TestApp_SI4,safApp=TestApp_TwoN' ACTIVE to 
>> 'safSu=TestApp_SU1,safSg=TestApp_SG1,safApp=TestApp_TwoN'
>> Oct 7 18:31:41 SYSTEST-PLD-1 osafamfnd[12467]: NO 
>> avnd_di_susi_resp_send() deferred as AMF director is offline
>> "
>> Last line in above log means AMFND was sending the message when it new 
>> about SC absence state. I think this issue is already fixed during 
>> #1725 and this published patch is not required. Why? After led set 
>> message amfnd will anyway send this message.
> [Minh] I have reproduced the problem and attached to ticket for your 
> reference.
> Some outlined logs:
> The step is stopping SC1, SC2.
> In SC2, amfd sent susi assignment req to amfnd-PL3
> May 18 16:32:03.633226 osafamfd [245:245:src/amf/amfd/sgproc.cc:2444] >> 
> avd_sg_su_si_mod_snd: 'safSu=SU3,safSg=AmfDemoTwon,safApp=AmfDemoTwon', 
> state 1
> 
> In PL3, amfnd completed this susi req, and sent susi resp successfully 
> but it did not reach to amfd-SC2
> May 18 16:32:03.641156 osafamfnd [186:186:src/amf/amfnd/su.cc:0373] >> 
> avnd_evt_avd_info_su_si_assign_evh: 
> 'safSu=SU3,safSg=AmfDemoTwon,safApp=AmfDemoTwon'
> May 18 16:32:03.641744 osafamfnd [186:186:src/amf/amfnd/di.cc:0866] >> 
> avnd_di_susi_resp_send: Sending Resp 
> su=safSu=SU3,safSg=AmfDemoTwon,safApp=AmfDemoTwon, 
> si=safSi=AmfDemoTwon,safApp=AmfDemoTwon, curr_state=1, prv_state=2
> 
> amfnd-PL3 is notified NCSMDS_DOWN, amfnd deleted all pending msg waiting 
> for ack
> May 18 16:32:05.568471 osafamfnd [186:186:src/amf/amfnd/di.cc:0629] >> 
> avnd_evt_mds_avd_dn_evh
> May 18 16:32:05.568492 osafamfnd [186:186:src/amf/amfnd/di.cc:0651] WA 
> AMF director unexpectedly crashed
> May 18 16:32:05.568495 osafamfnd [186:186:src/amf/amfnd/di.cc:0701] TR 
> Delete all pending messages to be sent to AMFD
> May 18 16:32:05.568498 osafamfnd [186:186:src/amf/amfnd/di.cc:1353] >> 
> avnd_diq_rec_del
> May 18 16:32:05.568503 osafamfnd [186:186:src/amf/amfnd/di.cc:1369] << 
> avnd_diq_rec_del
> 
> When SC restarts, amfd-SC1 thinks this assignment being in progress, so 
> it waits and waits forever
> May 18 16:32:28.954967 osafamfd [257:257:src/amf/amfd/su.cc:2588] >> 
> any_susi_fsm_in: SU:'safSu=SU3,safSg=AmfDemoTwon,safApp=AmfDemoTwon', 
> check_fsm:5
> May 18 16:32:28.954975 osafamfd [257:257:src/amf/amfd/su.cc:2593] TR 
> SUSI:'safSu=SU3,safSg=AmfDemoTwon,safApp=AmfDemoTwon,safSi=AmfDemoTwon,safApp=AmfDemoTwon',
>  
> fsm:'5'
> May 18 16:32:28.954982 osafamfd [257:257:src/amf/amfd/su.cc:2596] TR Found
> May 18 16:32:28.954989 osafamfd [257:257:src/amf/amfd/su.cc:2599] << 
> any_susi_fsm_in
> May 18 16:32:28.954996 osafamfd [257:257:src/amf/amfd/sg.cc:2340] << 
> any_assignment_in_progress
> 
> This problem is very close to the one you mentioned and fixed in #1725. 
> In #1725, amfnd surely knows amfd down, so amfnd buffers msg. In #2105, 
> amfnd sends msg out just before amfnd detects amfd being down.
>>
>> The logs that I have attached can be ignored. I was simulating the bug 
>> on different assumptions.
>>
>> One question regarding the patch:
>> If the goal is to fix the issue when the message is being sent and 
>> system has become SC-less. In this situation, then avnd_mds_send() 
>> will return, most probably,  a failure as MDS will not find the 
>> destination. In mds failure case,  rec->no_retries will not be 
>> incremented and will remain zero. Now AMFND will process down of SC 
>> and it will call avnd_diq_del(). In this function, since no_retries is 
>> zero for this message(first message),  the message will be deleted.
>>
> [Minh]: Thanks, it's good to handle failure code returned from MDS. I 
> will update the patch
>>
>> Thanks,
>> Praveen
>>
>>
>> On 18-May-17 9:14 AM, minh chau wrote:
>>> Hi Praveen,
>>>
>>> Do you have any idea why @is_avd_down was false that made amfnd to 
>>> send susi_resp at 12:37:20.453974?
>>> It should be true by the end of avnd_evt_mds_avd_dn_e

Re: [devel] [PATCH 1/1] amfnd: Buffered not-ack susi assignment response after both SC go down [#2105]

2017-05-17 Thread praveen malviya

Hi Minh,

In the description of the ticket there is a log which is :
"
Oct 7 18:31:41 SYSTEST-PLD-1 osafamfnd[12467]: NO Assigned 
'safSi=TestApp_SI4,safApp=TestApp_TwoN' ACTIVE to 
'safSu=TestApp_SU1,safSg=TestApp_SG1,safApp=TestApp_TwoN'
Oct 7 18:31:41 SYSTEST-PLD-1 osafamfnd[12467]: NO 
avnd_di_susi_resp_send() deferred as AMF director is offline
"
Last line in above log means AMFND was sending the message when it new 
about SC absence state. I think this issue is already fixed during #1725 
and this published patch is not required. Why? After led set message 
amfnd will anyway send this message.

The logs that I have attached can be ignored. I was simulating the bug 
on different assumptions.

One question regarding the patch:
If the goal is to fix the issue when the message is being sent and 
system has become SC-less. In this situation, then avnd_mds_send() will 
return, most probably,  a failure as MDS will not find the destination. 
In mds failure case,  rec->no_retries will not be incremented and will 
remain zero. Now AMFND will process down of SC and it will call 
avnd_diq_del(). In this function, since no_retries is zero for this 
message(first message),  the message will be deleted.


Thanks,
Praveen


On 18-May-17 9:14 AM, minh chau wrote:
> Hi Praveen,
> 
> Do you have any idea why @is_avd_down was false that made amfnd to send 
> susi_resp at 12:37:20.453974?
> It should be true by the end of avnd_evt_mds_avd_dn_evh() at 
> 12:37:16.741518, is it right?
> 
> Thanks,
> Minh
> On 17/05/17 21:31, minh chau wrote:
>> Hi Praveen,
>>
>> Thanks for looking at the issue.
>> Here is what I am observing
>>
>> amfnd-PL3 received NCSMDS_DOWN indicating no active amfd
>>
>> May 17 12:37:16.741308 osafamfnd [8141:8141:src/amf/amfnd/di.cc:0629] 
>> >> avnd_evt_mds_avd_dn_evh
>> May 17 12:37:16.741342 osafamfnd [8141:8141:src/amf/amfnd/di.cc:0651] 
>> WA AMF director unexpectedly crashed
>> May 17 12:37:16.741354 osafamfnd [8141:8141:src/amf/amfnd/di.cc:0701] 
>> TR Delete all pending messages to be sent to AMFD
>> May 17 12:37:16.741379 osafamfnd [8141:8141:src/amf/amfnd/di.cc:0709] 
>> NO Checking 'safSu=PL-3,safSg=NoRed,safApp=OpenSAF' for pending messages
>> May 17 12:37:16.741405 osafamfnd [8141:8141:src/amf/amfnd/di.cc:0709] 
>> NO Checking 'safSu=SU1,safSg=AmfDemo,safApp=AmfDemo1' for pending 
>> messages
>> May 17 12:37:16.741430 osafamfnd [8141:8141:src/amf/amfnd/di.cc:0709] 
>> NO Checking 'safSu=SU2,safSg=AmfDemo,safApp=AmfDemo1' for pending 
>> messages
>> May 17 12:37:16.741505 osafamfnd [8141:8141:src/amf/amfnd/tmr.cc:0083] 
>> TR SC absence timer started
>> May 17 12:37:16.741518 osafamfnd [8141:8141:src/amf/amfnd/di.cc:0742] 
>> << avnd_evt_mds_avd_dn_evh
>>
>> But a bit later, susi got assigned, and amfnd-PL3 did send this susi 
>> response (it should not send out and buffer it, since the @is_avd_down 
>> should be true)
>>
>> May 17 12:37:20.453974 osafamfnd [8141:8141:src/amf/amfnd/di.cc:0866] 
>> >> avnd_di_susi_resp_send: Sending Resp 
>> su=safSu=SU1,safSg=AmfDemo,safApp=AmfDemo1, 
>> si=safSi=AmfDemo,safApp=AmfDemo1, curr_state=3, prv_state=1
>> ...
>> May 17 12:37:20.454083 osafamfnd [8141:8141:src/amf/amfnd/mds.cc:1482] 
>> >> avnd_mds_send: Msg type '1'
>> May 17 12:37:20.454244 osafamfnd [8141:8141:src/amf/amfnd/mds.cc:1537] 
>> ER ncsmds_api for 0 FAILED, dest=0
>>
>> When SC1 restarted, amfd received the very first messages from PL3 
>> starting with msg_id=1 (it should be starting from 0)
>>
>> May 17 12:37:28.398633 osafamfd 
>> [7686:7686:src/amf/amfd/ndproc.cc:0330] NO Receive message with event 
>> type:12, msg_type:31, from node:2030f, msg_id:1
>> May 17 12:37:28.413018 osafamfd [7686:7686:src/amf/amfd/ndfsm.cc:0334] 
>> NO Received node_up_msg from all nodes
>> May 17 12:37:28.413069 osafamfd [7686:7686:src/amf/amfd/ndfsm.cc:0254] 
>> NO Received node_up from 2030f: msg_id 2
>>
>> Looks to me something should not happen inside 
>> avnd_evt_mds_avd_dn_evh(). In this avnd_evt_mds_avd_dn_evh(), 
>> @is_avd_down should be true, the msg counter should be reset to 0, but 
>> I do see the SC absence timer started. I couldn't figure how it 
>> happened for now
>>
>> Thanks,
>> Minh
>>
>> On 17/05/17 20:03, praveen malviya wrote:
>>> What I see is avnd_diq_del() is called as soon as system becomes 
>>> headless. This will delete all pending messages. But when component 
>>> will respond during SCs absence a new message will be generated and 
>>> buffered.
>>> For node_up AMFD will ack the message, but amfnd ca

Re: [devel] [PATCH 1/1] amfnd: Buffered not-ack susi assignment response after both SC go down [#2105]

2017-05-17 Thread praveen malviya

What I see is avnd_diq_del() is called as soon as system becomes 
headless. This will delete all pending messages. But when component will 
respond during SCs absence a new message will be generated and buffered.
For node_up AMFD will ack the message, but amfnd calls 
avnd_diq_rec_del() (not avnd_diq_del()) in avnd_di_msg_ack_process().
We need to call avnd_diq_del() in ack message so that msg_id gets updated.
Further looking into it..


Thanks.
Praveen



On 17-May-17 1:50 PM, praveen malviya wrote:
> Hi Minh,
> 
> While testing this, I am observing that amfd is dropping the assignment
> message because of message id mismatch:
> May 17 12:37:39.522117 osafamfd [7686:7686:src/amf/amfd/sgproc.cc:1171]
>   >> avd_su_si_assign_evh: id:1, node:2030f, act:5,
> 'safSu=SU1,safSg=AmfDemo,safApp=AmfDemo1', '', ha:3, err:1, single:0
> 
> 
> May 17 12:37:39.522404 osafamfd [7686:7686:src/amf/amfd/ndproc.cc:0075]
> WA avd_msg_sanity_chk: invalid msg id 1, msg type 5, from 2030f should be 3
> May 17 12:37:39.522418 osafamfd [7686:7686:src/amf/amfd/sgproc.cc:1777]
> << avd_su_si_assign_evh
> 
> I am also looking into this. For your reference I had attached amfd and
> amfnd traces from SC-1 and PL-3 respectively in the ticket.
> I am working with one controller and one payload.
> 
> 
> Thanks
> Praveen
> 
> On 15-May-17 1:06 PM, Minh Chau wrote:
>> When amfnd-payload responds susi assignment response just before both SC
>> go down, and that response message does not come to director. Therefore,
>> the status of that assignment could be seen as "modifying" in IMM. When
>> SC comes back, active amfd will be waiting for that response forever.
>>
>> Patch checks if a susi assignment response is sent but not-ack just before
>> both SC come down, amfnd-payload will buffer it in a way as a susi get
>> assigned during SC absence
>> ---
>>src/amf/amfnd/di.cc | 53 
>> +
>>1 file changed, 45 insertions(+), 8 deletions(-)
>>
>> diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc
>> index e06b9260d..3776a09dc 100644
>> --- a/src/amf/amfnd/di.cc
>> +++ b/src/amf/amfnd/di.cc
>> @@ -1282,16 +1282,53 @@ void avnd_di_msg_ack_process(AVND_CB *cb, uint32_t 
>> mid) {
>>  Notes : None.
>>
>> **/
>>void avnd_diq_del(AVND_CB *cb) {
>> -  AVND_DND_MSG_LIST *rec = 0;
>>
>> -  do {
>> -/* pop the record */
>> -m_AVND_DIQ_REC_POP(cb, rec);
>> -if (!rec) break;
>> +  if ((cb->dnd_list.head != nullptr)) {
>> +AVND_DND_MSG_LIST *rec = 0;
>> +bool found = true;
>> +while (found) {
>> +  found = false;
>> +  for (rec = cb->dnd_list.head; rec != nullptr;
>> +   rec = rec->next) {
>> +osafassert(rec->msg.type == AVND_MSG_AVD);
>> +// delete all pending messages that haven't been sent out
>> +if (rec->no_retries == 0) {
>> +  m_AVND_DIQ_REC_POP(cb, rec);
>> +  avnd_diq_rec_del(cb, rec);
>> +  break;
>> +} else {
>> +  // Assignment response had been sent, but not ack because last
>> +  // controller go down, reset msg_id and will be resent later
>> +  if (rec->msg.info.avd->msg_type == 
>> AVSV_N2D_INFO_SU_SI_ASSIGN_MSG) {
>> +if (rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id != 0) {
>> +  rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id = 0;
>> +  found = true;
>> +  LOG_NO(
>> +  "Found not-ack su_si_assign msg for SU:'%s', "
>> +  "SI:'%s', ha_state:'%u', msg_act:'%u', single_csi:'%u', "
>> +  "error:'%u', msg_id:'%u'",
>> +  osaf_extended_name_borrow(>msg.info.avd->msg_info
>> + .n2d_su_si_assign.su_name),
>> +  osaf_extended_name_borrow(>msg.info.avd->msg_info
>> + .n2d_su_si_assign.si_name),
>> +  rec->msg.info.avd->msg_info.n2d_su_si_assign.ha_state,
>> +  rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_act,
>> +  rec->msg.info.avd->msg_info.n2d_su_si_assign
>> +  .single_csi,
>> +  rec->msg.info.avd->msg_info.n2d_su_si_assign.error,
>> +  rec->msg.info.avd->msg

Re: [devel] [PATCH 1/1] amfd: Check IMM service status before use IMM call [#2416]

2017-05-17 Thread praveen malviya

Ack.

Thanks
Praveen

On 17-May-17 3:21 AM, Minh Chau wrote:
> When Opensaf 2N switchover, amfd tries to update some attributes
> to IMM. But this time, IMM is not available since Opensaf 2N SI
> is in QUIESCED.
> 
> SC-1 osafamfnd[510]: NO Assigned 'safSi=SC-2N,safApp=OpenSAF' QUIESCED to 
> 'safSu=SC-1,safSg=2N,safApp=OpenSAF'
> SC-1 osafamfd[496]: WA saImmOiRtObjectUpdate of 
> 'safSISU=safSu=SC-1\,safSg=2N\,safApp=OpenSAF,safSi=SC-2N,safApp=OpenSAF' 
> osafAmfSISUFsmState failed with 5
> SC-1 osafimmnd[441]: WA ERR_BAD_HANDLE: Handle use is blocked by pending 
> reply on syncronous call
> 
> This patch corrects the problem by checking IMM service status
> before call avd_saImmOiRtXXX_sync. The problem does not appear
> previously because IMM update is queued and FiFo::execute() does
> check IMM status before executing a job. A check of @avail_state_avd
> against SA_AMF_HA_ACTIVE is not enough in avd_saImmOiRtXXX_sync.
> This patch does check IMM status in avd_saImmOiRtXXX_sync as similar
> as FiFo::execute().
> ---
>   src/amf/amfd/imm.cc | 35 ---
>   1 file changed, 24 insertions(+), 11 deletions(-)
> 
> diff --git a/src/amf/amfd/imm.cc b/src/amf/amfd/imm.cc
> index 65df73f73..7b1aa333e 100644
> --- a/src/amf/amfd/imm.cc
> +++ b/src/amf/amfd/imm.cc
> @@ -131,22 +131,32 @@ static char *StrDup(const char *s) {
>   //
>   Job::~Job() {}
>   
> -//
> -bool ImmJob::isRunnable(const AVD_CL_CB *cb) {
> +// TODO: Make isImmServiceReady as static to limit its scope
> +// This function should belong to AVD_CB class as a method
> +static bool isImmServiceReady(const AVD_CL_CB *cb) {
> TRACE_ENTER();
> bool rc = true;
> +
> +  if (avd_cb->active_services_exist == false) {
> +  TRACE("No active service");
> +  rc = false;
> +  }
> if ((!avd_cb->is_implementer) &&
> (avd_cb->avail_state_avd == SA_AMF_HA_STANDBY)) {
>   rc = false;
> }
> -
> if (avd_cb->avd_imm_status == AVD_IMM_INIT_ONGOING) {
>   TRACE("Already IMM init is going, try again after sometime");
>   rc = false;
> }
> -  TRACE_LEAVE();
> +  TRACE_LEAVE2("%u:", rc);
> return rc;
>   }
> +
> +//
> +bool ImmJob::isRunnable(const AVD_CL_CB *cb) {
> +  return isImmServiceReady(cb);
> +}
>   //
>   AvdJobDequeueResultT ImmObjCreate::exec(const AVD_CL_CB *cb) {
> SaAisErrorT rc;
> @@ -1707,10 +1717,11 @@ SaAisErrorT avd_saImmOiRtObjectUpdate_sync(
> SaImmAttrModificationT_2 attrMod;
> const SaImmAttrModificationT_2 *attrMods[] = {, nullptr};
> SaImmAttrValueT attrValues[] = {value};
> -
> const std::string attribute_name(attributeName);
> +  bool isImmReady = isImmServiceReady(avd_cb);
> +
> TRACE_ENTER2("'%s' %s", dn.c_str(), attributeName);
> -  if (avd_cb->avail_state_avd == SA_AMF_HA_ACTIVE) {
> +  if (isImmReady == true) {
>   attrMod.modType = modifyType;
>   attrMod.modAttr.attrName = attributeName;
>   attrMod.modAttr.attrValuesNumber = 1;
> @@ -1723,7 +1734,7 @@ SaAisErrorT avd_saImmOiRtObjectUpdate_sync(
>attributeName, rc);
> }
>   
> -  if (rc != SA_AIS_OK || avd_cb->avail_state_avd != SA_AMF_HA_ACTIVE) {
> +  if (rc != SA_AIS_OK || isImmReady == false) {
>   // Now it will be updated through job queue.
>   avd_saImmOiRtObjectUpdate(dn, attribute_name, attrValueType, value);
> }
> @@ -1875,8 +1886,9 @@ void avd_saImmOiRtObjectCreate_sync(const std::string 
> ,
> TRACE_ENTER2("%s %s", className.c_str(), parentName.c_str());
>   
> SaAisErrorT rc = SA_AIS_OK;
> +  bool isImmReady = isImmServiceReady(avd_cb);
>   
> -  if (avd_cb->avail_state_avd == SA_AMF_HA_ACTIVE) {
> +  if (isImmReady == true) {
>   const SaNameTWrapper parent_name(parentName);
>   rc = saImmOiRtObjectCreate_2(avd_cb->immOiHandle,
>   const_cast(className.c_str()),
> @@ -1887,7 +1899,7 @@ void avd_saImmOiRtObjectCreate_sync(const std::string 
> ,
>   }
> }
>   
> -  if (rc != SA_AIS_OK || avd_cb->avail_state_avd != SA_AMF_HA_ACTIVE) {
> +  if (rc != SA_AIS_OK || isImmReady == false) {
>   // Now it will be updated through job queue.
>   avd_saImmOiRtObjectCreate(className, parentName, attrValues);
> }
> @@ -1930,14 +1942,15 @@ void avd_saImmOiRtObjectCreate(const std::string 
> ,
>   void avd_saImmOiRtObjectDelete_sync(const std::string ) {
> TRACE_ENTER2("%s", dn.c_str());
> SaAisErrorT rc = SA_AIS_OK;
> +  bool isImmReady = isImmServiceReady(avd_cb);
>   
> -  if (avd_cb->avail_state_avd == SA_AMF_HA_ACTIVE) {
> +  if (isImmReady == true) {
>   rc = saImmOiRtObjectDelete_o3(avd_cb->immOiHandle, dn.c_str());
>   if (rc != SA_AIS_OK) {
> LOG_WA("saImmOiRtObjectDelete_o3 of '%s' failed with %u", dn.c_str(), 
> rc);
>   }
> }
> -  if (rc != SA_AIS_OK || avd_cb->avail_state_avd != SA_AMF_HA_ACTIVE) {
> +  if (rc != SA_AIS_OK || isImmReady == false) {
>   // Now it will be updated through job queue.
>   avd_saImmOiRtObjectDelete(dn);
> }
>

Re: [devel] [PATCH 1/1] amfnd: Buffered not-ack susi assignment response after both SC go down [#2105]

2017-05-17 Thread praveen malviya

Hi Minh,

While testing this, I am observing that amfd is dropping the assignment 
message because of message id mismatch:
May 17 12:37:39.522117 osafamfd [7686:7686:src/amf/amfd/sgproc.cc:1171] 
 >> avd_su_si_assign_evh: id:1, node:2030f, act:5, 
'safSu=SU1,safSg=AmfDemo,safApp=AmfDemo1', '', ha:3, err:1, single:0


May 17 12:37:39.522404 osafamfd [7686:7686:src/amf/amfd/ndproc.cc:0075] 
WA avd_msg_sanity_chk: invalid msg id 1, msg type 5, from 2030f should be 3
May 17 12:37:39.522418 osafamfd [7686:7686:src/amf/amfd/sgproc.cc:1777] 
<< avd_su_si_assign_evh

I am also looking into this. For your reference I had attached amfd and 
amfnd traces from SC-1 and PL-3 respectively in the ticket.
I am working with one controller and one payload.


Thanks
Praveen

On 15-May-17 1:06 PM, Minh Chau wrote:
> When amfnd-payload responds susi assignment response just before both SC
> go down, and that response message does not come to director. Therefore,
> the status of that assignment could be seen as "modifying" in IMM. When
> SC comes back, active amfd will be waiting for that response forever.
> 
> Patch checks if a susi assignment response is sent but not-ack just before
> both SC come down, amfnd-payload will buffer it in a way as a susi get
> assigned during SC absence
> ---
>   src/amf/amfnd/di.cc | 53 
> +
>   1 file changed, 45 insertions(+), 8 deletions(-)
> 
> diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc
> index e06b9260d..3776a09dc 100644
> --- a/src/amf/amfnd/di.cc
> +++ b/src/amf/amfnd/di.cc
> @@ -1282,16 +1282,53 @@ void avnd_di_msg_ack_process(AVND_CB *cb, uint32_t 
> mid) {
> Notes : None.
>   
> **/
>   void avnd_diq_del(AVND_CB *cb) {
> -  AVND_DND_MSG_LIST *rec = 0;
>   
> -  do {
> -/* pop the record */
> -m_AVND_DIQ_REC_POP(cb, rec);
> -if (!rec) break;
> +  if ((cb->dnd_list.head != nullptr)) {
> +AVND_DND_MSG_LIST *rec = 0;
> +bool found = true;
> +while (found) {
> +  found = false;
> +  for (rec = cb->dnd_list.head; rec != nullptr;
> +   rec = rec->next) {
> +osafassert(rec->msg.type == AVND_MSG_AVD);
> +// delete all pending messages that haven't been sent out
> +if (rec->no_retries == 0) {
> +  m_AVND_DIQ_REC_POP(cb, rec);
> +  avnd_diq_rec_del(cb, rec);
> +  break;
> +} else {
> +  // Assignment response had been sent, but not ack because last
> +  // controller go down, reset msg_id and will be resent later
> +  if (rec->msg.info.avd->msg_type == AVSV_N2D_INFO_SU_SI_ASSIGN_MSG) 
> {
> +if (rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id != 0) {
> +  rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id = 0;
> +  found = true;
> +  LOG_NO(
> +  "Found not-ack su_si_assign msg for SU:'%s', "
> +  "SI:'%s', ha_state:'%u', msg_act:'%u', single_csi:'%u', "
> +  "error:'%u', msg_id:'%u'",
> +  osaf_extended_name_borrow(>msg.info.avd->msg_info
> + .n2d_su_si_assign.su_name),
> +  osaf_extended_name_borrow(>msg.info.avd->msg_info
> + .n2d_su_si_assign.si_name),
> +  rec->msg.info.avd->msg_info.n2d_su_si_assign.ha_state,
> +  rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_act,
> +  rec->msg.info.avd->msg_info.n2d_su_si_assign
> +  .single_csi,
> +  rec->msg.info.avd->msg_info.n2d_su_si_assign.error,
> +  rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id);
> +}
> +  } else {
> +// delete other messages for now
> +m_AVND_DIQ_REC_POP(cb, rec);
> +avnd_diq_rec_del(cb, rec);
> +break;
> +  }
> +}
>   
> -/* delete the record */
> -avnd_diq_rec_del(cb, rec);
> -  } while (1);
> +  }
> +}
> +  }
>   
> return;
>   }
> 

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] amfd: Check IMM service status before use IMM call [#2416]

2017-05-16 Thread praveen malviya

Hi Minh,

Is this reproducible all the time?
I performed 4 switchvoers on default branch but did not observed.

Thanks
Praveen

On 17-May-17 3:21 AM, Minh Chau wrote:
> When Opensaf 2N switchover, amfd tries to update some attributes
> to IMM. But this time, IMM is not available since Opensaf 2N SI
> is in QUIESCED.
> 
> SC-1 osafamfnd[510]: NO Assigned 'safSi=SC-2N,safApp=OpenSAF' QUIESCED to 
> 'safSu=SC-1,safSg=2N,safApp=OpenSAF'
> SC-1 osafamfd[496]: WA saImmOiRtObjectUpdate of 
> 'safSISU=safSu=SC-1\,safSg=2N\,safApp=OpenSAF,safSi=SC-2N,safApp=OpenSAF' 
> osafAmfSISUFsmState failed with 5
> SC-1 osafimmnd[441]: WA ERR_BAD_HANDLE: Handle use is blocked by pending 
> reply on syncronous call
> 
> This patch corrects the problem by checking IMM service status
> before call avd_saImmOiRtXXX_sync. The problem does not appear
> previously because IMM update is queued and FiFo::execute() does
> check IMM status before executing a job. A check of @avail_state_avd
> against SA_AMF_HA_ACTIVE is not enough in avd_saImmOiRtXXX_sync.
> This patch does check IMM status in avd_saImmOiRtXXX_sync as similar
> as FiFo::execute().
> ---
>   src/amf/amfd/imm.cc | 35 ---
>   1 file changed, 24 insertions(+), 11 deletions(-)
> 
> diff --git a/src/amf/amfd/imm.cc b/src/amf/amfd/imm.cc
> index 65df73f73..7b1aa333e 100644
> --- a/src/amf/amfd/imm.cc
> +++ b/src/amf/amfd/imm.cc
> @@ -131,22 +131,32 @@ static char *StrDup(const char *s) {
>   //
>   Job::~Job() {}
>   
> -//
> -bool ImmJob::isRunnable(const AVD_CL_CB *cb) {
> +// TODO: Make isImmServiceReady as static to limit its scope
> +// This function should belong to AVD_CB class as a method
> +static bool isImmServiceReady(const AVD_CL_CB *cb) {
> TRACE_ENTER();
> bool rc = true;
> +
> +  if (avd_cb->active_services_exist == false) {
> +  TRACE("No active service");
> +  rc = false;
> +  }
> if ((!avd_cb->is_implementer) &&
> (avd_cb->avail_state_avd == SA_AMF_HA_STANDBY)) {
>   rc = false;
> }
> -
> if (avd_cb->avd_imm_status == AVD_IMM_INIT_ONGOING) {
>   TRACE("Already IMM init is going, try again after sometime");
>   rc = false;
> }
> -  TRACE_LEAVE();
> +  TRACE_LEAVE2("%u:", rc);
> return rc;
>   }
> +
> +//
> +bool ImmJob::isRunnable(const AVD_CL_CB *cb) {
> +  return isImmServiceReady(cb);
> +}
>   //
>   AvdJobDequeueResultT ImmObjCreate::exec(const AVD_CL_CB *cb) {
> SaAisErrorT rc;
> @@ -1707,10 +1717,11 @@ SaAisErrorT avd_saImmOiRtObjectUpdate_sync(
> SaImmAttrModificationT_2 attrMod;
> const SaImmAttrModificationT_2 *attrMods[] = {, nullptr};
> SaImmAttrValueT attrValues[] = {value};
> -
> const std::string attribute_name(attributeName);
> +  bool isImmReady = isImmServiceReady(avd_cb);
> +
> TRACE_ENTER2("'%s' %s", dn.c_str(), attributeName);
> -  if (avd_cb->avail_state_avd == SA_AMF_HA_ACTIVE) {
> +  if (isImmReady == true) {
>   attrMod.modType = modifyType;
>   attrMod.modAttr.attrName = attributeName;
>   attrMod.modAttr.attrValuesNumber = 1;
> @@ -1723,7 +1734,7 @@ SaAisErrorT avd_saImmOiRtObjectUpdate_sync(
>attributeName, rc);
> }
>   
> -  if (rc != SA_AIS_OK || avd_cb->avail_state_avd != SA_AMF_HA_ACTIVE) {
> +  if (rc != SA_AIS_OK || isImmReady == false) {
>   // Now it will be updated through job queue.
>   avd_saImmOiRtObjectUpdate(dn, attribute_name, attrValueType, value);
> }
> @@ -1875,8 +1886,9 @@ void avd_saImmOiRtObjectCreate_sync(const std::string 
> ,
> TRACE_ENTER2("%s %s", className.c_str(), parentName.c_str());
>   
> SaAisErrorT rc = SA_AIS_OK;
> +  bool isImmReady = isImmServiceReady(avd_cb);
>   
> -  if (avd_cb->avail_state_avd == SA_AMF_HA_ACTIVE) {
> +  if (isImmReady == true) {
>   const SaNameTWrapper parent_name(parentName);
>   rc = saImmOiRtObjectCreate_2(avd_cb->immOiHandle,
>   const_cast(className.c_str()),
> @@ -1887,7 +1899,7 @@ void avd_saImmOiRtObjectCreate_sync(const std::string 
> ,
>   }
> }
>   
> -  if (rc != SA_AIS_OK || avd_cb->avail_state_avd != SA_AMF_HA_ACTIVE) {
> +  if (rc != SA_AIS_OK || isImmReady == false) {
>   // Now it will be updated through job queue.
>   avd_saImmOiRtObjectCreate(className, parentName, attrValues);
> }
> @@ -1930,14 +1942,15 @@ void avd_saImmOiRtObjectCreate(const std::string 
> ,
>   void avd_saImmOiRtObjectDelete_sync(const std::string ) {
> TRACE_ENTER2("%s", dn.c_str());
> SaAisErrorT rc = SA_AIS_OK;
> +  bool isImmReady = isImmServiceReady(avd_cb);
>   
> -  if (avd_cb->avail_state_avd == SA_AMF_HA_ACTIVE) {
> +  if (isImmReady == true) {
>   rc = saImmOiRtObjectDelete_o3(avd_cb->immOiHandle, dn.c_str());
>   if (rc != SA_AIS_OK) {
> LOG_WA("saImmOiRtObjectDelete_o3 of '%s' failed with %u", dn.c_str(), 
> rc);
>   }
> }
> -  if (rc != SA_AIS_OK || avd_cb->avail_state_avd != SA_AMF_HA_ACTIVE) {
> +  if (rc != SA_AIS_OK || isImmReady == false) {

Re: [devel] [PATCH 1/1] amf: send oper_state when NCS SUs already instantiated [#2443]

2017-05-15 Thread praveen malviya


Hi Long,

This check is very generic.
During su restart cases, a PI SU having NPI components will send 
unnecessary enabled events to AMFD. When AMFD will receive this events 
it will try to assign this SU and can lead to assignments in other than 
2N red models cases.
I think check should be moved to the event handler of presence state 
message. Attached is the patch based on this idea.

What do you think?


Thanks
Praveen

On 28-Apr-17 9:42 AM, Long H Buu Nguyen wrote:

---
  src/amf/amfnd/susm.cc | 10 ++
  1 file changed, 10 insertions(+)

diff --git a/src/amf/amfnd/susm.cc b/src/amf/amfnd/susm.cc
index 52af63b83..04ced426d 100644
--- a/src/amf/amfnd/susm.cc
+++ b/src/amf/amfnd/susm.cc
@@ -1608,6 +1608,16 @@ uint32_t avnd_su_pres_fsm_run(AVND_CB *cb, AVND_SU *su, 
AVND_COMP *comp,
/* process state change */
if (prv_st != final_st)
  rc = avnd_su_pres_st_chng_prc(cb, su, prv_st, final_st);
+  else {
+// If SU has been already instantiated, inform amfd
+if (SA_AMF_PRESENCE_INSTANTIATED == final_st &&
+su_all_pi_comps_instantiated(su) == true) {
+  if (m_AVND_SU_OPER_STATE_IS_ENABLED(su)) {
+TRACE("SU oper state is enabled");
+rc = avnd_di_oper_send(cb, su, 0);
+  }
+}
+  }
  
  done:

TRACE_LEAVE2("%u", rc);

diff --git a/src/amf/amfnd/susm.cc b/src/amf/amfnd/susm.cc
index 04ced42..18b8fc8 100644
--- a/src/amf/amfnd/susm.cc
+++ b/src/amf/amfnd/susm.cc
@@ -1608,16 +1608,6 @@ uint32_t avnd_su_pres_fsm_run(AVND_CB *cb, AVND_SU *su, 
AVND_COMP *comp,
   /* process state change */
   if (prv_st != final_st)
 rc = avnd_su_pres_st_chng_prc(cb, su, prv_st, final_st);
-  else {
-// If SU has been already instantiated, inform amfd
-if (SA_AMF_PRESENCE_INSTANTIATED == final_st &&
-su_all_pi_comps_instantiated(su) == true) {
-  if (m_AVND_SU_OPER_STATE_IS_ENABLED(su)) {
-TRACE("SU oper state is enabled");
-rc = avnd_di_oper_send(cb, su, 0);
-  }
-}
-  }
 
 done:
   TRACE_LEAVE2("%u", rc);
@@ -4133,7 +4123,16 @@ uint32_t avnd_evt_ir_evh(struct avnd_cb_tag *cb, struct 
avnd_evt_tag *evt) {
 }
 TRACE("SU instantiation for PI SUs, running the SU presence state 
FSM:'%s'",
   su->name.c_str());
-rc = avnd_su_pres_fsm_run(cb, su, 0, AVND_SU_PRES_FSM_EV_INST);
+// If SU has been already instantiated, inform amfd
+if ((cb->led_state == AVND_LED_STATE_RED) &&
+(su->pres == SA_AMF_PRESENCE_INSTANTIATED) &&
+(su_all_pi_comps_instantiated(su) == true) &&
+(m_AVND_SU_OPER_STATE_IS_ENABLED(su))) {
+TRACE("SU oper state is enabled and pres state is instantiated.");
+rc = avnd_di_oper_send(cb, su, 0);
+} else {
+rc = avnd_su_pres_fsm_run(cb, su, 0, AVND_SU_PRES_FSM_EV_INST);
+}
   } else {
 if (m_AVND_SU_IS_REG_FAILED(su)) {
   /* The SU configuration is bad, we cannot do much other transition to
--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1/1] amfnd: Buffered not-ack susi assignment response after both SC go down [#2105]

2017-05-15 Thread praveen malviya

Hi Minh,

I am reviewing this patch.

Thanks,
Praveen

On 15-May-17 1:06 PM, Minh Chau wrote:
> When amfnd-payload responds susi assignment response just before both SC
> go down, and that response message does not come to director. Therefore,
> the status of that assignment could be seen as "modifying" in IMM. When
> SC comes back, active amfd will be waiting for that response forever.
> 
> Patch checks if a susi assignment response is sent but not-ack just before
> both SC come down, amfnd-payload will buffer it in a way as a susi get
> assigned during SC absence
> ---
>   src/amf/amfnd/di.cc | 53 
> +
>   1 file changed, 45 insertions(+), 8 deletions(-)
> 
> diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc
> index e06b9260d..3776a09dc 100644
> --- a/src/amf/amfnd/di.cc
> +++ b/src/amf/amfnd/di.cc
> @@ -1282,16 +1282,53 @@ void avnd_di_msg_ack_process(AVND_CB *cb, uint32_t 
> mid) {
> Notes : None.
>   
> **/
>   void avnd_diq_del(AVND_CB *cb) {
> -  AVND_DND_MSG_LIST *rec = 0;
>   
> -  do {
> -/* pop the record */
> -m_AVND_DIQ_REC_POP(cb, rec);
> -if (!rec) break;
> +  if ((cb->dnd_list.head != nullptr)) {
> +AVND_DND_MSG_LIST *rec = 0;
> +bool found = true;
> +while (found) {
> +  found = false;
> +  for (rec = cb->dnd_list.head; rec != nullptr;
> +   rec = rec->next) {
> +osafassert(rec->msg.type == AVND_MSG_AVD);
> +// delete all pending messages that haven't been sent out
> +if (rec->no_retries == 0) {
> +  m_AVND_DIQ_REC_POP(cb, rec);
> +  avnd_diq_rec_del(cb, rec);
> +  break;
> +} else {
> +  // Assignment response had been sent, but not ack because last
> +  // controller go down, reset msg_id and will be resent later
> +  if (rec->msg.info.avd->msg_type == AVSV_N2D_INFO_SU_SI_ASSIGN_MSG) 
> {
> +if (rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id != 0) {
> +  rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id = 0;
> +  found = true;
> +  LOG_NO(
> +  "Found not-ack su_si_assign msg for SU:'%s', "
> +  "SI:'%s', ha_state:'%u', msg_act:'%u', single_csi:'%u', "
> +  "error:'%u', msg_id:'%u'",
> +  osaf_extended_name_borrow(>msg.info.avd->msg_info
> + .n2d_su_si_assign.su_name),
> +  osaf_extended_name_borrow(>msg.info.avd->msg_info
> + .n2d_su_si_assign.si_name),
> +  rec->msg.info.avd->msg_info.n2d_su_si_assign.ha_state,
> +  rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_act,
> +  rec->msg.info.avd->msg_info.n2d_su_si_assign
> +  .single_csi,
> +  rec->msg.info.avd->msg_info.n2d_su_si_assign.error,
> +  rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id);
> +}
> +  } else {
> +// delete other messages for now
> +m_AVND_DIQ_REC_POP(cb, rec);
> +avnd_diq_rec_del(cb, rec);
> +break;
> +  }
> +}
>   
> -/* delete the record */
> -avnd_diq_rec_del(cb, rec);
> -  } while (1);
> +  }
> +}
> +  }
>   
> return;
>   }
> 

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 0/1] Review Request for amf: send oper_state when NCS SUs already instantiated [#2443]

2017-05-14 Thread praveen malviya

Hi Long,

I will review it by tomorrow.

Thanks
Praveen

On 15-May-17 8:55 AM, Long Nguyen wrote:
> Dear maintainers,
> 
> Can you please help to review the patch?
> 
> Thanks so much,
> Long Nguyen.
> 
> On 5/9/2017 9:29 AM, Long Nguyen wrote:
>> Hi,
>>
>> Have you had time to look into the patch?
>>
>> Best regards,
>> Long Nguyen.
>>
>> On 4/28/2017 11:12 AM, Long H Buu Nguyen wrote:
>>> Summary: amf: send oper_state when NCS SUs already instantiated [#2443]
>>> Review request for Ticket(s): 2443
>>> Peer Reviewer(s): AMF devs
>>> Pull request to: AMF maintainers
>>> Affected branch(es): develop, release
>>> Development branch: ticket-2443
>>> Base revision: 94fe6f2ca5c34bafc86f001807ea08ce39f60a34
>>> Personal repository: git://git.code.sf.net/u/xlobung/review
>>>
>>> 
>>> Impacted area   Impact y/n
>>> 
>>>   Docsn
>>>   Build systemn
>>>   RPM/packaging   n
>>>   Configuration files n
>>>   Startup scripts n
>>>   SAF servicesn
>>>   OpenSAF servicesy
>>>   Core libraries  n
>>>   Samples n
>>>   Tests   n
>>>   Other   n
>>>
>>>
>>> Comments (indicate scope for each "y" above):
>>> -
>>>  Assume after headless, SC-1 becomes ACTIVE. Amfnd in SC-2 sends 
>>> a node_up
>>> message to amfd-SC-1. amfnd-SC-2 will instantiate NCS SUs in SC-2 
>>> as soon
>>> as amfd-SC-1 receives the node_up message. At the time NCS SUs in 
>>> SC-2
>>> are INSTANTIATED, if SC-1 is rebooted, amfnd-SC-2 receives 
>>> NEW_ACTIVE
>>> because amfd-SC-2 is set to ACTIVE by RDE. amfnd-SC-2 sends a 
>>> node_up
>>> message to amfd-SC-2. Later, amfnd-SC-2 continues to instantiate 
>>> NCS SUs
>>> in SC-2. However, the NCS SUs in SC-2 are already INSTANTIATED. 
>>> amfnd-SC-2
>>> does not send oper_state message to amfd-SC-2 because the NCS SU 
>>> presence
>>> states do not change. As a result, amf does not continue with the 
>>> normal
>>> startup process.
>>>
>>> revision 01dc86166f3ed1b9b46534092089d5bcfaf1ef57
>>> Author:Long H Buu Nguyen 
>>> Date:Thu, 27 Apr 2017 19:39:09 +0700
>>>
>>> amf: send oper_state when NCS SUs already instantiated [#2443]
>>>
>>>
>>>
>>> Complete diffstat:
>>> --
>>>   src/amf/amfnd/susm.cc | 10 ++
>>>   1 file changed, 10 insertions(+)
>>>
>>>
>>> Testing Commands:
>>> -
>>>   As described in the ticket.
>>>
>>>
>>> Testing, Expected Results:
>>> --
>>>   Opensaf starts successfully.
>>>
>>>
>>> Conditions of Submission:
>>> -
>>>   Ack'ed from reviewers.
>>>
>>>
>>> Arch  Built StartedLinux distro
>>> ---
>>> mipsn  n
>>> mips64  n  n
>>> x86 n  n
>>> x86_64  y  y
>>> powerpc n  n
>>> powerpc64   n  n
>>>
>>>
>>> Reviewer Checklist:
>>> ---
>>> [Submitters: make sure that your review doesn't trigger any checkmarks!]
>>>
>>>
>>> Your checkin has not passed review because (see checked entries):
>>>
>>> ___ Your RR template is generally incomplete; it has too many blank 
>>> entries
>>>  that need proper data filled in.
>>>
>>> ___ You have failed to nominate the proper persons for review and push.
>>>
>>> ___ Your patches do not have proper short+long header
>>>
>>> ___ You have grammar/spelling in your header that is unacceptable.
>>>
>>> ___ You have exceeded a sensible line length in your 
>>> headers/comments/text.
>>>
>>> ___ You have failed to put in a proper Trac Ticket # into your commits.
>>>
>>> ___ You have incorrectly put/left internal data in your comments/files
>>>  (i.e. internal bug tracking tool IDs, product names etc)
>>>
>>> ___ You have not given any evidence of testing beyond basic build tests.
>>>  Demonstrate some level of runtime or other sanity testing.
>>>
>>> ___ You have ^M present in some of your files. These have to be removed.
>>>
>>> ___ You have needlessly changed whitespace or added whitespace crimes
>>>  like trailing spaces, or spaces before tabs.
>>>
>>> ___ You have mixed real technical changes with whitespace and other
>>>  cosmetic code cleanup changes. These have to be separate commits.
>>>
>>> ___ You need to refactor your submission into logical chunks; there is
>>>  too much content into a single commit.
>>>
>>> ___ You have extraneous garbage in your review (merge commits etc)
>>>
>>> ___ You have giant attachments which should never have been sent;
>>>  Instead you should place your content in a public tree to be 
>>> pulled.
>>>
>>> ___ You have too many commits attached to an e-mail; resend as threaded
>>>  commits, or place in a public tree for a pull.
>>>
>>> ___ You have

Re: [devel] [PATCH 1/1] amfd: make auto repair restriction configurable [#2435]

2017-04-26 Thread praveen malviya



On 26-Apr-17 3:25 PM, Gary Lee wrote:
> Hi Praveen
> 
>  From talking with some SMF maintainers, some applications could be using 
> saAmfSUMaintenanceCampaign even though AMF does not.
How it is exposed to the application?
I guess an SMF application can register for SMF callback 
SaSmfCampaignCallbackT only which has nothing to do with setting and 
unsetting in AMF. Also AMF does not send campaign name in notifications 
before 5.2.

Thanks,
Praveen
> So in terms of backwards compatibility, it is better to put this 
> configuration in AMF instead.
> 
> Thanks
> Gary
> 
> -----Original Message-
> From: praveen malviya <praveen.malv...@oracle.com>
> Organization: Oracle Corporation
> Date: Wednesday, 26 April 2017 at 7:47 pm
> To: gary <gary@dektech.com.au>
> Cc: <opensaf-devel@lists.sourceforge.net>
> Subject: Re: [PATCH 1/1] amfd: make auto repair restriction configurable 
> [#2435]
> 
>  Hi Gary,
>  
>  If I understand, before 5.2, while running campaign SMF used to set
>  saAmfSUMaintenanceCampaign attribute in affected SUs using CCB
>  operations. Since AMF feature "Restrictions to auto repair" was not
>  implemented (implemented in #2144, 5.2), AMF was still taking actions if
>  components faults while campaign is running and also su maintenance
>  related notifications were not generated. With 2144 in 5.2 release, SMF
>  is still setting the saAmfSUMaintenanceCampaign. But if some faults
>  happens now, AMF will be taking action and also it sends su maintenance
>  related notification.
>  
>  I guess before 5.2 release SMF was just setting and unsetting
>  saAmfSUMaintenanceCampaign without any other use as #2144 was not
>  implemented before 5.2?  If it is so, it means no application and even
>  SMF itself does not track this attribute value before 5.2 other than
>  setting and unsetting? Based on this one solution could be:  if SMF
>  skips the step/command of setting saAmfSUMaintenanceCampaign based on a
>  new attribute in class SaSmfCampaign. One object of this class is
>  created for each campaign before starting the campaign.
>  
>  Note:All Non-spec configuration attributes are named as "osafAmf*" in 
> AMF.
>  
>  
>  Thanks
>  Praveen
>  
>  On 21-Apr-17 3:21 PM, Gary Lee wrote:
>  > This adds a configuration object for AMF at 
> amfConfig=1,safApp=safAmfService.
>  >
>  > A configuration attribute 'amfRestrictAutoRepairEnable' is added.
>  > This determines if 'suMaintenanceCampaign' should be ignored to 
> maintain
>  > legacy AMF behaviour. The default behaviour is not to support auto 
> repair
>  > restriction.
>  >
>  > To enable restriction:
>  > immcfg -a amfRestrictAutoRepairEnable=1 
> amfConfig=1,safApp=safAmfService
>  >
>  > To disable restriction:
>  > immcfg -a amfRestrictAutoRepairEnable=0 
> amfConfig=1,safApp=safAmfService
>  > ---
>  >   src/amf/Makefile.am|   3 +
>  >   src/amf/amfd/comp.cc   |   2 +-
>  >   src/amf/amfd/config.cc | 179 
> +
>  >   src/amf/amfd/config.h  |  21 +
>  >   src/amf/amfd/imm.cc|  35 ++--
>  >   src/amf/amfd/ndproc.cc |   4 +-
>  >   src/amf/amfd/node.cc   |   4 +-
>  >   src/amf/amfd/sgproc.cc |  14 ++--
>  >   src/amf/amfd/su.cc |  37 +++--
>  >   src/amf/amfd/su.h  |   3 +-
>  >   src/amf/common/amf_defs.h  |   3 +
>  >   src/amf/config/amf_classes.xml |  15 
>  >   src/amf/config/amf_objects.xml |   7 ++
>  >   13 files changed, 300 insertions(+), 27 deletions(-)
>  >   create mode 100644 src/amf/amfd/config.cc
>  >   create mode 100644 src/amf/amfd/config.h
>  >
>  > diff --git a/src/amf/Makefile.am b/src/amf/Makefile.am
>  > index 8c175c2..1d6ca60 100644
>  > --- a/src/amf/Makefile.am
>  > +++ b/src/amf/Makefile.am
>  > @@ -103,6 +103,7 @@ noinst_HEADERS += \
>  >src/amf/amfd/clm.h \
>  >src/amf/amfd/cluster.h \
>  >src/amf/amfd/comp.h \
>  > +  src/amf/amfd/config.h \
>  >src/amf/amfd/csi.h \
>  >src/amf/amfd/def.h \
>  >src/amf/amfd/evt.h \
>  > @@ -213,6 +214,7 @@ bin_testamfd_LDFLAGS = \
>  >src/amf/amfd/bin_osafamfd-ckpt_updt.o \
>  >src/amf/amfd

Re: [devel] [PATCH 1/1] amfd: make auto repair restriction configurable [#2435]

2017-04-26 Thread praveen malviya

Hi Gary,

If I understand, before 5.2, while running campaign SMF used to set 
saAmfSUMaintenanceCampaign attribute in affected SUs using CCB 
operations. Since AMF feature "Restrictions to auto repair" was not 
implemented (implemented in #2144, 5.2), AMF was still taking actions if 
components faults while campaign is running and also su maintenance 
related notifications were not generated. With 2144 in 5.2 release, SMF 
is still setting the saAmfSUMaintenanceCampaign. But if some faults 
happens now, AMF will be taking action and also it sends su maintenance 
related notification.

I guess before 5.2 release SMF was just setting and unsetting 
saAmfSUMaintenanceCampaign without any other use as #2144 was not 
implemented before 5.2?  If it is so, it means no application and even 
SMF itself does not track this attribute value before 5.2 other than 
setting and unsetting? Based on this one solution could be:  if SMF 
skips the step/command of setting saAmfSUMaintenanceCampaign based on a 
new attribute in class SaSmfCampaign. One object of this class is 
created for each campaign before starting the campaign.

Note:All Non-spec configuration attributes are named as "osafAmf*" in AMF.


Thanks
Praveen

On 21-Apr-17 3:21 PM, Gary Lee wrote:
> This adds a configuration object for AMF at amfConfig=1,safApp=safAmfService.
> 
> A configuration attribute 'amfRestrictAutoRepairEnable' is added.
> This determines if 'suMaintenanceCampaign' should be ignored to maintain
> legacy AMF behaviour. The default behaviour is not to support auto repair
> restriction.
> 
> To enable restriction:
> immcfg -a amfRestrictAutoRepairEnable=1 amfConfig=1,safApp=safAmfService
> 
> To disable restriction:
> immcfg -a amfRestrictAutoRepairEnable=0 amfConfig=1,safApp=safAmfService
> ---
>   src/amf/Makefile.am|   3 +
>   src/amf/amfd/comp.cc   |   2 +-
>   src/amf/amfd/config.cc | 179 
> +
>   src/amf/amfd/config.h  |  21 +
>   src/amf/amfd/imm.cc|  35 ++--
>   src/amf/amfd/ndproc.cc |   4 +-
>   src/amf/amfd/node.cc   |   4 +-
>   src/amf/amfd/sgproc.cc |  14 ++--
>   src/amf/amfd/su.cc |  37 +++--
>   src/amf/amfd/su.h  |   3 +-
>   src/amf/common/amf_defs.h  |   3 +
>   src/amf/config/amf_classes.xml |  15 
>   src/amf/config/amf_objects.xml |   7 ++
>   13 files changed, 300 insertions(+), 27 deletions(-)
>   create mode 100644 src/amf/amfd/config.cc
>   create mode 100644 src/amf/amfd/config.h
> 
> diff --git a/src/amf/Makefile.am b/src/amf/Makefile.am
> index 8c175c2..1d6ca60 100644
> --- a/src/amf/Makefile.am
> +++ b/src/amf/Makefile.am
> @@ -103,6 +103,7 @@ noinst_HEADERS += \
>   src/amf/amfd/clm.h \
>   src/amf/amfd/cluster.h \
>   src/amf/amfd/comp.h \
> + src/amf/amfd/config.h \
>   src/amf/amfd/csi.h \
>   src/amf/amfd/def.h \
>   src/amf/amfd/evt.h \
> @@ -213,6 +214,7 @@ bin_testamfd_LDFLAGS = \
>   src/amf/amfd/bin_osafamfd-ckpt_updt.o \
>   src/amf/amfd/bin_osafamfd-clm.o \
>   src/amf/amfd/bin_osafamfd-cluster.o \
> + src/amf/amfd/bin_osafamfd-config.o \
>   src/amf/amfd/bin_osafamfd-comp.o \
>   src/amf/amfd/bin_osafamfd-compcstype.o \
>   src/amf/amfd/bin_osafamfd-comptype.o \
> @@ -300,6 +302,7 @@ bin_osafamfd_SOURCES = \
>   src/amf/amfd/comp.cc \
>   src/amf/amfd/compcstype.cc \
>   src/amf/amfd/comptype.cc \
> + src/amf/amfd/config.cc \
>   src/amf/amfd/csi.cc \
>   src/amf/amfd/csiattr.cc \
>   src/amf/amfd/cstype.cc \
> diff --git a/src/amf/amfd/comp.cc b/src/amf/amfd/comp.cc
> index d4b51a6..3e0dc5d 100644
> --- a/src/amf/amfd/comp.cc
> +++ b/src/amf/amfd/comp.cc
> @@ -152,7 +152,7 @@ void 
> AVD_COMP::avd_comp_pres_state_set(SaAmfPresenceStateT pres_state) {
>   (saAmfCompPresenceState == SA_AMF_PRESENCE_TERMINATION_FAILED)) ||
>  ((node->saAmfNodeFailfastOnInstantiationFailure == true) &&
>   (saAmfCompPresenceState == SA_AMF_PRESENCE_INSTANTIATION_FAILED))) 
> &&
> -  (su->saAmfSUMaintenanceCampaign.empty())) {
> +  (su->restrict_auto_repair() == false)) {
>   saflog(LOG_NOTICE, amfSvcUsrName, "%s PresenceState %s => %s",
>  osaf_extended_name_borrow(_info.name),
>  avd_pres_state_name[old_state], avd_pres_state_name[pres_state]);
> diff --git a/src/amf/amfd/config.cc b/src/amf/amfd/config.cc
> new file mode 100644
> index 000..bdb07d2
> --- /dev/null
> +++ b/src/amf/amfd/config.cc
> @@ -0,0 +1,179 @@
> +#include "amf/amfd/util.h"
> +#include "amf/common/amf_util.h"
> +#include "amf/amfd/imm.h"
> +#include "amf/amfd/node.h"
> +#include "amf/amfd/config.h"
> +
> +static Configuration _configuration;
> +Configuration *configuration = &_configuration;
> +
> +static void ccb_apply_modify_hdlr(struct CcbUtilOperationData *opdata) {
> +  TRACE_ENTER();
> +  const SaImmAttrModificationT_2 *attr_mod;
> +  int i

Re: [devel] [PATCH 1/1] samples: fix $piddir undefined in amf_demo_script [#2410]

2017-04-12 Thread praveen malviya

Ack, code review only.

Thanks,
praveen

On 12-Apr-17 6:06 PM, Nguyen Luu wrote:
> The $piddir variable (containing path to amf_demo comp's pid file)
> is missed to be defined in amf_demo_script.
> 
> This could lead to the amf_demo process not getting truely killed
> in some cases when cleanup is called (e.g when invoking
> saAmfComponentErrorReport()), leaving the process unmanaged by AMF.
> ---
>   samples/amf/sa_aware/amf_demo_script | 1 +
>   1 file changed, 1 insertion(+)
> 
> diff --git a/samples/amf/sa_aware/amf_demo_script 
> b/samples/amf/sa_aware/amf_demo_script
> index eb61ac0..a033f53 100755
> --- a/samples/amf/sa_aware/amf_demo_script
> +++ b/samples/amf/sa_aware/amf_demo_script
> @@ -34,6 +34,7 @@ fi
>   # Source LSB functions library
>   . /lib/lsb/init-functions
>   
> +piddir="/tmp"
>   compname=`echo $SA_AMF_COMPONENT_NAME | md5sum | awk '{print $1}'`
>   pidfile="$piddir/${compname}.pid"
>   
> 

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1 of 1] clms: return TIME_OUT for unlock op if CLMS update to CLM agent fails [#2381]

2017-04-12 Thread praveen malviya



On 11-Apr-17 9:03 PM, Anders Widell wrote:
> A question:
> 
> I noticed that clms_send_is_member_info() is called twice from 
> clms_imm_node_unlock(), but you only modified the first call (the one 
> without PLM). Shouldn't the PLM case also be updated? Is a similar 
> change also applicable in clms_imm_node_shutdown?
> 
Yes, it is needed at-least for PLM case in clms_imm_node_unlock() and 
may be other places also. But in the reported problem, there is no clue 
in CLMD traces why CLMS is not able to send message to this client. Also 
there is not MDS_DOWN for that client. This client was created by this 
CLMS when it was in check-pointing.
I have reproduced issue in one way as given in the comment. But in this 
case CLMS is getting MDS_DOWN event. So this is not the original case 
reported in the description. Due to this, I did not go for generic fix.


Thanks
Praveen
> regards,
> 
> Anders Widell
> 
> 
> On 04/03/2017 10:09 AM, praveen.malv...@oracle.com wrote:
>>   src/clm/clmd/clms_evt.c   |   2 +-
>>   src/clm/clmd/clms_imm.c   |  11 +--
>>   src/clm/clmd/clms_mbcsv.c |   2 +-
>>   src/clm/clmd/clms_util.c  |  16 ++--
>>   4 files changed, 17 insertions(+), 14 deletions(-)
>>
>>
>> In this problem, first user performs CLM lock operation on payload and 
>> restarts it immediately.
>> When node was joining, user performs UNLOCK operation on it. Operation 
>> gets timed out.
>> After this, CLM rejects any admin operation on this payload with 
>> BAD_OP indicating
>> that an admin operation is already going on.
>>
>> For unlock operation when CLM tries to send membership status to 
>> clients on the node being
>> unlocked, MDS returns failure for a client. CLMS does not continue 
>> with remaining clients and it does
>> not reply to IMM client also. This the reason of unlocked opreration 
>> getting timed out. Also
>> CLM does not clear internal parameter related to admin operation. Due 
>> to this subsequent admin
>> operationa on this node are rejected with BAD_OP. There is no clue in 
>> traces why MDS returned failure.
>>
>> Generally this can happen when CLMS is trying to send message to a 
>> bunch of clients and
>> one of them goes down. Since CLMS has not processed this DOWN event, 
>> it will try to send
>> message to this cleint for which MDS will return failure. Currently 
>> issue is reproduced
>> on this basis. This patch is based on this. With the patch CLM will 
>> send membership status to remaining
>> clients even if MDS returns failure for a client. Also in such a 
>> situation, CLMS will return
>> TIMEOUT to the user and it will clear internal admin op params so that 
>> subsequent operation will
>> continue.
>>
>> diff --git a/src/clm/clmd/clms_evt.c b/src/clm/clmd/clms_evt.c
>> --- a/src/clm/clmd/clms_evt.c
>> +++ b/src/clm/clmd/clms_evt.c
>> @@ -137,7 +137,7 @@ CLMS_CLIENT_INFO *clms_client_get_by_id(
>>   rec = (CLMS_CLIENT_INFO *) 
>> ncs_patricia_tree_get(_cb->client_db, (uint8_t *)_id_net);
>>   if (NULL == rec)
>> -TRACE("client_id: %u lookup failed", client_id);
>> +TRACE("client_id: %u not found", client_id);
>>   return rec;
>>   }
>> diff --git a/src/clm/clmd/clms_imm.c b/src/clm/clmd/clms_imm.c
>> --- a/src/clm/clmd/clms_imm.c
>> +++ b/src/clm/clmd/clms_imm.c
>> @@ -2221,16 +2221,15 @@ uint32_t clms_imm_node_unlock(CLMS_CLUST
>>   {
>>   uint32_t rc = NCSCC_RC_SUCCESS;
>>   TRACE_ENTER2("Node name %s to unlock", nodeop->node_name.value);
>> -
>> +SaAisErrorT ais_rc = SA_AIS_OK;
>>   if (nodeop->admin_state == SA_CLM_ADMIN_UNLOCKED) {
>> -LOG_ER("Node is already in an unlocked state");
>> +LOG_NO("Node is already in an unlocked state");
>>   nodeop->admin_op = 0;
>>   
>> (void)immutil_saImmOiAdminOperationResult(clms_cb->immOiHandle, 
>> nodeop->curr_admin_inv,
>> SA_AIS_ERR_NO_OP);
>>   rc = NCSCC_RC_FAILURE;
>>   goto done;
>>   }
>> -
>>   if (((nodeop->admin_state == SA_CLM_ADMIN_LOCKED) || 
>> (nodeop->admin_state == SA_CLM_ADMIN_SHUTTING_DOWN))) {
>>   if (clms_cb->reg_with_plm == SA_FALSE) {
>> @@ -2259,9 +2258,9 @@ uint32_t clms_imm_node_unlock(CLMS_CLUST
>>   clms_node_join_ntf(clms_cb, nodeop);
>>   rc = clms_send_is_member_info(clms_cb, 
>> nodeop->node_id, nodeop->member, true);
>> -if(rc != NCSCC_RC_SUCCESS) {
>> +if (rc != NCSCC_RC_SUCCESS) {
>>   TRACE("clms_send_is_member_info failed %u", rc);
>> -goto done;
>> +ais_rc = SA_AIS_ERR_TIMEOUT;
>>   }
>>   nodeop->change = SA_CLM_NODE_NO_CHANGE;
>>   }
>> @@ -2322,7 +2321,7 @@ uint32_t clms_imm_node_unlock(CLMS_CLUST
>>   nodeop->admin_op = 0;
>>   /* Send node join notification */
>> -(void)immutil_saImmOiAdminOperationResult(clms_cb->immOiHandle, 
>>

[devel] [PATCH 0 of 1] Review Request for clm: add tool commands clm-adm, clm-state, clm-find [#2394]

2017-04-05 Thread praveen . malviya

Summary: clm: add tool commands clm-adm, clm-state, clm-find [#2394] 
Review request for Trac Ticket(s): #2394 
Peer Reviewer(s): Anders 
Pull request to: <>
Affected branch(es):Default 
Development branch: <>


Impacted area   Impact y/n

 Docsn
 Build systemn
 RPM/packaging   n
 Configuration files n
 Startup scripts n
 SAF servicesn
 OpenSAF servicesn
 Core libraries  n
 Samples n
 Tests   n
 Other   y


Comments (indicate scope for each "y" above):
-

changeset f4676a9743119edbd642debfe79f3ca43bbb8a47
Author: Praveen Malviya <praveen.malv...@oracle.com>
Date:   Wed, 05 Apr 2017 17:56:44 +0530

clm: add tool commands clm-adm, clm-state, clm-find [#2394]

clm-adm: for performing CLM admin operations on CLM node and cluser. 
clm-
find: for finding out object(s) configured for CLM classes SaClmCluster 
and
SaClmNode. Additional options like membership status and admin state can
also be given to find out objects which satisfy this criteria. clm- 
state:
for listing CLM node(s) and their important attributes.


Added Files:

 src/clm/tools/clm-adm
 src/clm/tools/clm-find
 src/clm/tools/clm-state


Complete diffstat:
--
 opensaf.spec.in |3 ++
 src/clm/Makefile.am |5 
 src/clm/tools/clm-adm   |   67 
+
 src/clm/tools/clm-find  |   83 
++
 src/clm/tools/clm-state |  107 
++
 5 files changed, 265 insertions(+), 0 deletions(-)


Testing Commands:
-
#clm-find node unlocked
safNode=PL-4,safCluster=myClmCluster
safNode=PL-5,safCluster=myClmCluster
safNode=PL-6,safCluster=myClmCluster
safNode=SC-1,safCluster=myClmCluster
safNode=SC-2,safCluster=myClmCluster
safNode=SC-3,safCluster=myClmCluster

#clm-state all adm
safNode=PL-4,safCluster=myClmCluster
   saClmNodeAdminState=UNLOCKED(1)
safNode=PL-5,safCluster=myClmCluster
   saClmNodeAdminState=UNLOCKED(1)
safNode=PL-6,safCluster=myClmCluster
   saClmNodeAdminState=UNLOCKED(1)
safNode=SC-1,safCluster=myClmCluster
   saClmNodeAdminState=UNLOCKED(1)
safNode=SC-2,safCluster=myClmCluster
   saClmNodeAdminState=UNLOCKED(1)
safNode=SC-3,safCluster=myClmCluster
   saClmNodeAdminState=UNLOCKED(1)


Testing, Expected Results:
--
Already pasted out above.

Conditions of Submission:
-
Ack from reviewer.

Arch  Built StartedLinux distro
---
mipsn  n
mips64  n  n
x86 n  n
x86_64  y  y
powerpc n  n
powerpc64   n  n


Reviewer Checklist:
---
[Submitters: make sure that your review doesn't trigger any checkmarks!]


Your checkin has not passed review because (see checked entries):

___ Your RR template is generally incomplete; it has too many blank entries
that need proper data filled in.

___ You have failed to nominate the proper persons for review and push.

___ Your patches do not have proper short+long header

___ You have grammar/spelling in your header that is unacceptable.

___ You have exceeded a sensible line length in your headers/comments/text.

___ You have failed to put in a proper Trac Ticket # into your commits.

___ You have incorrectly put/left internal data in your comments/files
(i.e. internal bug tracking tool IDs, product names etc)

___ You have not given any evidence of testing beyond basic build tests.
Demonstrate some level of runtime or other sanity testing.

___ You have ^M present in some of your files. These have to be removed.

___ You have needlessly changed whitespace or added whitespace crimes
like trailing spaces, or spaces before tabs.

___ You have mixed real technical changes with whitespace and other
cosmetic code cleanup changes. These have to be separate commits.

___ You need to refactor your submission into logical chunks; there is
too much content into a single commit.

___ You have extraneous garbage in your review (merge commits etc)

___ You have giant attachments which should never have been sent;
Instead you should place your content in a public tree to be pulled.

___ You have too many commits attached to an e-mail; resend as threaded
commits, or place in a public tree for a pull.

___ You have resent this content multiple times without a clear indication
of what has changed between each re-send.

___ You have failed to adequately and individually address all of the
comments and change requests that were pr

[devel] [PATCH 1 of 1] clm: add tool commands clm-adm, clm-state, clm-find [#2394]

2017-04-05 Thread praveen . malviya

 opensaf.spec.in |3 +
 src/clm/Makefile.am |5 ++
 src/clm/tools/clm-adm   |   67 ++
 src/clm/tools/clm-find  |   83 +
 src/clm/tools/clm-state |  107 
 5 files changed, 265 insertions(+), 0 deletions(-)


clm-adm:  for performing CLM admin operations on CLM node and cluser.
clm-find: for finding out object(s) configured for CLM classes SaClmCluster
  and SaClmNode. Additional options like membership status and admin 
state
  can also be given to find out objects which satisfy this criteria.
clm-state: for listing CLM node(s) and their important attributes.

diff --git a/opensaf.spec.in b/opensaf.spec.in
--- a/opensaf.spec.in
+++ b/opensaf.spec.in
@@ -1444,6 +1444,9 @@ fi
 %{_bindir}/ntfsubscribe
 %{_bindir}/ntfread
 %{_bindir}/saflogger
+%{_bindir}/clm-adm
+%{_bindir}/clm-find
+%{_bindir}/clm-state
 %if %is_ais_smf
 %{_bindir}/smf-adm
 %{_bindir}/smf-find
diff --git a/src/clm/Makefile.am b/src/clm/Makefile.am
--- a/src/clm/Makefile.am
+++ b/src/clm/Makefile.am
@@ -166,6 +166,11 @@ bin_osafclmna_LDADD = \
lib/libSaAmf.la \
lib/libopensaf_core.la
 
+dist_bin_SCRIPTS += \
+   src/clm/tools/clm-adm \
+   src/clm/tools/clm-find \
+   src/clm/tools/clm-state
+
 if ENABLE_TESTS
 
 bin_PROGRAMS += bin/clmtest
diff --git a/src/clm/tools/clm-adm b/src/clm/tools/clm-adm
new file mode 100644
--- /dev/null
+++ b/src/clm/tools/clm-adm
@@ -0,0 +1,67 @@
+#! /bin/sh
+#  -*- OpenSAF  -*-
+#
+# Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed
+# under the GNU Lesser General Public License Version 2.1, February 1999.
+# The complete license can be accessed from the following location:
+# http://opensource.org/licenses/lgpl-license.php
+# See the Copying file included with the OpenSAF distribution for full
+# licensing terms.
+#
+
+usage()
+{
+  echo ""
+  echo "usage: $(basename $0) [-t  timeout]  
"
+  echo ""
+  echo "-t:   command timeout in seconds (default=60)"
+  echo "Note: specify options in same order as given in usage."
+  echo ""
+}
+
+if [ $# -ne 2 -a $# -ne 4 ]; then
+  usage
+  exit 1
+fi
+
+if [ $# -eq 4 ]; then
+  if [ "$1" = "-t"  ]; then
+CMD=$3
+args="$1 $2 $4"
+  else
+echo "Invalid option or command as not per usage"
+usage 
+exit 1
+  fi
+elif [ $# -eq 2 ]; then
+  CMD=$1
+  args=$2
+fi 
+
+UNLOCK=1
+LOCK=2
+SHUTDOWN=3
+RESET=4
+case $CMD in
+  "unlock")
+  immadm -o $UNLOCK "$args"
+  ;;
+  "lock")
+  immadm -o $LOCK "$args"
+  ;;
+  "shutdown")
+  immadm -o $SHUTDOWN "$args"
+  ;;
+  "reset")
+  immadm -o $RESET "$args"
+  ;;
+  *)
+  echo "invalid admin operation: $CMD"
+  exit 1
+esac
+exit $?
+
diff --git a/src/clm/tools/clm-find b/src/clm/tools/clm-find
new file mode 100644
--- /dev/null
+++ b/src/clm/tools/clm-find
@@ -0,0 +1,83 @@
+#! /bin/sh
+
+#  -*- OpenSAF  -*-
+#
+# Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed
+# under the GNU Lesser General Public License Version 2.1, February 1999.
+# The complete license can be accessed from the following location:
+# http://opensource.org/licenses/lgpl-license.php
+# See the Copying file included with the OpenSAF distribution for full
+# licensing terms.
+#
+
+usage()
+{
+  echo ""
+  echo "usage: $(basename "$0")  
[]"
+  echo ""
+  echo "   cluster|node: CLM class names"
+  echo "   locked|unlocked|member|nonmember: list nodes with this status."
+  echo ""
+}
+
+if [ $# -ne 1 -a  $# -ne 2 ] || [ "$1" = "-h" -o "$1" = "--help" ]; then
+  usage
+  exit 1
+fi
+
+if [ $# -eq 2 -a "$1" = "cluster" ]; then
+  usage
+  exit 1
+fi
+
+if [ $# -eq 2 -a "$1" = "node" -a "$2" != "locked" -a "$2" != "unlocked" -a 
"$2" != "member" -a "$2" != "nonmember" ]
+then
+  usage
+  exit 1
+fi
+
+CLASS=""
+case "$1" in
+  cluster)
+CLASS="SaClmCluster"
+;;
+  node)
+CLASS="SaClmNode"
+;;
+  *)
+usage
+exit 1
+esac
+
+list_with_states ()
+{
+  for i in $(immfind -c "$1"); do
+if [ "$2" = "locked" -o  "$2" = "unlocked" ]; then
+  value=$(immlist -a "saClmNodeAdminState" "$i" | cut -d = -f2)
+  if [ "$2" = "locked" -a "$value" -eq 2 ] || [ "$2" = "unlocked" -a 
"$value" -eq 1 ]; then
+echo "$i"
+  fi
+else
+  value=$(immlist -a

Re: [devel] [PATCH 1 of 1] amfd: correct loop variable initialization [#2404]

2017-04-05 Thread praveen malviya

Ack, code review only.

Thanks
Praveen

On 05-Apr-17 11:31 AM, nagendr...@oracle.com wrote:
>   src/amf/amfd/su.cc |  3 ++-
>   1 files changed, 2 insertions(+), 1 deletions(-)
> 
> 
> diff --git a/src/amf/amfd/su.cc b/src/amf/amfd/su.cc
> --- a/src/amf/amfd/su.cc
> +++ b/src/amf/amfd/su.cc
> @@ -1423,9 +1423,10 @@ static SaAisErrorT su_rt_attr_cb(SaImmOi
>   }
>   rc = 
> avd_saImmOiRtObjectUpdate_multival_sync(obj_name, attributeName,
>   SA_IMM_ATTR_SANAMET, temp, 
> assigned_si);
> + j = 0;
>   for (AVD_SU_SI_REL *susi = su->list_of_susi; 
> susi != nullptr; susi = susi->su_next) {
> - j = 0;
>   osaf_extended_name_free(siName + j);
> + j = j + 1;
>   }
>   delete [] siName;
>   delete [] attrValues;
> 

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1 of 1] amfd: ignore node_up if node state is not absent [#2400]

2017-04-04 Thread praveen malviya

Ack, code review only.

Thanks
Praveen

On 03-Apr-17 6:53 AM, Gary Lee wrote:
>  src/amf/amfd/ndfsm.cc |  2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
>
>
> if a duplicate node_up is processed after cluster startup timer is finished,
> don't send a reboot order
>
> diff --git a/src/amf/amfd/ndfsm.cc b/src/amf/amfd/ndfsm.cc
> --- a/src/amf/amfd/ndfsm.cc
> +++ b/src/amf/amfd/ndfsm.cc
> @@ -428,7 +428,7 @@ void avd_node_up_evh(AVD_CL_CB *cb, AVD_
>   LOG_WA("Sending node reboot order to node:%s, due to 
> first node_up_msg after node sync window",
>   
> osaf_extended_name_borrow(_msg->msg_info.n2d_node_up.node_name));
>   avnd->reboot = true;
> - } else if (cb->init_state == AVD_APP_STATE) {
> + } else if (cb->init_state == AVD_APP_STATE && avnd->node_state 
> == AVD_AVND_STATE_ABSENT) {
>   LOG_WA("Sending node reboot order to node:%s, due to 
> late node_up_msg after cluster startup timeout",
>   
> osaf_extended_name_borrow(_msg->msg_info.n2d_node_up.node_name));
>   avnd->reboot = true;
>

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

[devel] [PATCH 1 of 1] clms: return TIME_OUT for unlock op if CLMS update to CLM agent fails [#2381]

2017-04-03 Thread praveen . malviya

 src/clm/clmd/clms_evt.c   |   2 +-
 src/clm/clmd/clms_imm.c   |  11 +--
 src/clm/clmd/clms_mbcsv.c |   2 +-
 src/clm/clmd/clms_util.c  |  16 ++--
 4 files changed, 17 insertions(+), 14 deletions(-)


In this problem, first user performs CLM lock operation on payload and restarts 
it immediately.
When node was joining, user performs UNLOCK operation on it. Operation gets 
timed out.
After this, CLM rejects any admin operation on this payload with BAD_OP 
indicating
that an admin operation is already going on.

For unlock operation when CLM tries to send membership status to clients on the 
node being
unlocked, MDS returns failure for a client. CLMS does not continue with 
remaining clients and it does
not reply to IMM client also. This the reason of unlocked opreration getting 
timed out. Also
CLM does not clear internal parameter related to admin operation. Due to this 
subsequent admin
operationa on this node are rejected with BAD_OP. There is no clue in traces 
why MDS returned failure.

Generally this can happen when CLMS is trying to send message to a bunch of 
clients and
one of them goes down. Since CLMS has not processed this DOWN event, it will 
try to send
message to this cleint for which MDS will return failure. Currently issue is 
reproduced
on this basis. This patch is based on this. With the patch CLM will send 
membership status to remaining
clients even if MDS returns failure for a client. Also in such a situation, 
CLMS will return
TIMEOUT to the user and it will clear internal admin op params so that 
subsequent operation will
continue.

diff --git a/src/clm/clmd/clms_evt.c b/src/clm/clmd/clms_evt.c
--- a/src/clm/clmd/clms_evt.c
+++ b/src/clm/clmd/clms_evt.c
@@ -137,7 +137,7 @@ CLMS_CLIENT_INFO *clms_client_get_by_id(
rec = (CLMS_CLIENT_INFO *) ncs_patricia_tree_get(_cb->client_db, 
(uint8_t *)_id_net);
 
if (NULL == rec)
-   TRACE("client_id: %u lookup failed", client_id);
+   TRACE("client_id: %u not found", client_id);
 
return rec;
 }
diff --git a/src/clm/clmd/clms_imm.c b/src/clm/clmd/clms_imm.c
--- a/src/clm/clmd/clms_imm.c
+++ b/src/clm/clmd/clms_imm.c
@@ -2221,16 +2221,15 @@ uint32_t clms_imm_node_unlock(CLMS_CLUST
 {
uint32_t rc = NCSCC_RC_SUCCESS;
TRACE_ENTER2("Node name %s to unlock", nodeop->node_name.value);
-
+   SaAisErrorT ais_rc = SA_AIS_OK;
if (nodeop->admin_state == SA_CLM_ADMIN_UNLOCKED) {
-   LOG_ER("Node is already in an unlocked state");
+   LOG_NO("Node is already in an unlocked state");
nodeop->admin_op = 0;
(void)immutil_saImmOiAdminOperationResult(clms_cb->immOiHandle, 
nodeop->curr_admin_inv,
  SA_AIS_ERR_NO_OP);
rc = NCSCC_RC_FAILURE;
goto done;
}
-
if (((nodeop->admin_state == SA_CLM_ADMIN_LOCKED) || 
(nodeop->admin_state == SA_CLM_ADMIN_SHUTTING_DOWN))) {
 
if (clms_cb->reg_with_plm == SA_FALSE) {
@@ -2259,9 +2258,9 @@ uint32_t clms_imm_node_unlock(CLMS_CLUST
clms_node_join_ntf(clms_cb, nodeop);
 
rc = clms_send_is_member_info(clms_cb, 
nodeop->node_id, nodeop->member, true);
-   if(rc != NCSCC_RC_SUCCESS) {
+   if (rc != NCSCC_RC_SUCCESS) {
TRACE("clms_send_is_member_info failed 
%u", rc);
-   goto done;
+   ais_rc = SA_AIS_ERR_TIMEOUT;
}
nodeop->change = SA_CLM_NODE_NO_CHANGE;
}
@@ -2322,7 +2321,7 @@ uint32_t clms_imm_node_unlock(CLMS_CLUST
nodeop->admin_op = 0;
 
/* Send node join notification */
-   (void)immutil_saImmOiAdminOperationResult(clms_cb->immOiHandle, 
nodeop->curr_admin_inv, SA_AIS_OK);
+   (void)immutil_saImmOiAdminOperationResult(clms_cb->immOiHandle, 
nodeop->curr_admin_inv, ais_rc);
clms_node_admin_state_change_ntf(clms_cb, nodeop, 
SA_CLM_ADMIN_UNLOCKED);
  done:
TRACE_LEAVE();
diff --git a/src/clm/clmd/clms_mbcsv.c b/src/clm/clmd/clms_mbcsv.c
--- a/src/clm/clmd/clms_mbcsv.c
+++ b/src/clm/clmd/clms_mbcsv.c
@@ -103,7 +103,7 @@ static CLMS_CKPT_HDLR ckpt_data_handler[
 static uint32_t ckpt_proc_cluster_rec(CLMS_CB * cb, CLMS_CKPT_REC * data)
 {
CLMSV_CKPT_CLUSTER_INFO *param = >param.cluster_rec;
-
+   TRACE_ENTER();
osaf_cluster->num_nodes = param->num_nodes;
osaf_cluster->init_time = param->init_time;
cb->cluster_view_num = param->cluster_view_num;
diff --git a/src/clm/clmd/clms_util.c b/src/clm/clmd/clms_util.c
--- a/src/clm/clmd/clms_util.c
+++ b/src/clm/clmd/clms_util.c
@@ -396,6 +396,7 @@ SaClmClusterNotificationT_4 *clms_notbuf
 
if (num == 0) {

[devel] [PATCH 0 of 1] Review Request for clms: return TIME_OUT for unlock op if CLMS update to CLM agent fails [#2381].

2017-04-03 Thread praveen . malviya

Summary:  clms: return TIME_OUT for unlock op if CLMS update to CLM agent fails 
[#2381].
Review request for Trac Ticket(s): #2381 
Peer Reviewer(s): Anders 
Pull request to: <>
Affected branch(es): ALL 
Development branch: <>


Impacted area   Impact y/n

 Docsn
 Build systemn
 RPM/packaging   n
 Configuration files n
 Startup scripts n
 SAF servicesy
 OpenSAF servicesn
 Core libraries  n
 Samples n
 Tests   n
 Other   n


Comments (indicate scope for each "y" above):
-

changeset 50f1f31babcc78c50b413497d91497266385b997
Author: Praveen Malviya <praveen.malv...@oracle.com>
Date:   Mon, 03 Apr 2017 12:47:47 +0530

clms: return TIME_OUT for unlock op if CLMS update to CLM agent fails
[#2381].

In this problem, first user performs CLM lock operation on payload and
restarts it immediately. When node was joining, user performs UNLOCK
operation on it. Operation gets timed out. After this, CLM rejects any 
admin
operation on this payload with BAD_OP indicating that an admin 
operation is
already going on.

For unlock operation when CLM tries to send membership status to 
clients on
the node being unlocked, MDS returns failure for a client. CLMS does not
continue with remaining clients and it does not reply to IMM client 
also.
This the reason of unlocked opreration getting timed out. Also CLM does 
not
clear internal parameter related to admin operation. Due to this 
subsequent
admin operationa on this node are rejected with BAD_OP. There is no 
clue in
traces why MDS returned failure.

Generally this can happen when CLMS is trying to send message to a 
bunch of
clients and one of them goes down. Since CLMS has not processed this 
DOWN
event, it will try to send message to this cleint for which MDS will 
return
failure. Currently issue is reproduced on this basis. This patch is 
based on
this. With the patch CLM will send membership status to remaining 
clients
even if MDS returns failure for a client. Also in such a situation, CLMS
will return TIMEOUT to the user and it will clear internal admin op 
params
so that subsequent operation will continue.


Complete diffstat:
--
 src/clm/clmd/clms_evt.c   |   2 +-
 src/clm/clmd/clms_imm.c   |  11 +--
 src/clm/clmd/clms_mbcsv.c |   2 +-
 src/clm/clmd/clms_util.c  |  16 ++--
 4 files changed, 17 insertions(+), 14 deletions(-)


Testing Commands:
-
1)Tested the case commented in the ticket.
2)LOCK, SHUTDOWN and UNLOCK operation.
Testing, Expected Results:
--
CLMS will accept admin operation on node after unlock.

Conditions of Submission:
-
Ack from reviewer.

Arch  Built StartedLinux distro
---
mipsn  n
mips64  n  n
x86 n  n
x86_64  y  y
powerpc n  n
powerpc64   n  n


Reviewer Checklist:
---
[Submitters: make sure that your review doesn't trigger any checkmarks!]


Your checkin has not passed review because (see checked entries):

___ Your RR template is generally incomplete; it has too many blank entries
that need proper data filled in.

___ You have failed to nominate the proper persons for review and push.

___ Your patches do not have proper short+long header

___ You have grammar/spelling in your header that is unacceptable.

___ You have exceeded a sensible line length in your headers/comments/text.

___ You have failed to put in a proper Trac Ticket # into your commits.

___ You have incorrectly put/left internal data in your comments/files
(i.e. internal bug tracking tool IDs, product names etc)

___ You have not given any evidence of testing beyond basic build tests.
Demonstrate some level of runtime or other sanity testing.

___ You have ^M present in some of your files. These have to be removed.

___ You have needlessly changed whitespace or added whitespace crimes
like trailing spaces, or spaces before tabs.

___ You have mixed real technical changes with whitespace and other
cosmetic code cleanup changes. These have to be separate commits.

___ You need to refactor your submission into logical chunks; there is
too much content into a single commit.

___ You have extraneous garbage in your review (merge commits etc)

___ You have giant attachments which should never have been sent;
Instead you should place your content in a public tree to be pulled.

___ You have too many commits attached to an e-mail; resend as threaded
commits, or place in a publ

[devel] [PATCH 1 of 1] amf: fix track callback when multiple CLM nodes leaves membership[#2372]

2017-03-27 Thread praveen . malviya

 src/amf/amfd/clm.cc  |  43 ++-
 src/amf/amfnd/clm.cc |   6 --
 2 files changed, 34 insertions(+), 15 deletions(-)


In reported issue, two CLM nodes are locked simultaneously. For one of the 
nodes,
CLM lock gets timed out and user gets REPAIR_PENDING as return code. The two 
payloads
being locked hosts Amf_demo with 2N model.

When AMFD gets CLM track callback for PL-3 it starts terminating amf demo on 
PL-3. When
termination of amf_demo still going on, user clm locks PL-4 and AMF gets 
another track callback
with rootcausetentity as PL-4. Callback contains information of PL-3 also as 
this node is still
in pending change phase. AMFD starts terminating amf_demo on PL-4 but at the 
same time it
incorreclty responds for PL-3 with invocationId of PL-4 callback. CLM assumes 
that for PL-4
change_started completed and sends completion callback for PL-4. In this 
callback,
AMF clears internal flags which monitors the graceful removal of nodes.
Since AMF never responds for PL-3 callback, node lock timer expires in CLMD and 
it sends
complete callback to AMF and responds user with REPAIR_PENDING. AMF thinks this 
is
the case of nodefailover and tries to failover PL-3.

Patch fixes this problem in both AMFD and AMFND:
-to act on CHANGE_START step only once for a node (amfd).
-to act on COMPLETE step only when rootCauseEntity matches and if it
 is graceful removal of node(amfd).
-to act only once in tracl callback for COMPLETE step(amfnd).

diff --git a/src/amf/amfd/clm.cc b/src/amf/amfd/clm.cc
--- a/src/amf/amfd/clm.cc
+++ b/src/amf/amfd/clm.cc
@@ -203,6 +203,7 @@ static void clm_node_exit_complete(SaClm
 
avd_node_failover(node);
m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, node, 
AVSV_CKPT_AVD_NODE_CONFIG);
+   node->clm_change_start_preceded = false;
 
 done:
TRACE_LEAVE();
@@ -218,7 +219,7 @@ static void clm_track_cb(const SaClmClus
AVD_AVND *node;
 
TRACE_ENTER2("'%llu' '%u' '%u'", invocation, step, error);
-
+   
if (error != SA_AIS_OK) {
LOG_ER("ClmTrackCallback received in error");
goto done;
@@ -232,10 +233,13 @@ static void clm_track_cb(const SaClmClus
** The CLM cluster can be larger than the AMF cluster thus it is not an
** error if the corresponding AMF node cannot be found.
*/
+   TRACE("numberOfMembers:'%u', numberOfItems:'%u'", numberOfMembers,
+   notificationBuffer->numberOfItems);
for (i = 0; i < notificationBuffer->numberOfItems; i++)
{
notifItem = >notification[i];
const std::string 
node_name(Amf::to_string(>clusterNode.nodeName));
+   TRACE("i=%u, node:'%s', clusterChange:%u",i, node_name.c_str(), 
notifItem->clusterChange);
switch(step) {
case SA_CLM_CHANGE_VALIDATE:
if(notifItem->clusterChange == SA_CLM_NODE_LEFT) {
@@ -264,6 +268,10 @@ static void clm_track_cb(const SaClmClus
}
if ( notifItem->clusterChange == SA_CLM_NODE_LEFT ||
 notifItem->clusterChange == SA_CLM_NODE_SHUTDOWN ) 
{
+   if (node->clm_change_start_preceded == true) {
+   TRACE_3("Already got callback for start 
of this change.");
+   continue;
+   }
/* invocation to be used by pending clm 
response */ 
node->clm_pend_inv = invocation;
clm_node_exit_start(node, 
notifItem->clusterChange);
@@ -298,25 +306,34 @@ static void clm_track_cb(const SaClmClus
}

clm_node_exit_complete(notifItem->clusterNode.nodeId);
} else if 
(strncmp(osaf_extended_name_borrow(rootCauseEntity), "safNode=", 8) == 0) {
+   const std::string 
rootCause_clm_node(Amf::to_string(rootCauseEntity));
/* This callback is because of 
operation on CLM.*/
-   if(true == 
node->clm_change_start_preceded) {
+   if (true == 
node->clm_change_start_preceded) {
/* We have got a completed 
callback with start cbk step before, so 
   already locking applications 
might have been done. So, no action
-  is needed.*/
-   node->clm_change_start_preceded 
= false; 
-   node->node_info.member = 
SA_FALSE;
-   
m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, node,

[devel] [PATCH 0 of 1] Review Request for amf: fix track callback when multiple CLM nodes leaves membership[#2372].

2017-03-27 Thread praveen . malviya

Summary: amf: fix track callback when multiple CLM nodes leaves 
membership[#2372]. 
Review request for Trac Ticket(s): #2372 
Peer Reviewer(s): AMF devs 
Pull request to: <>
Affected branch(es): ALL 
Development branch: <>


Impacted area   Impact y/n

 Docsn
 Build systemn
 RPM/packaging   n
 Configuration files n
 Startup scripts n
 SAF servicesy
 OpenSAF servicesn
 Core libraries  n
 Samples n
 Tests   n
 Other   n


Comments (indicate scope for each "y" above):
-

changeset 1ee79821742a117265da9a4d5ba60617ac86e2e4
Author: Praveen Malviya <praveen.malv...@oracle.com>
Date:   Mon, 27 Mar 2017 15:25:18 +0530

amf: fix track callback when multiple CLM nodes leaves 
membership[#2372].

In reported issue, two CLM nodes are locked simultaneously. For one of 
the
nodes, CLM lock gets timed out and user gets REPAIR_PENDING as return 
code.
The two payloads being locked hosts Amf_demo with 2N model.

When AMFD gets CLM track callback for PL-3 it starts terminating amf 
demo on
PL-3. When termination of amf_demo still going on, user clm locks PL-4 
and
AMF gets another track callback with rootcausetentity as PL-4. Callback
contains information of PL-3 also as this node is still in pending 
change
phase. AMFD starts terminating amf_demo on PL-4 but at the same time it
incorreclty responds for PL-3 with invocationId of PL-4 callback. CLM
assumes that for PL-4 change_started completed and sends completion 
callback
for PL-4. In this callback, AMF clears internal flags which monitors the
graceful removal of nodes. Since AMF never responds for PL-3 callback, 
node
lock timer expires in CLMD and it sends complete callback to AMF and
responds user with REPAIR_PENDING. AMF thinks this is the case of
nodefailover and tries to failover PL-3.

Patch fixes this problem in both AMFD and AMFND:
-to act on CHANGE_START step only once for a node (amfd).
-to act on COMPLETE step only when rootCauseEntity matches and if it is
graceful removal of node(amfd).
-to act only once in tracl callback for COMPLETE step(amfnd).


Complete diffstat:
--
 src/amf/amfd/clm.cc  |  43 ++-
 src/amf/amfnd/clm.cc |   6 --
 2 files changed, 34 insertions(+), 15 deletions(-)


Testing Commands:
-
tested both the cases mentioned in the ticket.

Testing, Expected Results:
--
Both the cases passed.

Conditions of Submission:
-
Ack from any reviewer.

Arch  Built StartedLinux distro
---
mipsn  n
mips64  n  n
x86 n  n
x86_64  y  y
powerpc n  n
powerpc64   n  n


Reviewer Checklist:
---
[Submitters: make sure that your review doesn't trigger any checkmarks!]


Your checkin has not passed review because (see checked entries):

___ Your RR template is generally incomplete; it has too many blank entries
that need proper data filled in.

___ You have failed to nominate the proper persons for review and push.

___ Your patches do not have proper short+long header

___ You have grammar/spelling in your header that is unacceptable.

___ You have exceeded a sensible line length in your headers/comments/text.

___ You have failed to put in a proper Trac Ticket # into your commits.

___ You have incorrectly put/left internal data in your comments/files
(i.e. internal bug tracking tool IDs, product names etc)

___ You have not given any evidence of testing beyond basic build tests.
Demonstrate some level of runtime or other sanity testing.

___ You have ^M present in some of your files. These have to be removed.

___ You have needlessly changed whitespace or added whitespace crimes
like trailing spaces, or spaces before tabs.

___ You have mixed real technical changes with whitespace and other
cosmetic code cleanup changes. These have to be separate commits.

___ You need to refactor your submission into logical chunks; there is
too much content into a single commit.

___ You have extraneous garbage in your review (merge commits etc)

___ You have giant attachments which should never have been sent;
Instead you should place your content in a public tree to be pulled.

___ You have too many commits attached to an e-mail; resend as threaded
commits, or place in a public tree for a pull.

___ You have resent this content multiple times without a clear indication
of what has changed between each re-send.

___ You have failed to adequately

Re: [devel] [PATCH 0 of 3] Review Request for amf: Fix all Cppcheck 1.77 issues [#2341] V3

2017-03-22 Thread praveen malviya

Hi Mahesh,

5.2RC2 tag is planned on 2017-03-24. I will be reviewing after taggging 
on Monday.

Thanks,
Praveen

On 22-Mar-17 1:41 PM, A V Mahesh wrote:
> Hi  AMF dev,
>
> A gentle reminder for review.
>
> So far I have received  comments for Gary , If you guys doesn't have any
> other comments , I will  push by tomorrow EOD.
>
> -AVM
>
>
> On 3/20/2017 10:49 AM, A V Mahesh wrote:
>> Summary:amf: Fix all Cppcheck 1.77 issues [#2341] V3
>> Review request for Trac Ticket(s): #2341
>> Peer Reviewer(s): AMF dev
>> Pull request to: <>
>> Affected branch(es): default
>> Development branch: default
>>
>> 
>> Impacted area   Impact y/n
>> 
>>   Docsn
>>   Build systemn
>>   RPM/packaging   n
>>   Configuration files n
>>   Startup scripts n
>>   SAF servicesn
>>   OpenSAF servicesy
>>   Core libraries  n
>>   Samples n
>>   Tests   n
>>   Other   n
>>
>>
>> Comments (indicate scope for each "y" above):
>> -
>>
>> changeset efb3364a0779447b858cbd0cdae1b92f0a2d2716
>> Author:A V Mahesh 
>> Date:Mon, 20 Mar 2017 10:33:35 +0530
>>
>> amfd: Fix all Cppcheck 1.77 issues [#2341] V3
>>
>>  V3 fixed review commets.
>>
>> [src/amf/amfd/app.cc:285]: (style) The scope of the variable 'i'
>> can be
>> reduced. [src/amf/amfd/apptype.cc:137]: (style) Condition 'rc!=0'
>> is always
>> false [src/amf/amfd/apptype.cc:89] -> [src/amf/amfd/apptype.cc:84]:
>> (warning, inconclusive) Either the condition
>> '(attr=attributes[i++])!=nullptr' is redundant or there is
>> possible null
>> pointer dereference: attr. [src/amf/amfd/apptype.cc:129] ->
>> [src/amf/amfd/apptype.cc:124]: (warning, inconclusive) Either the
>> condition
>> '(attr=attributes[i++])!=nullptr' is redundant or there is
>> possible null
>> pointer dereference: attr. [src/amf/amfd/apptype.cc:69]: (style)
>> The scope
>> of the variable 'sg_type' can be reduced.
>> [src/amf/amfd/chkop.cc:1297] ->
>> [src/amf/amfd/chkop.cc:1302]: (style) Variable 'uba' is reassigned
>> a value
>> before the old one has been used. [src/amf/amfd/ckpt_dec.cc:374] ->
>> [src/amf/amfd/ckpt_dec.cc:382]: (style) Variable 'status' is
>> reassigned a
>> value before the old one has been used.
>> [src/amf/amfd/ckpt_dec.cc:573] ->
>> [src/amf/amfd/ckpt_dec.cc:577]: (style) Variable 'status' is
>> reassigned a
>> value before the old one has been used.
>> [src/amf/amfd/ckpt_dec.cc:1109]:
>> (performance) Prefer prefix ++/-- operators for non-primitive types.
>> [src/amf/amfd/ckpt_edu.cc:51] -> [src/amf/amfd/ckpt_edu.cc:56]:
>> (style)
>> Variable 'rc' is reassigned a value before the old one has been used.
>> [src/amf/amfd/ckpt_enc.cc:2281] ->
>> [src/amf/amfd/ckpt_enc.cc:2288]: (style)
>> Variable 'status' is reassigned a value before the old one has
>> been used.
>> [src/amf/amfd/ckpt_enc.cc:2314] ->
>> [src/amf/amfd/ckpt_enc.cc:2322]: (style)
>> Variable 'status' is reassigned a value before the old one has
>> been used.
>> [src/amf/amfd/ckpt_enc.cc:1951]: (performance) Prefer prefix ++/--
>> operators
>> for non-primitive types. [src/amf/amfd/ckpt_enc.cc:1982]:
>> (performance)
>> Prefer prefix ++/-- operators for non-primitive types.
>> [src/amf/amfd/ckpt_enc.cc:2015]: (performance) Prefer prefix ++/--
>> operators
>> for non-primitive types. [src/amf/amfd/ckpt_enc.cc:2044]:
>> (performance)
>> Prefer prefix ++/-- operators for non-primitive types.
>> [src/amf/amfd/ckpt_enc.cc:2076]: (performance) Prefer prefix ++/--
>> operators
>> for non-primitive types. [src/amf/amfd/ckpt_enc.cc:2111]:
>> (performance)
>> Prefer prefix ++/-- operators for non-primitive types.
>> [src/amf/amfd/ckpt_enc.cc:2151]: (performance) Prefer prefix ++/--
>> operators
>> for non-primitive types. [src/amf/amfd/ckpt_enc.cc:2176]:
>> (performance)
>> Prefer prefix ++/-- operators for non-primitive types.
>> [src/amf/amfd/ckpt_enc.cc:2216]: (performance) Prefer prefix ++/--
>> operators
>> for non-primitive types. [src/amf/amfd/ckpt_enc.cc:2252]:
>> (performance)
>> Prefer prefix ++/-- operators for non-primitive types.
>> [src/amf/amfd/ckpt_enc.cc:2470]: (performance) Prefer prefix ++/--
>> operators
>> for non-primitive types. [src/amf/amfd/clm.cc:452] ->
>> [src/amf/amfd/clm.cc:456]: (style, inconclusive) Variable 'error' is
>> reassigned a value before the old one has been used if variable is no
>> semaphore variable. [src/amf/amfd/clm.cc:473] ->
>> [src/amf/amfd/clm.cc:475]:
>> (style, inconclusive) Variable 'error' is reassigned a value
>> before the old
>> one has been used if variable is no semaphore variable.
>> [src/amf/amfd/clm.cc:344]: (performance)

[devel] [PATCH 0 of 1] Review Request for amfd: choose CLM unlocked spare controller for standby role in failover situation[#2387]

2017-03-21 Thread praveen . malviya

Summary: amfd: choose CLM unlocked spare controller for standby role in 
failover situation[#2387]
Review request for Trac Ticket(s): #2387 
Peer Reviewer(s): AMF devs 
Pull request to: <>
Affected branch(es): ALL 
Development branch: <>


Impacted area   Impact y/n

 Docsn
 Build systemn
 RPM/packaging   n
 Configuration files n
 Startup scripts n
 SAF servicesy
 OpenSAF servicesn
 Core libraries  n
 Samples n
 Tests   n
 Other   n


Comments (indicate scope for each "y" above):
-

changeset 9d28b2e0bba4e479bc65c0df6d55d6cc3f71ecd4
Author: Praveen Malviya <praveen.malv...@oracle.com>
Date:   Tue, 21 Mar 2017 15:06:48 +0530

amfd: choose CLM unlocked spare controller for standby role in failover
situation[#2387]

When spare controllers are configured in cluster, AMF is chosing CLM 
locked
controller for fresh standby controller during failover situation. 
Currently
fresh standby assignment on CLM locked controller in failover situation
fails because of issue in SMF #1791. Even if SMF issue is fixed, AMF may
choose a CLM locked controller for fresh assignment. This will prohibit 
a
user to use si-swap operation for controller swap.

If available, AMF must choose CLM unlocked spare controller for fresh
standby assignments.


Complete diffstat:
--
 src/amf/amfd/clm.cc   |   2 ++
 src/amf/amfd/sg_2n_fsm.cc |  12 +++-
 2 files changed, 13 insertions(+), 1 deletions(-)


Testing Commands:
-
Brought 5 controllers up in UML envirnment.
CLM lock of SC-3.
Stop opensaf on active controller.

Testing, Expected Results:
--
AMF does not chose CLM locked spare controller for fresh standby assignments.

Conditions of Submission:
-
Ack from reviewers.

Arch  Built StartedLinux distro
---
mipsn  n
mips64  n  n
x86 n  n
x86_64  y  y
powerpc n  n
powerpc64   n  n


Reviewer Checklist:
---
[Submitters: make sure that your review doesn't trigger any checkmarks!]


Your checkin has not passed review because (see checked entries):

___ Your RR template is generally incomplete; it has too many blank entries
that need proper data filled in.

___ You have failed to nominate the proper persons for review and push.

___ Your patches do not have proper short+long header

___ You have grammar/spelling in your header that is unacceptable.

___ You have exceeded a sensible line length in your headers/comments/text.

___ You have failed to put in a proper Trac Ticket # into your commits.

___ You have incorrectly put/left internal data in your comments/files
(i.e. internal bug tracking tool IDs, product names etc)

___ You have not given any evidence of testing beyond basic build tests.
Demonstrate some level of runtime or other sanity testing.

___ You have ^M present in some of your files. These have to be removed.

___ You have needlessly changed whitespace or added whitespace crimes
like trailing spaces, or spaces before tabs.

___ You have mixed real technical changes with whitespace and other
cosmetic code cleanup changes. These have to be separate commits.

___ You need to refactor your submission into logical chunks; there is
too much content into a single commit.

___ You have extraneous garbage in your review (merge commits etc)

___ You have giant attachments which should never have been sent;
Instead you should place your content in a public tree to be pulled.

___ You have too many commits attached to an e-mail; resend as threaded
commits, or place in a public tree for a pull.

___ You have resent this content multiple times without a clear indication
of what has changed between each re-send.

___ You have failed to adequately and individually address all of the
comments and change requests that were proposed in the initial review.

___ You have a misconfigured ~/.hgrc file (i.e. username, email etc)

___ Your computer have a badly configured date and time; confusing the
the threaded patch review.

___ Your changes affect IPC mechanism, and you don't present any results
for in-service upgradability test.

___ Your changes affect user manual and documentation, your patch series
do not contain the patch that updates the Doxygen manual.


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-deve

[devel] [PATCH 1 of 1] amfd: choose CLM unlocked spare controller for standby role in failover situation[#2387]

2017-03-21 Thread praveen . malviya

 src/amf/amfd/clm.cc   |   2 ++
 src/amf/amfd/sg_2n_fsm.cc |  12 +++-
 2 files changed, 13 insertions(+), 1 deletions(-)


When spare controllers are configured in cluster, AMF is chosing CLM locked 
controller for
fresh standby controller during failover situation. Currently fresh standby 
assignment
on CLM locked controller in failover situation fails because of issue in SMF 
#1791.
Even if SMF issue is fixed, AMF may choose a CLM locked controller for fresh 
assignment.
This will prohibit a user to use si-swap operation for controller swap.

If available, AMF must choose CLM unlocked spare controller for fresh standby 
assignments.

diff --git a/src/amf/amfd/clm.cc b/src/amf/amfd/clm.cc
--- a/src/amf/amfd/clm.cc
+++ b/src/amf/amfd/clm.cc
@@ -202,6 +202,7 @@ static void clm_node_exit_complete(SaClm
 }
 
avd_node_failover(node);
+   m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, node, 
AVSV_CKPT_AVD_NODE_CONFIG);
 
 done:
TRACE_LEAVE();
@@ -304,6 +305,7 @@ static void clm_track_cb(const SaClmClus
   is needed.*/
node->clm_change_start_preceded 
= false; 
node->node_info.member = 
SA_FALSE;
+   
m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, node, AVSV_CKPT_AVD_NODE_CONFIG);
}
else
{
diff --git a/src/amf/amfd/sg_2n_fsm.cc b/src/amf/amfd/sg_2n_fsm.cc
--- a/src/amf/amfd/sg_2n_fsm.cc
+++ b/src/amf/amfd/sg_2n_fsm.cc
@@ -659,7 +659,17 @@ static AVD_SU *avd_sg_2n_su_chose_asgn(A
for (const auto& iter : sg->list_of_su) {
if (iter->saAmfSuReadinessState == 
SA_AMF_READINESS_IN_SERVICE &&
iter->list_of_susi == AVD_SU_SI_REL_NULL) {
-   s_su = iter;
+
+   /* Assign standby for MW SU on CLM enabled 
node. If not available
+  then choose based on rank(list_of_su is 
based on rank).*/ 
+   if (s_su == nullptr)
+   s_su = iter;
+   if (iter->sg_of_su->sg_ncs_spec == true) {
+   if (iter->su_on_node->node_info.member 
== SA_FALSE)
+   continue;
+   else 
+   s_su = iter;
+   }
break;
}
}

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1 of 1] amfd: remove assignments from lower ranked SU while adjusting SI assignments [#2268]

2017-03-21 Thread praveen malviya



On 21-Mar-17 12:57 PM, Gary Lee wrote:
> Hi Praveen
>
> I think it might be nicer to reduce the 2 else branches (ie. count > 
> si->saAmfSINumCurrActiveAssignments and count < 
> si->saAmfSINumCurrActiveAssignments) to a single else block, with a comment 
> saying adjustments are required.
>
The two else branches are required because in one case 
si->saAmfSINumCurrActiveAssignments is updated before actually adjusting 
the assignment and in other case after adjusting the assignments to 
avoid asserts in the code and also for fresh assignments.
> Thanks
> Gary
>
> -----Original Message-
> From: praveen malviya <praveen.malv...@oracle.com>
> Organization: Oracle Corporation
> Date: Tuesday, 21 March 2017 at 4:49 pm
> To: gary <gary@dektech.com.au>, <hans.nordeb...@ericsson.com>, 
> <nagendr...@oracle.com>, <minh.c...@dektech.com.au>
> Cc: <opensaf-devel@lists.sourceforge.net>
> Subject: Re: [PATCH 1 of 1] amfd: remove assignments from lower ranked SU 
> while adjusting SI assignments [#2268]
>
> Hi Gary,
>
> Thanks for the reviewing the patch.
> For readability I had kept separate handling for new definition in the
> if block.
>
> Now I will push patch with below refactoring :
>   /* Check if we need to readjust the SI assignments as
> PrefActiveAssignments
>  got changed */
>  uint32_t count = mod_pref_assignments;
>  if (mod_pref_assignments == 0) {
>  //Zero is set for using PrefAssignedSus
> as default arguments.
>  count = 
> si->sg_of_si->pref_assigned_sus();
>  }
>  if (count ==
> si->saAmfSINumCurrActiveAssignments ) {
>  TRACE("Assignments are equal updating
> the SI status ");
>  si->saAmfSIPrefActiveAssignments =
> mod_pref_assignments;
>  } else if (count >
> si->saAmfSINumCurrActiveAssignments) {
>  si->saAmfSIPrefActiveAssignments =
> mod_pref_assignments;
>  si->adjust_si_assignments(count);
>  } else if (count <
> si->saAmfSINumCurrActiveAssignments) {
>  si->adjust_si_assignments(count);
>  si->saAmfSIPrefActiveAssignments =
> mod_pref_assignments;
>  }
>  TRACE("Modified saAmfSIPrefActiveAssignments is
> '%u'", si->saAmfSIPrefActiveAssignments);
>  si->update_ass_state();
>
>
> Thanks,
> Praveen
>
> On 21-Mar-17 7:50 AM, Gary Lee wrote:
> > Hi Praveen
> >
> > Ack (review only + regression tests run) with minor comment below.
> >
> > Thanks
> > Gary
> >
> > -Original Message-
> > From: <praveen.malv...@oracle.com>
> > Date: Friday, 17 March 2017 at 8:22 pm
> > To: <hans.nordeb...@ericsson.com>, <nagendr...@oracle.com>, gary 
> <gary@dektech.com.au>, <minh.c...@dektech.com.au>
> > Cc: <opensaf-devel@lists.sourceforge.net>
> > Subject: [PATCH 1 of 1] amfd: remove assignments from lower ranked SU 
> while adjusting SI assignments [#2268]
> >
> >  src/amf/amfd/si.cc |  92 
> +++---
> >  1 files changed, 46 insertions(+), 46 deletions(-)
> >
> >
> > In N-Way Active mode, when saAmfSIPrefActiveAssignments is reduced,
> > AMFD removes assignments from higher ranked SU when siranked su is 
> not configured and lower
> > ranked SU have assignments.
> > Similar issue in N-Way model when SiPrefStandbyAssignment is 
> reduced. Also AMFD is not
> > checking HA state of susi and tries to delete active susi and 
> crashes.
> >
> > Patch fixes the problem by removing assignments from lower ranked 
> SU.
> >
> > diff --git a/src/amf/amfd/si.cc b/src/amf/amfd/si.cc
> > --- a/src/amf/amfd/si.cc
> > +++ b/src/amf/amfd/si.cc
> > @@ -1052,8 +1052,7 @@ done:
> >   */
> >  void AVD_SI::adjust_si_assignments(const uint32_t 
> mod_pref_assignments)
> >  {
> &g

Re: [devel] [PATCH 1 of 1] amfd: remove assignments from lower ranked SU while adjusting SI assignments [#2268]

2017-03-20 Thread praveen malviya

Hi Gary,

Thanks for the reviewing the patch.
For readability I had kept separate handling for new definition in the 
if block.

Now I will push patch with below refactoring :
  /* Check if we need to readjust the SI assignments as 
PrefActiveAssignments
 got changed */
 uint32_t count = mod_pref_assignments;
 if (mod_pref_assignments == 0) {
 //Zero is set for using PrefAssignedSus 
as default arguments.
 count = si->sg_of_si->pref_assigned_sus();
 }
 if (count == 
si->saAmfSINumCurrActiveAssignments ) {
 TRACE("Assignments are equal updating 
the SI status ");
 si->saAmfSIPrefActiveAssignments = 
mod_pref_assignments;
 } else if (count > 
si->saAmfSINumCurrActiveAssignments) {
 si->saAmfSIPrefActiveAssignments = 
mod_pref_assignments;
 si->adjust_si_assignments(count);
 } else if (count < 
si->saAmfSINumCurrActiveAssignments) {
 si->adjust_si_assignments(count);
 si->saAmfSIPrefActiveAssignments = 
mod_pref_assignments;
 }
 TRACE("Modified saAmfSIPrefActiveAssignments is 
'%u'", si->saAmfSIPrefActiveAssignments);
 si->update_ass_state();


Thanks,
Praveen

On 21-Mar-17 7:50 AM, Gary Lee wrote:
> Hi Praveen
>
> Ack (review only + regression tests run) with minor comment below.
>
> Thanks
> Gary
>
> -Original Message-
> From: 
> Date: Friday, 17 March 2017 at 8:22 pm
> To: , , gary 
> , 
> Cc: 
> Subject: [PATCH 1 of 1] amfd: remove assignments from lower ranked SU while 
> adjusting SI assignments [#2268]
>
>  src/amf/amfd/si.cc |  92 
> +++---
>  1 files changed, 46 insertions(+), 46 deletions(-)
>
>
> In N-Way Active mode, when saAmfSIPrefActiveAssignments is reduced,
> AMFD removes assignments from higher ranked SU when siranked su is not 
> configured and lower
> ranked SU have assignments.
> Similar issue in N-Way model when SiPrefStandbyAssignment is reduced. 
> Also AMFD is not
> checking HA state of susi and tries to delete active susi and crashes.
>
> Patch fixes the problem by removing assignments from lower ranked SU.
>
> diff --git a/src/amf/amfd/si.cc b/src/amf/amfd/si.cc
> --- a/src/amf/amfd/si.cc
> +++ b/src/amf/amfd/si.cc
> @@ -1052,8 +1052,7 @@ done:
>   */
>  void AVD_SI::adjust_si_assignments(const uint32_t mod_pref_assignments)
>  {
> - AVD_SU_SI_REL *sisu, *tmp_sisu;
> - uint32_t no_of_sisus_to_delete;
> + AVD_SU_SI_REL *sisu;
>   uint32_t i = 0;
>
>   TRACE_ENTER2("for SI:%s ", name.c_str());
> @@ -1073,31 +1072,24 @@ void AVD_SI::adjust_si_assignments(const
>   TRACE("No New assignments are been done SI:%s", 
> name.c_str());
>   }
>   } else {
> - no_of_sisus_to_delete = saAmfSINumCurrActiveAssignments 
> -
> - mod_pref_assignments;
> -
> - /* Get the sisu pointer from the  si->list_of_sisu list 
> from which
> - no of sisus need to be deleted based on SI ranked SU */
> - sisu = tmp_sisu = list_of_sisu;
> - for( i = 0; i < no_of_sisus_to_delete && nullptr != 
> tmp_sisu; i++ ) {
> - tmp_sisu = tmp_sisu->si_next;
> + if (list_of_sisu == nullptr)
> +   return;
> + /*
> +avd_susi_create() keeps sisus in list_of_sisu in 
> order from highest
> +ranked to lowest ranked.
> +Keep mod_pref_assignments in list_of_sisu from 
> beginning and delete others.
> +  */
> + sisu = list_of_sisu;
> + for( i = 0; ((i < mod_pref_assignments) && (sisu != 
> nullptr)); i++ ) {
> + sisu = sisu->si_next;
>   }
> - while( tmp_sisu && (tmp_sisu->si_next != nullptr) ) {
> - sisu = sisu->si_next;
> - tmp_sisu = tmp_sisu->si_next;
> - }
> -
> - for( i = 0; i < no_of_sisus_to_delete && (nullptr != 
> sisu); i++ ) {
> - /* Send quiesced request for the sisu that 
> needs tobe deleted */
> +

Re: [devel] [PATCH 1 of 1] amfd: remove assignments from lower ranked SU while adjusting SI assignments [#2268]

2017-03-19 Thread praveen malviya

Hi All,

Please review this patch and provide your feedback.
I want to push it on RC2.


Thanks,
Praveen


On 17-Mar-17 2:52 PM, praveen.malv...@oracle.com wrote:
>  src/amf/amfd/si.cc |  92 
> +++---
>  1 files changed, 46 insertions(+), 46 deletions(-)
>
>
> In N-Way Active mode, when saAmfSIPrefActiveAssignments is reduced,
> AMFD removes assignments from higher ranked SU when siranked su is not 
> configured and lower
> ranked SU have assignments.
> Similar issue in N-Way model when SiPrefStandbyAssignment is reduced. Also 
> AMFD is not
> checking HA state of susi and tries to delete active susi and crashes.
>
> Patch fixes the problem by removing assignments from lower ranked SU.
>
> diff --git a/src/amf/amfd/si.cc b/src/amf/amfd/si.cc
> --- a/src/amf/amfd/si.cc
> +++ b/src/amf/amfd/si.cc
> @@ -1052,8 +1052,7 @@ done:
>   */
>  void AVD_SI::adjust_si_assignments(const uint32_t mod_pref_assignments)
>  {
> - AVD_SU_SI_REL *sisu, *tmp_sisu;
> - uint32_t no_of_sisus_to_delete;
> + AVD_SU_SI_REL *sisu;
>   uint32_t i = 0;
>
>   TRACE_ENTER2("for SI:%s ", name.c_str());
> @@ -1073,31 +1072,24 @@ void AVD_SI::adjust_si_assignments(const
>   TRACE("No New assignments are been done SI:%s", 
> name.c_str());
>   }
>   } else {
> - no_of_sisus_to_delete = saAmfSINumCurrActiveAssignments 
> -
> - mod_pref_assignments;
> -
> - /* Get the sisu pointer from the  si->list_of_sisu list 
> from which
> - no of sisus need to be deleted based on SI ranked SU */
> - sisu = tmp_sisu = list_of_sisu;
> - for( i = 0; i < no_of_sisus_to_delete && nullptr != 
> tmp_sisu; i++ ) {
> - tmp_sisu = tmp_sisu->si_next;
> + if (list_of_sisu == nullptr)
> +   return;
> + /*
> +avd_susi_create() keeps sisus in list_of_sisu in 
> order from highest
> +ranked to lowest ranked.
> +Keep mod_pref_assignments in list_of_sisu from 
> beginning and delete others.
> +  */
> + sisu = list_of_sisu;
> + for( i = 0; ((i < mod_pref_assignments) && (sisu != 
> nullptr)); i++ ) {
> + sisu = sisu->si_next;
>   }
> - while( tmp_sisu && (tmp_sisu->si_next != nullptr) ) {
> - sisu = sisu->si_next;
> - tmp_sisu = tmp_sisu->si_next;
> - }
> -
> - for( i = 0; i < no_of_sisus_to_delete && (nullptr != 
> sisu); i++ ) {
> - /* Send quiesced request for the sisu that 
> needs tobe deleted */
> + for( ; sisu != nullptr; sisu = sisu->si_next) {
>   if (avd_susi_mod_send(sisu, SA_AMF_HA_QUIESCED) 
> == NCSCC_RC_SUCCESS) {
> - /* Add SU to su_opr_list */
>   avd_sg_su_oper_list_add(avd_cb, 
> sisu->su, false);
>   }
> - sisu = sisu->si_next;
>   }
> - /* Change the SG FSM to AVD_SG_FSM_SG_REALIGN if 
> assignment is sent.*/
> - if (i > 0)
> - sg_of_si->set_fsm_state(AVD_SG_FSM_SG_REALIGN);
> + /* Change the SG FSM to AVD_SG_FSM_SG_REALIGN as 
> assignment is sent.*/
> + sg_of_si->set_fsm_state(AVD_SG_FSM_SG_REALIGN);
>   }
>   }
>   if( sg_of_si->sg_redundancy_model == SA_AMF_N_WAY_REDUNDANCY_MODEL ) {
> @@ -1107,30 +1099,28 @@ void AVD_SI::adjust_si_assignments(const
>   LOG_ER("SI new assignmemts failed  SI:%s", 
> name.c_str());
>   }
>   } else {
> - no_of_sisus_to_delete = 0;
> - no_of_sisus_to_delete = 
> saAmfSINumCurrStandbyAssignments -
> - mod_pref_assignments;
> -
> - /* Get the sisu pointer from the  si->list_of_sisu list 
> from which
> - no of sisus need to be deleted based on SI ranked SU */
> - sisu = tmp_sisu = list_of_sisu;
> - for(i = 0; i < no_of_sisus_to_delete && (nullptr != 
> tmp_sisu); i++) {
> - tmp_sisu = tmp_sisu->si_next;
> + if (list_of_sisu == nullptr)
> + return;
> + /*
> +avd_susi_create() keeps sisus in list_of_sisu in 
> order from highest
> +ranked to lowest ranked.
> +

[devel] [PATCH 0 of 1] Review Request for amfd: remove assignments from lower ranked SU while adjusting SI assignments [#2268]

2017-03-17 Thread praveen . malviya

Summary: amfd: remove assignments from lower ranked SU while adjusting SI 
assignments [#2268]
Review request for Trac Ticket(s): #2268 
Peer Reviewer(s): AMF devs 
Pull request to: <>
Affected branch(es): ALL 
Development branch: <>


Impacted area   Impact y/n

 Docsn
 Build systemn
 RPM/packaging   n
 Configuration files n
 Startup scripts n
 SAF servicesy
 OpenSAF servicesn
 Core libraries  n
 Samples n
 Tests   n
 Other   n


Comments (indicate scope for each "y" above):
-

changeset 2d69e15801b87ac788b64a8287353616fd7e0c66
Author: Praveen Malviya <praveen.malv...@oracle.com>
Date:   Fri, 17 Mar 2017 14:48:12 +0530

amfd: remove assignments from lower ranked SU while adjusting SI 
assignments
[#2268]

In N-Way Active mode, when saAmfSIPrefActiveAssignments is reduced, AMFD
removes assignments from higher ranked SU when siranked su is not 
configured
and lower ranked SU have assignments. Similar issue in N-Way model when
SiPrefStandbyAssignment is reduced. Also AMFD is not checking HA state 
of
susi and tries to delete active susi and crashes.

Patch fixes the problem by removing assignments from lower ranked SU.


Complete diffstat:
--
 src/amf/amfd/si.cc |  92 
++--
 1 files changed, 46 insertions(+), 46 deletions(-)


Testing Commands:
-
Tested both N-WAY model and N-Way model configurations 
by adjsuting SI assignments params when SI has ranked SU configured and not 
configured.
Also tested configuration without siranked su having ranks in SU.

Testing, Expected Results:
--
AMF removes assignments from lower ranked SUs or sus having lower ranks.

Conditions of Submission:
-
Ack from any reviewer before RC2.

Arch  Built StartedLinux distro
---
mipsn  n
mips64  n  n
x86 n  n
x86_64  y  y
powerpc n  n
powerpc64   n  n


Reviewer Checklist:
---
[Submitters: make sure that your review doesn't trigger any checkmarks!]


Your checkin has not passed review because (see checked entries):

___ Your RR template is generally incomplete; it has too many blank entries
that need proper data filled in.

___ You have failed to nominate the proper persons for review and push.

___ Your patches do not have proper short+long header

___ You have grammar/spelling in your header that is unacceptable.

___ You have exceeded a sensible line length in your headers/comments/text.

___ You have failed to put in a proper Trac Ticket # into your commits.

___ You have incorrectly put/left internal data in your comments/files
(i.e. internal bug tracking tool IDs, product names etc)

___ You have not given any evidence of testing beyond basic build tests.
Demonstrate some level of runtime or other sanity testing.

___ You have ^M present in some of your files. These have to be removed.

___ You have needlessly changed whitespace or added whitespace crimes
like trailing spaces, or spaces before tabs.

___ You have mixed real technical changes with whitespace and other
cosmetic code cleanup changes. These have to be separate commits.

___ You need to refactor your submission into logical chunks; there is
too much content into a single commit.

___ You have extraneous garbage in your review (merge commits etc)

___ You have giant attachments which should never have been sent;
Instead you should place your content in a public tree to be pulled.

___ You have too many commits attached to an e-mail; resend as threaded
commits, or place in a public tree for a pull.

___ You have resent this content multiple times without a clear indication
of what has changed between each re-send.

___ You have failed to adequately and individually address all of the
comments and change requests that were proposed in the initial review.

___ You have a misconfigured ~/.hgrc file (i.e. username, email etc)

___ Your computer have a badly configured date and time; confusing the
the threaded patch review.

___ Your changes affect IPC mechanism, and you don't present any results
for in-service upgradability test.

___ Your changes affect user manual and documentation, your patch series
do not contain the patch that updates the Doxygen manual.


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___

[devel] [PATCH 1 of 1] amfd: remove assignments from lower ranked SU while adjusting SI assignments [#2268]

2017-03-17 Thread praveen . malviya

 src/amf/amfd/si.cc |  92 +++---
 1 files changed, 46 insertions(+), 46 deletions(-)


In N-Way Active mode, when saAmfSIPrefActiveAssignments is reduced,
AMFD removes assignments from higher ranked SU when siranked su is not 
configured and lower
ranked SU have assignments.
Similar issue in N-Way model when SiPrefStandbyAssignment is reduced. Also AMFD 
is not
checking HA state of susi and tries to delete active susi and crashes.

Patch fixes the problem by removing assignments from lower ranked SU.

diff --git a/src/amf/amfd/si.cc b/src/amf/amfd/si.cc
--- a/src/amf/amfd/si.cc
+++ b/src/amf/amfd/si.cc
@@ -1052,8 +1052,7 @@ done:
  */
 void AVD_SI::adjust_si_assignments(const uint32_t mod_pref_assignments)
 {
-   AVD_SU_SI_REL *sisu, *tmp_sisu;
-   uint32_t no_of_sisus_to_delete;
+   AVD_SU_SI_REL *sisu; 
uint32_t i = 0;
 
TRACE_ENTER2("for SI:%s ", name.c_str());
@@ -1073,31 +1072,24 @@ void AVD_SI::adjust_si_assignments(const
TRACE("No New assignments are been done SI:%s", 
name.c_str());
}
} else {
-   no_of_sisus_to_delete = saAmfSINumCurrActiveAssignments 
-
-   mod_pref_assignments;
-
-   /* Get the sisu pointer from the  si->list_of_sisu list 
from which 
-   no of sisus need to be deleted based on SI ranked SU */
-   sisu = tmp_sisu = list_of_sisu;
-   for( i = 0; i < no_of_sisus_to_delete && nullptr != 
tmp_sisu; i++ ) {
-   tmp_sisu = tmp_sisu->si_next;
+   if (list_of_sisu == nullptr)
+ return;
+   /*
+  avd_susi_create() keeps sisus in list_of_sisu in 
order from highest
+  ranked to lowest ranked.
+  Keep mod_pref_assignments in list_of_sisu from 
beginning and delete others.
+*/
+   sisu = list_of_sisu;
+   for( i = 0; ((i < mod_pref_assignments) && (sisu != 
nullptr)); i++ ) {
+   sisu = sisu->si_next;
}
-   while( tmp_sisu && (tmp_sisu->si_next != nullptr) ) {
-   sisu = sisu->si_next;
-   tmp_sisu = tmp_sisu->si_next;
-   }
-
-   for( i = 0; i < no_of_sisus_to_delete && (nullptr != 
sisu); i++ ) {
-   /* Send quiesced request for the sisu that 
needs tobe deleted */
+   for( ; sisu != nullptr; sisu = sisu->si_next) {
if (avd_susi_mod_send(sisu, SA_AMF_HA_QUIESCED) 
== NCSCC_RC_SUCCESS) {
-   /* Add SU to su_opr_list */
avd_sg_su_oper_list_add(avd_cb, 
sisu->su, false);
}
-   sisu = sisu->si_next;
}
-   /* Change the SG FSM to AVD_SG_FSM_SG_REALIGN if 
assignment is sent.*/
-   if (i > 0)
-   sg_of_si->set_fsm_state(AVD_SG_FSM_SG_REALIGN);
+   /* Change the SG FSM to AVD_SG_FSM_SG_REALIGN as 
assignment is sent.*/
+   sg_of_si->set_fsm_state(AVD_SG_FSM_SG_REALIGN);
}
} 
if( sg_of_si->sg_redundancy_model == SA_AMF_N_WAY_REDUNDANCY_MODEL ) {
@@ -1107,30 +1099,28 @@ void AVD_SI::adjust_si_assignments(const
LOG_ER("SI new assignmemts failed  SI:%s", 
name.c_str());
} 
} else {
-   no_of_sisus_to_delete = 0;
-   no_of_sisus_to_delete = 
saAmfSINumCurrStandbyAssignments -
-   mod_pref_assignments; 
-
-   /* Get the sisu pointer from the  si->list_of_sisu list 
from which 
-   no of sisus need to be deleted based on SI ranked SU */
-   sisu = tmp_sisu = list_of_sisu;
-   for(i = 0; i < no_of_sisus_to_delete && (nullptr != 
tmp_sisu); i++) {
-   tmp_sisu = tmp_sisu->si_next;
+   if (list_of_sisu == nullptr)
+   return;
+   /*
+  avd_susi_create() keeps sisus in list_of_sisu in 
order from highest
+  ranked to lowest ranked.
+  Keep mod_pref_assignments + active in list_of_sisu 
from beginning and delete others.
+*/
+   for (sisu = list_of_sisu; sisu != nullptr; sisu = 
sisu->si_next) {
+

Re: [devel] Review Request for amf: Update PR/README for SC absence feature [#2179]

2017-03-16 Thread praveen malviya

Hi Minh,

Ack.

I guess this is the same thing that was discussed during #1725 but in 
the context of headless. For normal cluster, issues related to missing 
IMM updates of run-time attributes and objects have been observed and 
reported but not the one in which assignment messages got missed.

But whenever implemented, this needs to be handled in both in SC Absence 
and Normal cluster in same manner. One solution could be Active AMFD 
will run timer equal to the highest value of callbacktimeout in that 
assignment sent to AMFND. When active AMFD sends this assignment, it 
checkpoints assignment state. While decoding this state, standby AMFD 
can also run same timer.
Now either AMFND will respond after collecting responses from components 
or it will send recovery request to AMFD if some assignment fails. For 
comp restart AMF will get presence state as update from that AMFND and 
it can restart the timer. If nothing comes then AMF can take action 
after timer expiry. If switchover/failover happens then standby is also 
running the timer.


Thanks,
Praveen

On 16-Mar-17 11:38 AM, minh chau wrote:
> Hi Praveen,
>
> Thanks for review, I have commented inline.
>
>  * Escalation and Recovery during SC absence period:
> -Restarts will work as normal, but failover or switchover will result in
> Node
> -Failfast. The repair action will be initiated when a SC returns if
> -saAmfSGAutoRepair is enabled.
> +Component and su restarts will work as normal. Any fail-over or
> switch-over at
> +component, su, and node level will only cleanup faulty components.
> Recovery will
> +be delayed until a SC returns: the fail-over or switch-over of SI
> assignments
> +will be initiated if saAmfSGAutoRepair is enabled, the node will be
> reboot if
> +saAmfNodeAutoRepair, aAmfNodeFailfastOnTerminationFailure, or
> +saAmfNodeFailfastOnInstantiationFailure is enabled.
> [Praveen] I think there is no dependecy of failover and switchover of
> assignents on saAmfSgAutoRepair.
> Should the sentence be like this?
>  " Recovery (failover or switchvoer of assignments) will be delayed
> until a SC returns.
> When first SC comes up after SC absebce state AMF will perform pending
> repairs:
>
> [Minh]: This part is about escalation and recovery which is initiated by
> su_oper message, it does depend on saAmfSgAutoRepair which is checked in
> su_try_repair(), so I am not going to change the text
>
> +* Possible loss of RTA updates and SI assignment messages
> +If both SCs go down abruptly (SCs are immediately powered-off for
> instance),
> +AMFD could fail to update RTA to IMM, the SI assignment messages sent from
> +AMFND could not reach to AMFD, recovery could be impossible.
> +
> [Praveen] Should be mention the case of loss of assignment reseponse
> from AMFND to AMFD?
> Also I think we should mention impact of this loss, something like:
> "In case of loss of RTA and SI assignments, AMF will not be able to
> fully recover assignments. Thus application
> may go in inconsistent state."
>
> [Minh]: I rewrites the text as: "If both SCs go down abruptly (SCs are
> immediately powered-off for instance), AMFD could fail to update RTA to
> IMM, the SI assignment request message sent from
> AMFD could not reach to AMFND, or the SI assignment response message
> sent from AMFND also could not reach to AMFD. In such cases, recovery
> could be impossible, application may have inappropriate assignment states"
>
> One query: It's known in ticket #2210 that loss of mbcsv checkpoint in
> sc failover in normal cluster can also happen as similar as loss of RTA
> when both SCs go headless. For the loss of SI assignment messages,
> although AMFD is using MDS in redundant view but the SI assignment is
> not synchronization, I wonder if someone abruptly power off active
> controller when active amfd is about receiving the assignment message,
> or when amfnd just sends out the assignment response message but does
> not reach to amfds?
>
>
>
> On 15/03/17 16:26, praveen malviya wrote:
>> +saAmfNodeFailfastOnInstantiationFailure is enabled.
>> [Praveen] I think there is no dependecy of failover and switchover of
>> assignents on saAmfSgAutoRepair.
>> Should the sentence be like this?
>>   " Recovery (failover or switchvoer of assignments) will be delayed
>> until a SC returns.
>> When first SC comes up after SC absebce state AMF will perform pending
>> repairs:
>

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] Review Request for amf: Update PR/README for SC absence feature [#2179]

2017-03-14 Thread praveen malviya


Hi Minh,

Ack with two comments in attached read me.


Thanks,
Praveen

On 10-Mar-17 11:14 AM, minh chau wrote:

Hi all,

Please help to review documentation changes. Files are attached, they
also can be found at below links:
https://sourceforge.net/p/opensaf/tickets/_discuss/thread/342e9c61/8b9a/attachment/OpenSAF_AMF_PR_2179.odt

https://sourceforge.net/p/opensaf/tickets/_discuss/thread/342e9c61/a94c/attachment/2179_README.diff


Thanks,
Minh
diff --git a/src/amf/README_SC_ABSENCE b/src/amf/README_SC_ABSENCE
--- a/src/amf/README_SC_ABSENCE
+++ b/src/amf/README_SC_ABSENCE
@@ -44,9 +44,12 @@ amfnd will not reboot the node and enter
 scAbsenceAllowed is configured)
 
 * Escalation and Recovery during SC absence period:
-Restarts will work as normal, but failover or switchover will result in Node
-Failfast. The repair action will be initiated when a SC returns if 
-saAmfSGAutoRepair is enabled.
+Component and su restarts will work as normal. Any fail-over or switch-over at
+component, su, and node level will only cleanup faulty components. Recovery 
will
+be delayed until a SC returns: the fail-over or switch-over of SI assignments
+will be initiated if saAmfSGAutoRepair is enabled, the node will be reboot if 
+saAmfNodeAutoRepair, aAmfNodeFailfastOnTerminationFailure, or 
+saAmfNodeFailfastOnInstantiationFailure is enabled.
[Praveen] I think there is no dependecy of failover and switchover of 
assignents on saAmfSgAutoRepair.
Should the sentence be like this?
 " Recovery (failover or switchvoer of assignments) will be delayed until a SC 
returns. 
When first SC comes up after SC absebce state AMF will perform pending repairs:
-for sufailover recovery if saAmfSGAutoRepair is enabled.
-for node-switchvoer and node failover recoveries if saAmfNodeAutoRepair is 
enabled.
-for INST_FAILED and TERM_FAILED state if saAmfSGAutoRepair and 
saAmfNodeAutoRepair are enabled along with
respective node level attributes saAmfNodeFailfastOnInstantiationFailure or 
saAmfNodeFailfastOnTerminationFailure.
"
.
-for comp-failover recovery, amfnd will re-instantiate comp after assignments 
are switchovered.
"
 * Amfnd detects return of SCs:
 NCSMDS_UP is the event that amfnd uses to detect the presence of an active 
amfd.
@@ -76,16 +79,19 @@ absence recovery. The new attributes are
 
 Only 2N SG is currently supported for admin operation continuation.
 
+* Node reboot during SC absence period:
+The event of node reboot initiated by user during SC absence period 
+may lead to a loss of SI assignments. When a SC returns, AMF Director
+will detect improper SI assignments and recover HA states of assignments. 
+
 LIMITATIONS
 ---
 
-* While both SCs are absent, any failover or switchover escalation will result 
-in node failfast. The events of node reboot, node power off, and node failfast
-will lead to a loss of SI assignments, which are not restored during the SC 
-absence period. The SI assignments may remain in improper states until a SC 
-comes back. Recovery of any lost SI assignments during SC absence period is 
-currently not supported.
-
+* Possible loss of RTA updates and SI assignment messages
+If both SCs go down abruptly (SCs are immediately powered-off for instance),
+AMFD could fail to update RTA to IMM, the SI assignment messages sent from
+AMFND could not reach to AMFD, recovery could be impossible. 
+  
[Praveen] Should be mention the case of loss of assignment reseponse from AMFND 
to AMFD?
Also I think we should mention impact of this loss, something like:
"In case of loss of RTA and SI assignments, AMF will not be able to fully 
recover assignments. Thus application
may go in inconsistent state."
 * SI dependency tolerance timer 
 After a SC comes back, if an unassigned sponsor SI is detected, all its 
 dependent SI(s) assignments are removed regardless of tolerance duration. The 
--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1 of 1] fm: changing the log level from ER to WA [#2363]

2017-03-14 Thread praveen malviya

Ack, code review only.

Thanks,
Praveen

On 14-Mar-17 11:54 AM, ramesh.bet...@oracle.com wrote:
>  src/fm/fmd/fm_main.c |  2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
>
>
> diff --git a/src/fm/fmd/fm_main.c b/src/fm/fmd/fm_main.c
> --- a/src/fm/fmd/fm_main.c
> +++ b/src/fm/fmd/fm_main.c
> @@ -608,7 +608,7 @@ static void fm_mbx_msg_handler(FM_CB *fm
>* (old-Active) is still in the progress of shutdown (i.e., 
> amfd/immd is still alive).
>*/
>   if ((fm_cb->role == PCS_RDA_ACTIVE) && (fm_cb->csi_assigned == 
> false)) {
> - LOG_ER("Two active controllers observed in a cluster, 
> newActive: %x and old-Active: %x", fm_cb->node_id, fm_cb->peer_node_id);
> + LOG_WA("Two active controllers observed in a cluster, 
> newActive: %x and old-Active: %x", fm_cb->node_id, fm_cb->peer_node_id);
>   opensaf_reboot(0, NULL,
>   "Received svc up from peer node (old-active is not 
> fully DOWN), hence rebooting the new Active");
>   }
>

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1 of 1] amf: print list of CLC CLI command env variables correctly [#2368]

2017-03-13 Thread praveen malviya

Ack, code review only.

Thanks,
Praveen

On 13-Mar-17 10:24 AM, Nguyen TK Luu wrote:
>  src/amf/amfnd/clc.cc |  2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
>
>
> Due to a coding error, only the first env variable in the list
> gets printed repeatedly for the length of the list. This ticket
> fixes the bug.
>
> diff --git a/src/amf/amfnd/clc.cc b/src/amf/amfnd/clc.cc
> --- a/src/amf/amfnd/clc.cc
> +++ b/src/amf/amfnd/clc.cc
> @@ -3104,7 +3104,7 @@ uint32_t avnd_comp_clc_cmd_execute(AVND_
>
>   for(count=0;countnum_args;count++)
>   TRACE_1("CLC CLI command env variable name = '%s': value ='%s'",
> - 
> cmd_info.i_set_env_args->env_arg->name,cmd_info.i_set_env_args->env_arg->value);
> + 
> cmd_info.i_set_env_args->env_arg[count].name,cmd_info.i_set_env_args->env_arg[count].value);
>
>   /* finally execute the command */
>   rc = ncs_os_process_execute_timed(_info);
>

--
Announcing the Oxford Dictionaries API! The API offers world-renowned
dictionary content that is easy and intuitive to access. Sign up for an
account today to start using our lexical data to power your apps and
projects. Get started today and enter our developer competition.
http://sdm.link/oxford
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

[devel] [PATCH 1 of 1] amfd: honor PrefAssignedSU in N-Way and N-Way Active model during assignments [#2269]

2017-03-10 Thread praveen . malviya

 src/amf/amfd/sg.cc |  46 +-
 src/amf/amfd/sg.h  |   1 +
 src/amf/amfd/sg_nway_fsm.cc|  38 +++---
 src/amf/amfd/sg_nwayact_fsm.cc |  26 ++-
 4 files changed, 83 insertions(+), 28 deletions(-)


SG attribute saAmfSGNumPrefAssignedSUs is applicable to N-Way and N-Way Active 
model.
AMF is assigning more than saAmfSGNumPrefAssignedSUs in both N-Way and N-Way 
Active model.

Patch fixes this problem.

diff --git a/src/amf/amfd/sg.cc b/src/amf/amfd/sg.cc
--- a/src/amf/amfd/sg.cc
+++ b/src/amf/amfd/sg.cc
@@ -105,7 +105,7 @@ AVD_SG::AVD_SG():
saAmfSGAutoAdjust(SA_FALSE),
saAmfSGNumPrefActiveSUs(0),
saAmfSGNumPrefStandbySUs(0),
-   saAmfSGNumPrefInserviceSUs(~0),
+   saAmfSGNumPrefInserviceSUs(0),
saAmfSGNumPrefAssignedSUs(0),
saAmfSGMaxActiveSIsperSU(0),
saAmfSGMaxStandbySIsperSU(0),
@@ -941,16 +941,16 @@ static void ccb_apply_modify_hdlr(CcbUti
TRACE("Modified saAmfSGNumPrefStandbySUs is 
'%u'", sg->saAmfSGNumPrefStandbySUs);
} else if (!strcmp(attribute->attrName, 
"saAmfSGNumPrefInserviceSUs")) {
if (value_is_deleted)
-   sg->saAmfSGNumPrefInserviceSUs = ~0;
+   sg->saAmfSGNumPrefInserviceSUs = 0; 
//default value for internal use.
else
sg->saAmfSGNumPrefInserviceSUs = 
*((SaUint32T *)value);
-   TRACE("Modified saAmfSGNumPrefInserviceSUs is 
'%u'", sg->saAmfSGNumPrefInserviceSUs);
+   TRACE("Modified saAmfSGNumPrefInserviceSUs is 
'%u'", sg->pref_inservice_sus());
} else if (!strcmp(attribute->attrName, 
"saAmfSGNumPrefAssignedSUs")) {
if (value_is_deleted)
-   sg->saAmfSGNumPrefAssignedSUs = 
sg->saAmfSGNumPrefInserviceSUs;
+   sg->saAmfSGNumPrefAssignedSUs = 0; 
//default value for internal use.
else
sg->saAmfSGNumPrefAssignedSUs = 
*((SaUint32T *)value);
-   TRACE("Modified saAmfSGNumPrefAssignedSUs is 
'%u'", sg->saAmfSGNumPrefAssignedSUs);
+   TRACE("Modified saAmfSGNumPrefAssignedSUs is 
'%u'", sg->pref_assigned_sus());
} else if (!strcmp(attribute->attrName, 
"saAmfSGMaxActiveSIsperSU")) {
if (value_is_deleted)
sg->saAmfSGMaxActiveSIsperSU = -1;
@@ -1043,10 +1043,10 @@ static void ccb_apply_modify_hdlr(CcbUti
 
if (!strcmp(attribute->attrName, 
"saAmfSGNumPrefInserviceSUs")) {
if (value_is_deleted)
-   sg->saAmfSGNumPrefInserviceSUs = ~0;
+   sg->saAmfSGNumPrefInserviceSUs = 0;
else
sg->saAmfSGNumPrefInserviceSUs = 
*((SaUint32T *)value);
-   TRACE("Modified saAmfSGNumPrefInserviceSUs is 
'%u'", sg->saAmfSGNumPrefInserviceSUs);
+   TRACE("Modified saAmfSGNumPrefInserviceSUs is 
'%u'", sg->pref_inservice_sus());
 
if (avd_cb->avail_state_avd == 
SA_AMF_HA_ACTIVE)  {
if (avd_sg_app_su_inst_func(avd_cb, sg) 
!= NCSCC_RC_SUCCESS) {
@@ -1209,7 +1209,7 @@ static void sg_app_sg_admin_unlock_inst(
 
if (su->saAmfSUPreInstantiable == true) {
if (su->su_on_node->node_state == 
AVD_AVND_STATE_PRESENT) {
-   if 
(su->sg_of_su->saAmfSGNumPrefInserviceSUs > su_try_inst) {
+   if (su->sg_of_su->pref_inservice_sus() 
> su_try_inst) {
if (avd_snd_presence_msg(cb, 
su, false) != NCSCC_RC_SUCCESS) {
LOG_NO("%s: Failed to 
send Instantiation order of '%s' to %x",

__FUNCTION__, su->name.c_str(),
@@ -1866,17 +1866,6 @@ void avd_sg_adjust_config(AVD_SG *sg)
}
}
}
-
-   /* adjust saAmfSGNumPrefAssignedSUs if not configured, only applicable 
for
-* the N-way and N-way active redundancy models
-*/
-   if ((sg->saAmfSGNumPrefAssignedSUs == 0) &&
-   ((sg->sg_type->saAmfSgtRedundancyModel == 
SA_AMF_N_WAY_REDUNDANCY_MODEL) ||
-

[devel] [PATCH 0 of 1] Review Request for amfd: honor PrefAssignedSU in N-Way and N-Way Active model during assignments [#2269].

2017-03-10 Thread praveen . malviya

Summary: amfd: honor PrefAssignedSU in N-Way and N-Way Active model during 
assignments [#2269]. 
Review request for Trac Ticket(s): #2269 
Peer Reviewer(s): AMF devs 
Pull request to: <>
Affected branch(es): ALL 
Development branch: <>


Impacted area   Impact y/n

 Docsn
 Build systemn
 RPM/packaging   n
 Configuration files n
 Startup scripts n
 SAF servicesy
 OpenSAF servicesn
 Core libraries  n
 Samples n
 Tests   n
 Other   n


Comments (indicate scope for each "y" above):
-

changeset 62b35316b2e40dff6098f4385e2073f2f1e5a11b
Author: Praveen Malviya <praveen.malv...@oracle.com>
Date:   Fri, 10 Mar 2017 16:09:20 +0530

amfd: honor PrefAssignedSU in N-Way and N-Way Active model during
assignments [#2269].

SG attribute saAmfSGNumPrefAssignedSUs is applicable to N-Way and N-Way
Active model. AMF is assigning more than saAmfSGNumPrefAssignedSUs in 
both
N-Way and N-Way Active model.

Patch fixes this problem.


Complete diffstat:
--
 src/amf/amfd/sg.cc |  46 
+++---
 src/amf/amfd/sg.h  |   1 +
 src/amf/amfd/sg_nway_fsm.cc|  38 ++
 src/amf/amfd/sg_nwayact_fsm.cc |  26 +-
 4 files changed, 83 insertions(+), 28 deletions(-)


Testing Commands:
-
Brought up N-Way and N-Way active models:
1)with siranked su configured.
2)with equal distribution enabled.


Testing, Expected Results:
--
PASS.
AMF assigns only PrefAssignedSus.

Conditions of Submission:
-
Ack from reviewers.

Arch  Built StartedLinux distro
---
mipsn  n
mips64  n  n
x86 n  n
x86_64  y  y
powerpc n  n
powerpc64   n  n


Reviewer Checklist:
---
[Submitters: make sure that your review doesn't trigger any checkmarks!]


Your checkin has not passed review because (see checked entries):

___ Your RR template is generally incomplete; it has too many blank entries
that need proper data filled in.

___ You have failed to nominate the proper persons for review and push.

___ Your patches do not have proper short+long header

___ You have grammar/spelling in your header that is unacceptable.

___ You have exceeded a sensible line length in your headers/comments/text.

___ You have failed to put in a proper Trac Ticket # into your commits.

___ You have incorrectly put/left internal data in your comments/files
(i.e. internal bug tracking tool IDs, product names etc)

___ You have not given any evidence of testing beyond basic build tests.
Demonstrate some level of runtime or other sanity testing.

___ You have ^M present in some of your files. These have to be removed.

___ You have needlessly changed whitespace or added whitespace crimes
like trailing spaces, or spaces before tabs.

___ You have mixed real technical changes with whitespace and other
cosmetic code cleanup changes. These have to be separate commits.

___ You need to refactor your submission into logical chunks; there is
too much content into a single commit.

___ You have extraneous garbage in your review (merge commits etc)

___ You have giant attachments which should never have been sent;
Instead you should place your content in a public tree to be pulled.

___ You have too many commits attached to an e-mail; resend as threaded
commits, or place in a public tree for a pull.

___ You have resent this content multiple times without a clear indication
of what has changed between each re-send.

___ You have failed to adequately and individually address all of the
comments and change requests that were proposed in the initial review.

___ You have a misconfigured ~/.hgrc file (i.e. username, email etc)

___ Your computer have a badly configured date and time; confusing the
the threaded patch review.

___ Your changes affect IPC mechanism, and you don't present any results
for in-service upgradability test.

___ Your changes affect user manual and documentation, your patch series
do not contain the patch that updates the Doxygen manual.


--
Announcing the Oxford Dictionaries API! The API offers world-renowned
dictionary content that is easy and intuitive to access. Sign up for an
account today to start using our lexical data to power your apps and
projects. Get started today and enter our developer competition.
http://sdm.link/oxford
___
Opensaf-deve

Re: [devel] [PATCH 1 of 1] AMFND: Ensure su operational message synchronizes with component failover sequence [#2233]

2017-03-07 Thread praveen malviya


On 08-Mar-17 9:11 AM, minh chau wrote:
> Hi Praveen,
>
> I don't think we need both patches, one of those is enough to fix the
> problem of comp f/o in case unassigned component. When we have both
> patches, V2 patch will make reinstantiation of failed-unassigned comp
> after assignment's removal, so V1 is not needed anymore because su
> operational message (enabled) will always be sent after switchover.
> I am not 100% sure how is the impact of moving reinstantiation of
> component after SI assignment's removal, but basically this change of
> behavior is exposed to applications
[Praveen] I have checked the comment in the ticket #2233 now which 
contains the problem description in SC absence case. I think V2 patch 
will not allow two su_oper message as recovery can be done only after 
first controller comes up. So I prefer V2 as a solution. With v2 comp 
instantiation is being done after completion of recovery for both 
assigned and unassigned components.

However, when comp-failover recovery is implemented in spec compliant 
way for N-Way and N-Way active model, then surely we need to instantiate 
component as early as possible.

> One potential impact I can think of, in either headless or normal
> cluster, is that failed component will have less time for its
> instantiation before receiving csi assignment (since reinstantiation of
> failed component has been started regardless SI switchover), so it could
> be a timing issue for application due to application's specific
> dependencies in instantiation phase.
[Praveen] This I did not get fully. But if instantiationlevel is 
configured for components in su, then instantiation of failed component 
of any level will not lead to instantiation of components of other levels:

from spec :"The instantiation level is, above all, a means to limit the 
load on the system during the instantiation process."

>
> Thanks,
> Minh
>
> On 07/03/17 16:34, praveen malviya wrote:
>> Hi Minh,
>>
>> Is there any harm if both the patches are merged? One patch adds
>> strict checks for message ordering in case of comp-failover recovery
>> of assigned or non-assigned component. Another patch ensures that if
>> an assigned or non-assigned comp faults with comp-faiover recovery
>> then first AMF will switchover whole SU (current implementation
>> irrespective of red models) and after completion of switchover
>> re-instantiation of failed comp will be attempted.
>> Also, I think, from headless perspective, the strict check of patch V1
>> is important when comp-failover occurs in the absence of SCs.
>> So I have a minor query here: Is there any impact of late
>> instantiation of comp when comp-failover occurs in SCs Absence?
>>
>>
>> Also I think now an enhancement ticket should be raised for
>> implementation of comp-failover recovery as per spec for N-Way and
>> N-Way active model.
>>
>>
>> Thanks,
>> Praveen
>>
>>
>>
>> On 07-Mar-17 4:10 AM, minh chau wrote:
>>> Hi Praveen,
>>>
>>> Please see comments with [Minh5]
>>>
>>> Thanks,
>>> Minh
>>>
>>> On 06/03/17 17:52, praveen malviya wrote:
>>>> Hi Minh,
>>>>
>>>> Please see inline with [Praveen].
>>>>
>>>> Thanks,
>>>> Praveen
>>>>
>>>> On 03-Mar-17 5:39 PM, minh chau wrote:
>>>>> Hi Praveen,
>>>>>
>>>>> I have two comments with [Minh4].
>>>>>
>>>>> Thanks
>>>>> Minh
>>>>>
>>>>> On 02/03/17 20:49, praveen malviya wrote:
>>>>>> Hi Minh,
>>>>>> Please see response with [Praveen].
>>>>>>
>>>>>> Thanks,
>>>>>> Praveen
>>>>>>
>>>>>>
>>>>>>
>>>>>> On 02-Mar-17 1:43 PM, minh chau wrote:
>>>>>>> Hi,
>>>>>>>
>>>>>>> Thanks Gary.
>>>>>>> @Nagu, Praveen: Have you had time to check the example in my
>>>>>>> previous
>>>>>>> email?
>>>>>>> The ticket #2179 is about to document that full escalation is
>>>>>>> supported
>>>>>>> for SC absence feature, it is waiting for fix of #2233.
>>>>>>> I think there's not big change in code for #2233, it's a matter of
>>>>>>> decision to make for re-instantiation of failed component.
>>>>>>>
>>>>>>> Thanks,
>>>>>>> Minh
>>>>>>>

Re: [devel] [PATCH 1 of 1] AMFND: Ensure su operational message synchronizes with component failover sequence [#2233]

2017-03-06 Thread praveen malviya

Hi Minh,

Is there any harm if both the patches are merged? One patch adds strict 
checks for message ordering in case of comp-failover recovery of 
assigned or non-assigned component. Another patch ensures that if an 
assigned or non-assigned comp faults with comp-faiover recovery then 
first AMF will switchover whole SU (current implementation irrespective 
of red models) and after completion of switchover re-instantiation of 
failed comp will be attempted.
Also, I think, from headless perspective, the strict check of patch V1 
is important when comp-failover occurs in the absence of SCs.
So I have a minor query here: Is there any impact of late instantiation 
of comp when comp-failover occurs in SCs Absence?


Also I think now an enhancement ticket should be raised for 
implementation of comp-failover recovery as per spec for N-Way and N-Way 
active model.


Thanks,
Praveen



On 07-Mar-17 4:10 AM, minh chau wrote:
> Hi Praveen,
>
> Please see comments with [Minh5]
>
> Thanks,
> Minh
>
> On 06/03/17 17:52, praveen malviya wrote:
>> Hi Minh,
>>
>> Please see inline with [Praveen].
>>
>> Thanks,
>> Praveen
>>
>> On 03-Mar-17 5:39 PM, minh chau wrote:
>>> Hi Praveen,
>>>
>>> I have two comments with [Minh4].
>>>
>>> Thanks
>>> Minh
>>>
>>> On 02/03/17 20:49, praveen malviya wrote:
>>>> Hi Minh,
>>>> Please see response with [Praveen].
>>>>
>>>> Thanks,
>>>> Praveen
>>>>
>>>>
>>>>
>>>> On 02-Mar-17 1:43 PM, minh chau wrote:
>>>>> Hi,
>>>>>
>>>>> Thanks Gary.
>>>>> @Nagu, Praveen: Have you had time to check the example in my previous
>>>>> email?
>>>>> The ticket #2179 is about to document that full escalation is
>>>>> supported
>>>>> for SC absence feature, it is waiting for fix of #2233.
>>>>> I think there's not big change in code for #2233, it's a matter of
>>>>> decision to make for re-instantiation of failed component.
>>>>>
>>>>> Thanks,
>>>>> Minh
>>>>>
>>>>> On 01/03/17 15:42, Gary Lee wrote:
>>>>>> Hi
>>>>>>
>>>>>> It seems the component should be re-instantiated if it has no CSI.
>>>>>> Whether or not there is an SI assigned should be irrelevant?
>>>>>>
>>>>>> Thanks
>>>>>> Gary
>>>>>>
>>>>>> -Original Message-
>>>>>> From: minh chau <minh.c...@dektech.com.au>
>>>>>> Date: Thursday, 23 February 2017 at 3:16 pm
>>>>>> To: Nagendra Kumar <nagendr...@oracle.com>, Praveen Malviya
>>>>>> <praveen.malv...@oracle.com>
>>>>>> Cc: <hans.nordeb...@ericsson.com>, gary <gary@dektech.com.au>,
>>>>>> <long.hb.ngu...@dektech.com.au>,
>>>>>> <opensaf-devel@lists.sourceforge.net>
>>>>>> Subject: Re: [devel] [PATCH 1 of 1] AMFND: Ensure su operational
>>>>>> message synchronizes with component failover sequence [#2233]
>>>>>>
>>>>>>  Hi Nagu, Praveen,
>>>>>>   Please find my comment in [Minh3]
>>>>>>   Thanks,
>>>>>>  Minh
>>>>>>   On 22/02/17 19:34, Nagendra Kumar wrote:
>>>>>>  >>> Since in spec there is no specific discussion for
>>>>>> comp-failover recovery for an unassigned comp, I will encourage other
>>>>>> maintainers also to provide inputs.
>>>>>>  > I do agree for not instantiating failed component before
>>>>>> recovery, this keeps the approach similar to SU failover also.
>>>>>>  [Minh3]: There's one example of component failover that I would
>>>>>> like us
>>>>>>  to have a look
>>>>>>  - 2N application, SU4/SU5 has active/standby assignment
>>>>>> respectively,
>>>>>>  each SU has 3 components
>>>>>>  - Add a sleep of 10 seconds in clc script start command of first
>>>>>>  component C41 of SU4
>>>>>>  Steps:
>>>>>>  1- Kill C41 to trigger component failover
>>>>>>  2- SU4 goes for quiesced assignment
>>>>>>  3- SU5 goes for active assignment
>>>>>&

Re: [devel] [PATCH 1 of 1] AMFND: Ensure su operational message synchronizes with component failover sequence [#2233]

2017-03-05 Thread praveen malviya

Hi Minh,

Please see inline with [Praveen].

Thanks,
Praveen

On 03-Mar-17 5:39 PM, minh chau wrote:
> Hi Praveen,
>
> I have two comments with [Minh4].
>
> Thanks
> Minh
>
> On 02/03/17 20:49, praveen malviya wrote:
>> Hi Minh,
>> Please see response with [Praveen].
>>
>> Thanks,
>> Praveen
>>
>>
>>
>> On 02-Mar-17 1:43 PM, minh chau wrote:
>>> Hi,
>>>
>>> Thanks Gary.
>>> @Nagu, Praveen: Have you had time to check the example in my previous
>>> email?
>>> The ticket #2179 is about to document that full escalation is supported
>>> for SC absence feature, it is waiting for fix of #2233.
>>> I think there's not big change in code for #2233, it's a matter of
>>> decision to make for re-instantiation of failed component.
>>>
>>> Thanks,
>>> Minh
>>>
>>> On 01/03/17 15:42, Gary Lee wrote:
>>>> Hi
>>>>
>>>> It seems the component should be re-instantiated if it has no CSI.
>>>> Whether or not there is an SI assigned should be irrelevant?
>>>>
>>>> Thanks
>>>> Gary
>>>>
>>>> -Original Message-
>>>> From: minh chau <minh.c...@dektech.com.au>
>>>> Date: Thursday, 23 February 2017 at 3:16 pm
>>>> To: Nagendra Kumar <nagendr...@oracle.com>, Praveen Malviya
>>>> <praveen.malv...@oracle.com>
>>>> Cc: <hans.nordeb...@ericsson.com>, gary <gary@dektech.com.au>,
>>>> <long.hb.ngu...@dektech.com.au>, <opensaf-devel@lists.sourceforge.net>
>>>> Subject: Re: [devel] [PATCH 1 of 1] AMFND: Ensure su operational
>>>> message synchronizes with component failover sequence [#2233]
>>>>
>>>>  Hi Nagu, Praveen,
>>>>   Please find my comment in [Minh3]
>>>>   Thanks,
>>>>  Minh
>>>>   On 22/02/17 19:34, Nagendra Kumar wrote:
>>>>  >>> Since in spec there is no specific discussion for
>>>> comp-failover recovery for an unassigned comp, I will encourage other
>>>> maintainers also to provide inputs.
>>>>  > I do agree for not instantiating failed component before
>>>> recovery, this keeps the approach similar to SU failover also.
>>>>  [Minh3]: There's one example of component failover that I would
>>>> like us
>>>>  to have a look
>>>>  - 2N application, SU4/SU5 has active/standby assignment
>>>> respectively,
>>>>  each SU has 3 components
>>>>  - Add a sleep of 10 seconds in clc script start command of first
>>>>  component C41 of SU4
>>>>  Steps:
>>>>  1- Kill C41 to trigger component failover
>>>>  2- SU4 goes for quiesced assignment
>>>>  3- SU5 goes for active assignment
>>>>  4- SU4 is removed its assignment
>>>>  5- Now there's a pause of 10 seconds due to clc script start, to
>>>> ensure
>>>>  that C41 is healthy
>>>>  6- Next SU4 has standby assignment.
>>>>From the above example, I think we can see some problems if
>>>> the
>>>>  re-instantiation of C41 is delayed:
>>>>  - Because C41 is faulty, it needs to be restarted ok because its
>>>> SU has
>>>>  assignment
>>>>  - Moving re-instantiation of C41 is further down that means the
>>>> recovery
>>>>  will take longer
>>>>  - What if re-instantiation of C41 leads to instantation-failed
>> [Praveen] If AMFND re-instantiate C41 after removal of assignment and
>> it moves to instantiation-failed then:
>> -Node will be rebooted if nodefailfastonterminationfaioure=true.
>> -ifnodefailfastonterminationfaioure=false then as per section 4.6 page
>> 212, SU will be marked INST_FAILED and AMF will have to terminate all
>> the components. Termination of other components will be easier if they
>> do not have assignments or pending assignments.
>>
>> If C41 is instantiated before removal of assignments and it moves to
>> INST_FAILED state, then AMFND will be terminating other comps of SU
>> when they are in the middle of quiesced or removal of assignment. So a
>> component will having different orders of quiesced/removal/terminate
>> callbacks in its mailbox. This will make thing complex.
> [Minh4]: I am not sure if I understand the complex thing you mentioned
>

Re: [devel] [PATCH 1 of 1] amf: support restrictions to auto-repair [#2144]

2017-03-05 Thread praveen malviya

Hi,

It is my mistake. I missed following two changes while combining the 
patches:
a)
546c544
< +  if (!comp->su->suMaintenanceCampaign.empty()) {
---
 > +  if (!comp->su->suMaintenanceCampaign.empty() && !comp->admin_oper) {

2)
275,276d274
< + continue;
< + }

I think test is failing because of change a). I think case b) may not 
hit in any campaign.

Thanks,
Praveen

On 03-Mar-17 7:52 PM, Alex Jones wrote:
> Hi Neel,
>
>   You are missing two patches. I've attached the final AMF patch, which
> incorporates my original and Praveen's improvements. This is what I will
> push if you are OK with it.
>
> Alex
>
> On 03/03/2017 05:19 AM, Neelakanta Reddy wrote:
>> 
>> NOTICE: This email was received from an EXTERNAL sender
>> 
>>
>> Attaching the patches used.
>>
>> Thanks,
>> Neel.
>> On 2017/03/03 03:15 PM, Neelakanta Reddy wrote:
>>> Hi Alex,
>>>
>>> The included patches are: latest #2144 patch provided by praveen with
>>> latest #2145 patch.
>>>
>>> The Rolling upgrade campaign to change the application version is failing.
>>> This is basic application upgrade test.
>>>
>>> # smf-state camp
>>> safSmfCampaign=Campaign1,safApp=safSmfService
>>> state=ERROR_DETECTED(7)
>>> error='safSu=dummy_2n_1,safSg=SG_dummy_2n,safApp=2nApp failed
>>> after upgrade'
>>>
>>> syslog:
>>> Mar 27 08:10:57 SLES1 osafsmfd[29847]: NO PROC: Procedure init actions
>>> completed
>>> Mar 27 08:10:57 SLES1 osafsmfd[29847]: NO PROC: Start executing the steps
>>> Mar 27 08:10:57 SLES1 osafsmfd[29847]: NO STEP: Executing AU restart
>>> step
>>>
>> safSmfStep=0001,safSmfProc=amfClusterProc-1,safSmfCampaign=Campaign1,safApp=safSmfService
>>> Mar 27 08:10:57 SLES1 osafsmfd[29847]: NO STEP: Online installation of
>>> new software
>>> Mar 27 08:10:57 SLES1 osafsmfnd[29845]: NO Successful start of command
>>> execution: /hostfs/online_install.sh bundle-new, timeout 8
>>> Mar 27 08:10:57 SLES1 osafsmfnd[29845]: NO Command execution OK
>>> Mar 27 08:10:57 SLES1 osafsmfd[29847]: NO STEP: Create new
>>> SaAmfNodeSwBundle objects
>>> Mar 27 08:10:57 SLES1 osafimmnd[29768]: NO Ccb 54 COMMITTED (SMFSERVICE)
>>> Mar 27 08:10:57 SLES1 osafsmfd[29847]: NO STEP: Modify information model
>>> and set maintenance status
>>> Mar 27 08:10:57 SLES1 osafimmnd[29768]: NO Ccb 55 COMMITTED (SMFSERVICE)
>>> Mar 27 08:10:57 SLES1 osafamfnd[29829]: NO saAmfCompType changed to
>>> 'safVersion=6.0.0,safCompType=Comp_2nApp_2n_1_1' for
>>> 'safComp=Norm1,safSu=dummy_2n_1,safSg=SG_dummy_2n,safApp=2nApp'
>>> Mar 27 08:10:57 SLES1 osafimmnd[29768]: NO Ccb 56 COMMITTED (SMFSERVICE)
>>> Mar 27 08:10:57 SLES1 osafsmfd[29847]: NO STEP: Restart activation units
>>> Mar 27 08:10:57 SLES1 osafamfnd[29829]: NO Admin restart requested for
>>> 'safComp=Norm1,safSu=dummy_2n_1,safSg=SG_dummy_2n,safApp=2nApp'
>>> Mar 27 08:10:57 SLES1 osafamfnd[29829]: NO not restarting comp because
>>> maintenance campaign is set: safSmfCampaign=Campaign1,safApp=safSmfService
>>> Mar 27 08:10:57 SLES1 osafsmfd[29847]: ER SU:
>>> safSu=dummy_2n_1,safSg=SG_dummy_2n,safApp=2nApp failed after upgrade in
>>> campaign
>>>
>>> Thanks,
>>> Neel.
>>>
>>>
>>>
>>> On 2017/03/03 02:55 AM, Alex Jones wrote:
>>>> Hi Praveen,
>>>>
>>>> Both patches look fine except for one issue in the first patch
>>>> (02_2144.patch). See the comment below.
>>>>
>>>> Neel, do you have any comments for the SMF patch?
>>>>
>>>> If both of you guys are OK, then I will push the AMF (my original and
>>>> Praveen's 2 later ones) and SMF patches tomorrow.
>>>>
>>>> Alex
>>>>
>>>> diff --git a/src/amf/amfd/sgproc.cc b/src/amf/amfd/sgproc.cc
>>>> --- a/src/amf/amfd/sgproc.cc
>>>> +++ b/src/amf/amfd/sgproc.cc
>>>> @@ -2092,13 +2092,17 @@ void avd_node_down_mw_susi_failover(AVD_
>>>> * one loop as more than one MW SU per SG in one node is not supported.
>>>> */
>>>> osafassert(avnd->list_of_ncs_su.empty() != true);
>>>> -
>>>> + bool campaign_set = avnd->is_campaign_set_for_all_sus();
>>>> for (const auto& i_su : avn

Re: [devel] [PATCH 1 of 1] osaf:fm on new-Active handling amfd up event of peer old-Active node which is going down[#2151] V2

2017-03-02 Thread praveen malviya

Ack, code review only.

Thanks,
Praveen.

On 02-Mar-17 3:39 PM, ramesh betham wrote:
> Hi,
>
> Correctionto the patch.
>
> + /* Weird situation in a cluster, where the new-Active 
> controller node founds the peer node
> +  * (old-Active) is still in the progress of shutdown (i.e., 
> amfd/immd is still alive).
> +  */
> + if ((fm_cb->role == PCS_RDA_ACTIVE) && (fm_cb->csi_assigned == 
> false)) {
> + LOG_ER("Two active controllers observed in a cluster, 
> newActive: %x and old-Active: %x", fm_cb->node_id, fm_cb->peer_node_id);
> + opensaf_reboot(fm_cb->peer_node_id, NULL,
> correction: opensaf_reboot(0, NULL,
> + "Received svc up from peer node (old-active is not 
> fully DOWN), hence rebooting the new Active");
> + }
>
> Thanks,
> Ramesh.
>
> On 3/2/2017 2:02 PM, ramesh.bet...@oracle.com wrote:
>>   src/fm/fmd/fm_evt.h  |2 +-
>>   src/fm/fmd/fm_main.c |   78 ++---
>>   src/fm/fmd/fm_mds.c  |  181 
>> --
>>   3 files changed, 155 insertions(+), 106 deletions(-)
>>
>>
>> diff --git a/src/fm/fmd/fm_evt.h b/src/fm/fmd/fm_evt.h
>> --- a/src/fm/fmd/fm_evt.h
>> +++ b/src/fm/fmd/fm_evt.h
>> @@ -1,6 +1,7 @@
>>   /*  -*- OpenSAF  -*-
>>   *
>>   * (C) Copyright 2008 The OpenSAF Foundation
>> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
>>   *
>>   * This program is distributed in the hope that it will be useful, but
>>   * WITHOUT ANY WARRANTY; without even the implied warranty of 
>> MERCHANTABILITY
>> @@ -49,7 +50,6 @@ typedef enum {
>>  FM_EVT_NODE_DOWN,
>>  FM_EVT_PEER_UP,
>>  FM_EVT_RDA_ROLE,
>> -FM_EVT_SVC_DOWN,
>>  FM_FSM_EVT_MAX
>>   } FM_FSM_EVT_CODE;
>>
>> diff --git a/src/fm/fmd/fm_main.c b/src/fm/fmd/fm_main.c
>> --- a/src/fm/fmd/fm_main.c
>> +++ b/src/fm/fmd/fm_main.c
>> @@ -1,6 +1,7 @@
>>   /*  -*- OpenSAF  -*-
>>   *
>>   * (C) Copyright 2008 The OpenSAF Foundation
>> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
>>   *
>>   * This program is distributed in the hope that it will be useful, but
>>   * WITHOUT ANY WARRANTY; without even the implied warranty of 
>> MERCHANTABILITY
>> @@ -31,6 +32,7 @@ This file contains the main() routine fo
>>   #include "nid/agent/nid_api.h"
>>   #include "fm.h"
>>   #include "base/osaf_time.h"
>> +#include "base/osaf_poll.h"
>>
>>   #define FM_CLM_API_TIMEOUT 100LL
>>
>> @@ -71,7 +73,6 @@ void handle_mbx_event(void);
>>   extern uint32_t fm_amf_init(FM_AMF_CB *fm_amf_cb);
>>   uint32_t gl_fm_hdl;
>>   static NCS_SEL_OBJ usr1_sel_obj;
>> -void fm_proc_svc_down(FM_CB *cb, FM_EVT *fm_mbx_evt);
>>
>>   /**
>>* USR1 signal is used when AMF wants instantiate us as a
>> @@ -176,6 +177,11 @@ int main(int argc, char *argv[])
>>   */
>>  fm_cb->control_tipc = true; /* Default behaviour */
>>
>> +fm_cb->immd_down = true;
>> +fm_cb->immnd_down = true;
>> +fm_cb->amfnd_down = true;
>> +fm_cb->amfd_down = true;
>> +
>>  /* Create CB handle */
>>  gl_fm_hdl = ncshm_create_hdl(NCS_HM_POOL_ID_COMMON, NCS_SERVICE_ID_GFM, 
>> (NCSCONTEXT)fm_cb);
>>
>> @@ -194,7 +200,7 @@ int main(int argc, char *argv[])
>>  goto fm_init_failed;
>>  }
>>
>> -/* Attach MBX */
>> +/* Attach MBX */
>>  if (m_NCS_IPC_ATTACH(_cb->mbx) != NCSCC_RC_SUCCESS) {
>>  syslog(LOG_ERR, "m_NCS_IPC_ATTACH() failed.");
>>  goto fm_init_failed;
>> @@ -268,7 +274,7 @@ int main(int argc, char *argv[])
>>
>>  /* notify the NID */
>>  if (nid_started)
>> -fm_nid_notify(NCSCC_RC_SUCCESS);
>> +fm_nid_notify((uint32_t) NCSCC_RC_SUCCESS);
>>
>>  while (1) {
>>  ret = poll(fds, nfds, -1);
>> @@ -454,52 +460,6 @@ static uint32_t fm_get_args(FM_CB *fm_cb
>>  return NCSCC_RC_SUCCESS;
>>   }
>>
>> -void fm_proc_svc_down(FM_CB *cb, FM_EVT *fm_mbx_evt)
>> -{
>> -switch (fm_mbx_evt->svc_id) {
>> -case NCSMDS_SVC_ID_IMMND:
>> -cb->immnd_down = true;
>> -LOG_NO("IMMND down on: %x", cb->peer_node_id);
>> -break;
>> -case NCSMDS_SVC_ID_AVND:
>> -cb->amfnd_down = true;
>> -LOG_NO("AMFND down on: %x", cb->peer_node_id);
>> -break;
>> -case NCSMDS_SVC_ID_IMMD:
>> -cb->immd_down = true;
>> -LOG_NO("IMMD down on: %x", cb->peer_node_id);
>> -break;
>> -case NCSMDS_SVC_ID_AVD:
>> -cb->amfd_down = true;
>> -LOG_NO("AVD down on: %x", cb->peer_node_id);
>> -break;
>> -case NCSMDS_SVC_ID_GFM:
>> -cb->fm_down = true;
>> -LOG_NO("FM down on: %x", cb->peer_node_id);
>> -

Re: [devel] [PATCH 1 of 1] AMFND: Ensure su operational message synchronizes with component failover sequence [#2233]

2017-03-02 Thread praveen malviya

Hi Minh,
Please see response with [Praveen].

Thanks,
Praveen



On 02-Mar-17 1:43 PM, minh chau wrote:
> Hi,
>
> Thanks Gary.
> @Nagu, Praveen: Have you had time to check the example in my previous
> email?
> The ticket #2179 is about to document that full escalation is supported
> for SC absence feature, it is waiting for fix of #2233.
> I think there's not big change in code for #2233, it's a matter of
> decision to make for re-instantiation of failed component.
>
> Thanks,
> Minh
>
> On 01/03/17 15:42, Gary Lee wrote:
>> Hi
>>
>> It seems the component should be re-instantiated if it has no CSI.
>> Whether or not there is an SI assigned should be irrelevant?
>>
>> Thanks
>> Gary
>>
>> -Original Message-
>> From: minh chau <minh.c...@dektech.com.au>
>> Date: Thursday, 23 February 2017 at 3:16 pm
>> To: Nagendra Kumar <nagendr...@oracle.com>, Praveen Malviya
>> <praveen.malv...@oracle.com>
>> Cc: <hans.nordeb...@ericsson.com>, gary <gary@dektech.com.au>,
>> <long.hb.ngu...@dektech.com.au>, <opensaf-devel@lists.sourceforge.net>
>> Subject: Re: [devel] [PATCH 1 of 1] AMFND: Ensure su operational
>> message synchronizes with component failover sequence [#2233]
>>
>>  Hi Nagu, Praveen,
>>   Please find my comment in [Minh3]
>>   Thanks,
>>  Minh
>>   On 22/02/17 19:34, Nagendra Kumar wrote:
>>  >>> Since in spec there is no specific discussion for
>> comp-failover recovery for an unassigned comp, I will encourage other
>> maintainers also to provide inputs.
>>  > I do agree for not instantiating failed component before
>> recovery, this keeps the approach similar to SU failover also.
>>  [Minh3]: There's one example of component failover that I would
>> like us
>>  to have a look
>>  - 2N application, SU4/SU5 has active/standby assignment
>> respectively,
>>  each SU has 3 components
>>  - Add a sleep of 10 seconds in clc script start command of first
>>  component C41 of SU4
>>  Steps:
>>  1- Kill C41 to trigger component failover
>>  2- SU4 goes for quiesced assignment
>>  3- SU5 goes for active assignment
>>  4- SU4 is removed its assignment
>>  5- Now there's a pause of 10 seconds due to clc script start, to
>> ensure
>>  that C41 is healthy
>>  6- Next SU4 has standby assignment.
>>From the above example, I think we can see some problems if
>> the
>>  re-instantiation of C41 is delayed:
>>  - Because C41 is faulty, it needs to be restarted ok because its
>> SU has
>>  assignment
>>  - Moving re-instantiation of C41 is further down that means the
>> recovery
>>  will take longer
>>  - What if re-instantiation of C41 leads to instantation-failed
[Praveen] If AMFND re-instantiate C41 after removal of assignment and it 
moves to instantiation-failed then:
-Node will be rebooted if nodefailfastonterminationfaioure=true.
-ifnodefailfastonterminationfaioure=false then as per section 4.6 page 
212, SU will be marked INST_FAILED and AMF will have to terminate all 
the components. Termination of other components will be easier if they 
do not have assignments or pending assignments.

If C41 is instantiated before removal of assignments and it moves to 
INST_FAILED state, then AMFND will be terminating other comps of SU when 
they are in the middle of quiesced or removal of assignment. So a 
component will having different orders of quiesced/removal/terminate 
callbacks in its mailbox. This will make thing complex.

>   Whether or not the C41 has assignment or is unassigned, the
>>  OperState/PresenceState result from re-instantiation of faulty C41
>>  affects to SU4's eligibility for assignment.
[Praveen] Here Su4 will get only fresh assignments after C4 gets 
enabled. For fresh assignments, AMF can choose any of the spare SUs 
available and Su4 will be chosen based on ranks.

At the same time, AMF spec encourages not to choose faulty SUs soon for 
assignments. It is highlighted in SG Auto adjust feature context in 
section 3.6.1.2 Initiation of the Auto-Adjust Procedure for a Service Group:
"
However, if the completion of a recovery/repair operation
has made the service group eligible for auto-adjustment (for example, if 
a node joins the cluster after the repair), it is not so wise to run the 
auto-adjust procedure for the service group involving the newly repaired 
service units immediately. Thus, the service
group-level configuration attribute auto-adjust probation period has 
been introduced (actually, the saAm

Re: [devel] [PATCH 1 of 1] amfd: support restrictions to auto-repair [#2144]

2017-02-28 Thread praveen malviya


Hi,

Attached  02_2144.patch is the patch that improves this patch. With 
02_2144.patch, AMF will not mark a node disabled when:

-its admin state is locked-in and
-sumaintenance attribute is set for all the sus hosted on this node 
including MW.


With this patchg SMF will not get any disabled state notification for 
SUs and node. SMF will have to locked-in node and mark sumaintenance 
attribute of all the SUs on that node before rebooting the node for upgrade.


This patch needs to be applied on top of #2144 main patch (attached 
01_2144.patch).

Thanks,
Praveen



On 28-Feb-17 2:36 AM, Alex Jones wrote:

 src/amf/amfd/ndproc.cc |  3 ++-
 src/amf/amfd/sgproc.cc |  6 --
 2 files changed, 6 insertions(+), 3 deletions(-)


This patch implements section 3.11.1.4.2 of AMF spec (Restrictions to
Auto-Repair).

diff --git a/src/amf/amfd/ndproc.cc b/src/amf/amfd/ndproc.cc
--- a/src/amf/amfd/ndproc.cc
+++ b/src/amf/amfd/ndproc.cc
@@ -1141,7 +1141,8 @@ void avd_node_failover(AVD_AVND *node)
avd_node_mark_absent(node);
avd_pg_node_csi_del_all(avd_cb, node);
avd_node_down_mw_susi_failover(avd_cb, node);
-   avd_node_down_appl_susi_failover(avd_cb, node);
+   if (node->saAmfNodeAdminState != SA_AMF_ADMIN_LOCKED_INSTANTIATION)
+   avd_node_down_appl_susi_failover(avd_cb, node);
avd_node_delete_nodeid(node);
TRACE_LEAVE();
 }
diff --git a/src/amf/amfd/sgproc.cc b/src/amf/amfd/sgproc.cc
--- a/src/amf/amfd/sgproc.cc
+++ b/src/amf/amfd/sgproc.cc
@@ -2046,11 +2046,13 @@ void avd_node_down_mw_susi_failover(AVD_
osafassert(avnd->list_of_ncs_su.empty() != true);

for (const auto& i_su : avnd->list_of_ncs_su) {
-   i_su->set_oper_state(SA_AMF_OPERATIONAL_DISABLED);
+   if (avnd->saAmfNodeAdminState != 
SA_AMF_ADMIN_LOCKED_INSTANTIATION) {
+   i_su->set_oper_state(SA_AMF_OPERATIONAL_DISABLED);
+   i_su->disable_comps(SA_AIS_ERR_TIMEOUT);
+   }
i_su->set_pres_state(SA_AMF_PRESENCE_UNINSTANTIATED);
i_su->set_readiness_state(SA_AMF_READINESS_OUT_OF_SERVICE);
i_su->complete_admin_op(SA_AIS_ERR_TIMEOUT);
-   i_su->disable_comps(SA_AIS_ERR_TIMEOUT);

/* Now analyze the service group for the new HA state
 * assignments and send the SU SI assign messages

diff --git a/src/amf/amfd/node.cc b/src/amf/amfd/node.cc
--- a/src/amf/amfd/node.cc
+++ b/src/amf/amfd/node.cc
@@ -1587,4 +1587,16 @@ void avd_node_constructor(void)
avd_class_impl_set("SaAmfNode", nullptr, node_admin_op_cb,
node_ccb_completed_cb, node_ccb_apply_cb);
 }
-
+bool AVD_AVND::is_campaign_set_for_all_sus() const {
+  if (std::all_of(list_of_ncs_su.begin(), list_of_ncs_su.end(),
+[&](AVD_SU *su) -> bool {return su->saAmfSUMaintenanceCampaign.empty() == 
false;})) {
+if (std::all_of(list_of_su.begin(), list_of_su.end(),
+  [&](AVD_SU *su) -> bool {return su->saAmfSUMaintenanceCampaign.empty() 
== false;})) {
+  return true;
+} else {
+  return false;
+}
+  } else {
+return false;
+  }
+}
diff --git a/src/amf/amfd/node.h b/src/amf/amfd/node.h
--- a/src/amf/amfd/node.h
+++ b/src/amf/amfd/node.h
@@ -143,7 +143,7 @@ class AVD_AVND {
   AVD_AMF_NG *admin_ng; /* points to the nodegroup on which admin operation is 
going on.*/
   uint16_t node_up_msg_count; /* to count of node_up msg that director had 
received from this node */
   bool reboot;
-
+  bool is_campaign_set_for_all_sus() const;
   //Member functions.
   void node_sus_termstate_set(bool term_state) const;
  private:
diff --git a/src/amf/amfd/sgproc.cc b/src/amf/amfd/sgproc.cc
--- a/src/amf/amfd/sgproc.cc
+++ b/src/amf/amfd/sgproc.cc
@@ -2092,13 +2092,17 @@ void avd_node_down_mw_susi_failover(AVD_
 * one loop as more than one MW SU per SG in one node is not supported.
 */
osafassert(avnd->list_of_ncs_su.empty() != true);
-
+   bool campaign_set = avnd->is_campaign_set_for_all_sus();
for (const auto& i_su : avnd->list_of_ncs_su) {
+   if ((avnd->saAmfNodeAdminState != 
SA_AMF_ADMIN_LOCKED_INSTANTIATION) || 
+   (campaign_set == false)) {
+   i_su->set_oper_state(SA_AMF_OPERATIONAL_DISABLED);
+   i_su->disable_comps(SA_AIS_ERR_TIMEOUT);
+   }
i_su->set_oper_state(SA_AMF_OPERATIONAL_DISABLED);
i_su->set_pres_state(SA_AMF_PRESENCE_UNINSTANTIATED);
i_su->set_readiness_state(SA_AMF_READINESS_OUT_OF_SERVICE);
i_su->complete_admin_op(SA_AIS_ERR_TIMEOUT);
-   i_su->disable_comps(SA_AIS_ERR_TIMEOUT);
 
/* Now analyze the service group for the new HA state
 * assignments and send the SU SI assign messages
@@ -2142,14 +2146,18 @@ void avd_node_down_appl_susi_failover(AV
 {

Re: [devel] [PATCH 1 of 1] amf: add support for restrictions to auto-repair [#2144]

2017-02-27 Thread praveen malviya

Hi Alex,

Please push acked patch of #2144 by tomorrow as it is in the list of 
pending tickets for FC tag.

This issue can be pushed post FC in a separate patch once the patch gets 
finalized.

Thanks,
Praveen


On 27-Feb-17 12:25 PM, praveen malviya wrote:
> Hi Alex,
>
> Please find some comments inline with [Praveen].
>
> Thanks,
> Praveen
>
> On 25-Feb-17 2:44 AM, Alex Jones wrote:
>>  src/amf/amfd/ndproc.cc |  6 --
>>  1 files changed, 4 insertions(+), 2 deletions(-)
>>
>>
>> This patch adds support for Section 3.11.1.4.2 of AMF B.04.01 spec: 
>> Restrictions
>> to Auto-Repair.
>>
>> diff --git a/src/amf/amfd/ndproc.cc b/src/amf/amfd/ndproc.cc
>> --- a/src/amf/amfd/ndproc.cc
>> +++ b/src/amf/amfd/ndproc.cc
>> @@ -1140,8 +1140,10 @@ void avd_node_failover(AVD_AVND *node)
>>  TRACE_ENTER2("'%s'", node->name.c_str());
>>  avd_node_mark_absent(node);
>>  avd_pg_node_csi_del_all(avd_cb, node);
>> -avd_node_down_mw_susi_failover(avd_cb, node);
>> -avd_node_down_appl_susi_failover(avd_cb, node);
>> +if (node->saAmfNodeAdminState != SA_AMF_ADMIN_LOCKED_INSTANTIATION) {
>> +avd_node_down_mw_susi_failover(avd_cb, node);
> [Praveen] Since node is in LOCK_IN state there are not application
> running on it, but failover of middle ware components still needs to be
> done.
> One more thing, as per spec AMF has to mark a node disabled when it
> reboots. Since avd_node_failover() is called in both the cases a)when a
> node is rebooted in controlled way and b)when a node reboots because of
> failure of any director component. So this differentiation is still needed.
> Thanks,
> Praveen
>> +avd_node_down_appl_susi_failover(avd_cb, node);
>> +}
>>  avd_node_delete_nodeid(node);
>>  TRACE_LEAVE();
>>  }
>>
>
> --
> Check out the vibrant tech community on one of the world's most
> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> ___
> Opensaf-devel mailing list
> Opensaf-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/opensaf-devel
>

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1 of 1] amf: add support for restrictions to auto-repair [#2144]

2017-02-26 Thread praveen malviya

Hi Alex,

Please find some comments inline with [Praveen].

Thanks,
Praveen

On 25-Feb-17 2:44 AM, Alex Jones wrote:
>  src/amf/amfd/ndproc.cc |  6 --
>  1 files changed, 4 insertions(+), 2 deletions(-)
>
>
> This patch adds support for Section 3.11.1.4.2 of AMF B.04.01 spec: 
> Restrictions
> to Auto-Repair.
>
> diff --git a/src/amf/amfd/ndproc.cc b/src/amf/amfd/ndproc.cc
> --- a/src/amf/amfd/ndproc.cc
> +++ b/src/amf/amfd/ndproc.cc
> @@ -1140,8 +1140,10 @@ void avd_node_failover(AVD_AVND *node)
>   TRACE_ENTER2("'%s'", node->name.c_str());
>   avd_node_mark_absent(node);
>   avd_pg_node_csi_del_all(avd_cb, node);
> - avd_node_down_mw_susi_failover(avd_cb, node);
> - avd_node_down_appl_susi_failover(avd_cb, node);
> + if (node->saAmfNodeAdminState != SA_AMF_ADMIN_LOCKED_INSTANTIATION) {
> + avd_node_down_mw_susi_failover(avd_cb, node);
[Praveen] Since node is in LOCK_IN state there are not application 
running on it, but failover of middle ware components still needs to be 
done.
One more thing, as per spec AMF has to mark a node disabled when it 
reboots. Since avd_node_failover() is called in both the cases a)when a 
node is rebooted in controlled way and b)when a node reboots because of 
failure of any director component. So this differentiation is still needed.
Thanks,
Praveen
> + avd_node_down_appl_susi_failover(avd_cb, node);
> + }
>   avd_node_delete_nodeid(node);
>   TRACE_LEAVE();
>  }
>

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1 of 1] osaf:fm on new-Active handling amfd up event of peer old-Active node which is going down[#2151]

2017-02-24 Thread praveen malviya



On 24-Feb-17 4:07 PM, ramesh betham wrote:
> Good catch. Hitting the case of fm_peer_down_wait() is very unlikely.
>
> But here fm_peer_down_wait() is called only before fm nid_notifies and
> considering for amfnd-up event too. A rare and race condition can hit
> where fm on upcoming new active receives fm-down event and amfnd is
> still alive.
>
But the if block where cb->amfnd_down is marked false assumes that 
cb->peer_node_id is already set in RED_UP events of IMMD or AVD. Is 
there any guarantee from MDS that RED_UP event will always come before 
normal MDS_UP event?

Thanks,
Praveen

> Thanks,
> Ramesh.
>
> On 2/24/2017 2:18 PM, praveen malviya wrote:
>> Hi Ramesh,
>>
>> One minor query:
>> In RED_UP of peer AVD, newly active SC will reboot itself if peer FM
>> on old active SC is not up. If this true then in which situations
>> newly active SC will wait in fm_peer_down_wait().
>>
>> Thanks,
>> Praveen
>>
>>
>> On 22-Feb-17 5:00 PM, ramesh.bet...@oracle.com wrote:
>>>  src/fm/fmd/fm_cb.h   |3 +
>>>  src/fm/fmd/fm_evt.h  |2 +-
>>>  src/fm/fmd/fm_main.c |  114 +---
>>>  src/fm/fmd/fm_mds.c  |  173
>>> +++---
>>>  4 files changed, 186 insertions(+), 106 deletions(-)
>>>
>>>
>>> This patch addresses the specific scenario where the new Active is
>>> coming up and has discovered the afmd process on the peer node (which
>>> is going down) is still alive. Here the peer amfd/amfnd is still in
>>> the process of going down i.e., progressing in termination of
>>> application components having big timeouts etc.
>>>
>>> diff --git a/src/fm/fmd/fm_cb.h b/src/fm/fmd/fm_cb.h
>>> --- a/src/fm/fmd/fm_cb.h
>>> +++ b/src/fm/fmd/fm_cb.h
>>> @@ -1,6 +1,7 @@
>>>  /*  -*- OpenSAF  -*-
>>>  *
>>>  * (C) Copyright 2008 The OpenSAF Foundation
>>> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights
>>> reserved.
>>>  *
>>>  * This program is distributed in the hope that it will be useful, but
>>>  * WITHOUT ANY WARRANTY; without even the implied warranty of
>>> MERCHANTABILITY
>>> @@ -107,6 +108,8 @@ typedef struct fm_cb {
>>>  bool use_remote_fencing;
>>>  SaNameT peer_clm_node_name;
>>>  bool peer_node_terminated;
>>> +NCS_SEL_OBJ peer_down_obj;
>>> +int peer_down_await;
>>>  } FM_CB;
>>>
>>>  extern char *role_string[];
>>> diff --git a/src/fm/fmd/fm_evt.h b/src/fm/fmd/fm_evt.h
>>> --- a/src/fm/fmd/fm_evt.h
>>> +++ b/src/fm/fmd/fm_evt.h
>>> @@ -1,6 +1,7 @@
>>>  /*  -*- OpenSAF  -*-
>>>  *
>>>  * (C) Copyright 2008 The OpenSAF Foundation
>>> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights
>>> reserved.
>>>  *
>>>  * This program is distributed in the hope that it will be useful, but
>>>  * WITHOUT ANY WARRANTY; without even the implied warranty of
>>> MERCHANTABILITY
>>> @@ -49,7 +50,6 @@ typedef enum {
>>>  FM_EVT_NODE_DOWN,
>>>  FM_EVT_PEER_UP,
>>>  FM_EVT_RDA_ROLE,
>>> -FM_EVT_SVC_DOWN,
>>>  FM_FSM_EVT_MAX
>>>  } FM_FSM_EVT_CODE;
>>>
>>> diff --git a/src/fm/fmd/fm_main.c b/src/fm/fmd/fm_main.c
>>> --- a/src/fm/fmd/fm_main.c
>>> +++ b/src/fm/fmd/fm_main.c
>>> @@ -1,6 +1,7 @@
>>>  /*  -*- OpenSAF  -*-
>>>  *
>>>  * (C) Copyright 2008 The OpenSAF Foundation
>>> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights
>>> reserved.
>>>  *
>>>  * This program is distributed in the hope that it will be useful, but
>>>  * WITHOUT ANY WARRANTY; without even the implied warranty of
>>> MERCHANTABILITY
>>> @@ -31,6 +32,7 @@ This file contains the main() routine fo
>>>  #include "nid/agent/nid_api.h"
>>>  #include "fm.h"
>>>  #include "base/osaf_time.h"
>>> +#include "base/osaf_poll.h"
>>>
>>>  #define FM_CLM_API_TIMEOUT 100LL
>>>
>>> @@ -71,7 +73,6 @@ void handle_mbx_event(void);
>>>  extern uint32_t fm_amf_init(FM_AMF_CB *fm_amf_cb);
>>>  uint32_t gl_fm_hdl;
>>>  static NCS_SEL_OBJ usr1_sel_obj;
>>> -void fm_proc_svc_down(FM_CB *cb, FM_EVT *fm_mbx_evt);
>>>
>>>  /**
>>>   * USR1 signal is used when AMF wants instantiate us as a
>>> @@ -119,6

Re: [devel] [PATCH 1 of 1] osaf:fm on new-Active handling amfd up event of peer old-Active node which is going down[#2151]

2017-02-24 Thread praveen malviya

Hi Ramesh,

One minor query:
In RED_UP of peer AVD, newly active SC will reboot itself if peer FM on 
old active SC is not up. If this true then in which situations newly 
active SC will wait in fm_peer_down_wait().

Thanks,
Praveen


On 22-Feb-17 5:00 PM, ramesh.bet...@oracle.com wrote:
>  src/fm/fmd/fm_cb.h   |3 +
>  src/fm/fmd/fm_evt.h  |2 +-
>  src/fm/fmd/fm_main.c |  114 +---
>  src/fm/fmd/fm_mds.c  |  173 
> +++---
>  4 files changed, 186 insertions(+), 106 deletions(-)
>
>
> This patch addresses the specific scenario where the new Active is coming up 
> and has discovered the afmd process on the peer node (which is going down) is 
> still alive. Here the peer amfd/amfnd is still in the process of going down 
> i.e., progressing in termination of application components having big 
> timeouts etc.
>
> diff --git a/src/fm/fmd/fm_cb.h b/src/fm/fmd/fm_cb.h
> --- a/src/fm/fmd/fm_cb.h
> +++ b/src/fm/fmd/fm_cb.h
> @@ -1,6 +1,7 @@
>  /*  -*- OpenSAF  -*-
>  *
>  * (C) Copyright 2008 The OpenSAF Foundation
> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
>  *
>  * This program is distributed in the hope that it will be useful, but
>  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
> @@ -107,6 +108,8 @@ typedef struct fm_cb {
>   bool use_remote_fencing;
>   SaNameT peer_clm_node_name;
>   bool peer_node_terminated;
> + NCS_SEL_OBJ peer_down_obj;
> + int peer_down_await;
>  } FM_CB;
>
>  extern char *role_string[];
> diff --git a/src/fm/fmd/fm_evt.h b/src/fm/fmd/fm_evt.h
> --- a/src/fm/fmd/fm_evt.h
> +++ b/src/fm/fmd/fm_evt.h
> @@ -1,6 +1,7 @@
>  /*  -*- OpenSAF  -*-
>  *
>  * (C) Copyright 2008 The OpenSAF Foundation
> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
>  *
>  * This program is distributed in the hope that it will be useful, but
>  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
> @@ -49,7 +50,6 @@ typedef enum {
>   FM_EVT_NODE_DOWN,
>   FM_EVT_PEER_UP,
>   FM_EVT_RDA_ROLE,
> - FM_EVT_SVC_DOWN,
>   FM_FSM_EVT_MAX
>  } FM_FSM_EVT_CODE;
>
> diff --git a/src/fm/fmd/fm_main.c b/src/fm/fmd/fm_main.c
> --- a/src/fm/fmd/fm_main.c
> +++ b/src/fm/fmd/fm_main.c
> @@ -1,6 +1,7 @@
>  /*  -*- OpenSAF  -*-
>  *
>  * (C) Copyright 2008 The OpenSAF Foundation
> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
>  *
>  * This program is distributed in the hope that it will be useful, but
>  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
> @@ -31,6 +32,7 @@ This file contains the main() routine fo
>  #include "nid/agent/nid_api.h"
>  #include "fm.h"
>  #include "base/osaf_time.h"
> +#include "base/osaf_poll.h"
>
>  #define FM_CLM_API_TIMEOUT 100LL
>
> @@ -71,7 +73,6 @@ void handle_mbx_event(void);
>  extern uint32_t fm_amf_init(FM_AMF_CB *fm_amf_cb);
>  uint32_t gl_fm_hdl;
>  static NCS_SEL_OBJ usr1_sel_obj;
> -void fm_proc_svc_down(FM_CB *cb, FM_EVT *fm_mbx_evt);
>
>  /**
>   * USR1 signal is used when AMF wants instantiate us as a
> @@ -119,6 +120,40 @@ static void rda_cb(uint32_t cb_hdl, PCS_
>   TRACE_LEAVE();
>  }
>
> +/* This function primarily handles the weird situation in a cluster where 
> the controller
> + * node which is coming up identifies the peer node is in the midst of DOWN 
> process (i.e.,
> + * non-existance of peer FM and amfd/amfnd is still alive). In this case, 
> the controller
> + * node has to wait till the peer gracefully shutdowns. This function 
> returns FAILURE if
> + * peer controller node is not down in a timeout period of 
> OPENSAF_TERMTIMEOUT (or 60 secs default).
> + */
> +static uint32_t  fm_peer_down_wait(FM_CB *fm_cb)
> +{
> + char *envVar = NULL;
> + int peer_term_timeout = 60; /*default 60 secs */
> +
> + TRACE_ENTER();
> +
> + /* Hoping that "OPENSAF_TERMTIMEOUT" on both  the controllers shall be 
> the same */
> + if ((envVar = getenv("OPENSAF_TERMTIMEOUT")))
> + peer_term_timeout = atoi(envVar);
> +
> + m_NCS_SEL_OBJ_CREATE(_cb->peer_down_obj);
> + fm_cb->peer_down_await = 1;
> +
> +osaf_poll_one_fd(m_GET_FD_FROM_SEL_OBJ(fm_cb->peer_down_obj), 
> peer_term_timeout*1000);
> +
> + m_NCS_SEL_OBJ_DESTROY(_cb->peer_down_obj);
> + 
> + /* Return failure if peer node is not yet completely down */
> + if(fm_cb->peer_down_await) {
> + LOG_ER("Peer node is not fully DOWN, please check");
> + TRACE_LEAVE();
> + return NCSCC_RC_FAILURE;
> + }
> +
> + TRACE_LEAVE();
> + return NCSCC_RC_SUCCESS;
> +}
>
>  
> /*
>
> @@ -176,6 +211,11 @@ int main(int argc, char *argv[])
>*/
>   fm_cb->control_tipc = true; /* Default behaviour */
>
> + fm_cb->immd_down = true;
> +

Re: [devel] [PATCH 1 of 1] AMFND: Ensure su operational message synchronizes with component failover sequence [#2233]

2017-02-21 Thread praveen malviya

Hi Minh,

Please see response with [Praveen]

Thanks
Praveen

On 22-Feb-17 5:39 AM, minh chau wrote:
> Hi Praveen,
>
> Please find my response with [Minh2]
>
> Thanks,
> Minh
>
> On 21/02/17 16:45, praveen malviya wrote:
>> Hi Minh,
>>
>> Please find my response inline with [Praveen].
>>
>> Thanks,
>> Praveen
>>
>> On 20-Feb-17 6:58 AM, minh chau wrote:
>>> Hi Praveen,
>>>
>>> Thanks for your V2 patch, I have tested V2 in scenario of ticket #2233
>>> and #1902, it also can fix the problem.
>>> Here we have 2 solutions:
>>> - The one I sent for review is letting the failed component to be
>>> instantiated, I think it is current behavior. But one change is that
>>> amfnd will not report su operational message to amfd until amfnd
>>> finishes removing the assignment of (faulty) su which contains the
>>> failed component
>>> - The V2 patch postpones the instantiation of failed component. amfnd
>>> will instantiate the failed component (via avnd_err_su_repair) after
>>> amfnd finishes removing the assignment of faulty su.
>>>
>>> So basically the difference is the time that the failed component should
>>> be instantiated.
>>>
>>> Still in item 3.11.1.3.2:
>>> "In a 2N or N+M redundancy model, SI2 also needs to be switched over;
>>> other-wise, the number of active service units would be higher than what
>>> is allowed by the redundancy model. However, in an Nway redundancy
>>> model, SI2 could be left assigned to SU1 (if the saAmfSUFailover
>>> configuration attribute of the ser-vice unit is set to SA_FALSE), and a
>>> repair of C2 should be attempted by reinstantiating it. If the attempt
>>> to instantiate C2 fails, the service unit becomes disabled, and SI2 must
>>> be switched-over; however, if the attempt to instantiate C2 is
>>> successful, SI2 shall remain assigned to SU1, and based on other
>>> configuration parameters and N-way redundancy model semantics, even SI1
>>> might get reassigned to SU1."
>>>
>>> My comment on V2:
>>>
>>> The configuration in #2233 is different from the example in
>>> specification, but it sounds to me the attempt to instantiate failed
>>> component should be done as soon as possible.
>>> The check in V2 patch means the failed component won't be instantiated
>>> if its SU still has any assignment. It should be true to 2N and N+M, but
>>> it's not for other SG. (As the example in specification, S2 does not
>>> have any CSI assigned to failed component C2).
>> [Praveen]As of now we have documented in the PR doc (conformance table
>> section 3.11.1.3 Recovery) that if a component faults with
>> comp-failover recovery then AMFD switch-overs the whole SU for N-Way,
>> N-Way Active and N+M models also. This is just to highlight about
>> other red models. But this documentation is not clear for an
>> unassigned comp.
>> But from the beginning, comp-failover is working this way only.
>> At-least from clean up perspective we have fixed the problem of
>> parallelism in the past in the ticket #474.
>>
>> One more thing I have noted, proxy-proxied implementation is based on
>> B.01.01. As per B.01.01, proxy will register himself and its proxied
>> as soon as it gets instantiated. In a configuration containing both
>> proxy and proxied comp, if the proxy does not get any CSI and it
>> faults with comp-failover recovery then in instantiation phase it may
>> again register its proxy. I think proxy in other SU should register
>> its proxied. I guess, from deployment perspective such a configuration
>> in which a user configures proxy without any CSI may not exists and
>> only possibility is an application modeling a legacy code in NoRed
>> model. However, in the later version of spec B.01.02, proxied was
>> supposed to mention the name of proxy CSI and thus proxy should
>> register only when its get proxy CSI.
>>
>> One more point to be noted comp-failover can also be done as a part of
>> escalation also. If a component is instantiated before the completion
>> of comp-failover recovery and if faults again then it may escalate to
>> node-failover before completion of comp-failover recovery.
>>
>> Since in spec there is no specific discussion for comp-failover
>> recovery for an unassigned comp, I will encourage other maintainers
>> also to provide inputs.
> [Minh2] Yes this was my worry too, that I could break something that has
> been working for long time in this area. If you lo

Re: [devel] [PATCH 1 of 1] amfd: fix default value of saAmfSIPrefActiveAssignments [1190]

2017-02-21 Thread praveen malviya

Hi All,

Please provide your feedback on #1190 (V2), #2252 and #2259.


Thanks,
Praveen





On 20-Feb-17 11:58 AM, praveen malviya wrote:
> Hi Gary,
>
> I think value '0' can be used for saAmfSIPrefActiveAssignments to
> indicate AMFD to use default value as saAmfSGNumPrefAssignedSUs. In all
> other cases current behavior will continue and default value will remain
> 1. In this way there will not be any backward incompatibility issue for
> an old application.
> Since user will be setting saAmfSIPrefActiveAssignments=0 for default
> value saAmfSGNumPrefAssignedSUs , IMMND will also remember it and the
> change will survive cluster reset in a PBE enabled system.
>
> Attached is the patch (1190_v2.patch) on this idea.
>
> Thanks,
> Praveen
>
>
>
> On 01-Feb-17 2:59 PM, praveen malviya wrote:
>>
>>
>> On 01-Feb-17 2:30 PM, Gary Lee wrote:
>>> Hi Praveen
>>>
>>> What if the user reboots the cluster, after modifying
>>> saAmfSIPrefActiveAssignments?
>>>
>> I guess you are talking about the case that a user modifies to 1 in 5.2
>> release and cluster reboots. But I think cluster reboots are done mainly
>> for the cases of upgrades which are not frequently done. For upgrades,
>> CCB modification can be part of campaign itself.
>>
>> What I am getting is all the cases cannot be handled here in AMFD
>> because of schema issue. User will have to take some pain depending upon
>> the use case. Also I guess N-way active red model will not be used for
>> keeping SI assigned to one SU.
>>
>> What do you think?
>>
>> Thanks,
>> Praveen
>>> Thanks
>>>
>>> -Original Message-
>>> From: praveen malviya <praveen.malv...@oracle.com>
>>> Organization: Oracle Corporation
>>> Date: Wednesday, 1 February 2017 at 5:31 pm
>>> To: gary <gary@dektech.com.au>, <hans.nordeb...@ericsson.com>,
>>> <nagendr...@oracle.com>, <minh.c...@dektech.com.au>
>>> Cc: <opensaf-devel@lists.sourceforge.net>
>>> Subject: Re: [PATCH 1 of 1] amfd: fix default value of
>>> saAmfSIPrefActiveAssignments [1190]
>>>
>>>
>>>
>>> On 01-Feb-17 11:20 AM, Gary Lee wrote:
>>> > Hi Praveen
>>> >
>>> > Ack – I think we should make it clear in the PR doc that the
>>> default will be 1 if they leave out ‘saAmfSIPrefActiveAssignments‘
>>> when creating the SI. If they want it to default to
>>> saAmfSGNumPrefAssignedSUs, they must create the SI with
>>> ‘saAmfSIPrefActiveAssignments’ set to blank.
>>> >
>>> This ticket is to honour saAmfSGNumPrefAssignedSUs from now
>>> on-wards in
>>> both the cases when it was not configured while creating the SI
>>> or when
>>> it is set to blank.
>>>
>>> I think, I will have to document in this way:
>>> 1) If saAmfSIPrefActiveAssignments is not configured while
>>> creating an
>>> SI, then AMF will use saAmfSGNumPrefAssignedSUs as default value.
>>> 2) If SI is already configured then saAmfSIPrefActiveAssignments
>>> can be
>>> adjusted to default value saAmfSGNumPrefAssignedSUs by setting
>>> saAmfSIPrefActiveAssignments to blanck via CCB operation.
>>>
>>> In 1)  immlist will still show value as 1, but AMF will adhere to
>>> saAmfSGNumPrefAssignedSUs. This cannot be corrected because imm.xml
>>> contains a default based on old schema definition:
>>>   
>>>  saAmfSIPrefActiveAssignments
>>>  SA_UINT32_T
>>>  SA_CONFIG
>>>  SA_WRITABLE
>>>  1
>>>  
>>> So when AMFD reads it from IMM, it does not know whether user has
>>> configured it to 1 or it is IMM that is giving 1 as default value to
>>> AMFD. With this enhancement, on seeing 1 AMFD will consider that
>>> use has
>>> not configured it and will adhere to default value
>>> saAmfSGNumPrefAssignedSUs.
>>> A old user can still make everything consistent for its old
>>> application
>>> my the provision of making saAmfSIPrefActiveAssignments to 1
>>> using CCB
>>> Modify operation. If user performs this CCB change then in function
>>> pref_active_assignments() flag
>>> saAmfSIPrefActiveAssignments_configured
>>>

[devel] [PATCH 0 of 1] Review Request for amfd: support si-swap admin op for NplusM model [#2259]

2017-02-21 Thread praveen . malviya

Summary:amfd: support si-swap admin op for NplusM model [#2259]
Review request for Trac Ticket(s): #2259 
Peer Reviewer(s): AMF devs 
Pull request to: <>
Affected branch(es): ALL 
Development branch: <>


Impacted area   Impact y/n

 Docsn
 Build systemn
 RPM/packaging   n
 Configuration files n
 Startup scripts n
 SAF servicesy
 OpenSAF servicesn
 Core libraries  n
 Samples n
 Tests   n
 Other   n


Comments (indicate scope for each "y" above):
-

changeset 5fa5af5649bc85f45c9ba4e21b105002085a12e4
Author: Praveen Malviya <praveen.malv...@oracle.com>
Date:   Tue, 21 Feb 2017 16:21:19 +0530

amfd: support si-swap admin op for NplusM model [#2259]

This version works for the case when all SIs which are active in the
designated SU (where SI to be swaped is active) have their standbys on 
same
SU including the designated SI.

TODO: consider the case when active SIs of designated SU have their 
standbys
on different SUs.


Complete diffstat:
--
 src/amf/amfd/sg.cc |   12 ++-
 src/amf/amfd/sg.h  |4 ++
 src/amf/amfd/sg_npm_fsm.cc |  145 
++
 src/amf/amfd/su.cc |   11 ++
 src/amf/amfd/su.h  |2 +
 5 files changed, 144 insertions(+), 30 deletions(-)


Testing Commands:
-
Tested in configuration attached in the ticket (test_conf.xml):
1)si-swap of without fault.

faults when si-swap is going on during HA state transition:
2)compfailover related su (timeout in callback):
-quiesced: PASS 
-active: PAssed.
-standby: sg unstable (This needs tobe fixed).
3)compfailover non-related su (comp was killed):
-active faults: PASSED
-standby faults: PASSED
-quieced faults : PASSED

4)sufailover in non related SU (kill comp):
-quiesced: PASS
-active:  PASS
-standby: PASS
5)sufailover in related su (kill comp):
-quiesced: PASS
-active:  PASSED. 
-standby: PASSED

Testing, Expected Results:
--
In case 2) minor fix is needed when standby comp faults with
comp-failover recovery in realign state. This could be a problem of FSM.
Need to check if some ticket already exists

All other cases passed.

Conditions of Submission:
-
Ack from reviewers.

Arch  Built StartedLinux distro
---
mipsn  n
mips64  n  n
x86 n  n
x86_64  y  y
powerpc n  n
powerpc64   n  n


Reviewer Checklist:
---
[Submitters: make sure that your review doesn't trigger any checkmarks!]


Your checkin has not passed review because (see checked entries):

___ Your RR template is generally incomplete; it has too many blank entries
that need proper data filled in.

___ You have failed to nominate the proper persons for review and push.

___ Your patches do not have proper short+long header

___ You have grammar/spelling in your header that is unacceptable.

___ You have exceeded a sensible line length in your headers/comments/text.

___ You have failed to put in a proper Trac Ticket # into your commits.

___ You have incorrectly put/left internal data in your comments/files
(i.e. internal bug tracking tool IDs, product names etc)

___ You have not given any evidence of testing beyond basic build tests.
Demonstrate some level of runtime or other sanity testing.

___ You have ^M present in some of your files. These have to be removed.

___ You have needlessly changed whitespace or added whitespace crimes
like trailing spaces, or spaces before tabs.

___ You have mixed real technical changes with whitespace and other
cosmetic code cleanup changes. These have to be separate commits.

___ You need to refactor your submission into logical chunks; there is
too much content into a single commit.

___ You have extraneous garbage in your review (merge commits etc)

___ You have giant attachments which should never have been sent;
Instead you should place your content in a public tree to be pulled.

___ You have too many commits attached to an e-mail; resend as threaded
commits, or place in a public tree for a pull.

___ You have resent this content multiple times without a clear indication
of what has changed between each re-send.

___ You have failed to adequately and individually address all of the
comments and change requests that were proposed in the initial review.

___ You have a misconfigured ~/.hgrc file (i.e. username, email etc)

___ Your computer have a badly configu

[devel] [PATCH 1 of 1] amfd: support si-swap admin op for NplusM model [#2259]

2017-02-21 Thread praveen . malviya

 src/amf/amfd/sg.cc |   12 +++-
 src/amf/amfd/sg.h  |4 +
 src/amf/amfd/sg_npm_fsm.cc |  145 -
 src/amf/amfd/su.cc |   11 +++
 src/amf/amfd/su.h  |2 +
 5 files changed, 144 insertions(+), 30 deletions(-)


This version works for the case when all SIs which are active in the
designated SU (where SI to be swaped is active) have their standbys on same SU
including the designated SI.

TODO: consider the case when active SIs of designated SU have their standbys on 
different SUs.

diff --git a/src/amf/amfd/sg.cc b/src/amf/amfd/sg.cc
--- a/src/amf/amfd/sg.cc
+++ b/src/amf/amfd/sg.cc
@@ -2,6 +2,7 @@
  *
  * (C) Copyright 2008 The OpenSAF Foundation
  * (C) Copyright 2017 Ericsson AB - All Rights Reserved.
+ * Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
@@ -2274,4 +2275,13 @@ bool AVD_SG::any_assignment_assigned() {
return pending;
 }
 
-
+/*
+ * @brief  Checks if si_equal_distribution is configured for the SG.
+ * @return true/false. 
+*/
+bool AVD_SG::is_equal() const {
+  return(((sg_redundancy_model == SA_AMF_NPM_REDUNDANCY_MODEL) ||
+ (sg_redundancy_model == SA_AMF_N_WAY_REDUNDANCY_MODEL) ||
+ (sg_redundancy_model == SA_AMF_N_WAY_ACTIVE_REDUNDANCY_MODEL)) &&
+ (equal_ranked_su == true) && (saAmfSGAutoAdjust == SA_TRUE));
+}
diff --git a/src/amf/amfd/sg.h b/src/amf/amfd/sg.h
--- a/src/amf/amfd/sg.h
+++ b/src/amf/amfd/sg.h
@@ -2,6 +2,7 @@
  *
  * (C) Copyright 2008 The OpenSAF Foundation
  * (C) Copyright 2017 Ericsson AB - All Rights Reserved.
+ * Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
@@ -429,6 +430,8 @@ public:
//Runtime calculates value of saAmfSGNumCurrNonInstantiatedSpareSUs;
uint32_t curr_non_instantiated_spare_sus() const;
bool is_middleware() const {return sg_ncs_spec ? true : false;}
+   //Checks if si_equal_distribution is configured for the SG.
+   bool is_equal() const;
 
 private:
// disallow copy and assign, TODO(hafe) add common macro for this
@@ -514,6 +517,7 @@ public:
struct avd_su_si_rel_tag *susi, AVSV_SUSI_ACT act, 
SaAmfHAStateT state);
 void node_fail_si_oper(AVD_CL_CB *cb, AVD_SU *su);
void ng_admin(AVD_SU *su, AVD_AMF_NG *ng);
+   SaAisErrorT si_swap(AVD_SI *si, SaInvocationT invocation);
 
 private:
 uint32_t su_fault_su_oper(AVD_CL_CB *cb, AVD_SU *su);
diff --git a/src/amf/amfd/sg_npm_fsm.cc b/src/amf/amfd/sg_npm_fsm.cc
--- a/src/amf/amfd/sg_npm_fsm.cc
+++ b/src/amf/amfd/sg_npm_fsm.cc
@@ -1,6 +1,7 @@
 /*  -*- OpenSAF  -*-
  *
  * (C) Copyright 2008 The OpenSAF Foundation
+ * Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
@@ -170,7 +171,10 @@ static uint32_t avd_sg_npm_su_chk_snd(AV
}
}
 
+   /*Do not send delete for a susi of s_su if other sisu of same si is in 
q_su.
+ In this case s_su can become active for this SI.*/
for (i_susi = s_su->list_of_susi; i_susi != AVD_SU_SI_REL_NULL; i_susi 
= i_susi->su_next) {
+   
if ((i_susi->si->list_of_sisu != i_susi) && 
(i_susi->si->list_of_sisu->su == q_su))
continue;
 
@@ -964,34 +968,6 @@ uint32_t SG_NPM::si_assign(AVD_CL_CB *cb
return NCSCC_RC_SUCCESS;
 }
 
-/*
- * Function: avd_sg_npm_siswitch_func
- *
- * Purpose:  This function is called when a operator does a SI switch on
- * a SI that belongs to N+M redundancy model SG. 
- * This will trigger a role change action as described in the SG FSM design.
- *
- * Input: cb - the AVD control block
- *si - The pointer to the SI that needs to be switched.
- *
- *
- * Returns: NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE.
- *
- * NOTES: This is a N+M redundancy model specific function. The initial 
- * functionality of N+M SI switch is same as 2N switch so it just calls that
- * function.
- *
- * 
- **/
-
-uint32_t avd_sg_npm_siswitch_func(AVD_CL_CB *cb, AVD_SI *si)
-{
-   TRACE_ENTER2("%u", si->sg_of_si->sg_fsm_state);
-   osafassert(0);
-// return avd_sg_2n_siswitch_func(cb, si);
-   return 0;
-}
-
  /*
  * Function: su_fault_su_oper
  *
@@ -1847,6 +1823,7 @@ uint32_t SG_NPM::susi_sucss_sg_reln(AVD_

Re: [devel] [PATCH 1 of 1] AMFND: Ensure su operational message synchronizes with component failover sequence [#2233]

2017-02-20 Thread praveen malviya

Hi Minh,

Please find my response inline with [Praveen].

Thanks,
Praveen

On 20-Feb-17 6:58 AM, minh chau wrote:
> Hi Praveen,
>
> Thanks for your V2 patch, I have tested V2 in scenario of ticket #2233
> and #1902, it also can fix the problem.
> Here we have 2 solutions:
> - The one I sent for review is letting the failed component to be
> instantiated, I think it is current behavior. But one change is that
> amfnd will not report su operational message to amfd until amfnd
> finishes removing the assignment of (faulty) su which contains the
> failed component
> - The V2 patch postpones the instantiation of failed component. amfnd
> will instantiate the failed component (via avnd_err_su_repair) after
> amfnd finishes removing the assignment of faulty su.
>
> So basically the difference is the time that the failed component should
> be instantiated.
>
> Still in item 3.11.1.3.2:
> "In a 2N or N+M redundancy model, SI2 also needs to be switched over;
> other-wise, the number of active service units would be higher than what
> is allowed by the redundancy model. However, in an Nway redundancy
> model, SI2 could be left assigned to SU1 (if the saAmfSUFailover
> configuration attribute of the ser-vice unit is set to SA_FALSE), and a
> repair of C2 should be attempted by reinstantiating it. If the attempt
> to instantiate C2 fails, the service unit becomes disabled, and SI2 must
> be switched-over; however, if the attempt to instantiate C2 is
> successful, SI2 shall remain assigned to SU1, and based on other
> configuration parameters and N-way redundancy model semantics, even SI1
> might get reassigned to SU1."
>
> My comment on V2:
>
> The configuration in #2233 is different from the example in
> specification, but it sounds to me the attempt to instantiate failed
> component should be done as soon as possible.
> The check in V2 patch means the failed component won't be instantiated
> if its SU still has any assignment. It should be true to 2N and N+M, but
> it's not for other SG. (As the example in specification, S2 does not
> have any CSI assigned to failed component C2).
[Praveen]As of now we have documented in the PR doc (conformance table 
section 3.11.1.3 Recovery) that if a component faults with comp-failover 
recovery then AMFD switch-overs the whole SU for N-Way, N-Way Active and 
N+M models also. This is just to highlight about other red models. But 
this documentation is not clear for an unassigned comp.
But from the beginning, comp-failover is working this way only. At-least 
from clean up perspective we have fixed the problem of parallelism in 
the past in the ticket #474.

One more thing I have noted, proxy-proxied implementation is based on 
B.01.01. As per B.01.01, proxy will register himself and its proxied as 
soon as it gets instantiated. In a configuration containing both proxy 
and proxied comp, if the proxy does not get any CSI and it faults with 
comp-failover recovery then in instantiation phase it may again register 
its proxy. I think proxy in other SU should register its proxied. I 
guess, from deployment perspective such a configuration in which a user 
configures proxy without any CSI may not exists and only possibility is 
an application modeling a legacy code in NoRed model. However, in the 
later version of spec B.01.02, proxied was supposed to mention the name 
of proxy CSI and thus proxy should register only when its get proxy CSI.

One more point to be noted comp-failover can also be done as a part of 
escalation also. If a component is instantiated before the completion of 
comp-failover recovery and if faults again then it may escalate to 
node-failover before completion of comp-failover recovery.

Since in spec there is no specific discussion for comp-failover recovery 
for an unassigned comp, I will encourage other maintainers also to 
provide inputs.


Thanks,
Praveen


  Moreover, in the clc.cc,
> amfnd does not check any of si_list.n_nodes, this probably is the logic
> that has being done so far.
>
> Thanks,
> Minh
>
> On 17/02/17 23:16, praveen malviya wrote:
>> Hi Minh,
>>
>> I think we should see this problem from fault management perspective
>> also. Here repair of failed component is performed before the
>> completion of recovery.In the problem, component faulted with
>> comp-failover recovery and it was successfully repaired(instantiated)
>> when SU switch-over was still pending.
>>
>> Now the question is: Why it was never observed earlier? The reason is
>> generally all components are assigned at least one CSI. In the present
>> configuration failed component was not assigned any CSI. When this
>> component was cleaned up and marked UNINSTANTIATED, AMFND sent
>> comp-failover recovery request to AMFD. But after sending recovery
>>

Re: [devel] [PATCH 1 of 1] AMFND: Ensure su operational message synchronizes with component failover sequence [#2233]

2017-02-17 Thread praveen malviya


Hi Minh,

I think we should see this problem from fault management perspective 
also. Here repair of failed component is performed before the completion 
of recovery.In the problem, component faulted with comp-failover 
recovery and it was successfully repaired(instantiated) when SU 
switch-over was still pending.


Now the question is: Why it was never observed earlier? The reason is 
generally all components are assigned at least one CSI. In the present 
configuration failed component was not assigned any CSI. When this 
component was cleaned up and marked UNINSTANTIATED, AMFND sent 
comp-failover recovery request to AMFD. But after sending recovery 
request, it instantiated failed comp when SU has still assignments to be 
switch-overed. The code related to this assumes that comp will have 
at-least one CSI assigned to it (clc.cc avnd_comp_clc_st_chng_prc(), 
TERMINATING to UNINSTANTIATED if block). For normal sequence of 
comp-failover, su is repaired after removal of assignment in 
avnd_su_si_oper_done() by calling avnd_err_su_repair().


For 2N and N+M spec talks (3.11.1.3.2 Fail-Over Recovery Action page 
195) about switch-overing all the SIs of failed SU in case of 
comp-failed recovery and not for other models. In current OpenSAF 
implementation we are following this for all models.


I think as a fix we should stop failed comp to get instantiated before 
removal of assignments. For this the check in clc.cc can be hardened to 
consider non-assigned comp failures.

Attached is the patch (2233_v2.patch) based on this idea/approach.

Thanks,
Praveen


On 17-Feb-17 1:19 PM, Minh Hon CHAU wrote:

Hi Praveen,

Yes, you are right, I will update the description.

Thanks, Minh

Quoting praveen malviya <praveen.malv...@oracle.com>:


Hi Minh,

One quick question:
Ticket description says:
"Si deps safSi=AmfDemoTwon2 depends safSi=AmfDemoTwon1 depends
safSi=AmfDemoTwon"
But logs are related to without SIdep. Also in the configuration
app3_twon3su3si.xml, SI dep classes are commented.
I think ticket description needs correction as problem is without SI dep.
Please confirm.

Thanks,
Praveen


On 17-Feb-17 10:58 AM, praveen malviya wrote:

Hi Minh,

I have started reviewing this patch.

Thanks,
Praveen

On 15-Feb-17 9:22 AM, minh chau wrote:

Hi all,

Have you had time to review this patch?
It changes the component failover sequence, so I think we need more
time
to look at it.

Thanks,
Minh

On 23/01/17 12:28, Minh Hon Chau wrote:

 src/amf/amfnd/avnd_su.h |   1 +
 src/amf/amfnd/clc.cc|   3 ---
 src/amf/amfnd/di.cc |  12 +++-
 src/amf/amfnd/susm.cc   |  32 +---
 4 files changed, 41 insertions(+), 7 deletions(-)


In case component failover, faulty component will be terminated. When
the reinstantiation
is done, amfnd will send su_oper_message (enabled) to amfd which is
running along with
component failover. In the reported problem, if su_oper_message
(enabled) comes to amfd
before the quiesced assignment response (as part of component failover
sequence) comes to
amfd, then this quiesced assignment response is ignored, thus
component failover will not
finish.

The problem is in function susi_success_sg_realign with act=5,
state=3, amfd always assumes
su having faulty component is OUT_OF_SERVICE. This assumption is true
in most of the time
when su_oper_message (enabled) comes a little later than quiesced
assignment response. In fact
the su_oper_message (enabled) is not designed as part of component
failover sequence, thus it
can come any time during the failover. If amfd is getting a bit busier
with RTA update then
the faulty component has enough to reinstiantiate so that amfnd sends
su_oper_message (enabled)
before quiesced assignment response, the reported problem will be
seen.

This patch hardens the component failover sequence by ensuring the
su_oper_message (enabled) to
be sent after su completes to remove assignment. This approach comes
from the similarity in
su failover, where the su_oper_message (enabled) is sent in repair
phase.

diff --git a/src/amf/amfnd/avnd_su.h b/src/amf/amfnd/avnd_su.h
--- a/src/amf/amfnd/avnd_su.h
+++ b/src/amf/amfnd/avnd_su.h
@@ -393,6 +393,7 @@ extern struct avnd_su_si_rec *avnd_silis
 extern struct avnd_su_si_rec *avnd_silist_getprev(const struct
avnd_su_si_rec *);
 extern struct avnd_su_si_rec *avnd_silist_getlast(void);
 extern bool sufailover_in_progress(const AVND_SU *su);
+extern bool componentfailover_in_progress(const AVND_SU *su);
 extern bool sufailover_during_nodeswitchover(const AVND_SU *su);
 extern bool all_csis_in_removed_state(const AVND_SU *su);
 extern void su_reset_restart_count_in_comps(const struct avnd_cb_tag
*cb, const AVND_SU *su);
diff --git a/src/amf/amfnd/clc.cc b/src/amf/amfnd/clc.cc
--- a/src/amf/amfnd/clc.cc
+++ b/src/amf/amfnd/clc.cc
@@ -2381,9 +2381,6 @@ uint32_t avnd_comp_clc_terming_cleansucc
 (m_AVND_SU_IS_FAILOVER(su))) {
 /* yes, request director to orchestrate co

Re: [devel] [PATCH 1 of 1] AMFND: Ensure su operational message synchronizes with component failover sequence [#2233]

2017-02-16 Thread praveen malviya

Hi Minh,

One quick question:
Ticket description says:
"Si deps safSi=AmfDemoTwon2 depends safSi=AmfDemoTwon1 depends 
safSi=AmfDemoTwon"
But logs are related to without SIdep. Also in the configuration 
app3_twon3su3si.xml, SI dep classes are commented.
I think ticket description needs correction as problem is without SI dep.
Please confirm.

Thanks,
Praveen


On 17-Feb-17 10:58 AM, praveen malviya wrote:
> Hi Minh,
>
> I have started reviewing this patch.
>
> Thanks,
> Praveen
>
> On 15-Feb-17 9:22 AM, minh chau wrote:
>> Hi all,
>>
>> Have you had time to review this patch?
>> It changes the component failover sequence, so I think we need more time
>> to look at it.
>>
>> Thanks,
>> Minh
>>
>> On 23/01/17 12:28, Minh Hon Chau wrote:
>>>   src/amf/amfnd/avnd_su.h |   1 +
>>>   src/amf/amfnd/clc.cc|   3 ---
>>>   src/amf/amfnd/di.cc |  12 +++-
>>>   src/amf/amfnd/susm.cc   |  32 +---
>>>   4 files changed, 41 insertions(+), 7 deletions(-)
>>>
>>>
>>> In case component failover, faulty component will be terminated. When
>>> the reinstantiation
>>> is done, amfnd will send su_oper_message (enabled) to amfd which is
>>> running along with
>>> component failover. In the reported problem, if su_oper_message
>>> (enabled) comes to amfd
>>> before the quiesced assignment response (as part of component failover
>>> sequence) comes to
>>> amfd, then this quiesced assignment response is ignored, thus
>>> component failover will not
>>> finish.
>>>
>>> The problem is in function susi_success_sg_realign with act=5,
>>> state=3, amfd always assumes
>>> su having faulty component is OUT_OF_SERVICE. This assumption is true
>>> in most of the time
>>> when su_oper_message (enabled) comes a little later than quiesced
>>> assignment response. In fact
>>> the su_oper_message (enabled) is not designed as part of component
>>> failover sequence, thus it
>>> can come any time during the failover. If amfd is getting a bit busier
>>> with RTA update then
>>> the faulty component has enough to reinstiantiate so that amfnd sends
>>> su_oper_message (enabled)
>>> before quiesced assignment response, the reported problem will be seen.
>>>
>>> This patch hardens the component failover sequence by ensuring the
>>> su_oper_message (enabled) to
>>> be sent after su completes to remove assignment. This approach comes
>>> from the similarity in
>>> su failover, where the su_oper_message (enabled) is sent in repair phase.
>>>
>>> diff --git a/src/amf/amfnd/avnd_su.h b/src/amf/amfnd/avnd_su.h
>>> --- a/src/amf/amfnd/avnd_su.h
>>> +++ b/src/amf/amfnd/avnd_su.h
>>> @@ -393,6 +393,7 @@ extern struct avnd_su_si_rec *avnd_silis
>>>   extern struct avnd_su_si_rec *avnd_silist_getprev(const struct
>>> avnd_su_si_rec *);
>>>   extern struct avnd_su_si_rec *avnd_silist_getlast(void);
>>>   extern bool sufailover_in_progress(const AVND_SU *su);
>>> +extern bool componentfailover_in_progress(const AVND_SU *su);
>>>   extern bool sufailover_during_nodeswitchover(const AVND_SU *su);
>>>   extern bool all_csis_in_removed_state(const AVND_SU *su);
>>>   extern void su_reset_restart_count_in_comps(const struct avnd_cb_tag
>>> *cb, const AVND_SU *su);
>>> diff --git a/src/amf/amfnd/clc.cc b/src/amf/amfnd/clc.cc
>>> --- a/src/amf/amfnd/clc.cc
>>> +++ b/src/amf/amfnd/clc.cc
>>> @@ -2381,9 +2381,6 @@ uint32_t avnd_comp_clc_terming_cleansucc
>>>   (m_AVND_SU_IS_FAILOVER(su))) {
>>>   /* yes, request director to orchestrate component failover */
>>>   rc = avnd_di_oper_send(cb, su, SA_AMF_COMPONENT_FAILOVER);
>>> -
>>> -//Reset component-failover here. SU failover is reset as part
>>> of REPAIRED admin op.
>>> -m_AVND_SU_FAILOVER_RESET(su);
>>>   }
>>> /*
>>> diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc
>>> --- a/src/amf/amfnd/di.cc
>>> +++ b/src/amf/amfnd/di.cc
>>> @@ -894,7 +894,17 @@ uint32_t avnd_di_susi_resp_send(AVND_CB
>>>   }
>>>   m_AVND_SU_ALL_SI_RESET(su);
>>>   }
>>> -
>>> +if (componentfailover_in_progress(su)) {
>>> +if (all_csis_in_removed_state(su) == true) {
>>> +bool is_en;
>>> +

Re: [devel] [PATCH 1 of 1] AMFND: Ensure su operational message synchronizes with component failover sequence [#2233]

2017-02-16 Thread praveen malviya

Hi Minh,

I have started reviewing this patch.

Thanks,
Praveen

On 15-Feb-17 9:22 AM, minh chau wrote:
> Hi all,
>
> Have you had time to review this patch?
> It changes the component failover sequence, so I think we need more time
> to look at it.
>
> Thanks,
> Minh
>
> On 23/01/17 12:28, Minh Hon Chau wrote:
>>   src/amf/amfnd/avnd_su.h |   1 +
>>   src/amf/amfnd/clc.cc|   3 ---
>>   src/amf/amfnd/di.cc |  12 +++-
>>   src/amf/amfnd/susm.cc   |  32 +---
>>   4 files changed, 41 insertions(+), 7 deletions(-)
>>
>>
>> In case component failover, faulty component will be terminated. When
>> the reinstantiation
>> is done, amfnd will send su_oper_message (enabled) to amfd which is
>> running along with
>> component failover. In the reported problem, if su_oper_message
>> (enabled) comes to amfd
>> before the quiesced assignment response (as part of component failover
>> sequence) comes to
>> amfd, then this quiesced assignment response is ignored, thus
>> component failover will not
>> finish.
>>
>> The problem is in function susi_success_sg_realign with act=5,
>> state=3, amfd always assumes
>> su having faulty component is OUT_OF_SERVICE. This assumption is true
>> in most of the time
>> when su_oper_message (enabled) comes a little later than quiesced
>> assignment response. In fact
>> the su_oper_message (enabled) is not designed as part of component
>> failover sequence, thus it
>> can come any time during the failover. If amfd is getting a bit busier
>> with RTA update then
>> the faulty component has enough to reinstiantiate so that amfnd sends
>> su_oper_message (enabled)
>> before quiesced assignment response, the reported problem will be seen.
>>
>> This patch hardens the component failover sequence by ensuring the
>> su_oper_message (enabled) to
>> be sent after su completes to remove assignment. This approach comes
>> from the similarity in
>> su failover, where the su_oper_message (enabled) is sent in repair phase.
>>
>> diff --git a/src/amf/amfnd/avnd_su.h b/src/amf/amfnd/avnd_su.h
>> --- a/src/amf/amfnd/avnd_su.h
>> +++ b/src/amf/amfnd/avnd_su.h
>> @@ -393,6 +393,7 @@ extern struct avnd_su_si_rec *avnd_silis
>>   extern struct avnd_su_si_rec *avnd_silist_getprev(const struct
>> avnd_su_si_rec *);
>>   extern struct avnd_su_si_rec *avnd_silist_getlast(void);
>>   extern bool sufailover_in_progress(const AVND_SU *su);
>> +extern bool componentfailover_in_progress(const AVND_SU *su);
>>   extern bool sufailover_during_nodeswitchover(const AVND_SU *su);
>>   extern bool all_csis_in_removed_state(const AVND_SU *su);
>>   extern void su_reset_restart_count_in_comps(const struct avnd_cb_tag
>> *cb, const AVND_SU *su);
>> diff --git a/src/amf/amfnd/clc.cc b/src/amf/amfnd/clc.cc
>> --- a/src/amf/amfnd/clc.cc
>> +++ b/src/amf/amfnd/clc.cc
>> @@ -2381,9 +2381,6 @@ uint32_t avnd_comp_clc_terming_cleansucc
>>   (m_AVND_SU_IS_FAILOVER(su))) {
>>   /* yes, request director to orchestrate component failover */
>>   rc = avnd_di_oper_send(cb, su, SA_AMF_COMPONENT_FAILOVER);
>> -
>> -//Reset component-failover here. SU failover is reset as part
>> of REPAIRED admin op.
>> -m_AVND_SU_FAILOVER_RESET(su);
>>   }
>> /*
>> diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc
>> --- a/src/amf/amfnd/di.cc
>> +++ b/src/amf/amfnd/di.cc
>> @@ -894,7 +894,17 @@ uint32_t avnd_di_susi_resp_send(AVND_CB
>>   }
>>   m_AVND_SU_ALL_SI_RESET(su);
>>   }
>> -
>> +if (componentfailover_in_progress(su)) {
>> +if (all_csis_in_removed_state(su) == true) {
>> +bool is_en;
>> +m_AVND_SU_IS_ENABLED(su, is_en);
>> +if (is_en) {
>> +if (avnd_di_oper_send(cb, su, 0) ==
>> NCSCC_RC_SUCCESS) {
>> +m_AVND_SU_FAILOVER_RESET(su);
>> +}
>> +}
>> +}
>> +}
>>   /* free the contents of avnd message */
>>   avnd_msg_content_free(cb, );
>>   diff --git a/src/amf/amfnd/susm.cc b/src/amf/amfnd/susm.cc
>> --- a/src/amf/amfnd/susm.cc
>> +++ b/src/amf/amfnd/susm.cc
>> @@ -1633,10 +1633,22 @@ uint32_t avnd_su_pres_st_chng_prc(AVND_C
>>   m_AVND_SU_IS_ENABLED(su, is_en);
>>   if (true == is_en) {
>>   TRACE("SU oper state is enabled");
>> +// do not send su_oper state if component failover is
>> in progress
>>   m_AVND_SU_OPER_STATE_SET(su,
>> SA_AMF_OPERATIONAL_ENABLED);
>> -rc = avnd_di_oper_send(cb, su, 0);
>> -if (NCSCC_RC_SUCCESS != rc)
>> -goto done;
>> +if (componentfailover_in_progress(su) == true) {
>> +si = reinterpret_cast
>> +(m_NCS_DBLIST_FIND_FIRST(>si_list));
>> +if (si == nullptr ||
>>

1 2 3 4 5 6 7 8 9 >

1 - 100 of 853 matches

Mail list logo