src/amf/amfd/comp.cc     |   5 ++-
 src/amf/amfd/comptype.cc |   5 ++-
 src/amf/amfd/sgproc.cc   |  49 ++++++++++++++++++++++++++++++++++++
 src/amf/amfd/util.cc     |  40 ++++++++++++++++++++++++++++++
 src/amf/amfd/util.h      |   2 +
 src/amf/amfnd/cpm.cc     |   3 +-
 src/amf/amfnd/err.cc     |  64 +++++++++++++++++++++++++++++++++++------------
 7 files changed, 146 insertions(+), 22 deletions(-)


Support for cluster reset recovery (SA_AMF_CLUSTER_RESET = 7) mentioned in
B.04.01 section 3.11.1.3.4 Cluster Reset Recovery Action.

-Use this by setting saAmfCompRecoveryOnError or saAmfCtDefRecoveryOnError in
 application configuration. Or
-pass as argument in APIs:
saAmfPmStart(), saAmfComponentErrorReport(), saAmfPmStart_3(), 
saAmfHealthcheckStart(),
and saAmfComponentErrorReport_4().

TODO: AMFD will have to raise alarm for cluster reset.

diff --git a/src/amf/amfd/comp.cc b/src/amf/amfd/comp.cc
--- a/src/amf/amfd/comp.cc
+++ b/src/amf/amfd/comp.cc
@@ -2,6 +2,7 @@
  *
  * (C) Copyright 2008 The OpenSAF Foundation
  * (C) Copyright 2017 Ericsson AB - All Rights Reserved.
+ * Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
@@ -368,7 +369,7 @@ static int is_config_valid(const std::st
 
        rc = 
immutil_getAttr(const_cast<SaImmAttrNameT>("saAmfCompRecoveryOnError"), 
attributes, 0, &value);
        if (rc == SA_AIS_OK) {
-               if ((value < SA_AMF_NO_RECOMMENDATION) || (value > 
SA_AMF_NODE_FAILFAST)) {
+               if ((value < SA_AMF_NO_RECOMMENDATION) || (value > 
SA_AMF_CLUSTER_RESET)) {
                        report_ccb_validation_error(opdata, 
"Illegal/unsupported saAmfCompRecoveryOnError value %u for '%s'",
                                   value, dn.c_str());
                        return 0;
@@ -1186,7 +1187,7 @@ static SaAisErrorT ccb_completed_modify_
                        if (value_is_deleted == true)
                                continue;
                        uint32_t recovery = *((SaUint32T *)value);
-                       if ((recovery < SA_AMF_NO_RECOMMENDATION) || (recovery 
> SA_AMF_CONTAINER_RESTART )) {
+                       if ((recovery < SA_AMF_NO_RECOMMENDATION) || (recovery 
> SA_AMF_CLUSTER_RESET)) {
                                report_ccb_validation_error(opdata, 
"Modification of saAmfCompRecoveryOnError Fail,"
                                                " Invalid recovery 
=%d",recovery);
                                goto done;
diff --git a/src/amf/amfd/comptype.cc b/src/amf/amfd/comptype.cc
--- a/src/amf/amfd/comptype.cc
+++ b/src/amf/amfd/comptype.cc
@@ -1,6 +1,7 @@
 /*      -*- OpenSAF  -*-
  *
  * (C) Copyright 2008 The OpenSAF Foundation
+ * Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
@@ -335,7 +336,7 @@ static bool config_is_valid(const std::s
        rc = 
immutil_getAttr(const_cast<SaImmAttrNameT>("saAmfCtDefRecoveryOnError"), 
attributes, 0, &value);
        osafassert(rc == SA_AIS_OK);
 
-       if ((value < SA_AMF_NO_RECOMMENDATION) || (value > 
SA_AMF_NODE_FAILFAST)) {
+       if ((value < SA_AMF_NO_RECOMMENDATION) || (value > 
SA_AMF_CLUSTER_RESET)) {
                report_ccb_validation_error(opdata, "Illegal/unsupported 
saAmfCtDefRecoveryOnError value %u for '%s'",
                                value, dn.c_str());
                return false;
@@ -646,7 +647,7 @@ static SaAisErrorT ccb_completed_modify_
                                goto done;
                        }
                        uint32_t value = *((SaUint32T 
*)mod->modAttr.attrValues[0]);
-                       if ((value < SA_AMF_COMPONENT_RESTART) || (value > 
SA_AMF_NODE_FAILFAST)) {
+                       if ((value < SA_AMF_COMPONENT_RESTART) || (value > 
SA_AMF_CLUSTER_RESET)) {
                                report_ccb_validation_error(opdata,
                                        "Invalid saAmfCtDefRecoveryOnError for 
'%s'", dn);
                                rc = SA_AIS_ERR_BAD_OPERATION;
diff --git a/src/amf/amfd/sgproc.cc b/src/amf/amfd/sgproc.cc
--- a/src/amf/amfd/sgproc.cc
+++ b/src/amf/amfd/sgproc.cc
@@ -1,6 +1,7 @@
 /*      -*- OpenSAF  -*-
  *
  * (C) Copyright 2008 The OpenSAF Foundation
+ * Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
@@ -626,6 +627,46 @@ static void perform_nodeswitchover_recov
 done:
        TRACE_LEAVE();
 }
+
+/**
+ * @brief       Performs Cluster reset recovery.
+ **/
+static void perform_cluster_reset_recovery() {
+  TRACE_ENTER();
+  uint32_t rc = NCSCC_RC_SUCCESS;
+  AVD_AVND *node = nullptr;
+  for (std::map<uint32_t, AVD_AVND *>::const_iterator it = node_id_db->begin();
+    it != node_id_db->end(); it++) {
+    node = it->second;
+    //First reboot payloads.
+    if ((node->node_info.nodeId == avd_cb->node_id_avd) ||
+                   (node->node_info.nodeId == avd_cb->node_id_avd_other))
+      continue;
+    TRACE_1("node:'%s', nodeId:%x", node->name.c_str(), 
node->node_info.nodeId);
+    rc = avd_send_reboot_msg_directly(node);
+    if (rc != NCSCC_RC_SUCCESS)
+      TRACE_1("Send failed fpr Reboot msg to payload.");
+  }
+
+  //Send for standby.
+  node = nullptr;
+  node = avd_node_find_nodeid(avd_cb->node_id_avd_other);
+  if (node != nullptr) {
+    rc = avd_send_reboot_msg_directly(node);
+    if (rc != NCSCC_RC_SUCCESS)
+      TRACE_1("Send failed for Reboot msg to standby.");
+  }
+
+  //Send for self.
+  node = nullptr;
+  node = avd_node_find_nodeid(avd_cb->node_id_avd);
+  osafassert(node != nullptr);
+  rc = avd_send_reboot_msg_directly(node);
+  if (rc != NCSCC_RC_SUCCESS)
+    TRACE_1("Send failed for Reboot msg to active.");
+
+  TRACE_LEAVE();
+}
 /*****************************************************************************
  * Function: avd_su_oper_state_func
  *
@@ -692,6 +733,8 @@ void avd_su_oper_state_evh(AVD_CL_CB *cb
        } else if (n2d_msg->msg_info.n2d_opr_state.rec_rcvr.saf_amf == 
SA_AMF_NODE_FAILOVER) {
                saflog(LOG_NOTICE, amfSvcUsrName, "Node Fail-Over requested by 
'%s'",
                           node->name.c_str());
+       } else if (n2d_msg->msg_info.n2d_opr_state.rec_rcvr.saf_amf == 
SA_AMF_CLUSTER_RESET) {
+               saflog(LOG_NOTICE, amfSvcUsrName, "Cluster reset requested by 
'%s'", node->name.c_str());
        }
 
        /* Verify that the SU and node oper state is diabled and rcvr is 
failfast */
@@ -819,6 +862,12 @@ void avd_su_oper_state_evh(AVD_CL_CB *cb
                                        
perform_nodeswitchover_recovery(su->su_on_node);
                                        goto done;
                                        break;
+                               case SA_AMF_CLUSTER_RESET:
+                                       perform_cluster_reset_recovery();
+                                       LOG_WA("Wait for reboot");
+                                       for (;;) 
+                                               sleep(1);
+                                       break;
                                default :
                                        break;
                                }
diff --git a/src/amf/amfd/util.cc b/src/amf/amfd/util.cc
--- a/src/amf/amfd/util.cc
+++ b/src/amf/amfd/util.cc
@@ -1,6 +1,7 @@
 /*      -*- OpenSAF  -*-
  *
  * (C) Copyright 2008 The OpenSAF Foundation
+ * Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
@@ -2125,3 +2126,42 @@ void avd_association_namet_init(const st
   child.erase(std::remove(child.begin(), child.end(), '\\'), child.end());
 }
 
+/**
+ * @brief  Sends reboot msg to node directly without queueing it up in
+ *        AMFD message queue.
+ * @param  ptr to AVD_AVND.
+ * @return NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE. 
+ **/
+uint32_t avd_send_reboot_msg_directly(AVD_AVND *node) {
+  NCSMDS_INFO snd_mds = {0};
+  uint32_t rc = NCSCC_RC_SUCCESS;
+  AVD_DND_MSG *d2n_msg = new AVD_DND_MSG();
+
+  d2n_msg->msg_type = AVSV_D2N_REBOOT_MSG;
+  d2n_msg->msg_info.d2n_reboot_info.node_id = node->node_info.nodeId;
+
+  if (node->adest == 0) {
+    LOG_WA("Invalid adest for %x, msg type %u",
+      node->node_info.nodeId, d2n_msg->msg_type);
+    rc = NCSCC_RC_FAILURE;
+    goto done;
+  }
+  d2n_msg->msg_info.d2n_reboot_info.msg_id = ++(node->snd_msg_id);
+
+  TRACE("Sending REBOOT MSG to %x", node->node_info.nodeId);
+
+  snd_mds.i_mds_hdl = avd_cb->adest_hdl;
+  snd_mds.i_svc_id = NCSMDS_SVC_ID_AVD;
+  snd_mds.i_op = MDS_SEND;
+  snd_mds.info.svc_send.i_msg = (NCSCONTEXT)d2n_msg;
+  snd_mds.info.svc_send.i_to_svc = NCSMDS_SVC_ID_AVND;
+  snd_mds.info.svc_send.i_priority = MDS_SEND_PRIORITY_HIGH;
+  snd_mds.info.svc_send.i_sendtype = MDS_SENDTYPE_SND;
+  snd_mds.info.svc_send.info.snd.i_to_dest = node->adest;
+  if ((rc = ncsmds_api(&snd_mds)) != NCSCC_RC_SUCCESS) {
+    LOG_ER("ncsmds_api failed %u", rc);
+  }
+done:
+  delete d2n_msg;
+  return rc;
+}
diff --git a/src/amf/amfd/util.h b/src/amf/amfd/util.h
--- a/src/amf/amfd/util.h
+++ b/src/amf/amfd/util.h
@@ -1,6 +1,7 @@
 /*      -*- OpenSAF  -*-
  *
  * (C) Copyright 2008 The OpenSAF Foundation
+ * Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
@@ -104,5 +105,6 @@ extern const char *admin_op_name(SaAmfAd
 int compare_sanamet(const std::string& lhs, const std::string& rhs);
 uint32_t avd_snd_compcsi_msg(AVD_COMP *comp, AVD_CSI *csi,
                avd_comp_csi_rel_tag *compcsi, AVSV_COMPCSI_ACT act);
+uint32_t avd_send_reboot_msg_directly(AVD_AVND *node);
 
 #endif  // AMF_AMFD_UTIL_H_
diff --git a/src/amf/amfnd/cpm.cc b/src/amf/amfnd/cpm.cc
--- a/src/amf/amfnd/cpm.cc
+++ b/src/amf/amfnd/cpm.cc
@@ -1,6 +1,7 @@
 /*      -*- OpenSAF  -*-
  *
  * (C) Copyright 2008 The OpenSAF Foundation
+ * Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
@@ -556,7 +557,7 @@ void avnd_comp_pm_param_val(AVND_CB *cb,
                                *o_amf_rc = SA_AIS_ERR_NOT_EXIST;
                                return;
                        }
-                       if ((pm_start->rec_rcvr.saf_amf >= 
SA_AMF_CLUSTER_RESET) && 
+                       if ((pm_start->rec_rcvr.saf_amf > SA_AMF_CLUSTER_RESET) 
&& 
                                        (pm_start->rec_rcvr.saf_amf <= 
SA_AMF_CONTAINER_RESTART)) {
                                *o_amf_rc = SA_AIS_ERR_NOT_SUPPORTED;
                                return;
diff --git a/src/amf/amfnd/err.cc b/src/amf/amfnd/err.cc
--- a/src/amf/amfnd/err.cc
+++ b/src/amf/amfnd/err.cc
@@ -1,6 +1,7 @@
 /*      -*- OpenSAF  -*-
  *
  * (C) Copyright 2008 The OpenSAF Foundation
+ * Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
@@ -189,11 +190,15 @@ uint32_t avnd_evt_ava_err_rep_evh(AVND_C
                        amf_rc = SA_AIS_ERR_INVALID_PARAM;
        }
 
-       if(comp && ((err_rep->rec_rcvr.saf_amf == SA_AMF_CLUSTER_RESET) || 
-                       (err_rep->rec_rcvr.saf_amf == 
SA_AMF_APPLICATION_RESTART)|| 
+       if(comp && ((err_rep->rec_rcvr.saf_amf == SA_AMF_APPLICATION_RESTART)|| 
                        (err_rep->rec_rcvr.saf_amf == 
SA_AMF_CONTAINER_RESTART)))
                amf_rc = SA_AIS_ERR_NOT_SUPPORTED;
 
+       if (comp && (comp->su->is_ncs == true) &&
+                       (err_rep->rec_rcvr.saf_amf == SA_AMF_CLUSTER_RESET)) {
+               LOG_NO("Cluster Reset recovery not supported for MW components 
'%s'", comp->name.c_str());
+               amf_rc = SA_AIS_ERR_NOT_SUPPORTED;
+       }
        /* send the response back to AvA */
        rc = avnd_amf_resp_send(cb, AVSV_AMF_ERR_REP, amf_rc, 0, 
&api_info->dest, &evt->mds_ctxt, comp, msg_from_avnd);
 
@@ -296,6 +301,45 @@ uint32_t avnd_evt_ava_err_clear_evh(AVND
        return rc;
 }
 
+/**
+ * @brief Performs cluster reset recovery action.
+ *
+ * @param cb: ptr to AvND control block.
+ * @param su: ptr to the SU which contains the failed component.
+ * @param comp: ptr to failed component.
+ *
+ * @return NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE.
+ */
+static uint32_t avnd_err_rcvr_cluster_reset(AVND_CB *cb, AVND_SU *failed_su, 
AVND_COMP *failed_comp) {
+  uint32_t rc = NCSCC_RC_SUCCESS;
+  TRACE_ENTER();
+
+  m_AVND_COMP_FAILED_SET(failed_comp);
+  m_AVND_SU_FAILED_SET(failed_su);
+
+  m_AVND_COMP_OPER_STATE_SET(failed_comp, SA_AMF_OPERATIONAL_DISABLED);
+  rc = avnd_comp_oper_state_avd_sync(cb, failed_comp);
+  if (NCSCC_RC_SUCCESS != rc)
+    goto done;
+
+  rc = avnd_comp_curr_info_del(cb, failed_comp);
+  if (NCSCC_RC_SUCCESS != rc)
+    goto done;
+
+  //AMFD will not send any assignments, so clean up PI/NPI comp.
+  rc = avnd_comp_clc_fsm_run(cb, failed_comp, 
AVND_COMP_CLC_PRES_FSM_EV_CLEANUP);
+  if (NCSCC_RC_SUCCESS != rc)
+    goto done;
+
+  cb->oper_state = SA_AMF_OPERATIONAL_DISABLED;
+  m_AVND_SU_OPER_STATE_SET(failed_su, SA_AMF_OPERATIONAL_DISABLED);
+  rc = avnd_di_oper_send(cb, failed_su, SA_AMF_CLUSTER_RESET);
+
+done:
+  TRACE_LEAVE2("%u", rc);
+  return rc;
+}
+
 /****************************************************************************
   Name          : avnd_err_process
  
@@ -532,20 +576,6 @@ uint32_t avnd_err_recover(AVND_CB *cb, A
                return rc;
        }
 
-       /* if we are already inst-failed,  do nothing */
-       if ((su->pres == SA_AMF_PRESENCE_INSTANTIATION_FAILED) &&
-           (comp->pres == SA_AMF_PRESENCE_TERMINATING) && (rcvr != 
SA_AMF_NODE_FAILOVER)
-           && (rcvr != SA_AMF_NODE_FAILFAST)) {
-               rc = avnd_comp_clc_fsm_run(cb, comp, 
AVND_COMP_CLC_PRES_FSM_EV_CLEANUP);
-               return rc;
-       }
-
-       /* if we are already terminating do nothing */
-       if ((comp->pres == SA_AMF_PRESENCE_TERMINATING) && (rcvr == 
SA_AMF_COMPONENT_RESTART)) {
-               rc = avnd_comp_clc_fsm_run(cb, comp, 
AVND_COMP_CLC_PRES_FSM_EV_CLEANUP);
-               return rc;
-       }
-
        /* When SU is in TERMINATING state, higher level recovery 
(SA_AMF_NODE_FAILOVER, 
           SA_AMF_NODE_FAILFAST and SA_AMF_NODE_SWITCHOVER) should be processed 
because higher 
           level recovery will terminate the component. If the faulted 
component has recovery 
@@ -595,7 +625,7 @@ uint32_t avnd_err_recover(AVND_CB *cb, A
                break;
 
        case SA_AMF_CLUSTER_RESET:
-               /* not supported */
+               rc = avnd_err_rcvr_cluster_reset(cb, su, comp);
                break;
 
        case AVSV_ERR_RCVR_SU_RESTART:

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to