Ack,
Not tested (Because I don't know have the testbed  :-))
Mathi.

> -----Original Message-----
> From: Hans Nordeback [mailto:hans.nordeb...@ericsson.com]
> Sent: Wednesday, November 23, 2016 7:15 PM
> To: Ramesh Babu Betham; Mathivanan Naickan Palanivelu;
> anders.wid...@ericsson.com
> Cc: opensaf-devel@lists.sourceforge.net
> Subject: [PATCH 1 of 1] fm: Add support for differentiating a hung node
> versus a stopped node V3 [#2160]
> 
>  osaf/services/infrastructure/fm/fms/fm_cb.h   |   1 +
>  osaf/services/infrastructure/fm/fms/fm_evt.h  |   1 +
>  osaf/services/infrastructure/fm/fms/fm_main.c |  54
> +++++++++++++++++++++++---
> osaf/services/infrastructure/fm/fms/fm_mds.c  |  12 +++++-
>  4 files changed, 59 insertions(+), 9 deletions(-)
> 
> 
> diff --git a/osaf/services/infrastructure/fm/fms/fm_cb.h
> b/osaf/services/infrastructure/fm/fms/fm_cb.h
> --- a/osaf/services/infrastructure/fm/fms/fm_cb.h
> +++ b/osaf/services/infrastructure/fm/fms/fm_cb.h
> @@ -106,6 +106,7 @@ typedef struct fm_cb {
>       SaClmHandleT clm_hdl;
>       bool use_remote_fencing;
>       SaNameT peer_clm_node_name;
> +     bool peer_node_terminated;
>  } FM_CB;
> 
>  extern char *role_string[];
> diff --git a/osaf/services/infrastructure/fm/fms/fm_evt.h
> b/osaf/services/infrastructure/fm/fms/fm_evt.h
> --- a/osaf/services/infrastructure/fm/fms/fm_evt.h
> +++ b/osaf/services/infrastructure/fm/fms/fm_evt.h
> @@ -21,6 +21,7 @@
>  /* EVT from other GFM over MDS.*/
>  typedef enum {
>       GFM_GFM_EVT_NODE_INFO_EXCHANGE,
> +     GFM_GFM_EVT_PEER_IS_TERMINATING,
>       GFM_GFM_EVT_MAX
>  } GFM_GFM_MSG_TYPE;
> 
> diff --git a/osaf/services/infrastructure/fm/fms/fm_main.c
> b/osaf/services/infrastructure/fm/fms/fm_main.c
> --- a/osaf/services/infrastructure/fm/fms/fm_main.c
> +++ b/osaf/services/infrastructure/fm/fms/fm_main.c
> @@ -59,6 +59,7 @@ char *role_string[] = { "UNDEFINED", "AC  static uint32_t
> fm_agents_startup(void);  static uint32_t fm_get_args(FM_CB *);  static
> uint32_t fms_fms_exchange_node_info(FM_CB *);
> +static uint32_t fms_fms_inform_terminating(FM_CB *fm_cb);
>  static uint32_t fm_nid_notify(uint32_t);  static uint32_t
> fm_tmr_start(FM_TMR *, SaTimeT);  static SaAisErrorT
> get_peer_clm_node_name(NODE_ID); @@ -280,6 +281,7 @@ int main(int
> argc, char *argv[])
>               }
> 
>               if (fds[FD_TERM].revents & POLLIN) {
> +                     fms_fms_inform_terminating(fm_cb);
>                       daemon_exit();
>               }
> 
> @@ -622,8 +624,12 @@ static void fm_mbx_msg_handler(FM_CB *fm
>                                        * node_down event has been
> received.
>                                        */
>                               if (fm_cb->use_remote_fencing) {
> -                                     opensaf_reboot(fm_cb-
> >peer_node_id, (char *)fm_cb->peer_clm_node_name.value,
> -                                                     "Received Node
> Down for peer controller");
> +                                     if (fm_cb->peer_node_terminated
> == false) {
> +                                             opensaf_reboot(fm_cb-
> >peer_node_id, (char *)fm_cb->peer_clm_node_name.value,
> +                                                             "Received
> Node Down for peer controller");
> +                                     } else {
> +                                             LOG_NO("Peer node %s is
> terminated, fencing will not be performed", fm_cb-
> >peer_clm_node_name.value);
> +                                     }
>                               } else {
>                                       opensaf_reboot(fm_cb-
> >peer_node_id, (char *)fm_cb->peer_node_name.value,
>                                                       "Received Node
> Down for peer controller"); @@ -661,11 +667,12 @@ static void
> fm_mbx_msg_handler(FM_CB *fm
> 
>                       LOG_NO("Reseting peer controller node id: %x",
> fm_cb->peer_node_id);
>                       if (fm_cb->use_remote_fencing) {
> -                             LOG_NO("saClmClusterNodeGet succeeded
> node_id 0x%X, clm peer node name %s",
> -                                     fm_mbx_evt->node_id, fm_cb-
> >peer_clm_node_name.value);
> -
> -                             opensaf_reboot(fm_cb->peer_node_id,
> (char *)fm_cb->peer_clm_node_name.value,
> -                                             "Received Node Down for
> peer controller");
> +                             if (fm_cb->peer_node_terminated == false) {
> +                                     opensaf_reboot(fm_cb-
> >peer_node_id, (char *)fm_cb->peer_clm_node_name.value,
> +                                                     "Received Node
> Down for peer controller");
> +                             } else {
> +                                     LOG_NO("Peer node %s is
> terminated, fencing will not be performed", fm_cb-
> >peer_clm_node_name.value);
> +                             }
>                       } else {
>                               opensaf_reboot(fm_cb->peer_node_id,
> (char *)fm_cb->peer_node_name.value,
>                                              "Received Node Down for Active
> peer"); @@ -868,6 +875,39 @@ static uint32_t fms_fms_exchange_node_in
> }
> 
> 
> /**********************************************************
> ******************
> +* Name          : fms_fms_inform_terminating
> +*
> +* Description   : sends information to peer that terminating is undergoing.
> +*
> +* Arguments     : Pointer to Control Block.
> +*
> +* Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE.
> +*
> +* Notes         : None.
> +*********************************************************
> **************
> +******/ static uint32_t fms_fms_inform_terminating(FM_CB *fm_cb) {
> +     GFM_GFM_MSG gfm_msg;
> +     TRACE_ENTER();
> +     if (fm_cb->peer_adest != 0) {
> +/* peer fms present */
> +             memset(&gfm_msg, 0, sizeof(GFM_GFM_MSG));
> +             gfm_msg.msg_type =
> GFM_GFM_EVT_PEER_IS_TERMINATING;
> +
> +             if (NCSCC_RC_SUCCESS != fm_mds_async_send(fm_cb,
> (NCSCONTEXT)&gfm_msg,
> +
> NCSMDS_SVC_ID_GFM, MDS_SEND_PRIORITY_VERY_HIGH,
> +                                                       0, fm_cb-
> >peer_adest, 0)) {
> +                     syslog(LOG_ERR, "Sending node-info message to
> peer fms failed");
> +                     return NCSCC_RC_FAILURE;
> +             }
> +
> +             return NCSCC_RC_SUCCESS;
> +     }
> +     TRACE_LEAVE();
> +     return NCSCC_RC_FAILURE;
> +}
> +
> +/*********************************************************
> *************
> +******
>  * Name          : fm_nid_notify
>  *
>  * Description   : Sends notification to NID
> diff --git a/osaf/services/infrastructure/fm/fms/fm_mds.c
> b/osaf/services/infrastructure/fm/fms/fm_mds.c
> --- a/osaf/services/infrastructure/fm/fms/fm_mds.c
> +++ b/osaf/services/infrastructure/fm/fms/fm_mds.c
> @@ -474,6 +474,7 @@ static uint32_t fm_mds_svc_evt(FM_CB *cb
>                               }
>                               cb->peer_adest = svc_evt->i_dest;
>                               cb->peer_node_id = svc_evt->i_node_id;
> +                             cb->peer_node_terminated = false;
>                               return_val =
> fm_fill_mds_evt_post_fm_mbx(cb, fm_evt, cb->peer_node_id,
> FM_EVT_PEER_UP);
> 
>                               if (NCSCC_RC_FAILURE == return_val) { @@ -
> 533,7 +534,9 @@ static uint32_t fm_mds_rcv_evt(FM_CB *cb
>                              cb->peer_node_name.length);
>                       LOG_IN("Peer Node_id  %u : EE_ID %s", cb-
> >peer_node_id, cb->peer_node_name.value);
>                       break;
> -
> +             case GFM_GFM_EVT_PEER_IS_TERMINATING:
> +                     fm_cb->peer_node_terminated = true;
> +                     break;
>               default:
>                       syslog(LOG_INFO, "Wrong MDS event from GFM.");
>                       return_val = NCSCC_RC_FAILURE;
> @@ -768,7 +771,9 @@ static uint32_t fm_fm_mds_enc(MDS_CALLBA
>               ncs_encode_n_octets_in_uba(uba, msg-
> >info.node_info.node_name.value,
>                                          (uint32_t)msg-
> >info.node_info.node_name.length);
>               break;
> -
> +     case GFM_GFM_EVT_PEER_IS_TERMINATING:
> +             fm_cb->peer_node_terminated = true;
> +             break;
>       default:
>               syslog(LOG_INFO, "fm_fm_mds_enc: Invalid msg type for
> encode.");
>               return m_LEAP_DBG_SINK(NCSCC_RC_FAILURE);
> @@ -830,6 +835,9 @@ static uint32_t fm_fm_mds_dec(MDS_CALLBA
>               ncs_decode_n_octets_from_uba(uba, msg-
> >info.node_info.node_name.value,
>                                            msg-
> >info.node_info.node_name.length);
>               break;
> +     case GFM_GFM_EVT_PEER_IS_TERMINATING:
> +             fm_cb->peer_node_terminated = true;
> +             break;
>       default:
>               syslog(LOG_INFO, "fm_fm_mds_dec: Invalid msg for
> decoding.");
>               return m_LEAP_DBG_SINK(NCSCC_RC_FAILURE);

------------------------------------------------------------------------------
Developer Access Program for Intel Xeon Phi Processors
Access to Intel Xeon Phi processor-based developer platforms.
With one year of Intel Parallel Studio XE.
Training and support from Colfax.
Order your platform today.http://sdm.link/xeonphi
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to