Ack with comments, see inline marked [AndersW]
regards,
Anders Widell
On 11/23/2016 02:45 PM, Hans Nordeback wrote:
> osaf/services/infrastructure/fm/fms/fm_cb.h | 1 +
> osaf/services/infrastructure/fm/fms/fm_evt.h | 1 +
> osaf/services/infrastructure/fm/fms/fm_main.c | 54
> +++++++++++++++++++++++---
> osaf/services/infrastructure/fm/fms/fm_mds.c | 12 +++++-
> 4 files changed, 59 insertions(+), 9 deletions(-)
>
>
> diff --git a/osaf/services/infrastructure/fm/fms/fm_cb.h
> b/osaf/services/infrastructure/fm/fms/fm_cb.h
> --- a/osaf/services/infrastructure/fm/fms/fm_cb.h
> +++ b/osaf/services/infrastructure/fm/fms/fm_cb.h
> @@ -106,6 +106,7 @@ typedef struct fm_cb {
> SaClmHandleT clm_hdl;
> bool use_remote_fencing;
> SaNameT peer_clm_node_name;
> + bool peer_node_terminated;
> } FM_CB;
>
> extern char *role_string[];
> diff --git a/osaf/services/infrastructure/fm/fms/fm_evt.h
> b/osaf/services/infrastructure/fm/fms/fm_evt.h
> --- a/osaf/services/infrastructure/fm/fms/fm_evt.h
> +++ b/osaf/services/infrastructure/fm/fms/fm_evt.h
> @@ -21,6 +21,7 @@
> /* EVT from other GFM over MDS.*/
> typedef enum {
> GFM_GFM_EVT_NODE_INFO_EXCHANGE,
> + GFM_GFM_EVT_PEER_IS_TERMINATING,
> GFM_GFM_EVT_MAX
> } GFM_GFM_MSG_TYPE;
>
> diff --git a/osaf/services/infrastructure/fm/fms/fm_main.c
> b/osaf/services/infrastructure/fm/fms/fm_main.c
> --- a/osaf/services/infrastructure/fm/fms/fm_main.c
> +++ b/osaf/services/infrastructure/fm/fms/fm_main.c
> @@ -59,6 +59,7 @@ char *role_string[] = { "UNDEFINED", "AC
> static uint32_t fm_agents_startup(void);
> static uint32_t fm_get_args(FM_CB *);
> static uint32_t fms_fms_exchange_node_info(FM_CB *);
> +static uint32_t fms_fms_inform_terminating(FM_CB *fm_cb);
> static uint32_t fm_nid_notify(uint32_t);
> static uint32_t fm_tmr_start(FM_TMR *, SaTimeT);
> static SaAisErrorT get_peer_clm_node_name(NODE_ID);
> @@ -280,6 +281,7 @@ int main(int argc, char *argv[])
> }
>
> if (fds[FD_TERM].revents & POLLIN) {
> + fms_fms_inform_terminating(fm_cb);
> daemon_exit();
> }
>
> @@ -622,8 +624,12 @@ static void fm_mbx_msg_handler(FM_CB *fm
> * node_down event has been received.
> */
> if (fm_cb->use_remote_fencing) {
> - opensaf_reboot(fm_cb->peer_node_id,
> (char *)fm_cb->peer_clm_node_name.value,
> - "Received Node Down for
> peer controller");
> + if (fm_cb->peer_node_terminated ==
> false) {
> +
> opensaf_reboot(fm_cb->peer_node_id, (char *)fm_cb->peer_clm_node_name.value,
> + "Received Node
> Down for peer controller");
> + } else {
> + LOG_NO("Peer node %s is
> terminated, fencing will not be performed", fm_cb->peer_clm_node_name.value);
> + }
> } else {
> opensaf_reboot(fm_cb->peer_node_id,
> (char *)fm_cb->peer_node_name.value,
> "Received Node Down for
> peer controller");
> @@ -661,11 +667,12 @@ static void fm_mbx_msg_handler(FM_CB *fm
>
> LOG_NO("Reseting peer controller node id: %x",
> fm_cb->peer_node_id);
> if (fm_cb->use_remote_fencing) {
> - LOG_NO("saClmClusterNodeGet succeeded node_id
> 0x%X, clm peer node name %s",
> - fm_mbx_evt->node_id,
> fm_cb->peer_clm_node_name.value);
> -
> - opensaf_reboot(fm_cb->peer_node_id, (char
> *)fm_cb->peer_clm_node_name.value,
> - "Received Node Down for peer
> controller");
> + if (fm_cb->peer_node_terminated == false) {
> + opensaf_reboot(fm_cb->peer_node_id,
> (char *)fm_cb->peer_clm_node_name.value,
> + "Received Node Down for
> peer controller");
> + } else {
> + LOG_NO("Peer node %s is terminated,
> fencing will not be performed", fm_cb->peer_clm_node_name.value);
> + }
> } else {
> opensaf_reboot(fm_cb->peer_node_id, (char
> *)fm_cb->peer_node_name.value,
> "Received Node Down for Active
> peer");
> @@ -868,6 +875,39 @@ static uint32_t fms_fms_exchange_node_in
> }
>
>
> /****************************************************************************
> +* Name : fms_fms_inform_terminating
> +*
> +* Description : sends information to peer that terminating is undergoing.
> +*
> +* Arguments : Pointer to Control Block.
> +*
> +* Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE.
> +*
> +* Notes : None.
> +*****************************************************************************/
> +static uint32_t fms_fms_inform_terminating(FM_CB *fm_cb)
> +{
> + GFM_GFM_MSG gfm_msg;
> + TRACE_ENTER();
> + if (fm_cb->peer_adest != 0) {
> +/* peer fms present */
> + memset(&gfm_msg, 0, sizeof(GFM_GFM_MSG));
> + gfm_msg.msg_type = GFM_GFM_EVT_PEER_IS_TERMINATING;
> +
> + if (NCSCC_RC_SUCCESS != fm_mds_async_send(fm_cb,
> (NCSCONTEXT)&gfm_msg,
> + NCSMDS_SVC_ID_GFM,
> MDS_SEND_PRIORITY_VERY_HIGH,
> + 0, fm_cb->peer_adest,
> 0)) {
> + syslog(LOG_ERR, "Sending node-info message to peer fms
> failed");
> + return NCSCC_RC_FAILURE;
> + }
> +
> + return NCSCC_RC_SUCCESS;
> + }
> + TRACE_LEAVE();
> + return NCSCC_RC_FAILURE;
> +}
> +
> +/****************************************************************************
> * Name : fm_nid_notify
> *
> * Description : Sends notification to NID
> diff --git a/osaf/services/infrastructure/fm/fms/fm_mds.c
> b/osaf/services/infrastructure/fm/fms/fm_mds.c
> --- a/osaf/services/infrastructure/fm/fms/fm_mds.c
> +++ b/osaf/services/infrastructure/fm/fms/fm_mds.c
> @@ -474,6 +474,7 @@ static uint32_t fm_mds_svc_evt(FM_CB *cb
> }
> cb->peer_adest = svc_evt->i_dest;
> cb->peer_node_id = svc_evt->i_node_id;
> + cb->peer_node_terminated = false;
> return_val = fm_fill_mds_evt_post_fm_mbx(cb,
> fm_evt, cb->peer_node_id, FM_EVT_PEER_UP);
>
> if (NCSCC_RC_FAILURE == return_val) {
> @@ -533,7 +534,9 @@ static uint32_t fm_mds_rcv_evt(FM_CB *cb
> cb->peer_node_name.length);
> LOG_IN("Peer Node_id %u : EE_ID %s", cb->peer_node_id,
> cb->peer_node_name.value);
> break;
> -
> + case GFM_GFM_EVT_PEER_IS_TERMINATING:
> + fm_cb->peer_node_terminated = true;
> + break;
> default:
> syslog(LOG_INFO, "Wrong MDS event from GFM.");
> return_val = NCSCC_RC_FAILURE;
> @@ -768,7 +771,9 @@ static uint32_t fm_fm_mds_enc(MDS_CALLBA
> ncs_encode_n_octets_in_uba(uba,
> msg->info.node_info.node_name.value,
>
> (uint32_t)msg->info.node_info.node_name.length);
> break;
> -
> + case GFM_GFM_EVT_PEER_IS_TERMINATING:
> + fm_cb->peer_node_terminated = true;
> + break;
[AndersW] Shouldn't modify global state in the MDS encode function;
remove it.
> default:
> syslog(LOG_INFO, "fm_fm_mds_enc: Invalid msg type for encode.");
> return m_LEAP_DBG_SINK(NCSCC_RC_FAILURE);
> @@ -830,6 +835,9 @@ static uint32_t fm_fm_mds_dec(MDS_CALLBA
> ncs_decode_n_octets_from_uba(uba,
> msg->info.node_info.node_name.value,
>
> msg->info.node_info.node_name.length);
> break;
> + case GFM_GFM_EVT_PEER_IS_TERMINATING:
> + fm_cb->peer_node_terminated = true;
> + break;
[AndersW] Shouldn't modify global state in the MDS decode function;
remove it.
> default:
> syslog(LOG_INFO, "fm_fm_mds_dec: Invalid msg for decoding.");
> return m_LEAP_DBG_SINK(NCSCC_RC_FAILURE);
------------------------------------------------------------------------------
Developer Access Program for Intel Xeon Phi Processors
Access to Intel Xeon Phi processor-based developer platforms.
With one year of Intel Parallel Studio XE.
Training and support from Colfax.
Order your platform today.http://sdm.link/xeonphi
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel