Hi Ramesh and Mathi,
Have you had time to review this patch?
/Regards HansN

-----Original Message-----
From: Hans Nordeback [mailto:hans.nordeb...@ericsson.com] 
Sent: den 4 november 2016 14:00
To: ramesh.bet...@oracle.com; mathi.naic...@oracle.com; Anders Widell 
<anders.wid...@ericsson.com>
Cc: opensaf-devel@lists.sourceforge.net
Subject: [devel] [PATCH 1 of 1] fm: Add support for differentiating a hung node 
versus a stopped node [#2160]

 osaf/services/infrastructure/fm/fms/Makefile.am |    6 +-
 osaf/services/infrastructure/fm/fms/fm_main.c   |  114 ++++++++++++++++++++---
 2 files changed, 104 insertions(+), 16 deletions(-)


diff --git a/osaf/services/infrastructure/fm/fms/Makefile.am 
b/osaf/services/infrastructure/fm/fms/Makefile.am
--- a/osaf/services/infrastructure/fm/fms/Makefile.am
+++ b/osaf/services/infrastructure/fm/fms/Makefile.am
@@ -33,7 +33,8 @@ noinst_HEADERS = \
 
 osaffmd_CPPFLAGS= \
        $(AM_CPPFLAGS) \
-       -I$(top_srcdir)/osaf/services/infrastructure/fm/include
+       -I$(top_srcdir)/osaf/services/infrastructure/fm/include \
+       -I$(top_srcdir)/osaf/libs/common/immsv/include
 
 osaffmd_CFLAGS = $(AM_CFLAGS)
 
@@ -44,6 +45,9 @@ osaffmd_SOURCES = \
        fm_amf.c
 
 osaffmd_LDADD = \
+       $(top_builddir)/osaf/tools/safimm/src/libimmutil.la \
+       $(top_builddir)/osaf/libs/saf/libSaImm/libSaImmOi.la \
+       $(top_builddir)/osaf/libs/saf/libSaImm/libSaImmOm.la \
        $(top_builddir)/osaf/libs/core/libopensaf_core.la \
        $(top_builddir)/osaf/libs/saf/libSaAmf/libSaAmf.la \
        $(top_builddir)/osaf/libs/agents/infrastructure/rda/librda.la \ diff 
--git a/osaf/services/infrastructure/fm/fms/fm_main.c 
b/osaf/services/infrastructure/fm/fms/fm_main.c
--- a/osaf/services/infrastructure/fm/fms/fm_main.c
+++ b/osaf/services/infrastructure/fm/fms/fm_main.c
@@ -31,6 +31,7 @@ This file contains the main() routine fo  #include 
<nid_api.h>  #include "fm.h"
 #include "osaf_time.h"
+#include "immutil.h"
 
 #define FM_CLM_API_TIMEOUT 10000000000LL
 
@@ -62,6 +63,8 @@ static uint32_t fms_fms_exchange_node_in  static uint32_t 
fm_nid_notify(uint32_t);  static uint32_t fm_tmr_start(FM_TMR *, SaTimeT);  
static SaAisErrorT get_peer_clm_node_name(NODE_ID);
+static const char* get_clm_node_name(const SaNameT* node_name); static 
+bool is_node_clm_member(const SaNameT *clm_node_name);
 static SaAisErrorT fm_clm_init();
 static void fm_mbx_msg_handler(FM_CB *, FM_EVT *);  static void 
fm_evt_proc_rda_callback(FM_CB*, FM_EVT*); @@ -499,6 +502,30 @@ void 
fm_proc_svc_down(FM_CB *cb, FM_EVT  }
 
 /****************************************************************************
+* Name          : get_node_name
+*
+* Description   : Extract node name from DN clm node.
+*
+* Arguments     : DN clm node name.
+*
+* Return Values : Extrated node name.
+*
+* Notes         : None.
+***********************************************************************
+******/ static const char* get_clm_node_name(const SaNameT* node_name){
+       SaNameT tmp_node_name = *node_name;
+       char *save_ptr;
+       // Extract peer clm node name, e.g SC-2 from 
"safNode=SC-2,safCluster=myClmCluster"
+       // The peer clm node name will be passed to opensaf_reboot script to 
support remote fencing.
+       // The peer clm node name should correspond to the name of the virtual 
machine for that node.
+       strtok_r((char*) tmp_node_name.value, "=", &save_ptr);
+       char *node = strtok_r(NULL, ",", &save_ptr);
+       char *tmp = strndup(node, strlen(node));
+       LOG_NO("Peer clm node name: %s", tmp);
+       return tmp;
+}
+
+/**********************************************************************
+******
 * Name          : fm_clm_init
 *
 * Description   : Initialize CLM. 
@@ -521,16 +548,10 @@ static SaAisErrorT get_peer_clm_node_nam
        }
 
        if ((rc = saClmClusterNodeGet_4(fm_cb->clm_hdl, node_id, 
FM_CLM_API_TIMEOUT, &cluster_node)) == SA_AIS_OK) {
-               // Extract peer clm node name, e.g SC-2 from 
"safNode=SC-2,safCluster=myClmCluster"
-               // The peer clm node name will be passed to opensaf_reboot 
script to support remote fencing.
-               // The peer clm node name should correspond to the name of the 
virtual machine for that node.
-               char *node = NULL;
-               strtok((char*) cluster_node.nodeName.value, "=");
-               node = strtok(NULL, ",");
-               strncpy((char*) fm_cb->peer_clm_node_name.value, node, 
cluster_node.nodeName.length);
+               fm_cb->peer_clm_node_name = cluster_node.nodeName;
                LOG_NO("Peer clm node name: %s", 
fm_cb->peer_clm_node_name.value);
        } else {
-               LOG_WA("saClmClusterNodeGet_4 returned %u", (unsigned) rc);
+               LOG_WA("saClmClusterNodeGet_4 returned %d", rc);
        }
 
        if ((rc = saClmFinalize(fm_cb->clm_hdl)) != SA_AIS_OK) { @@ -551,6 
+572,58 @@ static SaAisErrorT get_peer_clm_node_nam
 * 
 * Notes         : None. 
 *****************************************************************************/
+static bool is_node_clm_member(const SaNameT *clm_node_name) {
+       SaAisErrorT rc = SA_AIS_OK;
+       SaUint32T clm_member = 0;
+       SaNameT node_name = *clm_node_name;
+
+       SaVersionT immVersion = { 'A', 2, 15 };
+       const SaImmAttrValuesT_2 **attributes;
+       SaImmAccessorHandleT accessor_handle;
+       SaImmHandleT om_handle;
+
+       if ((rc = immutil_saImmOmInitialize(&om_handle, NULL, &immVersion)) != 
SA_AIS_OK) {
+               LOG_ER("saImmOmInitialize FAILED: %u", rc);
+               goto done;
+       }
+
+       if ((rc = immutil_saImmOmAccessorInitialize(om_handle, 
&accessor_handle)) != SA_AIS_OK) {
+               LOG_ER("saImmOmAccessorInitialize FAILED: %u", rc);
+               goto om_finalize;
+       }
+
+       if ((rc = immutil_saImmOmAccessorGet_2(accessor_handle, &node_name, 
NULL, (SaImmAttrValuesT_2 ***) &attributes)) != SA_AIS_OK) {
+               LOG_ER("saImmOmAccessorGet_2 FAILED: %s %u ", node_name.value, 
rc);
+               goto accessor_finalize;
+       }
+
+       if ((rc = immutil_getAttr("saClmNodeIsMember", attributes, 0, 
&clm_member)) != SA_AIS_OK) {
+               LOG_ER("immutil_getAttr FAILED: %u", rc);
+       }
+accessor_finalize:
+       if ((rc = immutil_saImmOmAccessorFinalize(accessor_handle)) != 
SA_AIS_OK) {
+               LOG_NO("immutil_saImmOmAccessorFinalize FAILED: %u", rc);
+       }
+om_finalize:
+       if ((rc = immutil_saImmOmFinalize(om_handle)) != SA_AIS_OK) {
+               LOG_NO("immutil_saImmOmFinalize FAILED: %u", rc);
+       }
+done:
+       return (clm_member == 1) ? true : false; }
+
+/****************************************************************************
+* Name          : fm_clm_init
+*
+* Description   : Initialize CLM.
+*
+* Arguments     : None.
+*
+* Return Values : None.
+*
+* Notes         : None.
+***********************************************************************
+******/
 static SaAisErrorT fm_clm_init()
 {
        SaAisErrorT rc = SA_AIS_OK;
@@ -622,8 +695,15 @@ static void fm_mbx_msg_handler(FM_CB *fm
                                         * node_down event has been received.
                                         */
                                if (fm_cb->use_remote_fencing) {
-                                       opensaf_reboot(fm_cb->peer_node_id, 
(char *)fm_cb->peer_clm_node_name.value,
-                                                       "Received Node Down for 
peer controller");
+                                       const char* clm_node_name = 
get_clm_node_name(&fm_cb->peer_clm_node_name);
+                                       if 
(is_node_clm_member(&fm_cb->peer_clm_node_name)) {
+                                               
opensaf_reboot(fm_cb->peer_node_id, clm_node_name,
+                                                               "Received Node 
Down for peer controller");
+                                       } else {
+                                               LOG_NO("Peer node %s is not a 
member of the CLM cluster, fencing will not be performed",
+                                                               clm_node_name);
+                                       }
+                                       free((char*)clm_node_name);
                                } else {
                                        opensaf_reboot(fm_cb->peer_node_id, 
(char *)fm_cb->peer_node_name.value,
                                                        "Received Node Down for 
peer controller"); @@ -661,11 +741,15 @@ static void fm_mbx_msg_handler(FM_CB 
*fm
 
                        LOG_NO("Reseting peer controller node id: %x", 
fm_cb->peer_node_id);
                        if (fm_cb->use_remote_fencing) {
-                               LOG_NO("saClmClusterNodeGet succeeded node_id 
0x%X, clm peer node name %s",
-                                       fm_mbx_evt->node_id, 
fm_cb->peer_clm_node_name.value);
-
-                               opensaf_reboot(fm_cb->peer_node_id, (char 
*)fm_cb->peer_clm_node_name.value,
-                                               "Received Node Down for peer 
controller");
+                               const char* clm_node_name = 
get_clm_node_name(&fm_cb->peer_clm_node_name);
+                               if 
(is_node_clm_member(&fm_cb->peer_clm_node_name)) {
+                                       opensaf_reboot(fm_cb->peer_node_id, 
clm_node_name,
+                                                       "Received Node Down for 
peer controller");
+                               } else {
+                                       LOG_NO("Peer node %s is not a member of 
the CLM cluster, fencing will not be performed",
+                                                       clm_node_name);
+                               }
+                               free((char*)clm_node_name);
                        } else {
                                opensaf_reboot(fm_cb->peer_node_id, (char 
*)fm_cb->peer_node_name.value,
                                               "Received Node Down for Active 
peer");

------------------------------------------------------------------------------
Developer Access Program for Intel Xeon Phi Processors Access to Intel Xeon Phi 
processor-based developer platforms.
With one year of Intel Parallel Studio XE.
Training and support from Colfax.
Order your platform today. http://sdm.link/xeonphi 
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to