One idea to prevent fast nodes from rebooting and joining the old cluster before all old nodes have shut down, is to set a flag in the CLM server that prevents any node from joining during the time the cluster reset is in progress (i.e. until the active CLM server has shut down).
/ Anders Widell On 09/29/2016 03:47 PM, Hans Nordebäck wrote: > Hi Anders, thanks for the comments, I have updated the patch accordingly. > Mathi have you had any time to look at the patch? > /BR HansN > > -----Original Message----- > From: Anders Widell > Sent: den 29 september 2016 14:29 > To: Hans Nordebäck <hans.nordeb...@ericsson.com>; mathi.naic...@oracle.com > Cc: opensaf-devel@lists.sourceforge.net > Subject: Re: [PATCH 1 of 1] clm: add support for cluster reboot V3 [#2053] > > Ack with comments. > > One comment is that we ought to export this new admin op in the saClm.h file > (or actually a new file separate file, like we have done when extending the > SAF API in other services). I think we should also select a new number (i.e. > 4) for this, even though it is targeted for a different object. So add e.g. > the following line: > > SA_CLM_ADMIN_CLUSTER_RESET = 4 > > In the future, we should consider how this feature interact with the enhanced > cluster management feature that is on the roadmap. For now I think it is good > enough, but in the future I would like to have a two-phase shutdown to avoid > the possibility that some of the nodes reboot very quickly and potentially > could come up before some other slow node has shut down. > > See further code comments inline below, marked AndersW> > > regards, > > Anders Widell > > On 09/28/2016 01:55 PM, Hans Nordeback wrote: >> osaf/libs/common/clmsv/include/clmsv_msg.h | 6 +++ >> osaf/libs/core/common/include/osaf_utility.h | 5 +++ >> osaf/libs/core/common/osaf_utility.c | 22 +++++++++++++ >> osaf/services/saf/clmsv/clms/clms.h | 3 +- >> osaf/services/saf/clmsv/clms/clms_imm.c | 18 ++++++++++ >> osaf/services/saf/clmsv/clms/clms_mds.c | 46 >> +++++++++++++++++++++++++++- >> osaf/services/saf/clmsv/clms/clms_util.c | 12 +++++++ >> osaf/services/saf/clmsv/nodeagent/main.c | 12 +++++++ >> scripts/opensaf_reboot | 22 ++++++++++--- >> 9 files changed, 139 insertions(+), 7 deletions(-) >> >> >> Admin command to request cluster reboot: >> immadm -o 1 safCluster=myClmCluster >> >> diff --git a/osaf/libs/common/clmsv/include/clmsv_msg.h >> b/osaf/libs/common/clmsv/include/clmsv_msg.h >> --- a/osaf/libs/common/clmsv/include/clmsv_msg.h >> +++ b/osaf/libs/common/clmsv/include/clmsv_msg.h >> @@ -23,6 +23,7 @@ typedef enum clms_msg_type { >> CLMSV_CLMS_TO_CLMA_CBK_MSG, >> CLMSV_CLMS_TO_CLMA_API_RESP_MSG, >> CLMSV_CLMS_TO_CLMA_IS_MEMBER_MSG, >> + CLMSV_CLMS_TO_CLMNA_REBOOT_MSG, >> CLMSV_MSG_MAX >> } CLMSV_MSG_TYPE; >> >> @@ -174,6 +175,10 @@ typedef struct clmsv_is_member_info_t { >> SaUint32T client_id; >> }CLMSV_IS_MEMBER_INFO; >> >> +typedef struct clmsv_reboot_info_t { >> + SaClmNodeIdT node_id; >> +} CLMSV_REBOOT_INFO; >> + >> /* Top Level CLMSv MDS message structure for use between CLMS-> CLMA && >> CLMA -> CLMS */ >> typedef struct clmsv_msg_t { >> struct clmsv_msg_t *next; /* Mailbox processing */ >> @@ -183,6 +188,7 @@ typedef struct clmsv_msg_t { >> CLMSV_CBK_INFO cbk_info; /* Callback Messages from CLMS to CLA >> */ >> CLMSV_API_RESP_INFO api_resp_info; /* Response Messages from >> CLMS to CLA */ >> CLMSV_IS_MEMBER_INFO is_member_info; /*Is node member or not >> Message from CLMS to CLA*/ >> + CLMSV_REBOOT_INFO reboot_info; /* Reboot request from CLMS to CLMNA */ >> } info; >> } CLMSV_MSG; >> >> diff --git a/osaf/libs/core/common/include/osaf_utility.h >> b/osaf/libs/core/common/include/osaf_utility.h >> --- a/osaf/libs/core/common/include/osaf_utility.h >> +++ b/osaf/libs/core/common/include/osaf_utility.h >> @@ -24,6 +24,8 @@ >> #ifndef OPENSAF_CORE_OSAF_UTILITY_H_ >> #define OPENSAF_CORE_OSAF_UTILITY_H_ >> >> +#define USE_SAFE_REBOOT 1 > AndersW> Maybe instead use: > > enum { > kUseSafeReboot = 1 > }; > >> + >> #include <pthread.h> >> >> #ifdef __cplusplus >> @@ -68,6 +70,9 @@ extern void osaf_abort(long i_cause) >> #endif >> nothrow, noreturn)); >> >> +extern void osaf_safe_reboot() >> + __attribute__ ((nothrow)); >> + > AndersW> Since this is C code, a function taking no parameter should be > declared "void osaf_safe_reboot(void)" >> static inline void osaf_mutex_lock_ordie(pthread_mutex_t* io_mutex) { >> int result = pthread_mutex_lock(io_mutex); >> if (result != 0) osaf_abort(result); diff --git >> a/osaf/libs/core/common/osaf_utility.c >> b/osaf/libs/core/common/osaf_utility.c >> --- a/osaf/libs/core/common/osaf_utility.c >> +++ b/osaf/libs/core/common/osaf_utility.c >> @@ -16,9 +16,12 @@ >> */ >> >> #include "osaf_utility.h" >> +#include "ncssysf_def.h" >> +#include "configmake.h" > AndersW> Include the project's header files after the system header > files (except for "osaf_utility.h" of course, which should be at the top). >> #include <stdlib.h> >> #include <errno.h> >> #include <syslog.h> >> +#include <stdio.h> >> >> void osaf_abort(long i_cause) >> { >> @@ -26,3 +29,22 @@ void osaf_abort(long i_cause) >> i_cause, __builtin_return_address(0), errno); >> abort(); >> } >> + >> +void osaf_safe_reboot() >> +{ >> + char str[256]; >> + >> + snprintf(str, sizeof(str), PKGLIBDIR "/opensaf_reboot %u %s %u", 0, >> "not_used", USE_SAFE_REBOOT); >> + syslog(LOG_NOTICE, "Reboot ordered using command: %s", str); >> + >> + int rc = system(str); >> + if (rc < 0) { > AndersW> To strictly follow the man page for system(), we should check > "if (rc == -1)" here, instead of "if (rc < 0)" >> + syslog(LOG_CRIT, "Node reboot failure: exit code %d", >> +WEXITSTATUS(rc)); > AndersW> In case rc == -1, we shall not call WEXITSTATUS(rc). Instead we > shall log the value of errno. >> + } else { >> + if (WIFEXITED(rc) && WEXITSTATUS(rc) == 0) { >> + syslog(LOG_NOTICE, "Command: %s successfully executed", >> str); >> + } else { >> + syslog(LOG_CRIT, "Command: %s failed with exit code >> %d", str, >> +WEXITSTATUS(rc)); > AndersW> Not sure if it is a big deal, but WEXITSTATUS(rc) is only valid > if WEXITED(rc) != 0. If e.g. WSIGNALED(rc) != 0, we should log > WTERMSIG(rc) instead, etc. >> + } >> + } >> +} >> diff --git a/osaf/services/saf/clmsv/clms/clms.h >> b/osaf/services/saf/clmsv/clms/clms.h >> --- a/osaf/services/saf/clmsv/clms/clms.h >> +++ b/osaf/services/saf/clmsv/clms/clms.h >> @@ -99,6 +99,7 @@ extern uint32_t clms_mds_msg_send(CLMS_C >> MDS_DEST *dest, >> MDS_SYNC_SND_CTXT *mds_ctxt, >> MDS_SEND_PRIORITY_TYPE prio, NCSMDS_SVC_ID svc_id); >> >> +extern uint32_t clms_mds_msg_bcast(CLMS_CB *cb, CLMSV_MSG >> +*bcast_msg); >> extern SaAisErrorT clms_imm_activate(CLMS_CB * cb); >> extern uint32_t clms_node_trackresplist_empty(CLMS_CLUSTER_NODE * >> op_node); >> extern uint32_t clms_send_cbk_start_sub(CLMS_CB * cb, >> CLMS_CLUSTER_NODE * node); @@ -125,5 +126,5 @@ extern void >> clms_cb_dump(void); >> extern uint32_t clms_send_is_member_info(CLMS_CB * cb, SaClmNodeIdT >> node_id, SaBoolT member, SaBoolT is_configured); >> extern void clm_imm_reinit_bg(CLMS_CB * cb); >> extern void proc_downs_during_rolechange (void); >> - >> +extern void clms_cluster_reboot(); > AndersW> This is C code, so use (void) here. >> #endif /* ifndef CLMS_H */ >> diff --git a/osaf/services/saf/clmsv/clms/clms_imm.c >> b/osaf/services/saf/clmsv/clms/clms_imm.c >> --- a/osaf/services/saf/clmsv/clms/clms_imm.c >> +++ b/osaf/services/saf/clmsv/clms/clms_imm.c >> @@ -19,6 +19,7 @@ >> >> #include "clms.h" >> #include "osaf_extended_name.h" >> +#include "osaf_utility.h" >> >> extern struct ImmutilWrapperProfile immutilWrapperProfile; >> >> @@ -886,6 +887,23 @@ static void clms_imm_admin_op_callback(S >> >> TRACE_ENTER2("Admin callback for nodename:%s, opId:%llu", >> objectName->value, opId); >> >> + // E.g. immadm -o 1 safCluster=myClmCluster >> + if (strncmp(osaf_extended_name_borrow(objectName), >> + osaf_extended_name_borrow(&osaf_cluster->name), >> + osaf_extended_name_length(objectName)) == 0) { >> + if (opId == 1) { >> + LOG_WA("Cluster reboot requested. Ordering cluster >> reboot"); >> + // MDS broadcast/multi cast call is synchronous >> + clms_cluster_reboot(); >> + sleep(1); > AndersW> Use osaf_nanosleep(&kOneSecond); >> + osaf_safe_reboot(); >> + } else { >> + LOG_ER("Admin Operation not supported for %s", >> osaf_extended_name_borrow(objectName)); >> + immutil_saImmOiAdminOperationResult(immOiHandle, >> invocation, SA_AIS_ERR_INVALID_PARAM); >> + } >> + goto done; >> + } >> + >> /*Lookup by the node_name and get the cluster node for CLM Admin oper */ >> nodeop = clms_node_get_by_name(objectName); >> if (nodeop == NULL) { >> diff --git a/osaf/services/saf/clmsv/clms/clms_mds.c >> b/osaf/services/saf/clmsv/clms/clms_mds.c >> --- a/osaf/services/saf/clmsv/clms/clms_mds.c >> +++ b/osaf/services/saf/clmsv/clms/clms_mds.c >> @@ -659,7 +659,17 @@ uint32_t clms_mds_enc(struct ncsmds_call >> ncs_enc_claim_space(uba, 4); >> total_bytes += 4; >> >> - if (CLMSV_CLMS_TO_CLMA_API_RESP_MSG == msg->evt_type) { >> + if (CLMSV_CLMS_TO_CLMNA_REBOOT_MSG == msg->evt_type) { >> + /* encode the reboot msg **/ >> + p8 = ncs_enc_reserve_space(uba, 4); >> + if (!p8) { >> + TRACE("ncs_enc_reserve_space failed"); >> + goto err; >> + } >> + ncs_encode_32bit(&p8, msg->info.reboot_info.node_id); >> + ncs_enc_claim_space(uba, 4); >> + total_bytes += 4; >> + } else if (CLMSV_CLMS_TO_CLMA_API_RESP_MSG == msg->evt_type) { >> /** encode the API RSP msg subtype **/ >> p8 = ncs_enc_reserve_space(uba, 4); >> if (!p8) { >> @@ -1517,3 +1527,37 @@ uint32_t clms_mds_msg_send(CLMS_CB * cb, >> TRACE_LEAVE(); >> return rc; >> } >> + >> +/**************************************************************************** >> + Name : clms_mds_msg_bcast >> + >> + Description : This routine sends a broadcast message to CLMNA. >> + >> + Arguments : cb - ptr to the CLMA CB >> + bcast_msg - ptr to the CLMSv broadcast message >> + >> + Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE >> + >> + Notes : None. >> +********************************************************************* >> +*********/ uint32_t clms_mds_msg_bcast(CLMS_CB *cb, CLMSV_MSG >> +*bcast_msg) { >> + NCSMDS_INFO snd_mds = {0}; >> + uint32_t rc; >> + >> + snd_mds.i_mds_hdl = cb->mds_hdl; >> + snd_mds.i_svc_id = NCSMDS_SVC_ID_CLMS; >> + snd_mds.i_op = MDS_SEND; >> + snd_mds.info.svc_send.i_msg = (NCSCONTEXT)bcast_msg; >> + snd_mds.info.svc_send.i_to_svc = NCSMDS_SVC_ID_CLMNA; >> + snd_mds.info.svc_send.i_priority = MDS_SEND_PRIORITY_HIGH; >> + snd_mds.info.svc_send.i_sendtype = MDS_SENDTYPE_BCAST; >> + snd_mds.info.svc_send.info.bcast.i_bcast_scope = NCSMDS_SCOPE_NONE; >> + >> + if ((rc = ncsmds_api(&snd_mds)) != NCSCC_RC_SUCCESS) { >> + LOG_ER("%s: ncsmds_api MDS_SEND failed %u", __FUNCTION__ ,rc); >> + return rc; >> + } >> + >> + return NCSCC_RC_SUCCESS; >> +} >> \ No newline at end of file >> diff --git a/osaf/services/saf/clmsv/clms/clms_util.c >> b/osaf/services/saf/clmsv/clms/clms_util.c >> --- a/osaf/services/saf/clmsv/clms/clms_util.c >> +++ b/osaf/services/saf/clmsv/clms/clms_util.c >> @@ -1200,3 +1200,15 @@ bool ip_matched(uint16_t family1, uint8_ >> return true; >> } >> >> +// >> +void clms_cluster_reboot() >> +{ >> + CLMSV_MSG bcast_msg; >> + bcast_msg.evt_type = CLMSV_CLMS_TO_CLMNA_REBOOT_MSG; >> + bcast_msg.info.reboot_info.node_id = clms_cb->node_id; >> + if (clms_mds_msg_bcast(clms_cb, &bcast_msg) == NCSCC_RC_SUCCESS) { >> + LOG_NO("Sending cluster reboot broadcast message succeeded"); >> + } else { >> + LOG_ER("Sending cluster reboot broadcast message failed"); >> + } >> +} >> diff --git a/osaf/services/saf/clmsv/nodeagent/main.c >> b/osaf/services/saf/clmsv/nodeagent/main.c >> --- a/osaf/services/saf/clmsv/nodeagent/main.c >> +++ b/osaf/services/saf/clmsv/nodeagent/main.c >> @@ -114,6 +114,18 @@ static uint32_t clmna_mds_dec(struct ncs >> total_bytes += 4; >> >> switch (msg->evt_type) { >> + case CLMSV_CLMS_TO_CLMNA_REBOOT_MSG: > AndersW> There is a tab character after "case". Replace with space. >> + { >> + p8 = ncs_dec_flatten_space(uba, local_data, 4); >> + msg->info.reboot_info.node_id = ncs_decode_32bit(&p8); >> + ncs_dec_skip_space(uba, 4); >> + total_bytes += 4; >> + // Reboot will be performed by CLMS for this node. >> + if (clmna_cb->node_info.node_id != >> msg->info.reboot_info.node_id) { >> + osaf_safe_reboot(); >> + } >> + break; >> + } >> case CLMSV_CLMS_TO_CLMA_API_RESP_MSG: >> { >> p8 = ncs_dec_flatten_space(uba, local_data, 8); diff >> --git >> a/scripts/opensaf_reboot b/scripts/opensaf_reboot >> --- a/scripts/opensaf_reboot >> +++ b/scripts/opensaf_reboot >> @@ -40,10 +40,17 @@ NODE_ID_FILE=$pkglocalstatedir/node_id >> >> node_id=$1 >> ee_name=$2 >> +safe_reboot=$3 >> >> # Run commands through sudo when not superuser >> test $(id -u) -ne 0 && icmd=$(which sudo 2> /dev/null) >> >> +opensaf_safe_reboot() >> +{ >> + logger -t "opensaf_reboot" "Rebooting local node using shutdown" >> + $icmd /sbin/shutdown -r now >> +} >> + >> ## Use stonith for remote fencing >> opensaf_reboot_with_remote_fencing() >> { >> @@ -91,8 +98,12 @@ temp_node_id=`cat "$NODE_ID_FILE"` >> temp_node_id=`echo "$temp_node_id" |sed -e 's:^0[bBxX]::'| sed -e >> 's:^:0x:'` >> self_node_id=`printf "%d" $temp_node_id` >> >> -# A node ID of zero(0) means an order to reboot the local node -if [ >> "$self_node_id" = "$node_id" ] || [ $node_id = 0 ]; then >> + >> +if [ "$safe_reboot" = 1 ]; then >> + opensaf_safe_reboot >> +else >> + # A node ID of zero(0) means an order to reboot the local node >> + if [ "$self_node_id" = "$node_id" ] || [ $node_id = 0 ]; then >> # uncomment the following line if debugging errors that keep restarting >> the node >> # exit 0 >> >> @@ -114,8 +125,8 @@ if [ "$self_node_id" = "$node_id" ] || [ >> >> # Reboot (not shutdown) system WITH file system sync >> $icmd /sbin/reboot -f >> -else >> - if [ "$FMS_USE_REMOTE_FENCING" = "1" ]; then >> + else >> + if [ "$FMS_USE_REMOTE_FENCING" = 1 ]; then >> opensaf_reboot_with_remote_fencing >> else >> if [ ":$ee_name" != ":" ]; then >> @@ -133,4 +144,5 @@ else >> logger -t "opensaf_reboot" "Rebooting remote node in >> the absence of PLM is outside the scope of OpenSAF" >> fi >> fi >> -fi >> + fi >> +fi >> \ No newline at end of file ------------------------------------------------------------------------------ _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel