One reason I can guess is that fore this admin op, we don't need a "failfast" kind of reboot but rather a normal reboot and A reboot that does not skips the .rc scripts.
Thanks, Mathi. > -----Original Message----- > From: Mathivanan Naickan Palanivelu > Sent: Friday, September 30, 2016 1:13 PM > To: Hans Nordeback; anders.wid...@ericsson.com > Cc: opensaf-devel@lists.sourceforge.net > Subject: Re: [devel] [PATCH 1 of 1] clm: add support for cluster reboot V3 > [#2053] > > Hi Hans, > > Could you provide some background on the need for > opensaf_safe_reboot(). > What would be the need for this? > > Thanks, > Mathi. > > > > -----Original Message----- > > From: Hans Nordeback [mailto:hans.nordeb...@ericsson.com] > > Sent: Wednesday, September 28, 2016 5:26 PM > > To: anders.wid...@ericsson.com; Mathivanan Naickan Palanivelu > > Cc: opensaf-devel@lists.sourceforge.net > > Subject: [PATCH 1 of 1] clm: add support for cluster reboot V3 [#2053] > > > > osaf/libs/common/clmsv/include/clmsv_msg.h | 6 +++ > > osaf/libs/core/common/include/osaf_utility.h | 5 +++ > > osaf/libs/core/common/osaf_utility.c | 22 +++++++++++++ > > osaf/services/saf/clmsv/clms/clms.h | 3 +- > > osaf/services/saf/clmsv/clms/clms_imm.c | 18 ++++++++++ > > osaf/services/saf/clmsv/clms/clms_mds.c | 46 > > +++++++++++++++++++++++++++- > > osaf/services/saf/clmsv/clms/clms_util.c | 12 +++++++ > > osaf/services/saf/clmsv/nodeagent/main.c | 12 +++++++ > > scripts/opensaf_reboot | 22 ++++++++++--- > > 9 files changed, 139 insertions(+), 7 deletions(-) > > > > > > Admin command to request cluster reboot: > > immadm -o 1 safCluster=myClmCluster > > > > diff --git a/osaf/libs/common/clmsv/include/clmsv_msg.h > > b/osaf/libs/common/clmsv/include/clmsv_msg.h > > --- a/osaf/libs/common/clmsv/include/clmsv_msg.h > > +++ b/osaf/libs/common/clmsv/include/clmsv_msg.h > > @@ -23,6 +23,7 @@ typedef enum clms_msg_type { > > CLMSV_CLMS_TO_CLMA_CBK_MSG, > > CLMSV_CLMS_TO_CLMA_API_RESP_MSG, > > CLMSV_CLMS_TO_CLMA_IS_MEMBER_MSG, > > + CLMSV_CLMS_TO_CLMNA_REBOOT_MSG, > > CLMSV_MSG_MAX > > } CLMSV_MSG_TYPE; > > > > @@ -174,6 +175,10 @@ typedef struct clmsv_is_member_info_t { > > SaUint32T client_id; > > }CLMSV_IS_MEMBER_INFO; > > > > +typedef struct clmsv_reboot_info_t { > > + SaClmNodeIdT node_id; > > +} CLMSV_REBOOT_INFO; > > + > > /* Top Level CLMSv MDS message structure for use between CLMS-> > CLMA > > && CLMA -> CLMS */ typedef struct clmsv_msg_t { > > struct clmsv_msg_t *next; /* Mailbox processing */ > > @@ -183,6 +188,7 @@ typedef struct clmsv_msg_t { > > CLMSV_CBK_INFO cbk_info; /* Callback Messages from CLMS to CLA > > */ > > CLMSV_API_RESP_INFO api_resp_info; /* Response Messages from > > CLMS to CLA */ > > CLMSV_IS_MEMBER_INFO is_member_info; /*Is node member or not > > Message from CLMS to CLA*/ > > + CLMSV_REBOOT_INFO reboot_info; /* Reboot request from > > CLMS to CLMNA */ > > } info; > > } CLMSV_MSG; > > > > diff --git a/osaf/libs/core/common/include/osaf_utility.h > > b/osaf/libs/core/common/include/osaf_utility.h > > --- a/osaf/libs/core/common/include/osaf_utility.h > > +++ b/osaf/libs/core/common/include/osaf_utility.h > > @@ -24,6 +24,8 @@ > > #ifndef OPENSAF_CORE_OSAF_UTILITY_H_ > > #define OPENSAF_CORE_OSAF_UTILITY_H_ > > > > +#define USE_SAFE_REBOOT 1 > > + > > #include <pthread.h> > > > > #ifdef __cplusplus > > @@ -68,6 +70,9 @@ extern void osaf_abort(long i_cause) #endif > > nothrow, noreturn)); > > > > +extern void osaf_safe_reboot() > > + __attribute__ ((nothrow)); > > + > > static inline void osaf_mutex_lock_ordie(pthread_mutex_t* io_mutex) { > > int result = pthread_mutex_lock(io_mutex); > > if (result != 0) osaf_abort(result); diff --git > > a/osaf/libs/core/common/osaf_utility.c > > b/osaf/libs/core/common/osaf_utility.c > > --- a/osaf/libs/core/common/osaf_utility.c > > +++ b/osaf/libs/core/common/osaf_utility.c > > @@ -16,9 +16,12 @@ > > */ > > > > #include "osaf_utility.h" > > +#include "ncssysf_def.h" > > +#include "configmake.h" > > #include <stdlib.h> > > #include <errno.h> > > #include <syslog.h> > > +#include <stdio.h> > > > > void osaf_abort(long i_cause) > > { > > @@ -26,3 +29,22 @@ void osaf_abort(long i_cause) > > i_cause, __builtin_return_address(0), errno); > > abort(); > > } > > + > > +void osaf_safe_reboot() > > +{ > > + char str[256]; > > + > > + snprintf(str, sizeof(str), PKGLIBDIR "/opensaf_reboot %u %s %u", 0, > > "not_used", USE_SAFE_REBOOT); > > + syslog(LOG_NOTICE, "Reboot ordered using command: %s", str); > > + > > + int rc = system(str); > > + if (rc < 0) { > > + syslog(LOG_CRIT, "Node reboot failure: exit code %d", > > WEXITSTATUS(rc)); > > + } else { > > + if (WIFEXITED(rc) && WEXITSTATUS(rc) == 0) { > > + syslog(LOG_NOTICE, "Command: %s successfully > > executed", str); > > + } else { > > + syslog(LOG_CRIT, "Command: %s failed with exit > > code %d", str, WEXITSTATUS(rc)); > > + } > > + } > > +} > > diff --git a/osaf/services/saf/clmsv/clms/clms.h > > b/osaf/services/saf/clmsv/clms/clms.h > > --- a/osaf/services/saf/clmsv/clms/clms.h > > +++ b/osaf/services/saf/clmsv/clms/clms.h > > @@ -99,6 +99,7 @@ extern uint32_t clms_mds_msg_send(CLMS_C > > MDS_DEST *dest, > > MDS_SYNC_SND_CTXT *mds_ctxt, > > MDS_SEND_PRIORITY_TYPE prio, NCSMDS_SVC_ID svc_id); > > > > +extern uint32_t clms_mds_msg_bcast(CLMS_CB *cb, CLMSV_MSG > > *bcast_msg); > > extern SaAisErrorT clms_imm_activate(CLMS_CB * cb); extern uint32_t > > clms_node_trackresplist_empty(CLMS_CLUSTER_NODE * op_node); > extern > > uint32_t clms_send_cbk_start_sub(CLMS_CB * cb, CLMS_CLUSTER_NODE > * > > node); @@ -125,5 +126,5 @@ extern void clms_cb_dump(void); extern > > uint32_t clms_send_is_member_info(CLMS_CB * cb, SaClmNodeIdT > node_id, > > SaBoolT member, SaBoolT is_configured); extern void > > clm_imm_reinit_bg(CLMS_CB * cb); extern void > > proc_downs_during_rolechange (void); > > - > > +extern void clms_cluster_reboot(); > > #endif /* ifndef CLMS_H */ > > diff --git a/osaf/services/saf/clmsv/clms/clms_imm.c > > b/osaf/services/saf/clmsv/clms/clms_imm.c > > --- a/osaf/services/saf/clmsv/clms/clms_imm.c > > +++ b/osaf/services/saf/clmsv/clms/clms_imm.c > > @@ -19,6 +19,7 @@ > > > > #include "clms.h" > > #include "osaf_extended_name.h" > > +#include "osaf_utility.h" > > > > extern struct ImmutilWrapperProfile immutilWrapperProfile; > > > > @@ -886,6 +887,23 @@ static void clms_imm_admin_op_callback(S > > > > TRACE_ENTER2("Admin callback for nodename:%s, opId:%llu", > > objectName->value, opId); > > > > + // E.g. immadm -o 1 safCluster=myClmCluster > > + if (strncmp(osaf_extended_name_borrow(objectName), > > + osaf_extended_name_borrow(&osaf_cluster->name), > > + osaf_extended_name_length(objectName)) == 0) { > > + if (opId == 1) { > > + LOG_WA("Cluster reboot requested. Ordering > > cluster reboot"); > > + // MDS broadcast/multi cast call is synchronous > > + clms_cluster_reboot(); > > + sleep(1); > > + osaf_safe_reboot(); > > + } else { > > + LOG_ER("Admin Operation not supported for %s", > > osaf_extended_name_borrow(objectName)); > > + > > immutil_saImmOiAdminOperationResult(immOiHandle, invocation, > > SA_AIS_ERR_INVALID_PARAM); > > + } > > + goto done; > > + } > > + > > /*Lookup by the node_name and get the cluster node for CLM > Admin > > oper */ > > nodeop = clms_node_get_by_name(objectName); > > if (nodeop == NULL) { > > diff --git a/osaf/services/saf/clmsv/clms/clms_mds.c > > b/osaf/services/saf/clmsv/clms/clms_mds.c > > --- a/osaf/services/saf/clmsv/clms/clms_mds.c > > +++ b/osaf/services/saf/clmsv/clms/clms_mds.c > > @@ -659,7 +659,17 @@ uint32_t clms_mds_enc(struct ncsmds_call > > ncs_enc_claim_space(uba, 4); > > total_bytes += 4; > > > > - if (CLMSV_CLMS_TO_CLMA_API_RESP_MSG == msg->evt_type) { > > + if (CLMSV_CLMS_TO_CLMNA_REBOOT_MSG == msg->evt_type) { > > + /* encode the reboot msg **/ > > + p8 = ncs_enc_reserve_space(uba, 4); > > + if (!p8) { > > + TRACE("ncs_enc_reserve_space failed"); > > + goto err; > > + } > > + ncs_encode_32bit(&p8, msg->info.reboot_info.node_id); > > + ncs_enc_claim_space(uba, 4); > > + total_bytes += 4; > > + } else if (CLMSV_CLMS_TO_CLMA_API_RESP_MSG == msg- > > >evt_type) { > > /** encode the API RSP msg subtype **/ > > p8 = ncs_enc_reserve_space(uba, 4); > > if (!p8) { > > @@ -1517,3 +1527,37 @@ uint32_t clms_mds_msg_send(CLMS_CB * cb, > > TRACE_LEAVE(); > > return rc; > > } > > + > > > +/********************************************************* > > ******************* > > + Name : clms_mds_msg_bcast > > + > > + Description : This routine sends a broadcast message to CLMNA. > > + > > + Arguments : cb - ptr to the CLMA CB > > + bcast_msg - ptr to the CLMSv broadcast message > > + > > + Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE > > + > > + Notes : None. > > > +********************************************************* > > ************** > > +*******/ uint32_t clms_mds_msg_bcast(CLMS_CB *cb, CLMSV_MSG > > *bcast_msg) > > +{ > > + NCSMDS_INFO snd_mds = {0}; > > + uint32_t rc; > > + > > + snd_mds.i_mds_hdl = cb->mds_hdl; > > + snd_mds.i_svc_id = NCSMDS_SVC_ID_CLMS; > > + snd_mds.i_op = MDS_SEND; > > + snd_mds.info.svc_send.i_msg = (NCSCONTEXT)bcast_msg; > > + snd_mds.info.svc_send.i_to_svc = NCSMDS_SVC_ID_CLMNA; > > + snd_mds.info.svc_send.i_priority = MDS_SEND_PRIORITY_HIGH; > > + snd_mds.info.svc_send.i_sendtype = MDS_SENDTYPE_BCAST; > > + snd_mds.info.svc_send.info.bcast.i_bcast_scope = > > NCSMDS_SCOPE_NONE; > > + > > + if ((rc = ncsmds_api(&snd_mds)) != NCSCC_RC_SUCCESS) { > > + LOG_ER("%s: ncsmds_api MDS_SEND failed %u", > > __FUNCTION__ ,rc); > > + return rc; > > + } > > + > > + return NCSCC_RC_SUCCESS; > > +} > > \ No newline at end of file > > diff --git a/osaf/services/saf/clmsv/clms/clms_util.c > > b/osaf/services/saf/clmsv/clms/clms_util.c > > --- a/osaf/services/saf/clmsv/clms/clms_util.c > > +++ b/osaf/services/saf/clmsv/clms/clms_util.c > > @@ -1200,3 +1200,15 @@ bool ip_matched(uint16_t family1, uint8_ > > return true; > > } > > > > +// > > +void clms_cluster_reboot() > > +{ > > + CLMSV_MSG bcast_msg; > > + bcast_msg.evt_type = CLMSV_CLMS_TO_CLMNA_REBOOT_MSG; > > + bcast_msg.info.reboot_info.node_id = clms_cb->node_id; > > + if (clms_mds_msg_bcast(clms_cb, &bcast_msg) == > > NCSCC_RC_SUCCESS) { > > + LOG_NO("Sending cluster reboot broadcast message > > succeeded"); > > + } else { > > + LOG_ER("Sending cluster reboot broadcast message failed"); > > + } > > +} > > diff --git a/osaf/services/saf/clmsv/nodeagent/main.c > > b/osaf/services/saf/clmsv/nodeagent/main.c > > --- a/osaf/services/saf/clmsv/nodeagent/main.c > > +++ b/osaf/services/saf/clmsv/nodeagent/main.c > > @@ -114,6 +114,18 @@ static uint32_t clmna_mds_dec(struct ncs > > total_bytes += 4; > > > > switch (msg->evt_type) { > > + case CLMSV_CLMS_TO_CLMNA_REBOOT_MSG: > > + { > > + p8 = ncs_dec_flatten_space(uba, local_data, 4); > > + msg->info.reboot_info.node_id = > > ncs_decode_32bit(&p8); > > + ncs_dec_skip_space(uba, 4); > > + total_bytes += 4; > > + // Reboot will be performed by CLMS for this node. > > + if (clmna_cb->node_info.node_id != msg- > > >info.reboot_info.node_id) { > > + osaf_safe_reboot(); > > + } > > + break; > > + } > > case CLMSV_CLMS_TO_CLMA_API_RESP_MSG: > > { > > p8 = ncs_dec_flatten_space(uba, local_data, 8); diff - > -git > > a/scripts/opensaf_reboot b/scripts/opensaf_reboot > > --- a/scripts/opensaf_reboot > > +++ b/scripts/opensaf_reboot > > @@ -40,10 +40,17 @@ NODE_ID_FILE=$pkglocalstatedir/node_id > > > > node_id=$1 > > ee_name=$2 > > +safe_reboot=$3 > > > > # Run commands through sudo when not superuser test $(id -u) -ne 0 > > && icmd=$(which sudo 2> /dev/null) > > > > +opensaf_safe_reboot() > > +{ > > + logger -t "opensaf_reboot" "Rebooting local node using shutdown" > > + $icmd /sbin/shutdown -r now > > +} > > + > > ## Use stonith for remote fencing > > opensaf_reboot_with_remote_fencing() > > { > > @@ -91,8 +98,12 @@ temp_node_id=`cat "$NODE_ID_FILE"` > > temp_node_id=`echo "$temp_node_id" |sed -e 's:^0[bBxX]::'| sed -e > > 's:^:0x:'` self_node_id=`printf "%d" $temp_node_id` > > > > -# A node ID of zero(0) means an order to reboot the local node -if [ > > "$self_node_id" = "$node_id" ] || [ $node_id = 0 ]; then > > + > > +if [ "$safe_reboot" = 1 ]; then > > + opensaf_safe_reboot > > +else > > + # A node ID of zero(0) means an order to reboot the local node > > + if [ "$self_node_id" = "$node_id" ] || [ $node_id = 0 ]; then > > # uncomment the following line if debugging errors that keep > > restarting the node > > # exit 0 > > > > @@ -114,8 +125,8 @@ if [ "$self_node_id" = "$node_id" ] || [ > > > > # Reboot (not shutdown) system WITH file system sync > > $icmd /sbin/reboot -f > > -else > > - if [ "$FMS_USE_REMOTE_FENCING" = "1" ]; then > > + else > > + if [ "$FMS_USE_REMOTE_FENCING" = 1 ]; then > > opensaf_reboot_with_remote_fencing > > else > > if [ ":$ee_name" != ":" ]; then > > @@ -133,4 +144,5 @@ else > > logger -t "opensaf_reboot" "Rebooting remote node in the > > absence of PLM is outside the scope of OpenSAF" > > fi > > fi > > -fi > > + fi > > +fi > > \ No newline at end of file > > ------------------------------------------------------------------------------ > _______________________________________________ > Opensaf-devel mailing list > Opensaf-devel@lists.sourceforge.net > https://lists.sourceforge.net/lists/listinfo/opensaf-devel ------------------------------------------------------------------------------ _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel