Hi Hans, Could you provide some background on the need for opensaf_safe_reboot(). What would be the need for this?
Thanks, Mathi. > -----Original Message----- > From: Hans Nordeback [mailto:hans.nordeb...@ericsson.com] > Sent: Wednesday, September 28, 2016 5:26 PM > To: anders.wid...@ericsson.com; Mathivanan Naickan Palanivelu > Cc: opensaf-devel@lists.sourceforge.net > Subject: [PATCH 1 of 1] clm: add support for cluster reboot V3 [#2053] > > osaf/libs/common/clmsv/include/clmsv_msg.h | 6 +++ > osaf/libs/core/common/include/osaf_utility.h | 5 +++ > osaf/libs/core/common/osaf_utility.c | 22 +++++++++++++ > osaf/services/saf/clmsv/clms/clms.h | 3 +- > osaf/services/saf/clmsv/clms/clms_imm.c | 18 ++++++++++ > osaf/services/saf/clmsv/clms/clms_mds.c | 46 > +++++++++++++++++++++++++++- > osaf/services/saf/clmsv/clms/clms_util.c | 12 +++++++ > osaf/services/saf/clmsv/nodeagent/main.c | 12 +++++++ > scripts/opensaf_reboot | 22 ++++++++++--- > 9 files changed, 139 insertions(+), 7 deletions(-) > > > Admin command to request cluster reboot: > immadm -o 1 safCluster=myClmCluster > > diff --git a/osaf/libs/common/clmsv/include/clmsv_msg.h > b/osaf/libs/common/clmsv/include/clmsv_msg.h > --- a/osaf/libs/common/clmsv/include/clmsv_msg.h > +++ b/osaf/libs/common/clmsv/include/clmsv_msg.h > @@ -23,6 +23,7 @@ typedef enum clms_msg_type { > CLMSV_CLMS_TO_CLMA_CBK_MSG, > CLMSV_CLMS_TO_CLMA_API_RESP_MSG, > CLMSV_CLMS_TO_CLMA_IS_MEMBER_MSG, > + CLMSV_CLMS_TO_CLMNA_REBOOT_MSG, > CLMSV_MSG_MAX > } CLMSV_MSG_TYPE; > > @@ -174,6 +175,10 @@ typedef struct clmsv_is_member_info_t { > SaUint32T client_id; > }CLMSV_IS_MEMBER_INFO; > > +typedef struct clmsv_reboot_info_t { > + SaClmNodeIdT node_id; > +} CLMSV_REBOOT_INFO; > + > /* Top Level CLMSv MDS message structure for use between CLMS-> CLMA > && CLMA -> CLMS */ typedef struct clmsv_msg_t { > struct clmsv_msg_t *next; /* Mailbox processing */ > @@ -183,6 +188,7 @@ typedef struct clmsv_msg_t { > CLMSV_CBK_INFO cbk_info; /* Callback Messages from CLMS to CLA > */ > CLMSV_API_RESP_INFO api_resp_info; /* Response Messages from > CLMS to CLA */ > CLMSV_IS_MEMBER_INFO is_member_info; /*Is node member or not > Message from CLMS to CLA*/ > + CLMSV_REBOOT_INFO reboot_info; /* Reboot request from > CLMS to CLMNA */ > } info; > } CLMSV_MSG; > > diff --git a/osaf/libs/core/common/include/osaf_utility.h > b/osaf/libs/core/common/include/osaf_utility.h > --- a/osaf/libs/core/common/include/osaf_utility.h > +++ b/osaf/libs/core/common/include/osaf_utility.h > @@ -24,6 +24,8 @@ > #ifndef OPENSAF_CORE_OSAF_UTILITY_H_ > #define OPENSAF_CORE_OSAF_UTILITY_H_ > > +#define USE_SAFE_REBOOT 1 > + > #include <pthread.h> > > #ifdef __cplusplus > @@ -68,6 +70,9 @@ extern void osaf_abort(long i_cause) #endif > nothrow, noreturn)); > > +extern void osaf_safe_reboot() > + __attribute__ ((nothrow)); > + > static inline void osaf_mutex_lock_ordie(pthread_mutex_t* io_mutex) { > int result = pthread_mutex_lock(io_mutex); > if (result != 0) osaf_abort(result); > diff --git a/osaf/libs/core/common/osaf_utility.c > b/osaf/libs/core/common/osaf_utility.c > --- a/osaf/libs/core/common/osaf_utility.c > +++ b/osaf/libs/core/common/osaf_utility.c > @@ -16,9 +16,12 @@ > */ > > #include "osaf_utility.h" > +#include "ncssysf_def.h" > +#include "configmake.h" > #include <stdlib.h> > #include <errno.h> > #include <syslog.h> > +#include <stdio.h> > > void osaf_abort(long i_cause) > { > @@ -26,3 +29,22 @@ void osaf_abort(long i_cause) > i_cause, __builtin_return_address(0), errno); > abort(); > } > + > +void osaf_safe_reboot() > +{ > + char str[256]; > + > + snprintf(str, sizeof(str), PKGLIBDIR "/opensaf_reboot %u %s %u", 0, > "not_used", USE_SAFE_REBOOT); > + syslog(LOG_NOTICE, "Reboot ordered using command: %s", str); > + > + int rc = system(str); > + if (rc < 0) { > + syslog(LOG_CRIT, "Node reboot failure: exit code %d", > WEXITSTATUS(rc)); > + } else { > + if (WIFEXITED(rc) && WEXITSTATUS(rc) == 0) { > + syslog(LOG_NOTICE, "Command: %s successfully > executed", str); > + } else { > + syslog(LOG_CRIT, "Command: %s failed with exit > code %d", str, WEXITSTATUS(rc)); > + } > + } > +} > diff --git a/osaf/services/saf/clmsv/clms/clms.h > b/osaf/services/saf/clmsv/clms/clms.h > --- a/osaf/services/saf/clmsv/clms/clms.h > +++ b/osaf/services/saf/clmsv/clms/clms.h > @@ -99,6 +99,7 @@ extern uint32_t clms_mds_msg_send(CLMS_C > MDS_DEST *dest, > MDS_SYNC_SND_CTXT *mds_ctxt, > MDS_SEND_PRIORITY_TYPE prio, NCSMDS_SVC_ID svc_id); > > +extern uint32_t clms_mds_msg_bcast(CLMS_CB *cb, CLMSV_MSG > *bcast_msg); > extern SaAisErrorT clms_imm_activate(CLMS_CB * cb); extern uint32_t > clms_node_trackresplist_empty(CLMS_CLUSTER_NODE * op_node); extern > uint32_t clms_send_cbk_start_sub(CLMS_CB * cb, CLMS_CLUSTER_NODE * > node); @@ -125,5 +126,5 @@ extern void clms_cb_dump(void); extern > uint32_t clms_send_is_member_info(CLMS_CB * cb, SaClmNodeIdT > node_id, SaBoolT member, SaBoolT is_configured); extern void > clm_imm_reinit_bg(CLMS_CB * cb); extern void > proc_downs_during_rolechange (void); > - > +extern void clms_cluster_reboot(); > #endif /* ifndef CLMS_H */ > diff --git a/osaf/services/saf/clmsv/clms/clms_imm.c > b/osaf/services/saf/clmsv/clms/clms_imm.c > --- a/osaf/services/saf/clmsv/clms/clms_imm.c > +++ b/osaf/services/saf/clmsv/clms/clms_imm.c > @@ -19,6 +19,7 @@ > > #include "clms.h" > #include "osaf_extended_name.h" > +#include "osaf_utility.h" > > extern struct ImmutilWrapperProfile immutilWrapperProfile; > > @@ -886,6 +887,23 @@ static void clms_imm_admin_op_callback(S > > TRACE_ENTER2("Admin callback for nodename:%s, opId:%llu", > objectName->value, opId); > > + // E.g. immadm -o 1 safCluster=myClmCluster > + if (strncmp(osaf_extended_name_borrow(objectName), > + osaf_extended_name_borrow(&osaf_cluster->name), > + osaf_extended_name_length(objectName)) == 0) { > + if (opId == 1) { > + LOG_WA("Cluster reboot requested. Ordering > cluster reboot"); > + // MDS broadcast/multi cast call is synchronous > + clms_cluster_reboot(); > + sleep(1); > + osaf_safe_reboot(); > + } else { > + LOG_ER("Admin Operation not supported for %s", > osaf_extended_name_borrow(objectName)); > + > immutil_saImmOiAdminOperationResult(immOiHandle, invocation, > SA_AIS_ERR_INVALID_PARAM); > + } > + goto done; > + } > + > /*Lookup by the node_name and get the cluster node for CLM > Admin oper */ > nodeop = clms_node_get_by_name(objectName); > if (nodeop == NULL) { > diff --git a/osaf/services/saf/clmsv/clms/clms_mds.c > b/osaf/services/saf/clmsv/clms/clms_mds.c > --- a/osaf/services/saf/clmsv/clms/clms_mds.c > +++ b/osaf/services/saf/clmsv/clms/clms_mds.c > @@ -659,7 +659,17 @@ uint32_t clms_mds_enc(struct ncsmds_call > ncs_enc_claim_space(uba, 4); > total_bytes += 4; > > - if (CLMSV_CLMS_TO_CLMA_API_RESP_MSG == msg->evt_type) { > + if (CLMSV_CLMS_TO_CLMNA_REBOOT_MSG == msg->evt_type) { > + /* encode the reboot msg **/ > + p8 = ncs_enc_reserve_space(uba, 4); > + if (!p8) { > + TRACE("ncs_enc_reserve_space failed"); > + goto err; > + } > + ncs_encode_32bit(&p8, msg->info.reboot_info.node_id); > + ncs_enc_claim_space(uba, 4); > + total_bytes += 4; > + } else if (CLMSV_CLMS_TO_CLMA_API_RESP_MSG == msg- > >evt_type) { > /** encode the API RSP msg subtype **/ > p8 = ncs_enc_reserve_space(uba, 4); > if (!p8) { > @@ -1517,3 +1527,37 @@ uint32_t clms_mds_msg_send(CLMS_CB * cb, > TRACE_LEAVE(); > return rc; > } > + > +/********************************************************* > ******************* > + Name : clms_mds_msg_bcast > + > + Description : This routine sends a broadcast message to CLMNA. > + > + Arguments : cb - ptr to the CLMA CB > + bcast_msg - ptr to the CLMSv broadcast message > + > + Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE > + > + Notes : None. > +********************************************************* > ************** > +*******/ uint32_t clms_mds_msg_bcast(CLMS_CB *cb, CLMSV_MSG > *bcast_msg) > +{ > + NCSMDS_INFO snd_mds = {0}; > + uint32_t rc; > + > + snd_mds.i_mds_hdl = cb->mds_hdl; > + snd_mds.i_svc_id = NCSMDS_SVC_ID_CLMS; > + snd_mds.i_op = MDS_SEND; > + snd_mds.info.svc_send.i_msg = (NCSCONTEXT)bcast_msg; > + snd_mds.info.svc_send.i_to_svc = NCSMDS_SVC_ID_CLMNA; > + snd_mds.info.svc_send.i_priority = MDS_SEND_PRIORITY_HIGH; > + snd_mds.info.svc_send.i_sendtype = MDS_SENDTYPE_BCAST; > + snd_mds.info.svc_send.info.bcast.i_bcast_scope = > NCSMDS_SCOPE_NONE; > + > + if ((rc = ncsmds_api(&snd_mds)) != NCSCC_RC_SUCCESS) { > + LOG_ER("%s: ncsmds_api MDS_SEND failed %u", > __FUNCTION__ ,rc); > + return rc; > + } > + > + return NCSCC_RC_SUCCESS; > +} > \ No newline at end of file > diff --git a/osaf/services/saf/clmsv/clms/clms_util.c > b/osaf/services/saf/clmsv/clms/clms_util.c > --- a/osaf/services/saf/clmsv/clms/clms_util.c > +++ b/osaf/services/saf/clmsv/clms/clms_util.c > @@ -1200,3 +1200,15 @@ bool ip_matched(uint16_t family1, uint8_ > return true; > } > > +// > +void clms_cluster_reboot() > +{ > + CLMSV_MSG bcast_msg; > + bcast_msg.evt_type = CLMSV_CLMS_TO_CLMNA_REBOOT_MSG; > + bcast_msg.info.reboot_info.node_id = clms_cb->node_id; > + if (clms_mds_msg_bcast(clms_cb, &bcast_msg) == > NCSCC_RC_SUCCESS) { > + LOG_NO("Sending cluster reboot broadcast message > succeeded"); > + } else { > + LOG_ER("Sending cluster reboot broadcast message failed"); > + } > +} > diff --git a/osaf/services/saf/clmsv/nodeagent/main.c > b/osaf/services/saf/clmsv/nodeagent/main.c > --- a/osaf/services/saf/clmsv/nodeagent/main.c > +++ b/osaf/services/saf/clmsv/nodeagent/main.c > @@ -114,6 +114,18 @@ static uint32_t clmna_mds_dec(struct ncs > total_bytes += 4; > > switch (msg->evt_type) { > + case CLMSV_CLMS_TO_CLMNA_REBOOT_MSG: > + { > + p8 = ncs_dec_flatten_space(uba, local_data, 4); > + msg->info.reboot_info.node_id = > ncs_decode_32bit(&p8); > + ncs_dec_skip_space(uba, 4); > + total_bytes += 4; > + // Reboot will be performed by CLMS for this node. > + if (clmna_cb->node_info.node_id != msg- > >info.reboot_info.node_id) { > + osaf_safe_reboot(); > + } > + break; > + } > case CLMSV_CLMS_TO_CLMA_API_RESP_MSG: > { > p8 = ncs_dec_flatten_space(uba, local_data, 8); diff - > -git a/scripts/opensaf_reboot b/scripts/opensaf_reboot > --- a/scripts/opensaf_reboot > +++ b/scripts/opensaf_reboot > @@ -40,10 +40,17 @@ NODE_ID_FILE=$pkglocalstatedir/node_id > > node_id=$1 > ee_name=$2 > +safe_reboot=$3 > > # Run commands through sudo when not superuser test $(id -u) -ne 0 && > icmd=$(which sudo 2> /dev/null) > > +opensaf_safe_reboot() > +{ > + logger -t "opensaf_reboot" "Rebooting local node using shutdown" > + $icmd /sbin/shutdown -r now > +} > + > ## Use stonith for remote fencing > opensaf_reboot_with_remote_fencing() > { > @@ -91,8 +98,12 @@ temp_node_id=`cat "$NODE_ID_FILE"` > temp_node_id=`echo "$temp_node_id" |sed -e 's:^0[bBxX]::'| sed -e > 's:^:0x:'` self_node_id=`printf "%d" $temp_node_id` > > -# A node ID of zero(0) means an order to reboot the local node -if [ > "$self_node_id" = "$node_id" ] || [ $node_id = 0 ]; then > + > +if [ "$safe_reboot" = 1 ]; then > + opensaf_safe_reboot > +else > + # A node ID of zero(0) means an order to reboot the local node > + if [ "$self_node_id" = "$node_id" ] || [ $node_id = 0 ]; then > # uncomment the following line if debugging errors that keep > restarting the node > # exit 0 > > @@ -114,8 +125,8 @@ if [ "$self_node_id" = "$node_id" ] || [ > > # Reboot (not shutdown) system WITH file system sync > $icmd /sbin/reboot -f > -else > - if [ "$FMS_USE_REMOTE_FENCING" = "1" ]; then > + else > + if [ "$FMS_USE_REMOTE_FENCING" = 1 ]; then > opensaf_reboot_with_remote_fencing > else > if [ ":$ee_name" != ":" ]; then > @@ -133,4 +144,5 @@ else > logger -t "opensaf_reboot" "Rebooting remote node in the > absence of PLM is outside the scope of OpenSAF" > fi > fi > -fi > + fi > +fi > \ No newline at end of file ------------------------------------------------------------------------------ _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel