osaf/libs/common/clmsv/include/clmsv_msg.h | 6 +++ osaf/libs/core/common/include/osaf_utility.h | 5 +++ osaf/libs/core/common/osaf_utility.c | 22 +++++++++++++ osaf/services/saf/clmsv/clms/clms.h | 3 +- osaf/services/saf/clmsv/clms/clms_imm.c | 18 ++++++++++ osaf/services/saf/clmsv/clms/clms_mds.c | 46 +++++++++++++++++++++++++++- osaf/services/saf/clmsv/clms/clms_util.c | 12 +++++++ osaf/services/saf/clmsv/nodeagent/main.c | 12 +++++++ scripts/opensaf_reboot | 22 ++++++++++--- 9 files changed, 139 insertions(+), 7 deletions(-)
Admin command to request cluster reboot: immadm -o 1 safCluster=myClmCluster diff --git a/osaf/libs/common/clmsv/include/clmsv_msg.h b/osaf/libs/common/clmsv/include/clmsv_msg.h --- a/osaf/libs/common/clmsv/include/clmsv_msg.h +++ b/osaf/libs/common/clmsv/include/clmsv_msg.h @@ -23,6 +23,7 @@ typedef enum clms_msg_type { CLMSV_CLMS_TO_CLMA_CBK_MSG, CLMSV_CLMS_TO_CLMA_API_RESP_MSG, CLMSV_CLMS_TO_CLMA_IS_MEMBER_MSG, + CLMSV_CLMS_TO_CLMNA_REBOOT_MSG, CLMSV_MSG_MAX } CLMSV_MSG_TYPE; @@ -174,6 +175,10 @@ typedef struct clmsv_is_member_info_t { SaUint32T client_id; }CLMSV_IS_MEMBER_INFO; +typedef struct clmsv_reboot_info_t { + SaClmNodeIdT node_id; +} CLMSV_REBOOT_INFO; + /* Top Level CLMSv MDS message structure for use between CLMS-> CLMA && CLMA -> CLMS */ typedef struct clmsv_msg_t { struct clmsv_msg_t *next; /* Mailbox processing */ @@ -183,6 +188,7 @@ typedef struct clmsv_msg_t { CLMSV_CBK_INFO cbk_info; /* Callback Messages from CLMS to CLA */ CLMSV_API_RESP_INFO api_resp_info; /* Response Messages from CLMS to CLA */ CLMSV_IS_MEMBER_INFO is_member_info; /*Is node member or not Message from CLMS to CLA*/ + CLMSV_REBOOT_INFO reboot_info; /* Reboot request from CLMS to CLMNA */ } info; } CLMSV_MSG; diff --git a/osaf/libs/core/common/include/osaf_utility.h b/osaf/libs/core/common/include/osaf_utility.h --- a/osaf/libs/core/common/include/osaf_utility.h +++ b/osaf/libs/core/common/include/osaf_utility.h @@ -24,6 +24,8 @@ #ifndef OPENSAF_CORE_OSAF_UTILITY_H_ #define OPENSAF_CORE_OSAF_UTILITY_H_ +#define USE_SAFE_REBOOT 1 + #include <pthread.h> #ifdef __cplusplus @@ -68,6 +70,9 @@ extern void osaf_abort(long i_cause) #endif nothrow, noreturn)); +extern void osaf_safe_reboot() + __attribute__ ((nothrow)); + static inline void osaf_mutex_lock_ordie(pthread_mutex_t* io_mutex) { int result = pthread_mutex_lock(io_mutex); if (result != 0) osaf_abort(result); diff --git a/osaf/libs/core/common/osaf_utility.c b/osaf/libs/core/common/osaf_utility.c --- a/osaf/libs/core/common/osaf_utility.c +++ b/osaf/libs/core/common/osaf_utility.c @@ -16,9 +16,12 @@ */ #include "osaf_utility.h" +#include "ncssysf_def.h" +#include "configmake.h" #include <stdlib.h> #include <errno.h> #include <syslog.h> +#include <stdio.h> void osaf_abort(long i_cause) { @@ -26,3 +29,22 @@ void osaf_abort(long i_cause) i_cause, __builtin_return_address(0), errno); abort(); } + +void osaf_safe_reboot() +{ + char str[256]; + + snprintf(str, sizeof(str), PKGLIBDIR "/opensaf_reboot %u %s %u", 0, "not_used", USE_SAFE_REBOOT); + syslog(LOG_NOTICE, "Reboot ordered using command: %s", str); + + int rc = system(str); + if (rc < 0) { + syslog(LOG_CRIT, "Node reboot failure: exit code %d", WEXITSTATUS(rc)); + } else { + if (WIFEXITED(rc) && WEXITSTATUS(rc) == 0) { + syslog(LOG_NOTICE, "Command: %s successfully executed", str); + } else { + syslog(LOG_CRIT, "Command: %s failed with exit code %d", str, WEXITSTATUS(rc)); + } + } +} diff --git a/osaf/services/saf/clmsv/clms/clms.h b/osaf/services/saf/clmsv/clms/clms.h --- a/osaf/services/saf/clmsv/clms/clms.h +++ b/osaf/services/saf/clmsv/clms/clms.h @@ -99,6 +99,7 @@ extern uint32_t clms_mds_msg_send(CLMS_C MDS_DEST *dest, MDS_SYNC_SND_CTXT *mds_ctxt, MDS_SEND_PRIORITY_TYPE prio, NCSMDS_SVC_ID svc_id); +extern uint32_t clms_mds_msg_bcast(CLMS_CB *cb, CLMSV_MSG *bcast_msg); extern SaAisErrorT clms_imm_activate(CLMS_CB * cb); extern uint32_t clms_node_trackresplist_empty(CLMS_CLUSTER_NODE * op_node); extern uint32_t clms_send_cbk_start_sub(CLMS_CB * cb, CLMS_CLUSTER_NODE * node); @@ -125,5 +126,5 @@ extern void clms_cb_dump(void); extern uint32_t clms_send_is_member_info(CLMS_CB * cb, SaClmNodeIdT node_id, SaBoolT member, SaBoolT is_configured); extern void clm_imm_reinit_bg(CLMS_CB * cb); extern void proc_downs_during_rolechange (void); - +extern void clms_cluster_reboot(); #endif /* ifndef CLMS_H */ diff --git a/osaf/services/saf/clmsv/clms/clms_imm.c b/osaf/services/saf/clmsv/clms/clms_imm.c --- a/osaf/services/saf/clmsv/clms/clms_imm.c +++ b/osaf/services/saf/clmsv/clms/clms_imm.c @@ -19,6 +19,7 @@ #include "clms.h" #include "osaf_extended_name.h" +#include "osaf_utility.h" extern struct ImmutilWrapperProfile immutilWrapperProfile; @@ -886,6 +887,23 @@ static void clms_imm_admin_op_callback(S TRACE_ENTER2("Admin callback for nodename:%s, opId:%llu", objectName->value, opId); + // E.g. immadm -o 1 safCluster=myClmCluster + if (strncmp(osaf_extended_name_borrow(objectName), + osaf_extended_name_borrow(&osaf_cluster->name), + osaf_extended_name_length(objectName)) == 0) { + if (opId == 1) { + LOG_WA("Cluster reboot requested. Ordering cluster reboot"); + // MDS broadcast/multi cast call is synchronous + clms_cluster_reboot(); + sleep(1); + osaf_safe_reboot(); + } else { + LOG_ER("Admin Operation not supported for %s", osaf_extended_name_borrow(objectName)); + immutil_saImmOiAdminOperationResult(immOiHandle, invocation, SA_AIS_ERR_INVALID_PARAM); + } + goto done; + } + /*Lookup by the node_name and get the cluster node for CLM Admin oper */ nodeop = clms_node_get_by_name(objectName); if (nodeop == NULL) { diff --git a/osaf/services/saf/clmsv/clms/clms_mds.c b/osaf/services/saf/clmsv/clms/clms_mds.c --- a/osaf/services/saf/clmsv/clms/clms_mds.c +++ b/osaf/services/saf/clmsv/clms/clms_mds.c @@ -659,7 +659,17 @@ uint32_t clms_mds_enc(struct ncsmds_call ncs_enc_claim_space(uba, 4); total_bytes += 4; - if (CLMSV_CLMS_TO_CLMA_API_RESP_MSG == msg->evt_type) { + if (CLMSV_CLMS_TO_CLMNA_REBOOT_MSG == msg->evt_type) { + /* encode the reboot msg **/ + p8 = ncs_enc_reserve_space(uba, 4); + if (!p8) { + TRACE("ncs_enc_reserve_space failed"); + goto err; + } + ncs_encode_32bit(&p8, msg->info.reboot_info.node_id); + ncs_enc_claim_space(uba, 4); + total_bytes += 4; + } else if (CLMSV_CLMS_TO_CLMA_API_RESP_MSG == msg->evt_type) { /** encode the API RSP msg subtype **/ p8 = ncs_enc_reserve_space(uba, 4); if (!p8) { @@ -1517,3 +1527,37 @@ uint32_t clms_mds_msg_send(CLMS_CB * cb, TRACE_LEAVE(); return rc; } + +/**************************************************************************** + Name : clms_mds_msg_bcast + + Description : This routine sends a broadcast message to CLMNA. + + Arguments : cb - ptr to the CLMA CB + bcast_msg - ptr to the CLMSv broadcast message + + Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE + + Notes : None. +******************************************************************************/ +uint32_t clms_mds_msg_bcast(CLMS_CB *cb, CLMSV_MSG *bcast_msg) +{ + NCSMDS_INFO snd_mds = {0}; + uint32_t rc; + + snd_mds.i_mds_hdl = cb->mds_hdl; + snd_mds.i_svc_id = NCSMDS_SVC_ID_CLMS; + snd_mds.i_op = MDS_SEND; + snd_mds.info.svc_send.i_msg = (NCSCONTEXT)bcast_msg; + snd_mds.info.svc_send.i_to_svc = NCSMDS_SVC_ID_CLMNA; + snd_mds.info.svc_send.i_priority = MDS_SEND_PRIORITY_HIGH; + snd_mds.info.svc_send.i_sendtype = MDS_SENDTYPE_BCAST; + snd_mds.info.svc_send.info.bcast.i_bcast_scope = NCSMDS_SCOPE_NONE; + + if ((rc = ncsmds_api(&snd_mds)) != NCSCC_RC_SUCCESS) { + LOG_ER("%s: ncsmds_api MDS_SEND failed %u", __FUNCTION__ ,rc); + return rc; + } + + return NCSCC_RC_SUCCESS; +} \ No newline at end of file diff --git a/osaf/services/saf/clmsv/clms/clms_util.c b/osaf/services/saf/clmsv/clms/clms_util.c --- a/osaf/services/saf/clmsv/clms/clms_util.c +++ b/osaf/services/saf/clmsv/clms/clms_util.c @@ -1200,3 +1200,15 @@ bool ip_matched(uint16_t family1, uint8_ return true; } +// +void clms_cluster_reboot() +{ + CLMSV_MSG bcast_msg; + bcast_msg.evt_type = CLMSV_CLMS_TO_CLMNA_REBOOT_MSG; + bcast_msg.info.reboot_info.node_id = clms_cb->node_id; + if (clms_mds_msg_bcast(clms_cb, &bcast_msg) == NCSCC_RC_SUCCESS) { + LOG_NO("Sending cluster reboot broadcast message succeeded"); + } else { + LOG_ER("Sending cluster reboot broadcast message failed"); + } +} diff --git a/osaf/services/saf/clmsv/nodeagent/main.c b/osaf/services/saf/clmsv/nodeagent/main.c --- a/osaf/services/saf/clmsv/nodeagent/main.c +++ b/osaf/services/saf/clmsv/nodeagent/main.c @@ -114,6 +114,18 @@ static uint32_t clmna_mds_dec(struct ncs total_bytes += 4; switch (msg->evt_type) { + case CLMSV_CLMS_TO_CLMNA_REBOOT_MSG: + { + p8 = ncs_dec_flatten_space(uba, local_data, 4); + msg->info.reboot_info.node_id = ncs_decode_32bit(&p8); + ncs_dec_skip_space(uba, 4); + total_bytes += 4; + // Reboot will be performed by CLMS for this node. + if (clmna_cb->node_info.node_id != msg->info.reboot_info.node_id) { + osaf_safe_reboot(); + } + break; + } case CLMSV_CLMS_TO_CLMA_API_RESP_MSG: { p8 = ncs_dec_flatten_space(uba, local_data, 8); diff --git a/scripts/opensaf_reboot b/scripts/opensaf_reboot --- a/scripts/opensaf_reboot +++ b/scripts/opensaf_reboot @@ -40,10 +40,17 @@ NODE_ID_FILE=$pkglocalstatedir/node_id node_id=$1 ee_name=$2 +safe_reboot=$3 # Run commands through sudo when not superuser test $(id -u) -ne 0 && icmd=$(which sudo 2> /dev/null) +opensaf_safe_reboot() +{ + logger -t "opensaf_reboot" "Rebooting local node using shutdown" + $icmd /sbin/shutdown -r now +} + ## Use stonith for remote fencing opensaf_reboot_with_remote_fencing() { @@ -91,8 +98,12 @@ temp_node_id=`cat "$NODE_ID_FILE"` temp_node_id=`echo "$temp_node_id" |sed -e 's:^0[bBxX]::'| sed -e 's:^:0x:'` self_node_id=`printf "%d" $temp_node_id` -# A node ID of zero(0) means an order to reboot the local node -if [ "$self_node_id" = "$node_id" ] || [ $node_id = 0 ]; then + +if [ "$safe_reboot" = 1 ]; then + opensaf_safe_reboot +else + # A node ID of zero(0) means an order to reboot the local node + if [ "$self_node_id" = "$node_id" ] || [ $node_id = 0 ]; then # uncomment the following line if debugging errors that keep restarting the node # exit 0 @@ -114,8 +125,8 @@ if [ "$self_node_id" = "$node_id" ] || [ # Reboot (not shutdown) system WITH file system sync $icmd /sbin/reboot -f -else - if [ "$FMS_USE_REMOTE_FENCING" = "1" ]; then + else + if [ "$FMS_USE_REMOTE_FENCING" = 1 ]; then opensaf_reboot_with_remote_fencing else if [ ":$ee_name" != ":" ]; then @@ -133,4 +144,5 @@ else logger -t "opensaf_reboot" "Rebooting remote node in the absence of PLM is outside the scope of OpenSAF" fi fi -fi + fi +fi \ No newline at end of file ------------------------------------------------------------------------------ _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel