Hi Hans,

Could you provide some background on the need for opensaf_safe_reboot(). 
What would be the need for this?

Thanks,
Mathi.


> -----Original Message-----
> From: Hans Nordeback [mailto:hans.nordeb...@ericsson.com]
> Sent: Wednesday, September 28, 2016 5:26 PM
> To: anders.wid...@ericsson.com; Mathivanan Naickan Palanivelu
> Cc: opensaf-devel@lists.sourceforge.net
> Subject: [PATCH 1 of 1] clm: add support for cluster reboot V3 [#2053]
> 
>  osaf/libs/common/clmsv/include/clmsv_msg.h   |   6 +++
>  osaf/libs/core/common/include/osaf_utility.h |   5 +++
>  osaf/libs/core/common/osaf_utility.c         |  22 +++++++++++++
>  osaf/services/saf/clmsv/clms/clms.h          |   3 +-
>  osaf/services/saf/clmsv/clms/clms_imm.c      |  18 ++++++++++
>  osaf/services/saf/clmsv/clms/clms_mds.c      |  46
> +++++++++++++++++++++++++++-
>  osaf/services/saf/clmsv/clms/clms_util.c     |  12 +++++++
>  osaf/services/saf/clmsv/nodeagent/main.c     |  12 +++++++
>  scripts/opensaf_reboot                       |  22 ++++++++++---
>  9 files changed, 139 insertions(+), 7 deletions(-)
> 
> 
> Admin command to request cluster reboot:
> immadm -o 1 safCluster=myClmCluster
> 
> diff --git a/osaf/libs/common/clmsv/include/clmsv_msg.h
> b/osaf/libs/common/clmsv/include/clmsv_msg.h
> --- a/osaf/libs/common/clmsv/include/clmsv_msg.h
> +++ b/osaf/libs/common/clmsv/include/clmsv_msg.h
> @@ -23,6 +23,7 @@ typedef enum clms_msg_type {
>    CLMSV_CLMS_TO_CLMA_CBK_MSG,
>    CLMSV_CLMS_TO_CLMA_API_RESP_MSG,
>    CLMSV_CLMS_TO_CLMA_IS_MEMBER_MSG,
> +  CLMSV_CLMS_TO_CLMNA_REBOOT_MSG,
>    CLMSV_MSG_MAX
>  } CLMSV_MSG_TYPE;
> 
> @@ -174,6 +175,10 @@ typedef struct clmsv_is_member_info_t {
>    SaUint32T client_id;
>  }CLMSV_IS_MEMBER_INFO;
> 
> +typedef struct clmsv_reboot_info_t {
> +  SaClmNodeIdT node_id;
> +} CLMSV_REBOOT_INFO;
> +
>  /* Top Level CLMSv MDS message structure for use between CLMS-> CLMA
> && CLMA -> CLMS */  typedef struct clmsv_msg_t {
>    struct clmsv_msg_t *next;       /* Mailbox processing */
> @@ -183,6 +188,7 @@ typedef struct clmsv_msg_t {
>      CLMSV_CBK_INFO cbk_info;        /* Callback Messages from CLMS to CLA
> */
>      CLMSV_API_RESP_INFO api_resp_info;      /* Response Messages from
> CLMS to CLA */
>      CLMSV_IS_MEMBER_INFO is_member_info;    /*Is node member or not
> Message from CLMS to CLA*/
> +    CLMSV_REBOOT_INFO reboot_info;   /* Reboot request from
> CLMS to CLMNA */
>    } info;
>  } CLMSV_MSG;
> 
> diff --git a/osaf/libs/core/common/include/osaf_utility.h
> b/osaf/libs/core/common/include/osaf_utility.h
> --- a/osaf/libs/core/common/include/osaf_utility.h
> +++ b/osaf/libs/core/common/include/osaf_utility.h
> @@ -24,6 +24,8 @@
>  #ifndef OPENSAF_CORE_OSAF_UTILITY_H_
>  #define OPENSAF_CORE_OSAF_UTILITY_H_
> 
> +#define USE_SAFE_REBOOT 1
> +
>  #include <pthread.h>
> 
>  #ifdef  __cplusplus
> @@ -68,6 +70,9 @@ extern void osaf_abort(long i_cause)  #endif
>          nothrow, noreturn));
> 
> +extern void osaf_safe_reboot()
> +    __attribute__ ((nothrow));
> +
>  static inline void osaf_mutex_lock_ordie(pthread_mutex_t* io_mutex) {
>    int result = pthread_mutex_lock(io_mutex);
>    if (result != 0) osaf_abort(result);
> diff --git a/osaf/libs/core/common/osaf_utility.c
> b/osaf/libs/core/common/osaf_utility.c
> --- a/osaf/libs/core/common/osaf_utility.c
> +++ b/osaf/libs/core/common/osaf_utility.c
> @@ -16,9 +16,12 @@
>   */
> 
>  #include "osaf_utility.h"
> +#include "ncssysf_def.h"
> +#include "configmake.h"
>  #include <stdlib.h>
>  #include <errno.h>
>  #include <syslog.h>
> +#include <stdio.h>
> 
>  void osaf_abort(long i_cause)
>  {
> @@ -26,3 +29,22 @@ void osaf_abort(long i_cause)
>               i_cause, __builtin_return_address(0), errno);
>       abort();
>  }
> +
> +void osaf_safe_reboot()
> +{
> +     char str[256];
> +
> +     snprintf(str, sizeof(str), PKGLIBDIR "/opensaf_reboot %u %s %u", 0,
> "not_used", USE_SAFE_REBOOT);
> +     syslog(LOG_NOTICE, "Reboot ordered using command: %s", str);
> +
> +     int rc = system(str);
> +     if (rc < 0) {
> +             syslog(LOG_CRIT, "Node reboot failure: exit code %d",
> WEXITSTATUS(rc));
> +     } else {
> +              if (WIFEXITED(rc) && WEXITSTATUS(rc) == 0) {
> +                     syslog(LOG_NOTICE, "Command: %s successfully
> executed", str);
> +             } else {
> +                     syslog(LOG_CRIT, "Command: %s failed with exit
> code %d", str, WEXITSTATUS(rc));
> +             }
> +     }
> +}
> diff --git a/osaf/services/saf/clmsv/clms/clms.h
> b/osaf/services/saf/clmsv/clms/clms.h
> --- a/osaf/services/saf/clmsv/clms/clms.h
> +++ b/osaf/services/saf/clmsv/clms/clms.h
> @@ -99,6 +99,7 @@ extern uint32_t clms_mds_msg_send(CLMS_C
>                                    MDS_DEST *dest,
>                                    MDS_SYNC_SND_CTXT *mds_ctxt,
> MDS_SEND_PRIORITY_TYPE prio, NCSMDS_SVC_ID svc_id);
> 
> +extern uint32_t clms_mds_msg_bcast(CLMS_CB *cb, CLMSV_MSG
> *bcast_msg);
>  extern SaAisErrorT clms_imm_activate(CLMS_CB * cb);  extern uint32_t
> clms_node_trackresplist_empty(CLMS_CLUSTER_NODE * op_node);  extern
> uint32_t clms_send_cbk_start_sub(CLMS_CB * cb, CLMS_CLUSTER_NODE *
> node); @@ -125,5 +126,5 @@ extern void clms_cb_dump(void);  extern
> uint32_t clms_send_is_member_info(CLMS_CB * cb, SaClmNodeIdT
> node_id,  SaBoolT member, SaBoolT is_configured);  extern void
> clm_imm_reinit_bg(CLMS_CB * cb);  extern void
> proc_downs_during_rolechange (void);
> -
> +extern void clms_cluster_reboot();
>  #endif   /* ifndef CLMS_H */
> diff --git a/osaf/services/saf/clmsv/clms/clms_imm.c
> b/osaf/services/saf/clmsv/clms/clms_imm.c
> --- a/osaf/services/saf/clmsv/clms/clms_imm.c
> +++ b/osaf/services/saf/clmsv/clms/clms_imm.c
> @@ -19,6 +19,7 @@
> 
>  #include "clms.h"
>  #include "osaf_extended_name.h"
> +#include "osaf_utility.h"
> 
>  extern struct ImmutilWrapperProfile immutilWrapperProfile;
> 
> @@ -886,6 +887,23 @@ static void clms_imm_admin_op_callback(S
> 
>       TRACE_ENTER2("Admin callback for nodename:%s, opId:%llu",
> objectName->value, opId);
> 
> +     // E.g. immadm -o 1 safCluster=myClmCluster
> +     if (strncmp(osaf_extended_name_borrow(objectName),
> +                  osaf_extended_name_borrow(&osaf_cluster->name),
> +                  osaf_extended_name_length(objectName)) == 0) {
> +             if (opId == 1) {
> +                     LOG_WA("Cluster reboot requested. Ordering
> cluster reboot");
> +                     // MDS broadcast/multi cast call is synchronous
> +                     clms_cluster_reboot();
> +                     sleep(1);
> +                     osaf_safe_reboot();
> +             } else {
> +                     LOG_ER("Admin Operation not supported for %s",
> osaf_extended_name_borrow(objectName));
> +
>       immutil_saImmOiAdminOperationResult(immOiHandle, invocation,
> SA_AIS_ERR_INVALID_PARAM);
> +             }
> +             goto done;
> +     }
> +
>       /*Lookup by the node_name and get the cluster node for CLM
> Admin oper */
>       nodeop = clms_node_get_by_name(objectName);
>       if (nodeop == NULL) {
> diff --git a/osaf/services/saf/clmsv/clms/clms_mds.c
> b/osaf/services/saf/clmsv/clms/clms_mds.c
> --- a/osaf/services/saf/clmsv/clms/clms_mds.c
> +++ b/osaf/services/saf/clmsv/clms/clms_mds.c
> @@ -659,7 +659,17 @@ uint32_t clms_mds_enc(struct ncsmds_call
>       ncs_enc_claim_space(uba, 4);
>       total_bytes += 4;
> 
> -     if (CLMSV_CLMS_TO_CLMA_API_RESP_MSG == msg->evt_type) {
> +     if (CLMSV_CLMS_TO_CLMNA_REBOOT_MSG == msg->evt_type) {
> +             /* encode the reboot msg **/
> +             p8 = ncs_enc_reserve_space(uba, 4);
> +             if (!p8) {
> +                     TRACE("ncs_enc_reserve_space failed");
> +                     goto err;
> +             }
> +             ncs_encode_32bit(&p8, msg->info.reboot_info.node_id);
> +             ncs_enc_claim_space(uba, 4);
> +             total_bytes += 4;
> +     } else if (CLMSV_CLMS_TO_CLMA_API_RESP_MSG == msg-
> >evt_type) {
>       /** encode the API RSP msg subtype **/
>               p8 = ncs_enc_reserve_space(uba, 4);
>               if (!p8) {
> @@ -1517,3 +1527,37 @@ uint32_t clms_mds_msg_send(CLMS_CB * cb,
>       TRACE_LEAVE();
>       return rc;
>  }
> +
> +/*********************************************************
> *******************
> +  Name          : clms_mds_msg_bcast
> +
> +  Description   : This routine sends a broadcast message to CLMNA.
> +
> +  Arguments     : cb  - ptr to the CLMA CB
> +                  bcast_msg - ptr to the CLMSv broadcast message
> +
> +  Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE
> +
> +  Notes         : None.
> +*********************************************************
> **************
> +*******/ uint32_t clms_mds_msg_bcast(CLMS_CB *cb, CLMSV_MSG
> *bcast_msg)
> +{
> +     NCSMDS_INFO snd_mds = {0};
> +     uint32_t rc;
> +
> +     snd_mds.i_mds_hdl = cb->mds_hdl;
> +     snd_mds.i_svc_id = NCSMDS_SVC_ID_CLMS;
> +     snd_mds.i_op = MDS_SEND;
> +     snd_mds.info.svc_send.i_msg = (NCSCONTEXT)bcast_msg;
> +     snd_mds.info.svc_send.i_to_svc = NCSMDS_SVC_ID_CLMNA;
> +     snd_mds.info.svc_send.i_priority = MDS_SEND_PRIORITY_HIGH;
> +     snd_mds.info.svc_send.i_sendtype = MDS_SENDTYPE_BCAST;
> +     snd_mds.info.svc_send.info.bcast.i_bcast_scope =
> NCSMDS_SCOPE_NONE;
> +
> +     if ((rc = ncsmds_api(&snd_mds)) != NCSCC_RC_SUCCESS) {
> +             LOG_ER("%s: ncsmds_api MDS_SEND failed %u",
> __FUNCTION__ ,rc);
> +             return rc;
> +     }
> +
> +     return NCSCC_RC_SUCCESS;
> +}
> \ No newline at end of file
> diff --git a/osaf/services/saf/clmsv/clms/clms_util.c
> b/osaf/services/saf/clmsv/clms/clms_util.c
> --- a/osaf/services/saf/clmsv/clms/clms_util.c
> +++ b/osaf/services/saf/clmsv/clms/clms_util.c
> @@ -1200,3 +1200,15 @@ bool ip_matched(uint16_t family1, uint8_
>       return true;
>  }
> 
> +//
> +void clms_cluster_reboot()
> +{
> +     CLMSV_MSG bcast_msg;
> +     bcast_msg.evt_type = CLMSV_CLMS_TO_CLMNA_REBOOT_MSG;
> +     bcast_msg.info.reboot_info.node_id = clms_cb->node_id;
> +     if (clms_mds_msg_bcast(clms_cb, &bcast_msg) ==
> NCSCC_RC_SUCCESS) {
> +             LOG_NO("Sending cluster reboot broadcast message
> succeeded");
> +     } else {
> +             LOG_ER("Sending cluster reboot broadcast message failed");
> +     }
> +}
> diff --git a/osaf/services/saf/clmsv/nodeagent/main.c
> b/osaf/services/saf/clmsv/nodeagent/main.c
> --- a/osaf/services/saf/clmsv/nodeagent/main.c
> +++ b/osaf/services/saf/clmsv/nodeagent/main.c
> @@ -114,6 +114,18 @@ static uint32_t clmna_mds_dec(struct ncs
>       total_bytes += 4;
> 
>       switch (msg->evt_type) {
> +     case    CLMSV_CLMS_TO_CLMNA_REBOOT_MSG:
> +             {
> +                     p8 = ncs_dec_flatten_space(uba, local_data, 4);
> +                     msg->info.reboot_info.node_id =
> ncs_decode_32bit(&p8);
> +                     ncs_dec_skip_space(uba, 4);
> +                     total_bytes += 4;
> +                     // Reboot will be performed by CLMS for this node.
> +                     if (clmna_cb->node_info.node_id != msg-
> >info.reboot_info.node_id) {
> +                             osaf_safe_reboot();
> +                     }
> +                     break;
> +             }
>       case CLMSV_CLMS_TO_CLMA_API_RESP_MSG:
>               {
>                       p8 = ncs_dec_flatten_space(uba, local_data, 8); diff -
> -git a/scripts/opensaf_reboot b/scripts/opensaf_reboot
> --- a/scripts/opensaf_reboot
> +++ b/scripts/opensaf_reboot
> @@ -40,10 +40,17 @@ NODE_ID_FILE=$pkglocalstatedir/node_id
> 
>  node_id=$1
>  ee_name=$2
> +safe_reboot=$3
> 
>  # Run commands through sudo when not superuser  test $(id -u) -ne 0 &&
> icmd=$(which sudo 2> /dev/null)
> 
> +opensaf_safe_reboot()
> +{
> +    logger -t "opensaf_reboot" "Rebooting local node using shutdown"
> +    $icmd /sbin/shutdown -r now
> +}
> +
>  ## Use stonith for remote fencing
>  opensaf_reboot_with_remote_fencing()
>  {
> @@ -91,8 +98,12 @@ temp_node_id=`cat "$NODE_ID_FILE"`
> temp_node_id=`echo "$temp_node_id" |sed -e 's:^0[bBxX]::'| sed -e
> 's:^:0x:'`  self_node_id=`printf "%d" $temp_node_id`
> 
> -# A node ID of zero(0) means an order to reboot the local node -if [
> "$self_node_id" = "$node_id" ] || [ $node_id = 0 ]; then
> +
> +if [ "$safe_reboot" = 1 ]; then
> +    opensaf_safe_reboot
> +else
> +    # A node ID of zero(0) means an order to reboot the local node
> +    if [ "$self_node_id" = "$node_id" ] || [ $node_id = 0 ]; then
>       # uncomment the following line if debugging errors that keep
> restarting the node
>       # exit 0
> 
> @@ -114,8 +125,8 @@ if [ "$self_node_id" = "$node_id" ] || [
> 
>       # Reboot (not shutdown) system WITH file system sync
>       $icmd /sbin/reboot -f
> -else
> -     if [ "$FMS_USE_REMOTE_FENCING" = "1" ]; then
> +    else
> +     if [ "$FMS_USE_REMOTE_FENCING" = 1 ]; then
>               opensaf_reboot_with_remote_fencing
>       else
>               if [ ":$ee_name" != ":" ]; then
> @@ -133,4 +144,5 @@ else
>                       logger -t "opensaf_reboot" "Rebooting remote node in the
> absence of PLM is outside the scope of OpenSAF"
>               fi
>       fi
> -fi
> +    fi
> +fi
> \ No newline at end of file

------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to