--- scripts/opensaf_reboot | 33 +++++++++++++++++++++++++++------ src/amf/amfd/ndproc.cc | 4 ++-- src/base/ncssysf_def.h | 6 ++++++ src/base/sysf_def.c | 10 ++++++++++ src/fm/fmd/fm_main.cc | 6 +++--- src/fm/fmd/fm_rda.cc | 5 ++--- src/rde/rded/rde_main.cc | 6 ++---- 7 files changed, 52 insertions(+), 18 deletions(-)
diff --git a/scripts/opensaf_reboot b/scripts/opensaf_reboot index 727272e1d..2f7a7daeb 100644 --- a/scripts/opensaf_reboot +++ b/scripts/opensaf_reboot @@ -31,7 +31,7 @@ export LD_LIBRARY_PATH=$libdir:$LD_LIBRARY_PATH # Node fencing: OpenSAF cannot reboot a node when there's no CLM node to # PLM EE mapping in the information model. In such cases rebooting would be done -# through proprietary mechanisms, i.e. not through PLM. Node_id is (the only +# through proprietary mechanisms, i.e. not through PLM. Node_id is (the only # entity) at the disposal of such a mechanism. if [ -f "$pkgsysconfdir/fmd.conf" ]; then @@ -81,7 +81,6 @@ opensaf_reboot_with_remote_fencing() #if plm exists in the system,then the reboot is performed using the eename. opensaf_reboot_with_plm() { - immadm -o 7 $ee_name retval=$? if [ $retval != 0 ]; then @@ -96,12 +95,29 @@ opensaf_reboot_with_plm() logger -t "opensaf_reboot" "abrupt restart failed for $ee_name: unable to restart remote node" exit 1 fi - fi + fi fi #Note: Operation Id SA_PLM_ADMIN_RESTART=7 #In the example the $ee_name would expand to (for eg:-) safEE=my_linux_os,safHE=64bitmulticore,safDomain=my_domain } +# Force local node reboot as fast as possible +quick_local_node_reboot() +{ + logger -t "opensaf_reboot" "Do quick local node reboot" + + $icmd /bin/echo -n 'b' 2> /dev/null > /proc/sysrq-trigger + ret_code=$? + + if [ $ret_code != 0 ] && [ -x /bin/systemctl ]; then + $icmd /bin/systemctl --force --force reboot + ret_code=$? + fi + + if [ $ret_code != 0 ]; then + $icmd /sbin/reboot -f + fi +} if ! test -f "$NODE_ID_FILE"; then logger -t "opensaf_reboot" "$NODE_ID_FILE doesnt exists,reboot failed " @@ -112,8 +128,13 @@ temp_node_id=`cat "$NODE_ID_FILE"` temp_node_id=`echo "$temp_node_id" |sed -e 's:^0[bBxX]::'| sed -e 's:^:0x:'` self_node_id=`printf "%d" $temp_node_id` -# If clm cluster reboot requested argument one and two are set but not used, argument 3 is set to 1, "safe reboot" request -if [ "$safe_reboot" = 1 ]; then +# If no argument is provided, forcing node reboot immediately without log +# flushing, process terminating, disk un-mounting. +# If clm cluster reboot requested argument one and two are set but not used, +# argument 3 is set to 1, "safe reboot" request. +if [ "$#" = 0 ]; then + quick_local_node_reboot +elif [ "$safe_reboot" = 1 ]; then opensaf_safe_reboot else # A node ID of zero(0) means an order to reboot the local node @@ -165,7 +186,7 @@ else logger -t "opensaf_reboot" "Not rebooting remote node $ee_name as it is not in INSTANTIATED state" elif [ $plm_node_state != 2 ]; then opensaf_reboot_with_plm - else + else logger -t "opensaf_reboot" "Not rebooting remote node $ee_name as it is already in locked state" fi else diff --git a/src/amf/amfd/ndproc.cc b/src/amf/amfd/ndproc.cc index ec347fc71..c5060e67b 100644 --- a/src/amf/amfd/ndproc.cc +++ b/src/amf/amfd/ndproc.cc @@ -1262,7 +1262,7 @@ void check_quorum(AVD_CL_CB *cb) { // remote fencing is disabled and we have lost write access // reboot this node to prevent split brain - opensaf_reboot(0, nullptr, - "Quorum lost. Rebooting this node to prevent split-brain"); + opensaf_quick_reboot("Quorum lost. Rebooting this node to " + "prevent split-brain"); } } diff --git a/src/base/ncssysf_def.h b/src/base/ncssysf_def.h index 3df8b221e..3de6f44f6 100644 --- a/src/base/ncssysf_def.h +++ b/src/base/ncssysf_def.h @@ -76,6 +76,12 @@ void opensaf_reboot_prepare(void); */ void opensaf_reboot(unsigned node_id, const char* ee_name, const char* reason); +/** + * Do quick local node reboot - without first unmounting file systems + * or syncing disks attached to the system. +*/ +void opensaf_quick_reboot(const char* reason); + /***************************************************************************** ** ** ** ncs_os_get_time_stamp: Return the current timestamp as "time_t" in ** diff --git a/src/base/sysf_def.c b/src/base/sysf_def.c index d0cfc94ef..880f08ec4 100644 --- a/src/base/sysf_def.c +++ b/src/base/sysf_def.c @@ -271,6 +271,16 @@ void opensaf_reboot(unsigned node_id, const char *ee_name, const char *reason) } } +void opensaf_quick_reboot(const char *reason) +{ + syslog(LOG_CRIT, "Quick local node rebooting, Reason: %s ", reason); + int reboot_result = system(PKGLIBDIR "/opensaf_reboot"); + if (reboot_result != EXIT_SUCCESS) { + syslog(LOG_CRIT, "node reboot failure: exit code %d", + WEXITSTATUS(reboot_result)); + } +} + /** * syslog and abort to generate a core dump (if enabled) * @param __file diff --git a/src/fm/fmd/fm_main.cc b/src/fm/fmd/fm_main.cc index 122b2175c..b473b68aa 100644 --- a/src/fm/fmd/fm_main.cc +++ b/src/fm/fmd/fm_main.cc @@ -665,9 +665,9 @@ static void fm_mbx_msg_handler(FM_CB *fm_cb, FM_EVT *fm_mbx_evt) { "Two active controllers observed in a cluster, newActive: %x and " "old-Active: %x", unsigned(fm_cb->node_id), unsigned(fm_cb->peer_node_id)); - opensaf_reboot(0, NULL, - "Received svc up from peer node (old-active is not " - "fully DOWN), hence rebooting the new Active"); + opensaf_quick_reboot("Received svc up from peer node (old-active " + "is not fully DOWN), " + "hence rebooting the new Active"); } } diff --git a/src/fm/fmd/fm_rda.cc b/src/fm/fmd/fm_rda.cc index 2a9f1664f..028bfa380 100644 --- a/src/fm/fmd/fm_rda.cc +++ b/src/fm/fmd/fm_rda.cc @@ -105,9 +105,8 @@ uint32_t fm_rda_set_role(FM_CB *fm_cb, PCS_RDA_ROLE role) { LOG_ER( "A controller is already active. We were separated from the " "cluster?"); - opensaf_reboot(0, nullptr, - "A controller is already active. We were separated " - "from the cluster?"); + opensaf_quick_reboot("A controller is already active. We were separated " + "from the cluster?"); } } diff --git a/src/rde/rded/rde_main.cc b/src/rde/rded/rde_main.cc index 2d9aa51dc..b4c842942 100644 --- a/src/rde/rded/rde_main.cc +++ b/src/rde/rded/rde_main.cc @@ -140,9 +140,7 @@ static void handle_mbx_event() { active_controller.c_str()); if (consensus_service.IsRemoteFencingEnabled() == false) { LOG_ER("Probable split-brain. Rebooting this node"); - - opensaf_reboot(0, nullptr, - "Split-brain detected by consensus service"); + opensaf_quick_reboot("Split-brain detected by consensus service"); } } @@ -258,7 +256,7 @@ static void CheckForSplitBrain(const rde_msg *msg) { PCS_RDA_ROLE own_role = role->role(); PCS_RDA_ROLE other_role = msg->info.peer_info.ha_role; if (own_role == PCS_RDA_ACTIVE && other_role == PCS_RDA_ACTIVE) { - opensaf_reboot(0, nullptr, "Split-brain detected"); + opensaf_quick_reboot("Split-brain detected"); } } -- 2.19.2 _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel