On 2008-09-09T15:28:31, NAKAHIRA Kazutomo <[EMAIL PROTECTED]> wrote:

> Hi, Lars
>
> Thank you for your advice.
>
> I decided to use the sbd command with "-W" option to enable watchdog.
> It operates well when starting from the command line.
>
> But, I encountered other problem when sbd watch process is started
> by Heartbeat using respawn directive in ha.cf.

Yes, that is a side-effect of starting it there. It really should be
started via the init script, as I do with the init script on SuSE. I'm
attaching the script for reference.

> (snip)
> Sep  9 11:15:56 dl380g5a kernel: SoftDog: Unexpected close, not stopping 
> watchdog!
> (snip)
>
> It seems that the sbd watch process had been stopped
> before watchdog_close() was done. And watchdog reboot
> the system.

Yes. heartbeat sends a kill signal and doesn't allow sbd to recover;
also, sbd really should continue running even if heartbeat crashes and
must continue running during hb shutdown.

Regards,
    Lars

-- 
Teamlead Kernel, SuSE Labs, Research and Development
SUSE LINUX Products GmbH, GF: Markus Rex, HRB 16746 (AG Nürnberg)
"Experience is the name everyone gives to their mistakes." -- Oscar Wilde

#!/bin/sh
#
# heartbeat     Start high-availability services
#
# Author:       Lars Marowsky-Bree <[EMAIL PROTECTED]>
#
# chkconfig: 2345 @HB_INITSTARTPRI@ @HB_INITSTOPPRI@
# description: Startup script high-availability services.
# processname: heartbeat
# pidfile: @localstatedir@/run/heartbeat.pid
# config: @sysconfdir@/ha.d/ha.cf
#
### BEGIN INIT INFO
# Provides: heartbeat
# Required-Start: $network $syslog $named
# Should-Start: drbd sshd xendomains o2cb evms ocfs2
# Required-Stop: $network $syslog $named
# Should-Stop: drbd sshd xendomains o2cb evms ocfs2
# Default-Start:  3 5
# Default-Stop:   0 1 2 6
# Description:    Start heartbeat HA services
### END INIT INFO

HA_DIR=/etc/ha.d; export HA_DIR
CONFIG=$HA_DIR/ha.cf
. $HA_DIR/shellfuncs

# Setup SuSE specific variables
[ -r /etc/rc.status ] && . /etc/rc.status
rc_reset

if [ ! -x $HA_BIN/heartbeat ]; then
        echo -n "High-Availability services not installed (heartbeat)"
        if [ "$1" = "stop" ]; then exit 0; fi
        rc_status -s
        rc_exit
fi

if [ ! -x $HA_BIN/ha_logd ]; then
        echo -n "High-Availability services not installed (ha_logd)"
        if [ "$1" = "stop" ]; then exit 0; fi
        rc_status -s
        rc_exit
fi

SBD_CONFIG=/etc/sysconfig/sbd
if [ -f $SBD_CONFIG ]; then
        . $SBD_CONFIG
fi

StartSBD() {
        if [ -n "$SBD_DEVICE" ]; then
                if ! sbd -d $SBD_DEVICE -D $SBD_OPTS watch ; then
                        rc_failed
                        rc_exit
                fi
        fi
}

StopSBD() {
        if [ -n "$SBD_DEVICE" ]; then
                if ! sbd -d $SBD_DEVICE -D $SBD_OPTS message LOCAL exit ; then
                        rc_failed
                        rc_exit
                fi
        fi
}

StartLogd() {
    $HA_BIN/ha_logd -s 2>&1 >/dev/null

    if [ $? -eq 0 ]; then
       Echo "logd is already running"
       return 0
    fi

    $HA_BIN/ha_logd -d
    if [ $? -ne 0 ]; then
       Echo "starting logd failed"
    fi
}

StopLogd() {
    $HA_BIN/ha_logd -s 2>&1 >/dev/null

    if [ $? -ne 0 ]; then
          Echo "logd is already stopped"
          return 0
    fi

    $HA_BIN/ha_logd -k
    if [ $? -ne 0 ]; then
       Echo "stopping logd failed"
    fi
}


StatusHA() {
  $HA_BIN/heartbeat -s >/dev/null 2>&1
}

StandbyHA() {
  auto_failback=`ha_parameter auto_failback | tr 'A-Z' 'a-z'`
  nice_failback=`ha_parameter nice_failback | tr 'A-Z' 'a-z'`
  
  case "$auto_failback" in
    *legacy*)   echo "auto_failback is set to legacy.  Cannot enter standby."
        return 1;;
  esac
  case "$nice_failback" in
    *off*)      echo "nice_failback is disabled.  Cannot enter standby."
        return 1;;
  esac
  case "${auto_failback}${nice_failback}" in
    "") echo "auto_failback defaulted to legacy.  Cannot enter standby."
        return 1;;
  esac
  
  echo "auto_failback: $auto_failback"
  if StatusHA; then
    echo -n "Attempting to enter standby mode."
    if $HA_BIN/hb_standby ; then
      return 0
    else
      return 1
    fi
  else
     echo -n "heartbeat is not currently running."
     return 0
  fi
  
  # Fall-through case:
  # XXX Never reached?
  rc_status -s
  rc_exit
}

if [ ! -f $CONFIG ]; then
        echo -n "High-Availability services not configured"
        if [ "$1" = "stop" ]; then exit 0; fi
        rc_status -u
        rc_exit
fi


case "$1" in
  start)
        echo -n "Starting High-Availability services"
        StartLogd
        StartSBD
        
        if [ -s $HA_DIR/haresources ]; then     
                $HA_BIN/ResourceManager verifyallidle
        fi
        
        $HA_BIN/heartbeat
        
        rc_status -v
        
        ;;

  stop)
        echo -n "Stopping High-Availability services"

        $HA_BIN/heartbeat -k
        
        rc_status -v
        
        StopSBD
        StopLogd

        ;;

  status)
        echo -n "Checking for High-Availability services"

        checkproc $HA_BIN/heartbeat
        
        rc_status -v
        ;;

  standby)
        StandbyHA
        rc_status -v
        ;;

  restart)
        sleeptime=`ha_parameter deadtime`
        $0 stop
        sleep $sleeptime
        sleep 10 # allow resource takeover to complete (hopefully).
        $0 start
        
        rc_status
        ;;

  force-reload|reload)
        echo -n "Reloading High-Availability services "

        # Restart, and keep your resources
        $HA_BIN/heartbeat -r
        rc_status -v
        ;;

  *)
        Echo "Usage: $0 {start|stop|status|restart|reload|force-reload|standby}"
        exit 1
esac

rc_exit

_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems

Reply via email to