On 2008-09-09T15:28:31, NAKAHIRA Kazutomo <[EMAIL PROTECTED]> wrote:
> Hi, Lars
>
> Thank you for your advice.
>
> I decided to use the sbd command with "-W" option to enable watchdog.
> It operates well when starting from the command line.
>
> But, I encountered other problem when sbd watch process is started
> by Heartbeat using respawn directive in ha.cf.
Yes, that is a side-effect of starting it there. It really should be
started via the init script, as I do with the init script on SuSE. I'm
attaching the script for reference.
> (snip)
> Sep 9 11:15:56 dl380g5a kernel: SoftDog: Unexpected close, not stopping
> watchdog!
> (snip)
>
> It seems that the sbd watch process had been stopped
> before watchdog_close() was done. And watchdog reboot
> the system.
Yes. heartbeat sends a kill signal and doesn't allow sbd to recover;
also, sbd really should continue running even if heartbeat crashes and
must continue running during hb shutdown.
Regards,
Lars
--
Teamlead Kernel, SuSE Labs, Research and Development
SUSE LINUX Products GmbH, GF: Markus Rex, HRB 16746 (AG Nürnberg)
"Experience is the name everyone gives to their mistakes." -- Oscar Wilde
#!/bin/sh
#
# heartbeat Start high-availability services
#
# Author: Lars Marowsky-Bree <[EMAIL PROTECTED]>
#
# chkconfig: 2345 @HB_INITSTARTPRI@ @HB_INITSTOPPRI@
# description: Startup script high-availability services.
# processname: heartbeat
# pidfile: @localstatedir@/run/heartbeat.pid
# config: @sysconfdir@/ha.d/ha.cf
#
### BEGIN INIT INFO
# Provides: heartbeat
# Required-Start: $network $syslog $named
# Should-Start: drbd sshd xendomains o2cb evms ocfs2
# Required-Stop: $network $syslog $named
# Should-Stop: drbd sshd xendomains o2cb evms ocfs2
# Default-Start: 3 5
# Default-Stop: 0 1 2 6
# Description: Start heartbeat HA services
### END INIT INFO
HA_DIR=/etc/ha.d; export HA_DIR
CONFIG=$HA_DIR/ha.cf
. $HA_DIR/shellfuncs
# Setup SuSE specific variables
[ -r /etc/rc.status ] && . /etc/rc.status
rc_reset
if [ ! -x $HA_BIN/heartbeat ]; then
echo -n "High-Availability services not installed (heartbeat)"
if [ "$1" = "stop" ]; then exit 0; fi
rc_status -s
rc_exit
fi
if [ ! -x $HA_BIN/ha_logd ]; then
echo -n "High-Availability services not installed (ha_logd)"
if [ "$1" = "stop" ]; then exit 0; fi
rc_status -s
rc_exit
fi
SBD_CONFIG=/etc/sysconfig/sbd
if [ -f $SBD_CONFIG ]; then
. $SBD_CONFIG
fi
StartSBD() {
if [ -n "$SBD_DEVICE" ]; then
if ! sbd -d $SBD_DEVICE -D $SBD_OPTS watch ; then
rc_failed
rc_exit
fi
fi
}
StopSBD() {
if [ -n "$SBD_DEVICE" ]; then
if ! sbd -d $SBD_DEVICE -D $SBD_OPTS message LOCAL exit ; then
rc_failed
rc_exit
fi
fi
}
StartLogd() {
$HA_BIN/ha_logd -s 2>&1 >/dev/null
if [ $? -eq 0 ]; then
Echo "logd is already running"
return 0
fi
$HA_BIN/ha_logd -d
if [ $? -ne 0 ]; then
Echo "starting logd failed"
fi
}
StopLogd() {
$HA_BIN/ha_logd -s 2>&1 >/dev/null
if [ $? -ne 0 ]; then
Echo "logd is already stopped"
return 0
fi
$HA_BIN/ha_logd -k
if [ $? -ne 0 ]; then
Echo "stopping logd failed"
fi
}
StatusHA() {
$HA_BIN/heartbeat -s >/dev/null 2>&1
}
StandbyHA() {
auto_failback=`ha_parameter auto_failback | tr 'A-Z' 'a-z'`
nice_failback=`ha_parameter nice_failback | tr 'A-Z' 'a-z'`
case "$auto_failback" in
*legacy*) echo "auto_failback is set to legacy. Cannot enter standby."
return 1;;
esac
case "$nice_failback" in
*off*) echo "nice_failback is disabled. Cannot enter standby."
return 1;;
esac
case "${auto_failback}${nice_failback}" in
"") echo "auto_failback defaulted to legacy. Cannot enter standby."
return 1;;
esac
echo "auto_failback: $auto_failback"
if StatusHA; then
echo -n "Attempting to enter standby mode."
if $HA_BIN/hb_standby ; then
return 0
else
return 1
fi
else
echo -n "heartbeat is not currently running."
return 0
fi
# Fall-through case:
# XXX Never reached?
rc_status -s
rc_exit
}
if [ ! -f $CONFIG ]; then
echo -n "High-Availability services not configured"
if [ "$1" = "stop" ]; then exit 0; fi
rc_status -u
rc_exit
fi
case "$1" in
start)
echo -n "Starting High-Availability services"
StartLogd
StartSBD
if [ -s $HA_DIR/haresources ]; then
$HA_BIN/ResourceManager verifyallidle
fi
$HA_BIN/heartbeat
rc_status -v
;;
stop)
echo -n "Stopping High-Availability services"
$HA_BIN/heartbeat -k
rc_status -v
StopSBD
StopLogd
;;
status)
echo -n "Checking for High-Availability services"
checkproc $HA_BIN/heartbeat
rc_status -v
;;
standby)
StandbyHA
rc_status -v
;;
restart)
sleeptime=`ha_parameter deadtime`
$0 stop
sleep $sleeptime
sleep 10 # allow resource takeover to complete (hopefully).
$0 start
rc_status
;;
force-reload|reload)
echo -n "Reloading High-Availability services "
# Restart, and keep your resources
$HA_BIN/heartbeat -r
rc_status -v
;;
*)
Echo "Usage: $0 {start|stop|status|restart|reload|force-reload|standby}"
exit 1
esac
rc_exit
_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems