Thanks for the response Pushkar! I'm a bit of a novice when it comes to stuff like this, so I'm not sure. Here is my /etc/init.d/hearbeat file:
#!/bin/sh # # # heartbeat Start high-availability services # # Author: Alan Robertson <[email protected]> # License: GNU General Public License (GPL) # # This script works correctly under SuSE, Debian, # Conectiva, Red Hat and a few others. Please let me know if it # doesn't work under your distribution, and we'll fix it. # We don't hate anyone, and like for everyone to use # our software, no matter what OS or distribution you're using. # # chkconfig: 2345 75 05 # description: Startup script high-availability services. # processname: heartbeat # pidfile: /var/run/heartbeat.pid # config: /etc/ha.d/ha.cf # ### BEGIN INIT INFO # Description: heartbeat is a basic high-availability subsystem. # It will start services at initialization, and when machines go up # or down. This version will also perform IP address takeover using # gratuitous ARPs. It works correctly for a 2-node configuration, # and is extensible to larger configurations. # # It implements the following kinds of heartbeats: # - Bidirectional Serial Rings ("raw" serial ports) # - UDP/IP broadcast (ethernet, etc) # - UDP/IP multicast (ethernet, etc) # - Unicast heartbeats # - "ping" heartbeats (for routers, switches, etc.) # (to be used for breaking ties in 2-node systems # and monitoring networking availability) # # Short-Description: High-availability services. # Provides: heartbeat HA # Required-Start: $network $time $syslog # Should-Start: ntp # Required-Stop: $network $time $syslog # Default-Start: 3 5 # Default-Stop: 0 6 ### END INIT INFO HA_DIR=/etc/ha.d; export HA_DIR CONFIG=$HA_DIR/ha.cf . $HA_DIR/shellfuncs LOCKDIR=/var/lock/subsys RUNDIR=/var/run # Echo without putting a newline on the end EchoNoNl() { Echo "$@" } # Echo with escapes enabled... EchoEsc() { Echo "$@" } echo_failure() { EchoEsc " Heartbeat failure [rc=$1]. $rc_failed" return $1 } echo_success() { : Cool! It started! EchoEsc "$rc_done" } if [ -r /etc/SuSE-release ] then # rc.status is new since SuSE 7.0 [ -r /etc/rc.status ] && . /etc/rc.status [ -r /etc/rc.config ] && . /etc/rc.config # Determine the base and follow a runlevel link name. base=${0##*/} link=${base#*[SK][0-9][0-9]} fi if [ -z "$rc_done" ] then rc_done="Done." rc_failed="Failed." rc_skipped="Skipped." fi # exec 2>>/var/log/ha-debug # This should probably be it's own autoconf parameter # because RH has moved it from time to time... # and I suspect Conectiva and Mandrake also supply it. DISTFUNCS=/etc/rc.d/init.d/functions SUBSYS=heartbeat MODPROBE=/sbin/modprobe US=`uname -n` # Set this to a 1 if you want to automatically load kernel modules USE_MODULES=1 [ -x $HA_BIN/heartbeat ] || exit 0 # # Some environments like it if we use their functions... # if [ ! -x $DISTFUNCS ] then # Provide our own versions of these functions status() { $HA_BIN/heartbeat -s } echo_failure() { EchoEsc " Heartbeat failure [rc=$1]. $rc_failed" return $1 } echo_success() { : Cool! It started! EchoEsc "$rc_done" } else . $DISTFUNCS fi # # See if they've configured things yet... # if [ ! -f $CONFIG ] then EchoNoNl "Heartbeat not configured: $CONFIG not found." echo_failure 1 exit 0 fi CrmEnabled() { case `ha_parameter crm | tr '[A-Z]' '[a-z]'` in y|yes|enable|on|true|1|manual) true;; *) false;; esac } StartLogd() { $HA_BIN/ha_logd -s >/dev/null 2>&1 if [ $? -eq 0 ] then Echo "logd is already running" return 0 fi $HA_BIN/ha_logd -d >/dev/null 2>&1 if [ $? -ne 0 ] then Echo "starting logd failed" fi } StopLogd() { $HA_BIN/ha_logd -s >/dev/null 2>&1 if [ $? -ne 0 ] then Echo "logd is already stopped" return 0 fi $HA_BIN/ha_logd -k >/dev/null 2>&1 if [ $? -ne 0 ] then Echo "stopping logd failed" fi } init_watchdog() { if [ -f /proc/devices -a -x $MODPROBE ] then init_watchdog_linux fi } # # Install the softdog module if we need to # init_watchdog_linux() { # # We need to install it if watchdog is specified in $CONFIG, and # /dev/watchdog refers to a softdog device, or it /dev/watchdog # doesn't exist at all. # # If we need /dev/watchdog, then we'll make it if necessary. # # Whatever the user says we should use for watchdog device, that's # what we'll check for, use and create if necessary. If they misspell # it, or don't put it under /dev, so will we. # Hope they do it right :-) # # insmod=no # What do they think /dev/watchdog is named? MISCDEV=`grep ' misc$' /proc/devices | cut -c1-4` MISCDEV=`Echo $MISCDEV` WATCHDEV=`ha_parameter watchdog` WATCHDEV=`Echo $WATCHDEV` if [ "X$WATCHDEV" != X ] then : Watchdog requested by $CONFIG file # # We try and modprobe the module if there's no dev or the dev exists # and points to the softdog major device. # if [ ! -c "$WATCHDEV" ] then insmod=yes else case `ls -l "$WATCHDEV" 2>/dev/null` in *$MISCDEV,*) insmod=yes;; *) : "$WATCHDEV isn't a softdog device (wrong major)" ;; esac fi else : No watchdog device specified in $CONFIG file. fi case $insmod in yes) if grep softdog /proc/modules >/dev/null 2>&1 then : softdog already loaded else $MODPROBE softdog nowayout=0 >/dev/null 2>&1 fi;; esac if [ "X$WATCHDEV" != X -a ! -c "$WATCHDEV" -a $insmod = yes ] then minor=`cat /proc/misc | grep watchdog | cut -c1-4` mknod -m 600 $WATCHDEV c $MISCDEV $minor fi } # init_watchdog_linux() # # Start the heartbeat daemon... # start_heartbeat() { if ERROR=`$HA_BIN/heartbeat 2>&1` then : OK else return $? fi } # # Start Linux-HA # StartHA() { EchoNoNl "Starting High-Availability services: " if CrmEnabled then : OK else $HA_NOARCHBIN/ResourceManager verifyallidle fi if [ $USE_MODULES = 1 ] then # Create /dev/watchdog and load module if we should init_watchdog fi rm -f $RUNDIR/ppp.d/* if [ ! -d $RUNDIR/heartbeat ] then mkdir -p $RUNDIR/heartbeat/ccm mkdir -p $RUNDIR/heartbeat/crm chown -R hacluster:haclient $RUNDIR/heartbeat chmod -R 750 $RUNDIR/heartbeat fi if [ -f $HA_DIR/ipresources -a ! -f $HA_DIR/haresources ] then mv $HA_DIR/ipresources $HA_DIR/haresources fi # Start heartbeat daemon if start_heartbeat then echo_success return 0 else RC=$? echo_failure $RC if [ ! -z "$ERROR" ]; then Echo Echo "$ERROR" fi return $RC fi } # # Ask heartbeat to stop. It will give up its resources... # StopHA() { EchoNoNl "Stopping High-Availability services: " if $HA_BIN/heartbeat -k &> /dev/null # Kill it then echo_success return 0 else RC=$? echo_failure $RC return $RC fi } StatusHA() { $HA_BIN/heartbeat -s } StandbyHA() { auto_failback=`ha_parameter auto_failback | tr '[A-Z]' '[a-z]'` nice_failback=`ha_parameter nice_failback | tr '[A-Z]' '[a-z]'` case "$auto_failback" in *legacy*) echo "auto_failback is set to legacy. Cannot enter standby." exit 1;; esac case "$nice_failback" in *off*) echo "nice_failback is disabled. Cannot enter standby." exit 1;; esac case "${auto_failback}${nice_failback}" in "") echo "auto_failback defaulted to legacy. Cannot enter standby." exit 1;; esac echo "auto_failback: $auto_failback" if StatusHA >/dev/null 2>&1 then EchoNoNl "Attempting to enter standby mode" if $HA_NOARCHBIN/hb_standby then # It's impossible to tell how long this will take. echo_success else echo_failure $? fi else Echo "Heartbeat is not currently running." exit 1 fi } # # Ask heartbeat to restart. It will *keep* its resources # ReloadHA() { EchoNoNl "Reloading High-Availability services: " if $HA_BIN/heartbeat -r # Restart, and keep your resources then echo_success return 0 else RC=$? echo_failure $RC return $RC fi } RunStartStop() { # Run pre-startup script if it exists if [ -f $HA_DIR/resource.d/startstop ] then $HA_DIR/resource.d/startstop "$@" fi } RC=0 # See how we were called. case "$1" in start) StartLogd RunStartStop pre-start StartHA RC=$? Echo if [ $RC -eq 0 ] then [ ! -d $LOCKDIR ] && mkdir -p $LOCKDIR touch $LOCKDIR/$SUBSYS fi RunStartStop post-start $RC ;; standby) StandbyHA RC=$?;; status) StatusHA RC=$?;; stop) RunStartStop "pre-stop" StopHA RC=$? Echo if [ $RC -eq 0 ] then rm -f $LOCKDIR/$SUBSYS fi RunStartStop post-stop $RC StopLogd ;; restart) sleeptime=`ha_parameter deadtime` StopHA Echo EchoNoNl Waiting to allow resource takeover to complete: sleep $sleeptime sleep 10 # allow resource takeover to complete (hopefully). echo_success Echo StartHA Echo ;; force-reload|reload) ReloadHA Echo RC=$? ;; *) Echo "Usage: $0 {start|stop|status|restart|reload|force-reload}" exit 1 esac exit $RC Here is /etc/init.d/network #! /bin/bash # # network Bring up/down networking # # chkconfig: 2345 10 90 # description: Activates/Deactivates all network interfaces configured to \ # start at boot time. # ### BEGIN INIT INFO # Provides: $network ### END INIT INFO # Source function library. . /etc/init.d/functions if [ ! -f /etc/sysconfig/network ]; then exit 0 fi . /etc/sysconfig/network if [ -f /etc/sysconfig/pcmcia ]; then . /etc/sysconfig/pcmcia fi # Check that networking is up. [ "${NETWORKING}" = "no" ] && exit 0 # if the ip configuration utility isn't around we can't function. [ -x /sbin/ip ] || exit 1 # Even if IPX is configured, without the utilities we can't do much [ ! -x /sbin/ipx_internal_net -o ! -x /sbin/ipx_configure ] && IPX= # Even if VLAN is configured, without the utility we can't do much [ ! -x /sbin/vconfig ] && VLAN= CWD=`pwd` cd /etc/sysconfig/network-scripts . ./network-functions # find all the interfaces besides loopback. # ignore aliases, alternative configurations, and editor backup files interfaces=$(ls ifcfg* | \ LANG=C sed -e "$__sed_discard_ignored_files" \ -e '/\(ifcfg-lo\|:\|ifcfg-.*-range\)/d' \ -e '/ifcfg-[A-Za-z0-9\._-]\+$/ { s/^ifcfg-//g;s/[0-9]/ &/}' | \ LANG=C sort -k 1,1 -k 2n | \ LANG=C sed 's/ //') rc=0 # See how we were called. case "$1" in start) rc=0 # IPv6 hook (pre IPv4 start) if [ "$NETWORKING_IPV6" = "yes" ]; then if [ -x /etc/sysconfig/network-scripts/init.ipv6-global ]; then /etc/sysconfig/network-scripts/init.ipv6-global start pre fi fi sysctl -e -p /etc/sysctl.conf >/dev/null 2>&1 # bring up loopback interface action $"Bringing up loopback interface: " ./ifup ifcfg-lo case "$IPX" in yes|true) /sbin/ipx_configure --auto_primary=$IPXAUTOPRIMARY \ --auto_interface=$IPXAUTOFRAME if [ "$IPXINTERNALNETNUM" != "0" ]; then /sbin/ipx_internal_net add $IPXINTERNALNETNUM $IPXINTERNALNODENUM fi ;; esac case "$VLAN" in yes) if [ -d /proc/net/vlan ] || modprobe 8021q >/dev/null 2>&1 ; then test -z "$VLAN_NAME_TYPE" && VLAN_NAME_TYPE=DEV_PLUS_VID_NO_PAD action $"Setting 802.1Q VLAN parameters: " /sbin/vconfig set_name_type "$VLAN_NAME_TYPE" else echo $"No 802.1Q VLAN support available in kernel." fi ;; esac vlaninterfaces="" vpninterfaces="" xdslinterfaces="" bridgeinterfaces="" # bring up all other interfaces configured to come up at boot time for i in $interfaces; do unset DEVICE TYPE SLAVE eval $(LANG=C fgrep "DEVICE=" ifcfg-$i) eval $(LANG=C fgrep "TYPE=" ifcfg-$i) eval $(LANG=C fgrep "SLAVE=" ifcfg-$i) if [ -z "$DEVICE" ] ; then DEVICE="$i"; fi if [ "${DEVICE##cipcb}" != "$DEVICE" ] ; then vpninterfaces="$vpninterfaces $i" continue fi if [ "$TYPE" = "xDSL" ]; then xdslinterfaces="$xdslinterfaces $i" continue fi if [ "$TYPE" = "Bridge" ]; then bridgeinterfaces="$bridgeinterfaces $i" continue fi if [ "$TYPE" = "IPSEC" ]; then vpninterfaces="$vpninterfaces $i" continue fi if [ "${DEVICE%%.*}" != "$DEVICE" -o "${DEVICE##vlan}" != "$DEVICE" ] ; then vlaninterfaces="$vlaninterfaces $i" continue fi if [ "$SLAVE" = "yes" ]; then continue fi if LANG=C egrep -L "^ONBOOT=['\"]?[Nn][Oo]['\"]?" ifcfg-$i > /dev/null ; then # this loads the module, to preserve ordering is_available $i continue fi # If we're in confirmation mode, get user confirmation. if [ -f /var/run/confirm ]; then confirm $i test $? = 1 && continue fi action $"Bringing up interface $i: " ./ifup $i boot rc=$((rc+$?)) done # Bring up xDSL and VPN interfaces for i in $vlaninterfaces $bridgeinterfaces $xdslinterfaces $vpninterfaces ; do if ! LANG=C egrep -L "^ONBOOT=['\"]?[Nn][Oo]['\"]?" ifcfg-$i >/dev/null 2>&1 ; then # If we're in confirmation mode, get user confirmation. if [ -f /var/run/confirm ]; then confirm $i test $? = 1 && continue fi action $"Bringing up interface $i: " ./ifup $i boot rc=$((rc+$?)) fi done # Add non interface-specific static-routes. if [ -f /etc/sysconfig/static-routes ]; then grep "^any" /etc/sysconfig/static-routes | while read ignore args ; do /sbin/route add -$args done fi # IPv6 hook (post IPv4 start) if [ "$NETWORKING_IPV6" = "yes" ]; then if [ -x /etc/sysconfig/network-scripts/init.ipv6-global ]; then /etc/sysconfig/network-scripts/init.ipv6-global start post fi fi # Run this again to catch any interface-specific actions sysctl -e -p /etc/sysctl.conf >/dev/null 2>&1 touch /var/lock/subsys/network [ -n "${NETWORKDELAY}" ] && /bin/sleep ${NETWORKDELAY} ;; stop) # Don't shut the network down if root is on NFS or a network # block device. rootfs=$(awk '{ if ($1 !~ /^[ \t]*#/ && $2 == "/" && $3 != "rootfs") { print $3; }}' /proc/mounts) rootopts=$(awk '{ if ($1 !~ /^[ \t]*#/ && $2 == "/") { print $4; }}' /etc/mtab) if [[ "$rootfs" =~ "^nfs" ]] || [[ "$rootopts" =~ "_netdev|_rnetdev" ]] ; then exit 1 fi # If this is a final shutdown/halt, check for network FS, # and unmount them even if the user didn't turn on netfs if [ "$RUNLEVEL" = "6" -o "$RUNLEVEL" = "0" -o "$RUNLEVEL" = "1" ]; then NFSMTAB=`LC_ALL=C awk '$3 ~ /^nfs/ { print $2 }' /proc/mounts` SMBMTAB=`LC_ALL=C awk '$3 == "smbfs" { print $2 }' /proc/mounts` NCPMTAB=`LC_ALL=C awk '$3 == "ncpfs" { print $2 }' /proc/mounts` if [ -n "$NFSMTAB" -o -n "$SMBMTAB" -o -n "$NCPMTAB" ] ; then /etc/init.d/netfs stop fi fi # IPv6 hook (pre IPv4 stop) if [ "$NETWORKING_IPV6" = "yes" ]; then if [ -x /etc/sysconfig/network-scripts/init.ipv6-global ]; then /etc/sysconfig/network-scripts/init.ipv6-global stop pre fi fi vlaninterfaces="" vpninterfaces="" xdslinterfaces="" bridgeinterfaces="" remaining="" rc=0 # get list of bonding, vpn, and xdsl interfaces for i in $interfaces; do unset DEVICE TYPE eval $(LANG=C fgrep "DEVICE=" ifcfg-$i) eval $(LANG=C fgrep "TYPE=" ifcfg-$i) if [ -z "$DEVICE" ] ; then DEVICE="$i"; fi if [ "${DEVICE##cipcb}" != "$DEVICE" ] ; then vpninterfaces="$vpninterfaces $i" continue fi if [ "$TYPE" = "IPSEC" ]; then vpninterfaces="$vpninterfaces $i" continue fi if [ "$TYPE" = "Bridge" ]; then bridgeinterfaces="$bridgeinterfaces $i" continue fi if [ "$TYPE" = "xDSL" ]; then xdslinterfaces="$xdslinterfaces $i" continue fi if [ "${DEVICE%%.*}" != "$DEVICE" -o "${DEVICE##vlan}" != "$DEVICE" ] ; then vlaninterfaces="$vlaninterfaces $i" continue fi remaining="$remaining $i" done for i in $vpninterfaces $xdslinterfaces $bridgeinterfaces $vlaninterfaces $remaining; do (. ifcfg-$i if [ -z "$DEVICE" ] ; then DEVICE="$i"; fi if ! check_device_down $DEVICE; then action $"Shutting down interface $i: " ./ifdown $i boot rc=$((rc+$?)) fi ) done case "$IPX" in yes|true) if [ "$IPXINTERNALNETNUM" != "0" ]; then /sbin/ipx_internal_net del fi ;; esac action $"Shutting down loopback interface: " ./ifdown ifcfg-lo if [ -d /proc/sys/net/ipv4 ]; then if [ -f /proc/sys/net/ipv4/ip_forward ]; then if [ `cat /proc/sys/net/ipv4/ip_forward` != 0 ]; then action $"Disabling IPv4 packet forwarding: " sysctl -w net.ipv4.ip_forward=0 fi fi if [ -f /proc/sys/net/ipv4/ip_always_defrag ]; then if [ `cat /proc/sys/net/ipv4/ip_always_defrag` != 0 ]; then action $"Disabling IPv4 automatic defragmentation: " sysctl -w net.ipv4.ip_always_defrag=0 fi fi fi # IPv6 hook (post IPv4 stop) if [ "$NETWORKING_IPV6" = "yes" ]; then if [ -x /etc/sysconfig/network-scripts/init.ipv6-global ]; then /etc/sysconfig/network-scripts/init.ipv6-global stop post fi fi rm -f /var/lock/subsys/network ;; status) echo $"Configured devices:" echo lo $interfaces echo $"Currently active devices:" echo $(/sbin/ip -o link show up | awk -F ": " '{ print $2 }') ;; restart|reload) cd "$CWD" $0 stop $0 start ;; *) echo $"Usage: $0 {start|stop|restart|reload|status}" exit 1 esac exit $rc From: linux-ha-bounces at lists.linux-ha.org <http://lists.linux-ha.org/mailman/listinfo/linux-ha> on behalf of Jim Sent: Fri 8/6/2010 2:25 AM To: linux-ha at lists.linux-ha.org <http://lists.linux-ha.org/mailman/listinfo/linux-ha> Subject: Re: [Linux-HA] heartbeat startup causes shared IP to stop responding Forgive me if this is a lengthy email, this is my first HA issue and I've included some logs at the end. For the sake of privacy, I've used dummy IPs here. My master is 192.168.1.101, slave is 192.168.1.102, shared IP is 192.168.1.103 Hello Jim, I did not analyze the logs in detail but perhaps you have configured your system to start HA or your virtual IP twice. Do you have your init.d scripts setup to start the virtual IP? pushkar _______________________________________________ Linux-HA mailing list [email protected] http://lists.linux-ha.org/mailman/listinfo/linux-ha See also: http://linux-ha.org/ReportingProblems
