Hi,
On Thu, Nov 04, 2010 at 09:31:04AM +0100, [email protected] wrote:
> # HG changeset patch
> # User Holger Teutsch <[email protected]>
> # Date 1288857475 -3600
> # Node ID 2ff375ca321554cf146bcf5be197f73fcbe28975
> # Parent 554ebfef6e9513178ea04cc4093710b65311934a
> Guard against a hanging db2stop by spawning this into the background. Use
> db2_kill after grace period.
>
> diff -r 554ebfef6e95 -r 2ff375ca3215 heartbeat/db2
> --- a/heartbeat/db2 Thu Nov 04 08:53:37 2010 +0100
> +++ b/heartbeat/db2 Thu Nov 04 08:57:55 2010 +0100
> @@ -211,16 +211,11 @@ db2_start() {
> done
> }
>
> -#
> -# db2_stop: Stop the given db2 database instance
> -#
> -db2_stop() {
> - # We ignore the instance, the info we need is already in $vars
> +# helper function in a spawned invocation of this script
> +# so we can detect a hang of the db2stop command
> +db2_stop_bg() {
> rc=$OCF_SUCCESS
> - db2_status || {
> - ocf_log info "DB2 UDB instance $1 already stopped"
> - return $rc
> - }
> +
> if
> output=`runasdb2 $db2adm/db2stop force`
> then
> @@ -236,17 +231,89 @@ db2_stop() {
> rc=$OCF_ERR_GENERIC;;
> esac
> fi
> - logasdb2 $db2db2 terminate
> - if [ -x $db2bin/db2_kill ]; then
> - logasdb2 $db2bin/db2_kill
> - elif [ -x $db2bin/db2nkill ]; then
> - logasdb2 $db2bin/db2nkill $db2node
> +
> + return $rc
> +}
> +
> +#
> +# db2_stop: Stop the given db2 database instance
> +#
> +db2_stop() {
> + # We ignore the instance, the info we need is already in $vars
> +
> + rc=$OCF_SUCCESS
> +
> + db2_status || {
> + ocf_log info "DB2 UDB instance $1 already stopped"
> + return $rc
> + }
> +
> + if [ -n "$OCF_RESKEY_stop_timeout" ]
> + then
> + stop_timeout=$OCF_RESKEY_stop_timeout
> + elif [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
> + stop_timeout=$OCF_RESKEY_CRM_meta_timeout
> + else
> + stop_timeout=20000
> fi
> +
> + # grace_time is 4/5 (unit is ms)
> + grace_timeout=$((stop_timeout/1250))
> +
> + # start db2stop in background as this may hang
> + sh $0 db2_stop_bg &
This should be OK:
db2_stop_bg &
> + stop_bg_pid=$!
> +
> + # wait for grace_timeout
> + i=0
> + while [ $i -lt $grace_timeout ]
> + do
> + kill -0 $stop_bg_pid 2>/dev/null || break;
> + sleep 1
> + i=$((i+1))
> + done
> +
> + # collect exit status but don't hang
> + if kill -0 $stop_bg_pid 2>/dev/null
> + then
> + stoprc=1
> + kill -9 $stop_bg_pid 2>/dev/null
> + else
> + wait $stop_bg_pid
> + stoprc=$?
> + fi
> +
> + if [ $stoprc -ne 0 ]
> + then
> + ocf_log warn "db2stop of $instance failed, using db2nkill"
> +
> + # db2nkill kills *all* partions on the node
> + if [ -x $db2bin/db2nkill ]; then
> + logasdb2 $db2bin/db2nkill $db2node
> + elif [ -x $db2bin/db2_kill ]; then
> + logasdb2 $db2bin/db2_kill
> + fi
> +
> + # let the processes die
> + sleep 2
> +
> + if db2_status
> + then
> + ocf_log info "DB2 UDB instance $1 can not be killed with db2nkill"
> + rc=$OCF_ERR_GENERIC
> + else
> + ocf_log info "DB2 UDB instance $1 is now dead"
> + fi
Perhaps safer to wait in a loop until the processes are gone:
sleep 1
while db2_status; do
ocf_log info "waiting for DB2 UDB instance $1 processes to exit"
sleep 1
done
ocf_log info "DB2 UDB instance $1 is now dead"
Cheers,
Dejan
> + fi
> + # db2jd has been deprecated since v8.x and doesn't exist
> + # anymore in v9.x
> pids=`our_db2_ps | grep db2jd | cut -d' ' -f1`
> for j in $pids
> do
> runasdb2 kill -9 $j
> done
> +
> return $rc
> }
>
> @@ -373,6 +440,9 @@ case "$1" in
> stop) db2_stop $instance
> exit $?;;
>
> + db2_stop_bg) db2_stop_bg $instance
> + exit $?;;
> +
> status) if
> db2_status $instance
> then
> ___________________________________________________________
> GRATIS! Movie-FLAT mit über 300 Videos.
> Jetzt freischalten unter http://movieflat.web.de
> _______________________________________________________
> Linux-HA-Dev: [email protected]
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/
_______________________________________________________
Linux-HA-Dev: [email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/