Hi,
On Mon, Nov 01, 2010 at 11:52:33AM +0100, [email protected] wrote:
> Hi,
> I would like to submit an upgrade of the db2 resource for review.
Can you please split the patch into two (or more) each of which
would contain a single functional change.
> 1) db2stop logic
> If db2stop succeeds there is no need to call db2_kill. If the
> database has some problems a typical behaviour is that db2stop
> hangs. The resulution then is db2_kill to bring down the system
> the hard way. db2_kill generally succeeds. Therefore db2stop is
> spawned in a subprocess and monitored. If it succeeds OK, in
> case of failure or timeout db2_kill is invoked.
OK. Are there any bad effects from running db2_kill anyway? If
not, perhaps we should just keep it. Or are you absolutely sure
that all processes would terminate if db2stop succeeds?
> 2) make the resource agent multipartition aware
> Replace calls to db2start/db2stop with the partition specific
> versions and introduce instance parameter dbpartitionnum
> defaulting to 0 if not specified. Each partion should then be
> configured as separate instance.
What is the difference between "db2 db2start" and db2start (as a
program)? Are there any docs describing the difference?
Cheers,
Dejan
> Thanx for feedback
> - holger
> diff -r e09ec5fd6691 heartbeat/db2
> --- a/heartbeat/db2 Thu Oct 21 23:00:21 2010 +0200
> +++ b/heartbeat/db2 Mon Nov 01 11:49:51 2010 +0100
> @@ -79,6 +79,13 @@ The admin user of the instance.
> <shortdesc lang="en">admin</shortdesc>
> <content type="string" default="" />
> </parameter>
> +<parameter name="dbpartitionnum" unique="0" required="0">
> +<longdesc lang="en">
> +The number of the partion (DBPARTITIONNUM) to be managed.
> +</longdesc>
> +<shortdesc lang="en">number of partion</shortdesc>
> +<content type="string" default="0" />
> +</parameter>
> </parameters>
>
> <actions>
> @@ -123,7 +130,6 @@ db2info() {
> db2ctrl=$db2sql/ctrl
> db2bin=$db2sql/bin
> db2db2=$db2bin/db2
> - db2node=0 # single node instances are supported
>
> # Let's make sure a few important things are there...
> if
> @@ -172,10 +178,10 @@ logasdb2() {
> #
> db2_start() {
> if
> - output=`runasdb2 $db2adm/db2start`
> + output=`runasdb2 $db2db2 db2start dbpartitionnum $db2node`
> then
> : Hurray! DB2 started OK
> - ocf_log info "DB2 UDB instance $1 started: $output"
> + ocf_log info "DB2 UDB instance $1($db2node) started: $output"
> else
> case $output in
> SQL1026N*|*"is already active"*)
> @@ -185,7 +191,7 @@ db2_start() {
> esac
> fi
> db2_status "$1" || {
> - ocf_log err "DB2 UDB instance $1 not active!"
> + ocf_log err "DB2 UDB instance $1($db2node) not active!"
> return $OCF_ERR_GENERIC
> }
> # db2jstrt has been deprecated since v8.x and doesn't exist
> @@ -196,13 +202,17 @@ db2_start() {
> return $OCF_ERR_GENERIC
> }
> fi
> +
> + [ $db2node = 0 ] || return 0
> + # activate DB only on node 0
> +
> for DB in `db2_dblist`
> do
> if output=`runasdb2 $db2db2 activate database $DB`; then
> ocf_log info "DB2 UDB database $DB activated"
> else
> case $output in
> - SQL1490W*|*"already been activated"*)
> + SQL1490W*|*"already been activated"*|SQL1497W*)
> ocf_log info "DB2 UDB database $DB already activated: $output";;
>
> *) ocf_log err "DB2 UDB database $DB didn't activate: $output"; return
> $OCF_ERR_GENERIC;;
> @@ -211,21 +221,16 @@ db2_start() {
> done
> }
>
> -#
> -# db2_stop: Stop the given db2 database instance
> -#
> -db2_stop() {
> - # We ignore the instance, the info we need is already in $vars
> +# helper function in a spawned invocation of this script
> +# so we can detect a hang of the db2stop command
> +db2_stop_bg() {
> rc=$OCF_SUCCESS
> - db2_status || {
> - ocf_log info "DB2 UDB instance $1 already stopped"
> - return $rc
> - }
> +
> if
> - output=`runasdb2 $db2adm/db2stop force`
> + output=`runasdb2 $db2db2 db2stop force dbpartitionnum $db2node`
> then
> : DB2 stopped OK
> - ocf_log info "DB2 UDB instance $1 stopped: $output"
> + ocf_log info "DB2 UDB instance $1($db2node) stopped: $output"
> else
> case $output in
>
> @@ -236,17 +241,89 @@ db2_stop() {
> rc=$OCF_ERR_GENERIC;;
> esac
> fi
> - logasdb2 $db2db2 terminate
> - if [ -x $db2bin/db2_kill ]; then
> - logasdb2 $db2bin/db2_kill
> - elif [ -x $db2bin/db2nkill ]; then
> - logasdb2 $db2bin/db2nkill $db2node
> +
> + return $rc
> +}
> +
> +#
> +# db2_stop: Stop the given db2 database instance
> +#
> +db2_stop() {
> + # We ignore the instance, the info we need is already in $vars
> +
> + rc=$OCF_SUCCESS
> +
> + db2_status || {
> + ocf_log info "DB2 UDB instance $1($db2node) already stopped"
> + return $rc
> + }
> +
> + if [ -n "$OCF_RESKEY_stop_timeout" ]
> + then
> + stop_timeout=$OCF_RESKEY_stop_timeout
> + elif [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
> + stop_timeout=$OCF_RESKEY_CRM_meta_timeout
> + else
> + stop_timeout=20000
> fi
> +
> + # grace_time is 4/5 (unit is ms)
> + grace_timeout=$((stop_timeout/1250))
> +
> + # start db2stop in background as this may hang
> + sh $0 db2_stop_bg &
> + stop_bg_pid=$!
> +
> + # wait for grace_timeout
> + i=0
> + while [ $i -lt $grace_timeout ]
> + do
> + kill -0 $stop_bg_pid 2>/dev/null || break;
> + sleep 1
> + i=$((i+1))
> + done
> +
> + # collect exit status but don't hang
> + if kill -0 $stop_bg_pid 2>/dev/null
> + then
> + stoprc=1
> + kill -9 $stop_bg_pid 2>/dev/null
> + else
> + wait $stop_bg_pid
> + stoprc=$?
> + fi
> +
> + if [ $stoprc -ne 0 ]
> + then
> + ocf_log warn "db2stop of $instance($db2node) failed, using db2nkill"
> +
> + # db2nkill kills *all* partions on the node
> + if [ -x $db2bin/db2nkill ]; then
> + logasdb2 $db2bin/db2nkill $db2node
> + elif [ -x $db2bin/db2_kill ]; then
> + logasdb2 $db2bin/db2_kill
> + fi
> +
> + # let the processes die
> + sleep 2
> +
> + if db2_status
> + then
> + ocf_log info "DB2 UDB instance $1($db2node) can not be killed with
> db2nkill"
> + rc=$OCF_ERR_GENERIC
> + else
> + ocf_log info "DB2 UDB instance $1($db2node) is now dead"
> + fi
> + fi
> +
> + # db2jd has been deprecated since v8.x and doesn't exist
> + # anymore in v9.x
> pids=`our_db2_ps | grep db2jd | cut -d' ' -f1`
> for j in $pids
> do
> runasdb2 kill -9 $j
> done
> +
> return $rc
> }
>
> @@ -275,6 +352,9 @@ db2_dblist() {
> # db2_monitor: Can the given db2 instance do anything useful?
> #
> db2_monitor() {
> + [ $db2node = 0 ] || return 0
> + # monitoring only for partition 0
> +
> # We ignore the instance, the info we need is already in $vars
> for DB in `db2_dblist`
> do
> @@ -337,6 +417,7 @@ fi
>
> instance=$OCF_RESKEY_instance
> db2admin=${OCF_RESKEY_admin:-$instance}
> +db2node=${OCF_RESKEY_dbpartitionnum:-0}
>
> US=`id -u -n`
> US=`echo $US`
> @@ -373,6 +454,9 @@ case "$1" in
> stop) db2_stop $instance
> exit $?;;
>
> + db2_stop_bg) db2_stop_bg $instance
> + exit $?;;
> +
> status) if
> db2_status $instance
> then
> ___________________________________________________________
> Neu: WEB.DE De-Mail - Einfach wie E-Mail, sicher wie ein Brief!
> Jetzt De-Mail-Adresse reservieren: https://produkte.web.de/go/demail02
> diff -r e09ec5fd6691 heartbeat/db2
> --- a/heartbeat/db2 Thu Oct 21 23:00:21 2010 +0200
> +++ b/heartbeat/db2 Mon Nov 01 11:51:15 2010 +0100
> @@ -79,6 +79,13 @@ The admin user of the instance.
> <shortdesc lang="en">admin</shortdesc>
> <content type="string" default="" />
> </parameter>
> +<parameter name="dbpartitionnum" unique="0" required="0">
> +<longdesc lang="en">
> +The number of the partion (DBPARTITIONNUM) to be managed.
> +</longdesc>
> +<shortdesc lang="en">number of partion</shortdesc>
> +<content type="string" default="0" />
> +</parameter>
> </parameters>
>
> <actions>
> @@ -123,7 +130,6 @@ db2info() {
> db2ctrl=$db2sql/ctrl
> db2bin=$db2sql/bin
> db2db2=$db2bin/db2
> - db2node=0 # single node instances are supported
>
> # Let's make sure a few important things are there...
> if
> @@ -172,10 +178,10 @@ logasdb2() {
> #
> db2_start() {
> if
> - output=`runasdb2 $db2adm/db2start`
> + output=`runasdb2 $db2db2 db2start dbpartitionnum $db2node`
> then
> : Hurray! DB2 started OK
> - ocf_log info "DB2 UDB instance $1 started: $output"
> + ocf_log info "DB2 UDB instance $1($db2node) started: $output"
> else
> case $output in
> SQL1026N*|*"is already active"*)
> @@ -185,7 +191,7 @@ db2_start() {
> esac
> fi
> db2_status "$1" || {
> - ocf_log err "DB2 UDB instance $1 not active!"
> + ocf_log err "DB2 UDB instance $1($db2node) not active!"
> return $OCF_ERR_GENERIC
> }
> # db2jstrt has been deprecated since v8.x and doesn't exist
> @@ -196,13 +202,17 @@ db2_start() {
> return $OCF_ERR_GENERIC
> }
> fi
> +
> + [ $db2node = 0 ] || return 0
> + # activate DB only on node 0
> +
> for DB in `db2_dblist`
> do
> if output=`runasdb2 $db2db2 activate database $DB`; then
> ocf_log info "DB2 UDB database $DB activated"
> else
> case $output in
> - SQL1490W*|*"already been activated"*)
> + SQL1490W*|*"already been activated"*|SQL1497W*)
> ocf_log info "DB2 UDB database $DB already activated: $output";;
>
> *) ocf_log err "DB2 UDB database $DB didn't activate: $output";
> return $OCF_ERR_GENERIC;;
> @@ -211,21 +221,16 @@ db2_start() {
> done
> }
>
> -#
> -# db2_stop: Stop the given db2 database instance
> -#
> -db2_stop() {
> - # We ignore the instance, the info we need is already in $vars
> +# helper function in a spawned invocation of this script
> +# so we can detect a hang of the db2stop command
> +db2_stop_bg() {
> rc=$OCF_SUCCESS
> - db2_status || {
> - ocf_log info "DB2 UDB instance $1 already stopped"
> - return $rc
> - }
> +
> if
> - output=`runasdb2 $db2adm/db2stop force`
> + output=`runasdb2 $db2db2 db2stop force dbpartitionnum $db2node`
> then
> : DB2 stopped OK
> - ocf_log info "DB2 UDB instance $1 stopped: $output"
> + ocf_log info "DB2 UDB instance $1($db2node) stopped: $output"
> else
> case $output in
>
> @@ -236,17 +241,89 @@ db2_stop() {
> rc=$OCF_ERR_GENERIC;;
> esac
> fi
> - logasdb2 $db2db2 terminate
> - if [ -x $db2bin/db2_kill ]; then
> - logasdb2 $db2bin/db2_kill
> - elif [ -x $db2bin/db2nkill ]; then
> - logasdb2 $db2bin/db2nkill $db2node
> +
> + return $rc
> +}
> +
> +#
> +# db2_stop: Stop the given db2 database instance
> +#
> +db2_stop() {
> + # We ignore the instance, the info we need is already in $vars
> +
> + rc=$OCF_SUCCESS
> +
> + db2_status || {
> + ocf_log info "DB2 UDB instance $1($db2node) already stopped"
> + return $rc
> + }
> +
> + if [ -n "$OCF_RESKEY_stop_timeout" ]
> + then
> + stop_timeout=$OCF_RESKEY_stop_timeout
> + elif [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
> + stop_timeout=$OCF_RESKEY_CRM_meta_timeout
> + else
> + stop_timeout=20000
> fi
> +
> + # grace_time is 4/5 (unit is ms)
> + grace_timeout=$((stop_timeout/1250))
> +
> + # start db2stop in background as this may hang
> + sh $0 db2_stop_bg &
> + stop_bg_pid=$!
> +
> + # wait for grace_timeout
> + i=0
> + while [ $i -lt $grace_timeout ]
> + do
> + kill -0 $stop_bg_pid 2>/dev/null || break;
> + sleep 1
> + i=$((i+1))
> + done
> +
> + # collect exit status but don't hang
> + if kill -0 $stop_bg_pid 2>/dev/null
> + then
> + stoprc=1
> + kill -9 $stop_bg_pid 2>/dev/null
> + else
> + wait $stop_bg_pid
> + stoprc=$?
> + fi
> +
> + if [ $stoprc -ne 0 ]
> + then
> + ocf_log warn "db2stop of $instance($db2node) failed, using db2nkill"
> +
> + # db2nkill kills *all* partions on the node
> + if [ -x $db2bin/db2nkill ]; then
> + logasdb2 $db2bin/db2nkill $db2node
> + elif [ -x $db2bin/db2_kill ]; then
> + logasdb2 $db2bin/db2_kill
> + fi
> +
> + # let the processes die
> + sleep 2
> +
> + if db2_status
> + then
> + ocf_log info "DB2 UDB instance $1($db2node) can not be killed with
> db2nkill"
> + rc=$OCF_ERR_GENERIC
> + else
> + ocf_log info "DB2 UDB instance $1($db2node) is now dead"
> + fi
> + fi
> +
> + # db2jd has been deprecated since v8.x and doesn't exist
> + # anymore in v9.x
> pids=`our_db2_ps | grep db2jd | cut -d' ' -f1`
> for j in $pids
> do
> runasdb2 kill -9 $j
> done
> +
> return $rc
> }
>
> @@ -275,6 +352,9 @@ db2_dblist() {
> # db2_monitor: Can the given db2 instance do anything useful?
> #
> db2_monitor() {
> + [ $db2node = 0 ] || return 0
> + # monitoring only for partition 0
> +
> # We ignore the instance, the info we need is already in $vars
> for DB in `db2_dblist`
> do
> @@ -337,6 +417,7 @@ fi
>
> instance=$OCF_RESKEY_instance
> db2admin=${OCF_RESKEY_admin:-$instance}
> +db2node=${OCF_RESKEY_dbpartitionnum:-0}
>
> US=`id -u -n`
> US=`echo $US`
> @@ -373,6 +454,9 @@ case "$1" in
> stop) db2_stop $instance
> exit $?;;
>
> + db2_stop_bg) db2_stop_bg $instance
> + exit $?;;
> +
> status) if
> db2_status $instance
> then
> _______________________________________________________
> Linux-HA-Dev: [email protected]
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/
_______________________________________________________
Linux-HA-Dev: [email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/