Hi,
I would like to submit an upgrade of the db2 resource for review.
1) db2stop logic
If db2stop succeeds there is no need to call db2_kill. If the database has some 
problems a typical behaviour is that db2stop hangs. The resulution then is 
db2_kill to bring down the system the hard way. db2_kill generally succeeds. 
Therefore db2stop is spawned in a subprocess and monitored. If it succeeds OK, 
in case of failure or timeout db2_kill is invoked.
2) make the resource agent multipartition aware
Replace calls to db2start/db2stop with the partition specific versions and 
introduce instance parameter dbpartitionnum defaulting to 0 if not specified. 
Each partion should then be configured as separate instance.
Thanx for feedback
- holger
diff -r e09ec5fd6691 heartbeat/db2
--- a/heartbeat/db2    Thu Oct 21 23:00:21 2010 +0200
+++ b/heartbeat/db2    Mon Nov 01 11:49:51 2010 +0100
@@ -79,6 +79,13 @@ The admin user of the instance.
 <shortdesc lang="en">admin</shortdesc>
 <content type="string" default="" />
 </parameter>
+<parameter name="dbpartitionnum" unique="0" required="0">
+<longdesc lang="en">
+The number of the partion (DBPARTITIONNUM) to be managed.
+</longdesc>
+<shortdesc lang="en">number of partion</shortdesc>
+<content type="string" default="0" />
+</parameter>
 </parameters>
 
 <actions>
@@ -123,7 +130,6 @@ db2info() {
 db2ctrl=$db2sql/ctrl
 db2bin=$db2sql/bin
 db2db2=$db2bin/db2
-    db2node=0 # single node instances are supported
 
 #    Let's make sure a few important things are there...
 if
@@ -172,10 +178,10 @@ logasdb2() {
 #
 db2_start() {
 if
-    output=`runasdb2 $db2adm/db2start`
+    output=`runasdb2 $db2db2 db2start dbpartitionnum $db2node`
 then
 : Hurray! DB2 started OK
-    ocf_log info "DB2 UDB instance $1 started: $output"
+    ocf_log info "DB2 UDB instance $1($db2node) started: $output"
 else
 case $output in
 SQL1026N*|*"is already active"*)
@@ -185,7 +191,7 @@ db2_start() {
 esac
 fi
 db2_status "$1" || {
-    ocf_log err "DB2 UDB instance $1 not active!"
+    ocf_log err "DB2 UDB instance $1($db2node) not active!"
 return $OCF_ERR_GENERIC
 }
 # db2jstrt has been deprecated since v8.x and doesn't exist
@@ -196,13 +202,17 @@ db2_start() {
 return $OCF_ERR_GENERIC
 }
 fi
+
+  [ $db2node = 0 ] || return 0
+  # activate DB only on node 0
+
 for DB in `db2_dblist`
 do
 if output=`runasdb2 $db2db2 activate database $DB`; then
 ocf_log info "DB2 UDB database $DB activated"
 else
 case $output in
-        SQL1490W*|*"already been activated"*)
+        SQL1490W*|*"already been activated"*|SQL1497W*)
 ocf_log info "DB2 UDB database $DB already activated: $output";;
 
 *) ocf_log err "DB2 UDB database $DB didn't activate: $output"; return 
$OCF_ERR_GENERIC;;
@@ -211,21 +221,16 @@ db2_start() {
 done
 }
 
-#
-# db2_stop: Stop the given db2 database instance
-#
-db2_stop() {
-  # We ignore the instance, the info we need is already in $vars
+# helper function in a spawned invocation of this script
+# so we can detect a hang of the db2stop command
+db2_stop_bg() {
 rc=$OCF_SUCCESS
-  db2_status || {
-    ocf_log info "DB2 UDB instance $1 already stopped"
-    return $rc
-  }
+
 if
-    output=`runasdb2 $db2adm/db2stop force`
+    output=`runasdb2 $db2db2 db2stop force dbpartitionnum $db2node`
 then
 : DB2 stopped OK
-    ocf_log info "DB2 UDB instance $1 stopped: $output"
+    ocf_log info "DB2 UDB instance $1($db2node) stopped: $output"
 else
 case $output in
 
@@ -236,17 +241,89 @@ db2_stop() {
 rc=$OCF_ERR_GENERIC;;
 esac
 fi
-  logasdb2 $db2db2 terminate
-  if [ -x $db2bin/db2_kill ]; then
-    logasdb2 $db2bin/db2_kill
-  elif [ -x $db2bin/db2nkill ]; then
-    logasdb2 $db2bin/db2nkill $db2node
+
+  return $rc
+}
+
+#
+# db2_stop: Stop the given db2 database instance
+#
+db2_stop() {
+  # We ignore the instance, the info we need is already in $vars
+
+  rc=$OCF_SUCCESS
+
+  db2_status || {
+    ocf_log info "DB2 UDB instance $1($db2node) already stopped"
+    return $rc
+  }
+
+  if [ -n "$OCF_RESKEY_stop_timeout" ]
+  then
+      stop_timeout=$OCF_RESKEY_stop_timeout
+  elif [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
+      stop_timeout=$OCF_RESKEY_CRM_meta_timeout
+  else
+      stop_timeout=20000
 fi
+
+  # grace_time is 4/5 (unit is ms)
+  grace_timeout=$((stop_timeout/1250))
+
+  # start db2stop in background as this may hang
+  sh $0 db2_stop_bg &
+  stop_bg_pid=$!
+
+  # wait for grace_timeout
+  i=0
+  while [ $i -lt $grace_timeout ]
+  do
+      kill -0 $stop_bg_pid 2>/dev/null || break;
+      sleep 1
+      i=$((i+1))
+  done
+
+  # collect exit status but don't hang
+  if kill -0 $stop_bg_pid 2>/dev/null
+  then
+      stoprc=1
+      kill -9 $stop_bg_pid 2>/dev/null
+  else
+      wait $stop_bg_pid
+      stoprc=$?
+  fi
+
+  if [ $stoprc -ne 0 ]
+  then
+      ocf_log warn "db2stop of $instance($db2node) failed, using db2nkill"
+
+      # db2nkill kills *all* partions on the node
+      if [ -x $db2bin/db2nkill ]; then
+          logasdb2 $db2bin/db2nkill $db2node
+      elif [ -x $db2bin/db2_kill ]; then
+          logasdb2 $db2bin/db2_kill
+      fi
+
+      # let the processes die
+      sleep 2
+
+      if db2_status
+      then
+          ocf_log info "DB2 UDB instance $1($db2node) can not be killed with 
db2nkill"
+          rc=$OCF_ERR_GENERIC
+      else
+          ocf_log info "DB2 UDB instance $1($db2node) is now dead"
+      fi
+  fi
+
+  # db2jd has been deprecated since v8.x and doesn't exist
+  # anymore in v9.x
 pids=`our_db2_ps | grep db2jd | cut -d' ' -f1`
 for j in $pids
 do
 runasdb2 kill -9 $j
 done
+
 return $rc
 }
 
@@ -275,6 +352,9 @@ db2_dblist() {
 # db2_monitor: Can the given db2 instance do anything useful?
 #
 db2_monitor() {
+  [ $db2node = 0 ] || return 0
+  # monitoring only for partition 0
+
 # We ignore the instance, the info we need is already in $vars
 for DB in `db2_dblist`
 do
@@ -337,6 +417,7 @@ fi
 
 instance=$OCF_RESKEY_instance
 db2admin=${OCF_RESKEY_admin:-$instance}
+db2node=${OCF_RESKEY_dbpartitionnum:-0}
 
 US=`id -u -n`
 US=`echo $US`
@@ -373,6 +454,9 @@ case "$1" in
 stop)        db2_stop $instance
 exit $?;;
 
+  db2_stop_bg)  db2_stop_bg $instance
+                exit $?;;
+
 status)    if
 db2_status $instance
 then
___________________________________________________________
Neu: WEB.DE De-Mail - Einfach wie E-Mail, sicher wie ein Brief!  
Jetzt De-Mail-Adresse reservieren: https://produkte.web.de/go/demail02
diff -r e09ec5fd6691 heartbeat/db2
--- a/heartbeat/db2	Thu Oct 21 23:00:21 2010 +0200
+++ b/heartbeat/db2	Mon Nov 01 11:51:15 2010 +0100
@@ -79,6 +79,13 @@ The admin user of the instance.
 <shortdesc lang="en">admin</shortdesc>
 <content type="string" default="" />
 </parameter>
+<parameter name="dbpartitionnum" unique="0" required="0">
+<longdesc lang="en">
+The number of the partion (DBPARTITIONNUM) to be managed.
+</longdesc>
+<shortdesc lang="en">number of partion</shortdesc>
+<content type="string" default="0" />
+</parameter>
 </parameters>
 
 <actions>
@@ -123,7 +130,6 @@ db2info() {
 	db2ctrl=$db2sql/ctrl
 	db2bin=$db2sql/bin
 	db2db2=$db2bin/db2
-	db2node=0 # single node instances are supported
 
 	#	Let's make sure a few important things are there...
 	if
@@ -172,10 +178,10 @@ logasdb2() {
 #
 db2_start() {
   if
-    output=`runasdb2 $db2adm/db2start`
+    output=`runasdb2 $db2db2 db2start dbpartitionnum $db2node`
   then
     : Hurray! DB2 started OK
-    ocf_log info "DB2 UDB instance $1 started: $output"
+    ocf_log info "DB2 UDB instance $1($db2node) started: $output"
   else
     case $output in
       SQL1026N*|*"is already active"*)
@@ -185,7 +191,7 @@ db2_start() {
     esac
   fi
   db2_status "$1" || {
-    ocf_log err "DB2 UDB instance $1 not active!"
+    ocf_log err "DB2 UDB instance $1($db2node) not active!"
     return $OCF_ERR_GENERIC
   }
   # db2jstrt has been deprecated since v8.x and doesn't exist
@@ -196,13 +202,17 @@ db2_start() {
 	  return $OCF_ERR_GENERIC
     }
   fi
+
+  [ $db2node = 0 ] || return 0
+  # activate DB only on node 0
+
   for DB in `db2_dblist`
   do
 	if output=`runasdb2 $db2db2 activate database $DB`; then
       ocf_log info "DB2 UDB database $DB activated"
     else
       case $output in
-        SQL1490W*|*"already been activated"*)
+        SQL1490W*|*"already been activated"*|SQL1497W*)
            ocf_log info "DB2 UDB database $DB already activated: $output";;
 
         *) ocf_log err "DB2 UDB database $DB didn't activate: $output"; return $OCF_ERR_GENERIC;;
@@ -211,21 +221,16 @@ db2_start() {
   done
 }
 
-#
-# db2_stop: Stop the given db2 database instance
-#
-db2_stop() {
-  # We ignore the instance, the info we need is already in $vars
+# helper function in a spawned invocation of this script
+# so we can detect a hang of the db2stop command
+db2_stop_bg() {
   rc=$OCF_SUCCESS
-  db2_status || {
-    ocf_log info "DB2 UDB instance $1 already stopped"
-    return $rc
-  }
+
   if
-    output=`runasdb2 $db2adm/db2stop force`
+    output=`runasdb2 $db2db2 db2stop force dbpartitionnum $db2node`
   then
     : DB2 stopped OK
-    ocf_log info "DB2 UDB instance $1 stopped: $output"
+    ocf_log info "DB2 UDB instance $1($db2node) stopped: $output"
   else
     case $output in
 
@@ -236,17 +241,89 @@ db2_stop() {
 		rc=$OCF_ERR_GENERIC;;
     esac
   fi
-  logasdb2 $db2db2 terminate
-  if [ -x $db2bin/db2_kill ]; then
-    logasdb2 $db2bin/db2_kill
-  elif [ -x $db2bin/db2nkill ]; then
-    logasdb2 $db2bin/db2nkill $db2node
+
+  return $rc
+}
+
+#
+# db2_stop: Stop the given db2 database instance
+#
+db2_stop() {
+  # We ignore the instance, the info we need is already in $vars
+
+  rc=$OCF_SUCCESS
+
+  db2_status || {
+    ocf_log info "DB2 UDB instance $1($db2node) already stopped"
+    return $rc
+  }
+
+  if [ -n "$OCF_RESKEY_stop_timeout" ]
+  then
+      stop_timeout=$OCF_RESKEY_stop_timeout
+  elif [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
+      stop_timeout=$OCF_RESKEY_CRM_meta_timeout
+  else
+      stop_timeout=20000
   fi
+
+  # grace_time is 4/5 (unit is ms)
+  grace_timeout=$((stop_timeout/1250))
+
+  # start db2stop in background as this may hang
+  sh $0 db2_stop_bg &
+  stop_bg_pid=$!
+
+  # wait for grace_timeout
+  i=0
+  while [ $i -lt $grace_timeout ]
+  do
+      kill -0 $stop_bg_pid 2>/dev/null || break;
+      sleep 1
+      i=$((i+1))
+  done
+
+  # collect exit status but don't hang
+  if kill -0 $stop_bg_pid 2>/dev/null
+  then
+      stoprc=1
+      kill -9 $stop_bg_pid 2>/dev/null
+  else
+      wait $stop_bg_pid
+      stoprc=$?
+  fi
+
+  if [ $stoprc -ne 0 ]
+  then
+      ocf_log warn "db2stop of $instance($db2node) failed, using db2nkill"
+
+      # db2nkill kills *all* partions on the node
+      if [ -x $db2bin/db2nkill ]; then
+          logasdb2 $db2bin/db2nkill $db2node
+      elif [ -x $db2bin/db2_kill ]; then
+          logasdb2 $db2bin/db2_kill
+      fi
+
+      # let the processes die
+      sleep 2
+
+      if db2_status
+      then
+          ocf_log info "DB2 UDB instance $1($db2node) can not be killed with db2nkill"
+          rc=$OCF_ERR_GENERIC
+      else
+          ocf_log info "DB2 UDB instance $1($db2node) is now dead"
+      fi
+  fi
+
+  # db2jd has been deprecated since v8.x and doesn't exist
+  # anymore in v9.x
   pids=`our_db2_ps | grep db2jd | cut -d' ' -f1`
   for j in $pids
   do
     runasdb2 kill -9 $j
   done
+
   return $rc
 }
 
@@ -275,6 +352,9 @@ db2_dblist() {
 # db2_monitor: Can the given db2 instance do anything useful?
 #
 db2_monitor() {
+  [ $db2node = 0 ] || return 0
+  # monitoring only for partition 0
+
   # We ignore the instance, the info we need is already in $vars
   for DB in `db2_dblist`
   do
@@ -337,6 +417,7 @@ fi
 
 instance=$OCF_RESKEY_instance
 db2admin=${OCF_RESKEY_admin:-$instance}
+db2node=${OCF_RESKEY_dbpartitionnum:-0}
 
 US=`id -u -n`
 US=`echo $US`
@@ -373,6 +454,9 @@ case "$1" in
   stop)		db2_stop $instance
 		exit $?;;
 
+  db2_stop_bg)  db2_stop_bg $instance
+                exit $?;;
+
   status)	if
 		  db2_status $instance
 		then
_______________________________________________________
Linux-HA-Dev: [email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/

Reply via email to