Hi,

On Mon, Nov 01, 2010 at 11:52:33AM +0100, [email protected] wrote:
> Hi,
> I would like to submit an upgrade of the db2 resource for review.

Can you please split the patch into two (or more) each of which
would contain a single functional change.

> 1) db2stop logic
> If db2stop succeeds there is no need to call db2_kill. If the
> database has some problems a typical behaviour is that db2stop
> hangs. The resulution then is db2_kill to bring down the system
> the hard way. db2_kill generally succeeds. Therefore db2stop is
> spawned in a subprocess and monitored. If it succeeds OK, in
> case of failure or timeout db2_kill is invoked.

OK. Are there any bad effects from running db2_kill anyway? If
not, perhaps we should just keep it. Or are you absolutely sure
that all processes would terminate if db2stop succeeds?

> 2) make the resource agent multipartition aware
> Replace calls to db2start/db2stop with the partition specific
> versions and introduce instance parameter dbpartitionnum
> defaulting to 0 if not specified. Each partion should then be
> configured as separate instance.

What is the difference between "db2 db2start" and db2start (as a
program)? Are there any docs describing the difference?

Cheers,

Dejan

> Thanx for feedback
> - holger
> diff -r e09ec5fd6691 heartbeat/db2
> --- a/heartbeat/db2    Thu Oct 21 23:00:21 2010 +0200
> +++ b/heartbeat/db2    Mon Nov 01 11:49:51 2010 +0100
> @@ -79,6 +79,13 @@ The admin user of the instance.
>  <shortdesc lang="en">admin</shortdesc>
>  <content type="string" default="" />
>  </parameter>
> +<parameter name="dbpartitionnum" unique="0" required="0">
> +<longdesc lang="en">
> +The number of the partion (DBPARTITIONNUM) to be managed.
> +</longdesc>
> +<shortdesc lang="en">number of partion</shortdesc>
> +<content type="string" default="0" />
> +</parameter>
>  </parameters>
>  
>  <actions>
> @@ -123,7 +130,6 @@ db2info() {
>  db2ctrl=$db2sql/ctrl
>  db2bin=$db2sql/bin
>  db2db2=$db2bin/db2
> -    db2node=0 # single node instances are supported
>  
>  #    Let's make sure a few important things are there...
>  if
> @@ -172,10 +178,10 @@ logasdb2() {
>  #
>  db2_start() {
>  if
> -    output=`runasdb2 $db2adm/db2start`
> +    output=`runasdb2 $db2db2 db2start dbpartitionnum $db2node`
>  then
>  : Hurray! DB2 started OK
> -    ocf_log info "DB2 UDB instance $1 started: $output"
> +    ocf_log info "DB2 UDB instance $1($db2node) started: $output"
>  else
>  case $output in
>  SQL1026N*|*"is already active"*)
> @@ -185,7 +191,7 @@ db2_start() {
>  esac
>  fi
>  db2_status "$1" || {
> -    ocf_log err "DB2 UDB instance $1 not active!"
> +    ocf_log err "DB2 UDB instance $1($db2node) not active!"
>  return $OCF_ERR_GENERIC
>  }
>  # db2jstrt has been deprecated since v8.x and doesn't exist
> @@ -196,13 +202,17 @@ db2_start() {
>  return $OCF_ERR_GENERIC
>  }
>  fi
> +
> +  [ $db2node = 0 ] || return 0
> +  # activate DB only on node 0
> +
>  for DB in `db2_dblist`
>  do
>  if output=`runasdb2 $db2db2 activate database $DB`; then
>  ocf_log info "DB2 UDB database $DB activated"
>  else
>  case $output in
> -        SQL1490W*|*"already been activated"*)
> +        SQL1490W*|*"already been activated"*|SQL1497W*)
>  ocf_log info "DB2 UDB database $DB already activated: $output";;
>  
>  *) ocf_log err "DB2 UDB database $DB didn't activate: $output"; return 
> $OCF_ERR_GENERIC;;
> @@ -211,21 +221,16 @@ db2_start() {
>  done
>  }
>  
> -#
> -# db2_stop: Stop the given db2 database instance
> -#
> -db2_stop() {
> -  # We ignore the instance, the info we need is already in $vars
> +# helper function in a spawned invocation of this script
> +# so we can detect a hang of the db2stop command
> +db2_stop_bg() {
>  rc=$OCF_SUCCESS
> -  db2_status || {
> -    ocf_log info "DB2 UDB instance $1 already stopped"
> -    return $rc
> -  }
> +
>  if
> -    output=`runasdb2 $db2adm/db2stop force`
> +    output=`runasdb2 $db2db2 db2stop force dbpartitionnum $db2node`
>  then
>  : DB2 stopped OK
> -    ocf_log info "DB2 UDB instance $1 stopped: $output"
> +    ocf_log info "DB2 UDB instance $1($db2node) stopped: $output"
>  else
>  case $output in
>  
> @@ -236,17 +241,89 @@ db2_stop() {
>  rc=$OCF_ERR_GENERIC;;
>  esac
>  fi
> -  logasdb2 $db2db2 terminate
> -  if [ -x $db2bin/db2_kill ]; then
> -    logasdb2 $db2bin/db2_kill
> -  elif [ -x $db2bin/db2nkill ]; then
> -    logasdb2 $db2bin/db2nkill $db2node
> +
> +  return $rc
> +}
> +
> +#
> +# db2_stop: Stop the given db2 database instance
> +#
> +db2_stop() {
> +  # We ignore the instance, the info we need is already in $vars
> +
> +  rc=$OCF_SUCCESS
> +
> +  db2_status || {
> +    ocf_log info "DB2 UDB instance $1($db2node) already stopped"
> +    return $rc
> +  }
> +
> +  if [ -n "$OCF_RESKEY_stop_timeout" ]
> +  then
> +      stop_timeout=$OCF_RESKEY_stop_timeout
> +  elif [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
> +      stop_timeout=$OCF_RESKEY_CRM_meta_timeout
> +  else
> +      stop_timeout=20000
>  fi
> +
> +  # grace_time is 4/5 (unit is ms)
> +  grace_timeout=$((stop_timeout/1250))
> +
> +  # start db2stop in background as this may hang
> +  sh $0 db2_stop_bg &
> +  stop_bg_pid=$!
> +
> +  # wait for grace_timeout
> +  i=0
> +  while [ $i -lt $grace_timeout ]
> +  do
> +      kill -0 $stop_bg_pid 2>/dev/null || break;
> +      sleep 1
> +      i=$((i+1))
> +  done
> +
> +  # collect exit status but don't hang
> +  if kill -0 $stop_bg_pid 2>/dev/null
> +  then
> +      stoprc=1
> +      kill -9 $stop_bg_pid 2>/dev/null
> +  else
> +      wait $stop_bg_pid
> +      stoprc=$?
> +  fi
> +
> +  if [ $stoprc -ne 0 ]
> +  then
> +      ocf_log warn "db2stop of $instance($db2node) failed, using db2nkill"
> +
> +      # db2nkill kills *all* partions on the node
> +      if [ -x $db2bin/db2nkill ]; then
> +          logasdb2 $db2bin/db2nkill $db2node
> +      elif [ -x $db2bin/db2_kill ]; then
> +          logasdb2 $db2bin/db2_kill
> +      fi
> +
> +      # let the processes die
> +      sleep 2
> +
> +      if db2_status
> +      then
> +          ocf_log info "DB2 UDB instance $1($db2node) can not be killed with 
> db2nkill"
> +          rc=$OCF_ERR_GENERIC
> +      else
> +          ocf_log info "DB2 UDB instance $1($db2node) is now dead"
> +      fi
> +  fi
> +
> +  # db2jd has been deprecated since v8.x and doesn't exist
> +  # anymore in v9.x
>  pids=`our_db2_ps | grep db2jd | cut -d' ' -f1`
>  for j in $pids
>  do
>  runasdb2 kill -9 $j
>  done
> +
>  return $rc
>  }
>  
> @@ -275,6 +352,9 @@ db2_dblist() {
>  # db2_monitor: Can the given db2 instance do anything useful?
>  #
>  db2_monitor() {
> +  [ $db2node = 0 ] || return 0
> +  # monitoring only for partition 0
> +
>  # We ignore the instance, the info we need is already in $vars
>  for DB in `db2_dblist`
>  do
> @@ -337,6 +417,7 @@ fi
>  
>  instance=$OCF_RESKEY_instance
>  db2admin=${OCF_RESKEY_admin:-$instance}
> +db2node=${OCF_RESKEY_dbpartitionnum:-0}
>  
>  US=`id -u -n`
>  US=`echo $US`
> @@ -373,6 +454,9 @@ case "$1" in
>  stop)        db2_stop $instance
>  exit $?;;
>  
> +  db2_stop_bg)  db2_stop_bg $instance
> +                exit $?;;
> +
>  status)    if
>  db2_status $instance
>  then
> ___________________________________________________________
> Neu: WEB.DE De-Mail - Einfach wie E-Mail, sicher wie ein Brief!  
> Jetzt De-Mail-Adresse reservieren: https://produkte.web.de/go/demail02

> diff -r e09ec5fd6691 heartbeat/db2
> --- a/heartbeat/db2   Thu Oct 21 23:00:21 2010 +0200
> +++ b/heartbeat/db2   Mon Nov 01 11:51:15 2010 +0100
> @@ -79,6 +79,13 @@ The admin user of the instance.
>  <shortdesc lang="en">admin</shortdesc>
>  <content type="string" default="" />
>  </parameter>
> +<parameter name="dbpartitionnum" unique="0" required="0">
> +<longdesc lang="en">
> +The number of the partion (DBPARTITIONNUM) to be managed.
> +</longdesc>
> +<shortdesc lang="en">number of partion</shortdesc>
> +<content type="string" default="0" />
> +</parameter>
>  </parameters>
>  
>  <actions>
> @@ -123,7 +130,6 @@ db2info() {
>       db2ctrl=$db2sql/ctrl
>       db2bin=$db2sql/bin
>       db2db2=$db2bin/db2
> -     db2node=0 # single node instances are supported
>  
>       #       Let's make sure a few important things are there...
>       if
> @@ -172,10 +178,10 @@ logasdb2() {
>  #
>  db2_start() {
>    if
> -    output=`runasdb2 $db2adm/db2start`
> +    output=`runasdb2 $db2db2 db2start dbpartitionnum $db2node`
>    then
>      : Hurray! DB2 started OK
> -    ocf_log info "DB2 UDB instance $1 started: $output"
> +    ocf_log info "DB2 UDB instance $1($db2node) started: $output"
>    else
>      case $output in
>        SQL1026N*|*"is already active"*)
> @@ -185,7 +191,7 @@ db2_start() {
>      esac
>    fi
>    db2_status "$1" || {
> -    ocf_log err "DB2 UDB instance $1 not active!"
> +    ocf_log err "DB2 UDB instance $1($db2node) not active!"
>      return $OCF_ERR_GENERIC
>    }
>    # db2jstrt has been deprecated since v8.x and doesn't exist
> @@ -196,13 +202,17 @@ db2_start() {
>         return $OCF_ERR_GENERIC
>      }
>    fi
> +
> +  [ $db2node = 0 ] || return 0
> +  # activate DB only on node 0
> +
>    for DB in `db2_dblist`
>    do
>       if output=`runasdb2 $db2db2 activate database $DB`; then
>        ocf_log info "DB2 UDB database $DB activated"
>      else
>        case $output in
> -        SQL1490W*|*"already been activated"*)
> +        SQL1490W*|*"already been activated"*|SQL1497W*)
>             ocf_log info "DB2 UDB database $DB already activated: $output";;
>  
>          *) ocf_log err "DB2 UDB database $DB didn't activate: $output"; 
> return $OCF_ERR_GENERIC;;
> @@ -211,21 +221,16 @@ db2_start() {
>    done
>  }
>  
> -#
> -# db2_stop: Stop the given db2 database instance
> -#
> -db2_stop() {
> -  # We ignore the instance, the info we need is already in $vars
> +# helper function in a spawned invocation of this script
> +# so we can detect a hang of the db2stop command
> +db2_stop_bg() {
>    rc=$OCF_SUCCESS
> -  db2_status || {
> -    ocf_log info "DB2 UDB instance $1 already stopped"
> -    return $rc
> -  }
> +
>    if
> -    output=`runasdb2 $db2adm/db2stop force`
> +    output=`runasdb2 $db2db2 db2stop force dbpartitionnum $db2node`
>    then
>      : DB2 stopped OK
> -    ocf_log info "DB2 UDB instance $1 stopped: $output"
> +    ocf_log info "DB2 UDB instance $1($db2node) stopped: $output"
>    else
>      case $output in
>  
> @@ -236,17 +241,89 @@ db2_stop() {
>               rc=$OCF_ERR_GENERIC;;
>      esac
>    fi
> -  logasdb2 $db2db2 terminate
> -  if [ -x $db2bin/db2_kill ]; then
> -    logasdb2 $db2bin/db2_kill
> -  elif [ -x $db2bin/db2nkill ]; then
> -    logasdb2 $db2bin/db2nkill $db2node
> +
> +  return $rc
> +}
> +
> +#
> +# db2_stop: Stop the given db2 database instance
> +#
> +db2_stop() {
> +  # We ignore the instance, the info we need is already in $vars
> +
> +  rc=$OCF_SUCCESS
> +
> +  db2_status || {
> +    ocf_log info "DB2 UDB instance $1($db2node) already stopped"
> +    return $rc
> +  }
> +
> +  if [ -n "$OCF_RESKEY_stop_timeout" ]
> +  then
> +      stop_timeout=$OCF_RESKEY_stop_timeout
> +  elif [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
> +      stop_timeout=$OCF_RESKEY_CRM_meta_timeout
> +  else
> +      stop_timeout=20000
>    fi
> +
> +  # grace_time is 4/5 (unit is ms)
> +  grace_timeout=$((stop_timeout/1250))
> +
> +  # start db2stop in background as this may hang
> +  sh $0 db2_stop_bg &
> +  stop_bg_pid=$!
> +
> +  # wait for grace_timeout
> +  i=0
> +  while [ $i -lt $grace_timeout ]
> +  do
> +      kill -0 $stop_bg_pid 2>/dev/null || break;
> +      sleep 1
> +      i=$((i+1))
> +  done
> +
> +  # collect exit status but don't hang
> +  if kill -0 $stop_bg_pid 2>/dev/null
> +  then
> +      stoprc=1
> +      kill -9 $stop_bg_pid 2>/dev/null
> +  else
> +      wait $stop_bg_pid
> +      stoprc=$?
> +  fi
> +
> +  if [ $stoprc -ne 0 ]
> +  then
> +      ocf_log warn "db2stop of $instance($db2node) failed, using db2nkill"
> +
> +      # db2nkill kills *all* partions on the node
> +      if [ -x $db2bin/db2nkill ]; then
> +          logasdb2 $db2bin/db2nkill $db2node
> +      elif [ -x $db2bin/db2_kill ]; then
> +          logasdb2 $db2bin/db2_kill
> +      fi
> +
> +      # let the processes die
> +      sleep 2
> +
> +      if db2_status
> +      then
> +          ocf_log info "DB2 UDB instance $1($db2node) can not be killed with 
> db2nkill"
> +          rc=$OCF_ERR_GENERIC
> +      else
> +          ocf_log info "DB2 UDB instance $1($db2node) is now dead"
> +      fi
> +  fi
> +
> +  # db2jd has been deprecated since v8.x and doesn't exist
> +  # anymore in v9.x
>    pids=`our_db2_ps | grep db2jd | cut -d' ' -f1`
>    for j in $pids
>    do
>      runasdb2 kill -9 $j
>    done
> +
>    return $rc
>  }
>  
> @@ -275,6 +352,9 @@ db2_dblist() {
>  # db2_monitor: Can the given db2 instance do anything useful?
>  #
>  db2_monitor() {
> +  [ $db2node = 0 ] || return 0
> +  # monitoring only for partition 0
> +
>    # We ignore the instance, the info we need is already in $vars
>    for DB in `db2_dblist`
>    do
> @@ -337,6 +417,7 @@ fi
>  
>  instance=$OCF_RESKEY_instance
>  db2admin=${OCF_RESKEY_admin:-$instance}
> +db2node=${OCF_RESKEY_dbpartitionnum:-0}
>  
>  US=`id -u -n`
>  US=`echo $US`
> @@ -373,6 +454,9 @@ case "$1" in
>    stop)              db2_stop $instance
>               exit $?;;
>  
> +  db2_stop_bg)  db2_stop_bg $instance
> +                exit $?;;
> +
>    status)    if
>                 db2_status $instance
>               then

> _______________________________________________________
> Linux-HA-Dev: [email protected]
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/

_______________________________________________________
Linux-HA-Dev: [email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/

Reply via email to