I've been adminning java-based sites on linux platforms for a year and a half now, 
and in my experience there is only one way to make sure your servlet is available at 
all times

  1) create a servelet in a zone only accessible from localhost and your trusted 
gateway, and make sure YOU DO NOT CACHE THIS SERVLET
  2) this servlet makes the same system calls as the one that's public, but it only 
needs to return values from those calls. The values need to be expected; ie, you need 
to be able to compre the returned value to a known good expected value
  3) write a shell script (you could do it in perl, but why load another interpreter 
every 60 seconds? it's a waste of resources on your server) that calls the private 
servlet, and checks the returned values against the expected values. 
  4) run this script from cron every 60 seconds. Any less, and you're unnecessarily 
loading the server. Any more, and you have too much downtime.
  4) if they match, life is grand! exit 1 
  5) if they don't match, log the diagnostics and restart the JVM.


  I've attached a script that I use to monitor my application servers. It can detect a 
downed JVM within one second, and have it restarted within 3 (it's takes a couple of 
seconds for the JVM to grab 1.5G of heap size)


Today Thomas Eitzenberger spoke thusly:

: Date: Thu, 07 Dec 2000 15:00:57 +0100
: From: Thomas Eitzenberger <[EMAIL PROTECTED]>
: Reply-To: [EMAIL PROTECTED]
: To: "Lacerda, Wellington (AFIS)" <[EMAIL PROTECTED]>
: Cc: "'[EMAIL PROTECTED]'" <[EMAIL PROTECTED]>
: Subject: Re: How to design a application context servlet
: 
: <Refining Request for Help>
: We need to have this running in a servlet as we are working in a high available
: context and that way we could rely on the JSP Engine to provide the servlet
: under any circumstances. We could of course create such processes outside but
: woul dhave to implement some kind of task manager / watchdog to be HA again :o(
: 
: </Refining>
: 
: <A HREF="mailto:[EMAIL PROTECTED]">I know how to help ya :o)</a>
: 
: help me PLEASE
: 
: ET
: 
: "Lacerda, Wellington (AFIS)" wrote:

_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
Robert Miller
Unix Systems Administrator
Internet Operations
PictureIQ Corporation
 - fix your photos online!
http://www.pictureiq.com
 - - - - - - - - - - - - - - - - - - - - -

#!/bin/bash2

# this script examines PictureIQStatusServlet and logs its trials and tribulations

# [EMAIL PROTECTED] 11-00

# ###
# ### globals
# ###

a_ChannelDir=/etc/httpd/conf/jserv/
b_ChannelDir=/etc/httpd/bhost/jserv/
  # we'll assemble lock_${thisChannel} when we check for it
logFile=/var/log/httpd/keepalive
channelList=( \
              a \
              b \
            )
httpdHandler=apachectl
jvmHandler=jserv
kaLockFile=/tmp/kaLockFile
a_HostURL=http://localhost/PictureIQStatusServlet
b_HostURL=http://localhost:8080/PictureIQStatusServlet
statusReturnFile=/tmp/jvmstatus.$$

# ###
# ### functions
# ###

# ###################################################
f_isJVMup(){
  # this is purely for logging purposes
if [ -z $(ps ax | grep java_${thisChannel} | grep -v grep | tr -d ' ') ]
  then
    echo "$(date) java_${thisChannel} is NOT running" >> ${logFile}
  else
    echo "$(date) $(ps ax | grep java_${thisChannel} | grep -v grep)" >> ${logFile}
fi
}; # end f_isJVMup
# ###################################################

# ###################################################
f_restart(){
  # this restarts dead daemons, logging the whole sordid affair
  # make sure each startup command exits 0
if ${httpdHandler}.${thisChannel} stop
  then
    if killall -9 java_${thisChannel}
      then
        if ${httpdHandler}.${thisChannel} start
          then
            if ${jvmHandler}.${thisChannel} start
              then
                  # and then make sure everyone knows about it
                echo -e "$(date) channel ${thisChannel} servers restarted\n" >> 
${logFile}
              else
                echo -e "$(date) ${jvmHandler}.${thisChannel} start failed\n" >> 
${logFile}
                exit 1
            fi
          else
            echo -e "$(date) ${httpdHandler}.${thisChannel} start failed\n" >> 
${logFile}
            exit 1
        fi
      else
        echo -e "$(date) killall -9 java_${thisChannel} failed\n" >> ${logFile}
    fi
  else
    echo -e "$(date) ${httpdHandler}.${thisChannel} stop failed\n" >> ${logFile}
fi
}; # end f_restart()
# ###################################################

# ###################################################
f_checkPIQStatus(){
  # this is where we actually query the servlet
  # wget opts= quiet, timeout 3 secs, 1 try, stdout > ${statusReturnFile}
/usr/local/bin/wget -q -T 3 -t 1 -O ${statusReturnFile} ${thisURL}
 # /usr/local/bin/wget -q -T 3 -t 1 -O ${statusReturnFile} 
\${"${thisChannel}_HostURL"}
 # /usr/local/bin/wget -q -T 3 -t 1 -O ${statusReturnFile} $(echo 
"${thisChannel}_HostURL")
if grep -e 'PictureIQ OK' ${statusReturnFile}
  then
      # everything is peachykeen
    :
  else
      # something is awry
    if [ -z $(cat ${statusReturnFile} | head 1 | tr -d ' ') ]
      then
        echo -e "\n$(date) ${thisURL} returned 0 BYTES" >> ${logFile}
        f_isJVMup 
        f_restart
      # anything from here down is an error we can id
    elif grep -e '500' ${statusReturnFile}
      then
        echo -e "\n$(date) ${thisURL} returned 500 INTERNAL SERVER ERROR" >> 
${logFile}
        mv ${statusReturnFile} ${statusReturnFile}.$(date | tr ' ' '_' | tr ':' '-')
        f_isJVMup 
        f_restart
    elif grep -e 'initURL unsuccessful' ${statusReturnFile}
      then
        echo -e "\n$(date) ${thisURL} returned SERVLET INITIALIZATION ERROR" >> 
${logFile}
        mv ${statusReturnFile} ${statusReturnFile}.$(date | tr ' ' '_' | tr ':' '-')
        f_restart
    elif grep -e 'RENDERSIZE NULL' ${statusReturnFile}
      then
        echo -e "\n$(date) ${thisURL} returned RENDERSIZE NULL" >> ${logFile}
        mv ${statusReturnFile} ${statusReturnFile}.$(date | tr ' ' '_' | tr ':' '-')
        f_restart
    elif grep -e 'RENDERSIZE UNEXPECTED' ${statusReturnFile}
      then
        echo -e "\n$(date) ${thisURL} returned RENDERSIZE UNEXPECTED" >> ${logFile}
        mv ${statusReturnFile} ${statusReturnFile}.$(date | tr ' ' '_' | tr ':' '-')
        f_restart
        # PUNT!!
      else
        echo -e "\n$(date) ${thisURL} returned UNKNOWN ERROR" >> ${logFile}
        mv ${statusReturnFile} ${statusReturnFile}.$(date | tr ' ' '_' | tr ':' '-')
        f_restart
    fi; # end error diagnosis
fi; # end ${statusReturnFile} diagnosis
  # return to main and clean up our detrius
}; # end f_checkPIQStatus()
# ###################################################

# ###
# ### main
# ###

  # this should be the only instance running
if [ -f ${kaLockFile} ]
  then
      # we're a day late and a buck short
    exit 1
  else
    touch ${kaLockFile}
fi

  # set up a loop because we do exactly the same thing for each channel
for thisChannel in ${channelList[@]}
  do
      # we may be the only instance, but ops may have a jvm down intentionally
    if [ -f $(echo "${thisChannel}_channelDir/lock_${thisChannel}") ]
      then
        break
      else
        case ${thisChannel} in
          a )
              thisURL="${a_HostURL}"
              ;;
          b )
              thisURL="${b_HostURL}"
              ;;
        esac
        f_checkPIQStatus
    fi
  done

  # clean up our detrius
rm -f ${statusReturnFile}
rm -f ${kaLockFile}

exit 0

Reply via email to