assuming things are broken without external help ....
i've been having trouble with dns infinitely extending
the life of queries when a "srvfail" is returned by an authoratitive
server. eventually one query to a broken ns will hold up all the threads
available
on the server. this happends a lot on reverse lookups. i fire this script
every 10 minutes to help ease the pain until i have the time figure out exactly
what's going wrong.
- erik
#!/bin/rc
rfork en
mailuser=guywhogetstocheckonthisstuff
fflag=0
nl='
'
fn usage{
echo 'usage: restartdns [-f]' >[1=2]
exit usage
}
fn why{
if(! ~ $#nbroken 0)
echo getting mediƦval on $#nbroken broken dns processes.
if not{
echo getting mediƦval on $#nwait deadlocked dns processes.
for(i in $nwait)
echo $i
}
}
for(i)switch($i){
case -f
fflag=1
case *
usage
}
if(~ $fflag 0){
nbroken=`{ps -a | grep dns | grep Broken}
ifs=$nl nwait=`{ps -a |sed -n 's/.* +dns \[query lock wait
for(.*)\]/\1/gp' | sort | uniq -c | awk '$1>2'}
if(~ $#nbroken 0 && ~ $#nwait 0)
exit 'none broken'
why
if(~ $service rx)
{date; echo; why; echo; ps -a | grep dns}| mail
$guywhogetstocheckonthisstuff
}
slay dns | rc
ndb/dns -s
ndb/dns -Rrsx /net.alt -f /lib/ndb/external