i'm seeing dns break about every 6 hrs. i can't leave
the process hanging about, so i have an automated dns killer.
unfortunately this results in no useful debugging information.
i'll upgrade the script (following) to snap the broken processes.
dennis 1206083 0:00 0:00 472K Await dns [/net restarter]
dennis 1206084 0:00 0:00 6316K Pread dns [636 /net/dns Twrites
of 4798 9p rpcs read; 16754 alarms]
dennis 1206085 0:08 0:26 6316K Pread dns [served 74205 udp;
16753 alarms]
dennis 1206087 0:00 0:00 388K Await dns [/net.alt restarter]
dennis 1206088 0:00 0:00 616K Pread dns [0 /net.alt/dns
Twrites of 982 9p rpcs read; 177 alarms]
dennis 1206089 1:54 0:06 616K Pread dns [served 7775 udp; 177
alarms]
dennis 1234972 0:00 0:00 6316K Pread dns [reading inside reply
from 208.122.22.2: some.name.com ip from 1.2.3.4]
dennis 1235413 0:00 0:00 6316K Broken dns [netquery:
a1040.b.akamai.net]
sorry for the anonomization. and the current lack
of detail.
- erik
#!/bin/rc
rfork en
mailto=quanstro
dns = ndb/dns
extrec = ()
ext = ()
fflag=0
nl='
'
fn usage{
echo 'usage: restartdns [-f]' >[1=2]
exit usage
}
fn why{
if(! ~ $#nbroken 0)
echo getting mediƦval on $#nbroken broken dns processes.
if not{
echo getting mediƦval on $#nwait deadlocked dns processes.
for(i in $nwait)
echo $i
}
}
for(i)switch($i){
case -f
fflag=1
case *
usage
}
if(~ $fflag 0){
if(! ~ $sysname atlas baron rb2 atlas)
exit 'wrong system'
nbroken=`{ps -a | grep ' dns ' | grep Broken}
ifs=$nl nwait=`{ps -a |sed -n 's/.* +dns \[query lock wait
for(.*)\]/\1/gp' | sort | uniq -c | awk '$1>2'}
if(~ $#nbroken 0 && ~ $#nwait 0)
exit 'none broken'
why
if(~ $service rx)
{date; echo; why; echo; ps -a | grep dns}| mail -s 'restartdns:
'^$sysname quanstro
}
slaydns = `{echo $dns | sed 's:.*/::g'}
slay $slaydns | rc
$dns -s
if(~ $sysname $ext)
$dns -sx /net.alt -f /lib/ndb/external
if(~ $sysname $extrec)
$dns -Rrsx /net.alt -f /lib/ndb/external