Hi, I noticed a constant load of 1 on both of our two varnish servers and was going to look into this. They are running CentOS release 5.4, x86_64 and varnish-2.1.2-1 (recompiled from the spec file in the released tar.gz) with the attached vcl and the following command line:
# /usr/sbin/varnishd -P /var/run/varnish.pid -a :6081 -f
/etc/varnish/frontend_maptpserver.maptp42.vcl -T 127.0.0.1:6082 -t 120 -w
1,1000,120 -u varnish -g varnish -s
file,/var/lib/varnish/varnish_storage.bin,196G
Then I got distracted when I strace'd to the varnish child and it
immediately exited with:
r...@paris [~] $ strace -p 15023
Process 15023 attached - interrupt to quit
close(7) = 0
close(8) = 0
write(1, "Child dies", 10) = 10
write(1, "\n", 1) = 1
brk(0) = 0x19bfe000
brk(0x19c1f000) = 0x19c1f000
exit_group(1) = ?
Process 15023 detached
r...@paris [~] $
D'oh, all cache gone! It does that every times, so it seems unrelated to
the load I see on the system. In /var/log/messages I found:
Jul 19 14:34:09 paris varnishd[5102]: child (15023) Started
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Child starts
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said managed to mmap
210453397504 bytes of 210453397504
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Probe("GET
/exodus.html HTTP/1.1
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Host:
apoll.map24.local.
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Connection: close
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris last message repeated 2 times
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said ", 0.03, 1)
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Probe("GET
/exodus.html HTTP/1.1
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Host:
gotthard.map24.local.
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Connection: close
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris last message repeated 2 times
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said ", 0.03, 1)
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Probe("GET
/exodus.html HTTP/1.1
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Host:
hybris.map24.local.
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Connection: close
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris last message repeated 2 times
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said ", 0.03, 1)
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Probe("GET
/exodus.html HTTP/1.1
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Host: cure.map24.local.
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Connection: close
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris last message repeated 2 times
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said ", 0.03, 1)
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Probe("GET
/exodus.html HTTP/1.1
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Host:
hardy.map24.local.
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Connection: close
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris last message repeated 2 times
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said ", 0.03, 1)
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Probe("GET
/exodus.html HTTP/1.1
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Host:
goblin.map24.local.
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Connection: close
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris last message repeated 2 times
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said ", 0.03, 1)
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Probe("GET
/exodus.html HTTP/1.1
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Host: leto.map24.local.
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Connection: close
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris last message repeated 2 times
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said ", 0.03, 1)
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Probe("GET
/exodus.html HTTP/1.1
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Host:
krantor.map24.local.
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Connection: close
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris last message repeated 2 times
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said ", 0.03, 1)
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Probe("GET
/exodus.html HTTP/1.1
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Host:
solons.map24.local.
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Connection: close
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris last message repeated 2 times
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said ", 0.03, 1)
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Probe("GET
/exodus.html HTTP/1.1
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Host:
white.map24.local.
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Connection: close
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris last message repeated 2 times
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said ", 0.03, 1)
Jul 19 14:34:39 paris varnishd[5102]: Child (15023) said Child dies
Jul 19 14:34:39 paris varnishd[5102]: Child (15023) died
I think 14:34:10 is all from the startup and at 14:34:39 I'm starting
strace. That looks like the health check to the backend servers, but
other then that I have no idea what it means. I run varnishlog to a
file and strace'd the child process again. The file is 3MB, so I have
uploaded it FTP: ftp://ftp.lbsp.navteq.com/outgoing/2XyKTXBY/. I cannot
read much from it, but I think the point where it restarts is somewhere
around:
[...]
122 ReqEnd c 887294864 1279552540.103346109 1279552540.504165888
0.000036001 0.000036955 0.400782824
160 Debug c "herding"
0 CLI - EOF on CLI connection, worker stops
0 WorkThread - 0x45559e60 start
0 CLI - Rd vcl.load "boot" ./vcl.1P9zoqAU.so
0 Backend_health - apoll Still sick ------H 1 3 5 0.000000 0.000000
0 Backend_health - apoll Still sick ------H 2 3 5 0.000000 0.000000
[...]
I don't know what "EOF on CLI" means. Does CLI refer to the management
interface via the -T parameter? That was definitely not in use at this
time. Any ideas? Should I be able to strace the PID? (I think so.) If
not how can I debug the load issue I have?
(Oh, BTW: the load is gone from the system after the child died. No, I
don't like that way of solving this problem.)
Thanks in advance.
Kind regards,
--
Navteq (DE) GmbH
Frank Gruellich
Map24 Systems and Networks
Duesseldorfer Strasse 40a
65760 Eschborn
Germany
Phone: +49 6196 77756-414
Fax: +49 6196 77756-100
HRB 46215, Local Court Frankfurt am Main
Managing Directors: Thomas Golob, Hans Pieter Gieszen, Martin Robert Stockman
USt-ID-No.: DE 197947163
backend apoll {
.host = "apoll.map24.local.";
.port = "80";
.probe = {
.url = "/exodus.html";
.timeout = 30 ms;
.interval = 1s;
.window = 5;
.threshold = 3;
}
}
backend cure {
.host = "cure.map24.local.";
.port = "80";
.probe = {
.url = "/exodus.html";
.timeout = 30 ms;
.interval = 1s;
.window = 5;
.threshold = 3;
}
}
backend goblin {
.host = "goblin.map24.local.";
.port = "80";
.probe = {
.url = "/exodus.html";
.timeout = 30 ms;
.interval = 1s;
.window = 5;
.threshold = 3;
}
}
backend gotthard {
.host = "gotthard.map24.local.";
.port = "80";
.probe = {
.url = "/exodus.html";
.timeout = 30 ms;
.interval = 1s;
.window = 5;
.threshold = 3;
}
}
backend hardy {
.host = "hardy.map24.local.";
.port = "80";
.probe = {
.url = "/exodus.html";
.timeout = 30 ms;
.interval = 1s;
.window = 5;
.threshold = 3;
}
}
backend hybris {
.host = "hybris.map24.local.";
.port = "80";
.probe = {
.url = "/exodus.html";
.timeout = 30 ms;
.interval = 1s;
.window = 5;
.threshold = 3;
}
}
backend krantor {
.host = "krantor.map24.local.";
.port = "80";
.probe = {
.url = "/exodus.html";
.timeout = 30 ms;
.interval = 1s;
.window = 5;
.threshold = 3;
}
}
backend leto {
.host = "leto.map24.local.";
.port = "80";
.probe = {
.url = "/exodus.html";
.timeout = 30 ms;
.interval = 1s;
.window = 5;
.threshold = 3;
}
}
backend solons {
.host = "solons.map24.local.";
.port = "80";
.probe = {
.url = "/exodus.html";
.timeout = 30 ms;
.interval = 1s;
.window = 5;
.threshold = 3;
}
}
backend white {
.host = "white.map24.local.";
.port = "80";
.probe = {
.url = "/exodus.html";
.timeout = 30 ms;
.interval = 1s;
.window = 5;
.threshold = 3;
}
}
director maptp42 round-robin {
{ .backend = apoll; }
{ .backend = cure; }
{ .backend = goblin; }
{ .backend = gotthard; }
{ .backend = hardy; }
{ .backend = hybris; }
{ .backend = krantor; }
{ .backend = leto; }
{ .backend = solons; }
{ .backend = white; }
}
sub vcl_recv {
set req.backend = maptp42;
if (req.request != "GET" &&
req.request != "HEAD" &&
req.request != "PUT" &&
req.request != "POST" &&
req.request != "TRACE" &&
req.request != "OPTIONS" &&
req.request != "DELETE") {
/* Non-RFC2616 or CONNECT which is weird. */
return (pipe);
}
if (req.request != "GET" && req.request != "HEAD") {
/* We only deal with GET and HEAD by default */
return (pass);
}
if (req.url == "/exodus.html" ) {
if (req.backend.healthy) {
error 200 "OK";
} else {
error 404 "Not Found";
}
}
if ( ! ( req.url ~ "^/lbsp/webservices1\.5\?cgi=Map24MGI.*quadkey=.*$"
|| req.url ~ "^/lbsp/webservices1\.5\?.*&cgi=Map24MapSearch&.*$" ) ) {
/* Does not look like a quadkey request, pass to backend. How
* did it get here?
*/
return (pass);
}
if (req.http.Authorization) {
/* Not cacheable by default */
return (pass);
}
if ( req.url ~ "^/lbsp/webservices1\.5\?.*&cgi=Map24MapSearch&.*$" ) {
set req.url = regsub(req.url, "\&sid=[^&]*\&", "\&");
}
# normalize the Host: header
if (req.http.host ~ ".*\.ctl.maptp42\.map24\.com$") {
set req.http.host = "0.ctl.maptp42.map24.com";
}
return (lookup);
}
sub vcl_fetch {
if (beresp.cacheable) {
/* Remove Expires from backend, it's not long enough */
unset beresp.http.expires;
/* Set the clients TTL on this object */
set beresp.http.cache-control = "max-age = 900";
/* Set how long Varnish will keep it */
set beresp.ttl = 1d;
/* marker for vcl_deliver to reset Age: */
set beresp.http.magicmarker = "1";
}
}
sub vcl_deliver {
if (resp.http.magicmarker) {
/* Remove the magic marker */
unset resp.http.magicmarker;
/* By definition we have a fresh object */
set resp.http.age = "0";
}
}
# vim:ft=vcl
signature.asc
Description: OpenPGP digital signature
_______________________________________________ varnish-misc mailing list [email protected] http://lists.varnish-cache.org/mailman/listinfo/varnish-misc
