Hi,

I noticed a constant load of 1 on both of our two varnish servers and
was going to look into this.  They are running CentOS release 5.4,
x86_64 and varnish-2.1.2-1 (recompiled from the spec file in the
released tar.gz) with the attached vcl and the following command line:

 # /usr/sbin/varnishd -P /var/run/varnish.pid -a :6081 -f 
/etc/varnish/frontend_maptpserver.maptp42.vcl -T 127.0.0.1:6082 -t 120 -w 
1,1000,120 -u varnish -g varnish -s 
file,/var/lib/varnish/varnish_storage.bin,196G

Then I got distracted when I strace'd to the varnish child and it
immediately exited with:

r...@paris [~] $ strace -p 15023
Process 15023 attached - interrupt to quit
close(7)                                = 0
close(8)                                = 0
write(1, "Child dies", 10)              = 10
write(1, "\n", 1)                       = 1
brk(0)                                  = 0x19bfe000
brk(0x19c1f000)                         = 0x19c1f000
exit_group(1)                           = ?
Process 15023 detached
r...@paris [~] $

D'oh, all cache gone!  It does that every times, so it seems unrelated to
the load I see on the system.  In /var/log/messages I found:

Jul 19 14:34:09 paris varnishd[5102]: child (15023) Started
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Child starts
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said managed to mmap 
210453397504 bytes of 210453397504
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Probe("GET 
/exodus.html HTTP/1.1
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Host: 
apoll.map24.local.
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Connection: close
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris last message repeated 2 times
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said ", 0.03, 1)
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Probe("GET 
/exodus.html HTTP/1.1
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Host: 
gotthard.map24.local.
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Connection: close
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris last message repeated 2 times
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said ", 0.03, 1)
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Probe("GET 
/exodus.html HTTP/1.1
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Host: 
hybris.map24.local.
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Connection: close
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris last message repeated 2 times
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said ", 0.03, 1)
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Probe("GET 
/exodus.html HTTP/1.1
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Host: cure.map24.local.
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Connection: close
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris last message repeated 2 times
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said ", 0.03, 1)
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Probe("GET 
/exodus.html HTTP/1.1
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said Host: 
hardy.map24.local.
Jul 19 14:34:09 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Connection: close
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris last message repeated 2 times
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said ", 0.03, 1)
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Probe("GET 
/exodus.html HTTP/1.1
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Host: 
goblin.map24.local.
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Connection: close
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris last message repeated 2 times
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said ", 0.03, 1)
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Probe("GET 
/exodus.html HTTP/1.1
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Host: leto.map24.local.
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Connection: close
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris last message repeated 2 times
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said ", 0.03, 1)
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Probe("GET 
/exodus.html HTTP/1.1
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Host: 
krantor.map24.local.
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Connection: close
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris last message repeated 2 times
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said ", 0.03, 1)
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Probe("GET 
/exodus.html HTTP/1.1
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Host: 
solons.map24.local.
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Connection: close
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris last message repeated 2 times
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said ", 0.03, 1)
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Probe("GET 
/exodus.html HTTP/1.1
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Host: 
white.map24.local.
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said Connection: close
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said
Jul 19 14:34:10 paris last message repeated 2 times
Jul 19 14:34:10 paris varnishd[5102]: Child (15023) said ", 0.03, 1)
Jul 19 14:34:39 paris varnishd[5102]: Child (15023) said Child dies
Jul 19 14:34:39 paris varnishd[5102]: Child (15023) died

I think 14:34:10 is all from the startup and at 14:34:39 I'm starting
strace.  That looks like the health check to the backend servers, but
other then that I have no idea what it means.  I run varnishlog to a
file and strace'd the child process again.  The file is 3MB, so I have
uploaded it FTP: ftp://ftp.lbsp.navteq.com/outgoing/2XyKTXBY/.  I cannot
read much from it, but I think the point where it restarts is somewhere
around:

[...]
  122 ReqEnd       c 887294864 1279552540.103346109 1279552540.504165888 
0.000036001 0.000036955 0.400782824
  160 Debug        c "herding"
    0 CLI          - EOF on CLI connection, worker stops
    0 WorkThread   - 0x45559e60 start
    0 CLI          - Rd vcl.load "boot" ./vcl.1P9zoqAU.so
    0 Backend_health - apoll Still sick ------H 1 3 5 0.000000 0.000000
    0 Backend_health - apoll Still sick ------H 2 3 5 0.000000 0.000000
[...]

I don't know what "EOF on CLI" means.  Does CLI refer to the management
interface via the -T parameter?  That was definitely not in use at this
time.  Any ideas?  Should I be able to strace the PID?  (I think so.) If
not how can I debug the load issue I have?

(Oh, BTW: the load is gone from the system after the child died.  No, I
don't like that way of solving this problem.)

Thanks in advance.

Kind regards,
-- 
Navteq (DE) GmbH
Frank Gruellich
Map24 Systems and Networks

Duesseldorfer Strasse 40a
65760 Eschborn
Germany

Phone:      +49 6196 77756-414
Fax:        +49 6196 77756-100

HRB 46215, Local Court Frankfurt am Main
Managing Directors: Thomas Golob, Hans Pieter Gieszen, Martin Robert Stockman
USt-ID-No.: DE 197947163
backend apoll {
        .host   = "apoll.map24.local.";
        .port   = "80";
        .probe  = {
                .url = "/exodus.html";
                .timeout = 30 ms;
                .interval = 1s;
                .window = 5;
                .threshold = 3;
        }
}
backend cure {
        .host   = "cure.map24.local.";
        .port   = "80";
        .probe  = {
                .url = "/exodus.html";
                .timeout = 30 ms;
                .interval = 1s;
                .window = 5;
                .threshold = 3;
        }
}
backend goblin {
        .host   = "goblin.map24.local.";
        .port   = "80";
        .probe  = {
                .url = "/exodus.html";
                .timeout = 30 ms;
                .interval = 1s;
                .window = 5;
                .threshold = 3;
        }
}
backend gotthard {
        .host   = "gotthard.map24.local.";
        .port   = "80";
        .probe  = {
                .url = "/exodus.html";
                .timeout = 30 ms;
                .interval = 1s;
                .window = 5;
                .threshold = 3;
        }
}
backend hardy {
        .host   = "hardy.map24.local.";
        .port   = "80";
        .probe  = {
                .url = "/exodus.html";
                .timeout = 30 ms;
                .interval = 1s;
                .window = 5;
                .threshold = 3;
        }
}
backend hybris {
        .host   = "hybris.map24.local.";
        .port   = "80";
        .probe  = {
                .url = "/exodus.html";
                .timeout = 30 ms;
                .interval = 1s;
                .window = 5;
                .threshold = 3;
        }
}
backend krantor {
        .host   = "krantor.map24.local.";
        .port   = "80";
        .probe  = {
                .url = "/exodus.html";
                .timeout = 30 ms;
                .interval = 1s;
                .window = 5;
                .threshold = 3;
        }
}
backend leto {
        .host   = "leto.map24.local.";
        .port   = "80";
        .probe  = {
                .url = "/exodus.html";
                .timeout = 30 ms;
                .interval = 1s;
                .window = 5;
                .threshold = 3;
        }
}
backend solons {
        .host   = "solons.map24.local.";
        .port   = "80";
        .probe  = {
                .url = "/exodus.html";
                .timeout = 30 ms;
                .interval = 1s;
                .window = 5;
                .threshold = 3;
        }
}
backend white {
        .host   = "white.map24.local.";
        .port   = "80";
        .probe  = {
                .url = "/exodus.html";
                .timeout = 30 ms;
                .interval = 1s;
                .window = 5;
                .threshold = 3;
        }
}

director maptp42 round-robin {
        { .backend = apoll;     }
        { .backend = cure;      }
        { .backend = goblin;    }
        { .backend = gotthard;  }
        { .backend = hardy;     }
        { .backend = hybris;    }
        { .backend = krantor;   }
        { .backend = leto;      }
        { .backend = solons;    }
        { .backend = white;     }
}

sub vcl_recv {
        set req.backend = maptp42;

        if (req.request != "GET" &&
                req.request != "HEAD" &&
                req.request != "PUT" &&
                req.request != "POST" &&
                req.request != "TRACE" &&
                req.request != "OPTIONS" &&
                req.request != "DELETE") {
                        /* Non-RFC2616 or CONNECT which is weird. */
                        return (pipe);
        }
        if (req.request != "GET" && req.request != "HEAD") {
                /* We only deal with GET and HEAD by default */
                return (pass);
        }
        if (req.url == "/exodus.html" ) {
                if (req.backend.healthy) {
                        error 200 "OK";
                } else {
                        error 404 "Not Found";
                }
        }

        if ( ! ( req.url ~ "^/lbsp/webservices1\.5\?cgi=Map24MGI.*quadkey=.*$" 
|| req.url ~ "^/lbsp/webservices1\.5\?.*&cgi=Map24MapSearch&.*$" ) ) {
                /* Does not look like a quadkey request, pass to backend.  How
                 * did it get here?
                 */
                return (pass);
        }
        if (req.http.Authorization) {
                /* Not cacheable by default */
                return (pass);
        }
        if ( req.url ~ "^/lbsp/webservices1\.5\?.*&cgi=Map24MapSearch&.*$" ) {
                set req.url = regsub(req.url, "\&sid=[^&]*\&", "\&");
        }

        # normalize the Host: header
        if (req.http.host ~ ".*\.ctl.maptp42\.map24\.com$") {
                set req.http.host = "0.ctl.maptp42.map24.com";
        }

        return (lookup);

}


sub vcl_fetch {
        if (beresp.cacheable) {
                /* Remove Expires from backend, it's not long enough */
                unset beresp.http.expires;

                /* Set the clients TTL on this object */
                set beresp.http.cache-control = "max-age = 900";

                /* Set how long Varnish will keep it */
                set beresp.ttl = 1d;

                /* marker for vcl_deliver to reset Age: */
                set beresp.http.magicmarker = "1";
        }
}

sub vcl_deliver {
        if (resp.http.magicmarker) {
                /* Remove the magic marker */
                unset resp.http.magicmarker;

                /* By definition we have a fresh object */
                set resp.http.age = "0";
        }
}
# vim:ft=vcl

Attachment: signature.asc
Description: OpenPGP digital signature

_______________________________________________
varnish-misc mailing list
[email protected]
http://lists.varnish-cache.org/mailman/listinfo/varnish-misc

Reply via email to