Hi Guys,
This is my first message in this list, I began working for a new
company some months ago and I found this infrastructure:
+---------+ +---------+ +---------+ +---------+
| VARNISH | | VARNISH | | VARNISH | | VARNISH |
+---------+ +---------+ +---------+ +---------+
| | | |
+------------+------------+------------+
| |
+------+-+ +--+-----+
| APACHE | | APACHE |
+--------+ +--------+
Varnish servers are HP DL360 G6 with 66Gb RAM and 4 Quad-Core Xeon CPUs,
running varnish 2.1.6 (Updated from 2.0.5 1 month ago). They're serving
content for up to 450Mbit/s during peaks.
It's happening often that they freeze serving contents. and I noticed a
common pattern: the content that get stuck is always one of the most
served, like a css or js file, or some component of the page layout, and
it never happens to an image part of the content.
It's really weird, because css should be always cached.
I'm running Centos 5.5 64bit and here's my varnish startup parameters:
DAEMON_OPTS=" -a ${VARNISH_LISTEN_ADDRESS}:${VARNISH_LISTEN_PORT} \
-f ${VARNISH_VCL_CONF} \
-T 0.0.0.0:6082 \
-t 604800 \
-u varnish -g varnish \
-s malloc,54G \
-p thread_pool_add_delay=2 \
-p thread_pools=16 \
-p thread_pool_min=50 \
-p thread_pool_max=4000 \
-p listen_depth=4096 \
-p lru_interval=600 \
-hclassic,500009 \
-p log_hashstring=off \
-p shm_workspace=16384 \
-p ping_interval=2 \
-p default_grace=3600 \
-p pipe_timeout=10 \
-p sess_timeout=6 \
-p send_timeout=10"
In attach there is my vcl and the varnishstat -1 output after a 24h run
of 1 of the servers. Do you notice something bad?
In the meanwhile I'm running through the documentation, but it's for us
an high priority issue as we're talking about the production environment
and there's not time now to wait for me to completely understand how
does varnish work and find out a solution.
Hope someone can help me
Thanks in advance
Diego
client_conn 20485094 241.68 Client connections accepted
client_drop 0 0.00 Connection dropped, no sess/wrk
client_req 86782883 1023.85 Client requests received
cache_hit 79748094 940.86 Cache hits
cache_hitpass 0 0.00 Cache hits for pass
cache_miss 6909286 81.51 Cache misses
backend_conn 193081 2.28 Backend conn. success
backend_unhealthy 0 0.00 Backend conn. not attempted
backend_busy 0 0.00 Backend conn. too many
backend_fail 1 0.00 Backend conn. failures
backend_reuse 6805997 80.30 Backend conn. reuses
backend_toolate 10882 0.13 Backend conn. was closed
backend_recycle 6818150 80.44 Backend conn. recycles
backend_unused 0 0.00 Backend conn. unused
fetch_head 0 0.00 Fetch head
fetch_length 6993290 82.51 Fetch with Length
fetch_chunked 7026 0.08 Fetch chunked
fetch_eof 0 0.00 Fetch EOF
fetch_bad 0 0.00 Fetch had bad headers
fetch_close 0 0.00 Fetch wanted close
fetch_oldhttp 0 0.00 Fetch pre HTTP/1.1 closed
fetch_zero 0 0.00 Fetch zero len
fetch_failed 0 0.00 Fetch failed
n_sess_mem 4406 . N struct sess_mem
n_sess 1915 . N struct sess
n_object 4487398 . N struct object
n_vampireobject 0 . N unresurrected objects
n_objectcore 4487638 . N struct objectcore
n_objecthead 4108162 . N struct objecthead
n_smf 0 . N struct smf
n_smf_frag 0 . N small free smf
n_smf_large 0 . N large free smf
n_vbe_conn 6 . N struct vbe_conn
n_wrk 800 . N worker threads
n_wrk_create 800 0.01 N worker threads created
n_wrk_failed 0 0.00 N worker threads not created
n_wrk_max 2430 0.03 N worker threads limited
n_wrk_queue 0 0.00 N queued work requests
n_wrk_overflow 449 0.01 N overflowed work requests
n_wrk_drop 0 0.00 N dropped work requests
n_backend 4 . N backends
losthdr 0 0.00 HTTP header overflows
n_objsendfile 0 0.00 Objects sent with sendfile
n_objwrite 84751546 999.89 Objects sent with write
n_objoverflow 0 0.00 Objects overflowing workspace
s_sess 20485084 241.68 Total Sessions
s_req 86782883 1023.85 Total Requests
s_pipe 0 0.00 Total pipe
s_pass 125607 1.48 Total pass
s_fetch 7000176 82.59 Total fetch
s_hdrbytes 34123882151 402589.42 Total header bytes
s_bodybytes 387496967606 4571642.24 Total body bytes
sess_closed 1168804 13.79 Session Closed
sess_pipeline 306074 3.61 Session Pipeline
sess_readahead 148010 1.75 Session Read Ahead
sess_linger 85660304 1010.61 Session Linger
sess_herd 80938258 954.90 Session herd
shm_records 3912135692 46154.90 SHM records
shm_writes 243351032 2871.03 SHM writes
shm_flushes 0 0.00 SHM flushes due to overflow
shm_cont 1635868 19.30 SHM MTX contention
shm_cycles 1477 0.02 SHM cycles through buffer
sm_nreq 0 0.00 allocator requests
sm_nobj 0 . outstanding allocations
sm_balloc 0 . bytes allocated
sm_bfree 0 . bytes free
sma_nreq 16255630 191.78 SMA allocator requests
sma_nobj 8974798 . SMA outstanding allocations
sma_nbytes 57981956173 . SMA outstanding bytes
sma_balloc 98113072064 . SMA bytes allocated
sma_bfree 40131115891 . SMA bytes free
sms_nreq 34612 0.41 SMS allocator requests
sms_nobj 0 . SMS outstanding allocations
sms_nbytes 0 . SMS outstanding bytes
sms_balloc 11420541 . SMS bytes allocated
sms_bfree 11420541 . SMS bytes freed
backend_req 7000349 82.59 Backend requests made
n_vcl 1 0.00 N vcl total
n_vcl_avail 1 0.00 N vcl available
n_vcl_discard 0 0.00 N vcl discarded
n_purge 1 . N total active purges
n_purge_add 1 0.00 N new purges added
n_purge_retire 0 0.00 N old purges deleted
n_purge_obj_test 0 0.00 N objects tested
n_purge_re_test 0 0.00 N regexps tested against
n_purge_dups 0 0.00 N duplicate purges removed
hcb_nolock 0 0.00 HCB Lookups without lock
hcb_lock 0 0.00 HCB Lookups with lock
hcb_insert 0 0.00 HCB Inserts
esi_parse 0 0.00 Objects ESI parsed (unlock)
esi_errors 0 0.00 ESI parse errors (unlock)
accept_fail 0 0.00 Accept failures
client_drop_late 0 0.00 Connection dropped late
uptime 84761 1.00 Client uptime
backend_retry 39 0.00 Backend conn. retry
dir_dns_lookups 0 0.00 DNS director lookups
dir_dns_failed 0 0.00 DNS director failed lookups
dir_dns_hit 0 0.00 DNS director cached lookups hit
dir_dns_cache_full 0 0.00 DNS director full dnscache
fetch_1xx 0 0.00 Fetch no body (1xx)
fetch_204 0 0.00 Fetch no body (204)
fetch_304 0 0.00 Fetch no body (304)
backend img1 {
.host = "172.16.22.11";
.port = "82";
.connect_timeout = 5s;
.first_byte_timeout = 5s;
.between_bytes_timeout = 2s;
.probe = {
.url = "/images/testfiledonotdelete.html";
.interval = 5s;
.timeout = 1 s;
.window = 5;
.threshold = 3;
}
}
backend img2 {
.host = "172.16.22.13";
.port = "82";
.connect_timeout = 5s;
.first_byte_timeout = 5s;
.between_bytes_timeout = 2s;
.probe = {
.url = "/images/testfiledonotdelete.html";
.interval = 5s;
.timeout = 1 s;
.window = 5;
.threshold = 3;
}
}
backend www1 {
.host = "172.16.22.2";
.port = "80";
.connect_timeout = 5s;
.first_byte_timeout = 5s;
.between_bytes_timeout = 2s;
.probe = {
.url = "/robots.txt";
.interval = 5s;
.timeout = 1 s;
.window = 5;
.threshold = 3;
}
}
backend www2 {
.host = "172.16.22.3";
.port = "80";
.connect_timeout = 5s;
.first_byte_timeout = 5s;
.between_bytes_timeout = 2s;
.probe = {
.url = "/robots.txt";
.interval = 5s;
.timeout = 1 s;
.window = 5;
.threshold = 3;
}
}
director img_director round-robin {
{ .backend = img1; }
{ .backend = img2; }
}
director www_director round-robin {
{ .backend = www1; }
{ .backend = www2; }
}
sub vcl_recv {
unset req.http.Cookie;
if (req.http.Accept-Encoding) {
if (req.url ~ "\.(jpg|png|gif)$") {
# No point in compressing these
remove req.http.Accept-Encoding;
} elsif (req.http.Accept-Encoding ~ "gzip") {
set req.http.Accept-Encoding = "gzip";
} elsif (req.http.Accept-Encoding ~ "deflate") {
set req.http.Accept-Encoding = "deflate";
} else {
# unkown algorithm
remove req.http.Accept-Encoding;
}
}
if ( req.url ~ "/templates/common/" ) {
set req.backend = www_director;
} else {
set req.backend = img_director;
}
if (req.backend.healthy) {
set req.grace = 30s;
}
else {
set req.grace = 1h;
}
if (req.request != "GET" && req.request != "HEAD")
{
return (pass);
}
return (lookup);
}
sub vcl_pipe {
# Note that only the first request to the backend will have
# X-Forwarded-For set. If you use X-Forwarded-For and want to
# have it set for all requests, make sure to have:
# set req.http.connection = "close";
# here. It is not set by default as it might break some broken web
# applications, like IIS with NTLM authentication.
return (pipe);
}
sub vcl_pass {
return (pass);
}
sub vcl_miss {
if (req.http.user-agent ~ "spider" ||
req.http.user-agent ~ "googlebot" ||
req.http.user-agent ~ "crawl" ||
req.http.user-agent ~ "msnbot") {
error 503 "Not presently in cache";
}
return (fetch);
}
sub vcl_fetch
{
set beresp.ttl = 45d;
if (beresp.status == 500) {
set beresp.saintmode = 10s;
restart;
}
set beresp.grace = 30m;
if (beresp.cacheable) {
/* marker for vcl_deliver to reset Age: */
set beresp.http.magicmarker = "1";
}
if (req.request == "GET" ) {
unset beresp.http.Set-Cookie;
set beresp.ttl = 45d;
return (deliver);
}
if (!beresp.cacheable)
{
return (pass);
}
if (beresp.http.Set-Cookie)
{
return (pass);
}
if (beresp.ttl < 24h) {
# force minimum ttl of 1 hour for all cached objects.
set beresp.ttl = 45d;
}
# We don't want to cache 404. If status differs from 200 and 302
# then we issue a restart causing varnish to fetch the image again
# from the defined director.
if (beresp.status != 200 && beresp.status != 302) {
restart;
}
return (deliver);
}
sub vcl_deliver {
set resp.http.Via = "Varnish";
if (resp.http.magicmarker) {
#/* Remove the magic marker */
unset resp.http.magicmarker;
#/* By definition we have a fresh object */
set resp.http.Age = "0";
}
if (obj.hits > 0) {
set resp.http.X-Cache = "HIT";
}
else {
set resp.http.X-Cache = "MISS";
}
return (deliver);
}
sub vcl_error {
set obj.http.Content-Type = "text/html; charset=utf-8";
synthetic {"
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html>
<head>
<title>"} obj.status " " obj.response {"</title>
</head>
<body>
<h1>Error "} obj.status " " obj.response {"</h1>
<p>"} obj.response {"</p>
</body>
</html>
"};
return (deliver);
}
_______________________________________________
varnish-misc mailing list
[email protected]
http://www.varnish-cache.org/lists/mailman/listinfo/varnish-misc