We fix this by front-ending varnish with apache. Not the best solution but we still get to compress.
-j On Mar 2, 2009, at 1:48 PM, Cloude Porteus wrote: > Artur, > What is the issue with ESI & gzip? > > Does this mean that if we want to use ESI, we can't gzip the pages > that have ESI includes? But we could still gzip the pages that are > included by ESI. > > thanks, > cloude > > On Mon, Mar 2, 2009 at 1:40 PM, Artur Bergman <s...@crucially.net> > wrote: >> >> On Feb 27, 2009, at 2:24 PM, John Adams wrote: >> >>> cc'ing the varnish dev list for comments... >>> >>> On Feb 27, 2009, at 1:33 PM, Cloude Porteus wrote: >>> >>>> John, >>>> Goodto hear from you. You must be slammed at Twitter. I'm happy to >>>> hear that ESI is holding up for you. It's been in my backlog >>>> since you >>>> mentioned it to me pre-Twitter. >>>> >>>> Any performance info would be great. >>>> >>> >>> Any comments on our setup are welcome. You may also choose to call >>> us >>> crazypants. Many, many thanks to Artur Bergman of Wikia for >>> helping us get >>> this configuration straightened out. >>> >> >> Thanks John :) >> >> I'll describe the settings we use. (We don't use ESI because of gzip) >> >> The first important step is that we put the shmlog on tmpfs >> >> tmpfs /usr/var/varnish/ tmpfs noatime,defaults,size=150M >> 0 0 >> /dev/md0 /var/lib/varnish ext2 >> noatime,nodiratime,norelatime 0 >> 0 >> >> Notice also ext2 we don't care about journaling. (Ignore the broken >> paths) >> >> This is because linux will asynchronously write the log to disk, >> this puts a >> large io pressure on the system (interfering with your normal reads >> if you >> use the same disks) It also scales the IO load with traffic and not >> working >> set. >> >> # Maximum number of open files (for ulimit -n) >> NFILES=131072 >> >> # Locked shared memory (for ulimit -l) >> # Default log size is 82MB + header >> MEMLOCK=90000 >> >> DAEMON_COREFILE_LIMIT="unlimited" >> >> >> DAEMON_OPTS="-a :80 \ >> -T localhost:6082 \ >> -f /etc/varnish/wikia.vcl \ >> -p obj_workspace=4096 \ >> # We have lots of objects >> -p sess_workspace=32768 \ >> # Need lots of sessoin space >> -p listen_depth=8192 \ >> -p ping_interval=1 \ >> -s file,/var/lib/varnish/mmap,120G \ >> # lots of mmap >> -p log_hashstring=off \ >> -h classic,250007 \ >> # 2.5 mmilion objects >> -p thread_pool_max=4000 \ >> -p lru_interval=60 \ >> -p esi_syntax=0x00000003 \ >> -p sess_timeout=10 \ >> -p thread_pools=4 \ >> -p thread_pool_min=500 \ >> # we force 4000 threads pre-created >> # otherwise we run into overflows >> -p shm_workspace=32768 \ >> # avoid shm_mtx >> -p srcaddr_ttl=0" >> # avoid hash lookup >> >> # we link geoip into the vcl >> CC_COMMAND='cc_command=exec cc -fpic -shared -Wl,-x -L/usr/local/lib/ >> -lGeoIP -o %o %s' >> >> #### VCL >> >> # declare the function signature >> # so we can use them >> C{ >> #include <string.h> >> double TIM_real(void); >> void TIM_format(double t, char *p); >> }C >> >> >> >> # init GeoIP code >> C{ >> #include <dlfcn.h> >> #include <stdlib.h> >> #include <stdio.h> >> #include <string.h> >> #include <GeoIPCity.h> >> #include <pthread.h> >> >> pthread_mutex_t geoip_mutex = PTHREAD_MUTEX_INITIALIZER; >> >> GeoIP* gi; >> void geo_init () { >> if(!gi) { >> gi = >> GeoIP_open_type(GEOIP_CITY_EDITION_REV1,GEOIP_MEMORY_CACHE); >> } >> } >> }C >> >> vcl_recv { >> >> set req.url = regsub(req.url, "http://[^/]*",""); >> #will normalize proxied requests, specificl curl -x foo:80 >> >> # get out error handler for geoiplookup >> if(req.http.host == "geoiplookup.wikia.com") { >> error 200 "Ok"; >> } >> >> # lvs check >> if (req.url == "/lvscheck.html") { >> error 200 "Ok"; >> } >> >> # normalize Accept-Encoding to reduce vary >> if (req.http.Accept-Encoding) { >> if (req.http.Accept-Encoding ~ "gzip") { >> set req.http.Accept-Encoding = "gzip"; >> } elsif (req.http.Accept-Encoding ~ "deflate") { >> set req.http.Accept-Encoding = "deflate"; >> } else { >> unset req.http.Accept-Encoding; >> } >> } >> >> >> # Yahoo uses this to check for 404 >> if (req.url ~ "^/SlurpConfirm404") { >> error 404 "Not found"; >> } >> >> set req.grace = 360000s; #if the backend is down, just serve >> >> >> # check for specific cookies, otherwise nuke them >> # save them so we can re-inject them later in pipe or miss >> set req.http.X-Orig-Cookie = req.http.Cookie; >> if(req.http.Cookie ~ "(session|UserID|UserName|Token|LoggedOut)") { >> # dont do anything, the user is logged in >> } else { >> # dont care about any other cookies >> unset req.http.Cookie; >> } >> >> >> } >> >> # varnish XFF is broken, it doesn't chain them >> # if you have chained varnishes, or trust AOL, you need to append >> them >> sub vcl_pipe { >> # do the right XFF processing >> set bereq.http.X-Forwarded-For = req.http.X-Forwarded-For; >> set bereq.http.X-Forwarded-For = regsub(bereq.http.X-Forwarded- >> For, "$", ", >> "); >> set bereq.http.X-Forwarded-For = regsub(bereq.http.X-Forwarded- >> For, "$", >> client.ip); >> set bereq.http.Cookie = req.http.X-Orig-Cookie; >> } >> >> >> # this implements purging (we purge all 3 versions of the accept- >> encoding, >> none,gzip,deflate) >> sub vcl_hit { >> if (req.request == "PURGE") { >> set obj.ttl = 0s; >> error 200 "Purged."; >> } >> } >> >> sub vcl_miss { >> >> if (req.request == "PURGE") { >> error 404 "Not purged"; >> } >> >> set bereq.http.X-Forwarded-For = req.http.X-Forwarded-For; >> set bereq.http.X-Forwarded-For = regsub(bereq.http.X-Forwarded- >> For, "$", ", >> "); >> set bereq.http.X-Forwarded-For = regsub(bereq.http.X-Forwarded- >> For, "$", >> client.ip); >> } >> >> >> # this marks if something is cacheable or not, if it isn't >> # say why >> vcl_fetch { >> # so we have access to this in deliver >> set obj.http.X-Orighost = req.http.host; >> set obj.http.X-Origurl = req.url; >> if (!obj.cacheable) { >> set obj.http.X-Cacheable = "NO:Not-Cacheable"; >> pass; >> } >> if (obj.http.Cache-Control ~ "private") { >> if(req.http.Cookie ~"(UserID|_session)") { >> set obj.http.X-Cacheable = "NO:Got Session"; >> } else { >> set obj.http.X-Cacheable = >> "NO:Cache-Control=private"; >> } >> pass; >> } >> if (obj.http.Set-Cookie ~ "(UserID|_session)") { >> set obj.http.X-Cacheable = "NO:Set-Cookie"; >> pass; >> } >> >> set obj.http.X-Cacheable = "YES"; >> set obj.grace = 360000s; >> >> >> } >> >> >> #Following sets X-Served-By, if it is already set it appends it >> # it also says if it is a HIT, and how many hits >> >> sub vcl_deliver { >> >> #add or append Served By >> if(!resp.http.X-Served-By) { >> set resp.http.X-Served-By = "varnish8"; >> if (obj.hits > 0) { >> set resp.http.X-Cache = "HIT"; >> } else { >> set resp.http.X-Cache = "MISS"; >> } >> set resp.http.X-Cache-Hits = obj.hits; >> } else { >> # append current data >> set resp.http.X-Served-By = regsub(resp.http.X-Served-By, "$", ", >> varnish8"); >> if (obj.hits > 0) { >> set resp.http.X-Cache = regsub(resp.http.X-Cache, "$", ", HIT"); >> } else { >> set resp.http.X-Cache = regsub(resp.http.X-Cache, "$" , ", >> MISS"); >> } >> set resp.http.X-Cache-Hits = regsub(resp.http.X-Cache-Hits, "$", >> ", "); >> set resp.http.X-Cache-Hits = regsub(resp.http.X-Cache-Hits, "$", >> obj.hits); >> } >> >> # >> >> # if the client is another DC, just remove stuff and deliver >> if ( client.ip ~ LON >> || client.ip ~ SJC >> || client.ip ~ IOWA >> ) { >> unset resp.http.X-CPU-Time; >> unset resp.http.X-Real-Time; >> unset resp.http.X-Served-By-Backend; >> unset resp.http.X-User-Id; >> unset resp.http.X-Namespace-Number; >> unset resp.http.X-Orighost; >> unset resp.http.X-Origurl; >> deliver; >> } >> # else do cache-control >> # nuke the headers since they were generally meant for varnish >> # these rules are mostly based on mediawiki rules >> if ( resp.http.X-Pass-Cache-Control ) { >> set resp.http.Cache-Control = resp.http.X-Pass-Cache-Control; >> } elsif ( resp.status == 304 ) { >> # no headers on if-modified since >> } elsif ( resp.http.X-Origurl ~ ".*/index\.php.*(css|js)" >> || resp.http.X-Origurl ~ "raw") { >> # dont touch it let mediawiki decide >> } elsif (resp.http.X-Orighost ~ "images.wikia.com") { >> # lighttpd knows what it is doing >> } elsif (resp.http.X-Orighost ~ "geoiplookup") { >> } else { >> #follow squid content here >> set resp.http.Cache-Control = "private, s-maxage=0, max-age=0, >> must-revalidate"; >> } >> >> # this will calculate an Expire headers which is based on now+max-age >> # if you cache the Expire header, then it won't match max-age since >> it is >> static >> if (!resp.status == 304) { >> C{ >> char *cache = VRT_GetHdr(sp, HDR_REQ, "\016cache-control:"); >> char date[40]; >> int max_age; >> int want_equals = 0; >> if(cache) { >> while(*cache != '\0') { >> if (want_equals && *cache == '=') { >> cache++; >> max_age = strtoul(cache, 0, 0); >> break; >> } >> >> if (*cache == 'm' && !memcmp(cache, "max-age", 7)) { >> cache += 7; >> want_equals = 1; >> continue; >> } >> cache++; >> } >> if (max_age) { >> TIM_format(TIM_real() + max_age, date); >> VRT_SetHdr(sp, HDR_RESP, "\010Expires:", date, >> vrt_magic_string_end); >> } >> } >> }C >> #; >> } >> >> } >> >> >> vcl_error { >> # this implements geoip lookups inside varnish >> # so clients can get the data without hitting the backend >> if(req.http.host == "geoiplookup.wikia.com" || req.url == >> "/__varnish/geoip") { >> set obj.http.Content-Type = "text/plain"; >> set obj.http.cache-control = "private, s-maxage=0, max-age=360"; >> set obj.http.X-Orighost = req.http.host; >> C{ >> char *ip = VRT_IP_string(sp, VRT_r_client_ip(sp)); >> char date[40]; >> char json[255]; >> >> pthread_mutex_lock(&geoip_mutex); >> >> if(!gi) { geo_init(); } >> >> GeoIPRecord *record = GeoIP_record_by_addr(gi, ip); >> if(record) { >> snprintf(json, 255, "Geo = >> {\"city\":\"%s\",\"country\":\"%s\",\"lat\":\"%f\",\"lon\":\"%f\", >> \"classC\":\"%s\",\"netmask\":\"%d\"}", >> record->city, >> record->country_code, >> record->latitude, >> record->longitude, >> ip, >> GeoIP_last_netmask(gi) >> ); >> pthread_mutex_unlock(&geoip_mutex); >> VRT_synth_page(sp, 0, json, vrt_magic_string_end); >> } else { >> pthread_mutex_unlock(&geoip_mutex); >> VRT_synth_page(sp, 0, "Geo = {}", vrt_magic_string_end); >> } >> >> >> TIM_format(TIM_real(), date); >> VRT_SetHdr(sp, HDR_OBJ, "\016Last-Modified:", date, >> vrt_magic_string_end); >> }C >> # check if site is working >> if(req.url ~ "lvscheck.html") { >> synthetic {"varnish is okay"}; >> deliver; >> } >> >> deliver; >> >> } >> >> >> ############# >> >> sysctl >> >> net.ipv4.ip_local_port_range = 1024 65536 >> net.core.rmem_max=16777216 >> net.core.wmem_max=16777216 >> net.ipv4.tcp_rmem=4096 87380 16777216 >> net.ipv4.tcp_wmem=4096 65536 16777216 >> net.ipv4.tcp_fin_timeout = 3 >> net.ipv4.tcp_tw_recycle = 1 >> net.core.netdev_max_backlog = 30000 >> net.ipv4.tcp_no_metrics_save=1 >> net.core.somaxconn = 262144 >> net.ipv4.tcp_syncookies = 0 >> net.ipv4.tcp_max_orphans = 262144 >> net.ipv4.tcp_max_syn_backlog = 262144 >> net.ipv4.tcp_synack_retries = 2 >> net.ipv4.tcp_syn_retries = 2 >> >> These are mostly cargo culted from previous emails here. >> >> Cheers >> Artur >> > > > > -- > VP of Product Development > Instructables.com > > http://www.instructables.com/member/lebowski --- John Adams Twitter Operations j...@twitter.com http://twitter.com/netik _______________________________________________ varnish-dev mailing list varnish-dev@projects.linpro.no http://projects.linpro.no/mailman/listinfo/varnish-dev