We considered that, but it is important our backend traffic is gziped. So we would end up with apache->varnish->apache->apache->varnish- >apache which is suboptimal at best!
On Mar 2, 2009, at 2:39 PM, John Adams wrote: > We fix this by front-ending varnish with apache. Not the best > solution but we still get to compress. > > -j > > On Mar 2, 2009, at 1:48 PM, Cloude Porteus wrote: > >> Artur, >> What is the issue with ESI & gzip? >> >> Does this mean that if we want to use ESI, we can't gzip the pages >> that have ESI includes? But we could still gzip the pages that are >> included by ESI. >> >> thanks, >> cloude >> >> On Mon, Mar 2, 2009 at 1:40 PM, Artur Bergman <s...@crucially.net> >> wrote: >>> >>> On Feb 27, 2009, at 2:24 PM, John Adams wrote: >>> >>>> cc'ing the varnish dev list for comments... >>>> >>>> On Feb 27, 2009, at 1:33 PM, Cloude Porteus wrote: >>>> >>>>> John, >>>>> Goodto hear from you. You must be slammed at Twitter. I'm happy to >>>>> hear that ESI is holding up for you. It's been in my backlog >>>>> since you >>>>> mentioned it to me pre-Twitter. >>>>> >>>>> Any performance info would be great. >>>>> >>>> >>>> Any comments on our setup are welcome. You may also choose to >>>> call us >>>> crazypants. Many, many thanks to Artur Bergman of Wikia for >>>> helping us get >>>> this configuration straightened out. >>>> >>> >>> Thanks John :) >>> >>> I'll describe the settings we use. (We don't use ESI because of >>> gzip) >>> >>> The first important step is that we put the shmlog on tmpfs >>> >>> tmpfs /usr/var/varnish/ tmpfs >>> noatime,defaults,size=150M 0 0 >>> /dev/md0 /var/lib/varnish ext2 >>> noatime,nodiratime,norelatime 0 >>> 0 >>> >>> Notice also ext2 we don't care about journaling. (Ignore the >>> broken paths) >>> >>> This is because linux will asynchronously write the log to disk, >>> this puts a >>> large io pressure on the system (interfering with your normal >>> reads if you >>> use the same disks) It also scales the IO load with traffic and >>> not working >>> set. >>> >>> # Maximum number of open files (for ulimit -n) >>> NFILES=131072 >>> >>> # Locked shared memory (for ulimit -l) >>> # Default log size is 82MB + header >>> MEMLOCK=90000 >>> >>> DAEMON_COREFILE_LIMIT="unlimited" >>> >>> >>> DAEMON_OPTS="-a :80 \ >>> -T localhost:6082 \ >>> -f /etc/varnish/wikia.vcl \ >>> -p obj_workspace=4096 \ >>> # We have lots of objects >>> -p sess_workspace=32768 \ >>> # Need lots of sessoin space >>> -p listen_depth=8192 \ >>> -p ping_interval=1 \ >>> -s file,/var/lib/varnish/mmap,120G \ >>> # lots of mmap >>> -p log_hashstring=off \ >>> -h classic,250007 \ >>> # 2.5 mmilion objects >>> -p thread_pool_max=4000 \ >>> -p lru_interval=60 \ >>> -p esi_syntax=0x00000003 \ >>> -p sess_timeout=10 \ >>> -p thread_pools=4 \ >>> -p thread_pool_min=500 \ >>> # we force 4000 threads pre-created >>> # otherwise we run into overflows >>> -p shm_workspace=32768 \ >>> # avoid shm_mtx >>> -p srcaddr_ttl=0" >>> # avoid hash lookup >>> >>> # we link geoip into the vcl >>> CC_COMMAND='cc_command=exec cc -fpic -shared -Wl,-x -L/usr/local/ >>> lib/ >>> -lGeoIP -o %o %s' >>> >>> #### VCL >>> >>> # declare the function signature >>> # so we can use them >>> C{ >>> #include <string.h> >>> double TIM_real(void); >>> void TIM_format(double t, char *p); >>> }C >>> >>> >>> >>> # init GeoIP code >>> C{ >>> #include <dlfcn.h> >>> #include <stdlib.h> >>> #include <stdio.h> >>> #include <string.h> >>> #include <GeoIPCity.h> >>> #include <pthread.h> >>> >>> pthread_mutex_t geoip_mutex = PTHREAD_MUTEX_INITIALIZER; >>> >>> GeoIP* gi; >>> void geo_init () { >>> if(!gi) { >>> gi = >>> GeoIP_open_type(GEOIP_CITY_EDITION_REV1,GEOIP_MEMORY_CACHE); >>> } >>> } >>> }C >>> >>> vcl_recv { >>> >>> set req.url = regsub(req.url, "http://[^/]*",""); >>> #will normalize proxied requests, specificl curl -x foo:80 >>> >>> # get out error handler for geoiplookup >>> if(req.http.host == "geoiplookup.wikia.com") { >>> error 200 "Ok"; >>> } >>> >>> # lvs check >>> if (req.url == "/lvscheck.html") { >>> error 200 "Ok"; >>> } >>> >>> # normalize Accept-Encoding to reduce vary >>> if (req.http.Accept-Encoding) { >>> if (req.http.Accept-Encoding ~ "gzip") { >>> set req.http.Accept-Encoding = "gzip"; >>> } elsif (req.http.Accept-Encoding ~ "deflate") { >>> set req.http.Accept-Encoding = "deflate"; >>> } else { >>> unset req.http.Accept-Encoding; >>> } >>> } >>> >>> >>> # Yahoo uses this to check for 404 >>> if (req.url ~ "^/SlurpConfirm404") { >>> error 404 "Not found"; >>> } >>> >>> set req.grace = 360000s; #if the backend is down, just serve >>> >>> >>> # check for specific cookies, otherwise nuke them >>> # save them so we can re-inject them later in pipe or miss >>> set req.http.X-Orig-Cookie = req.http.Cookie; >>> if(req.http.Cookie ~ "(session|UserID|UserName|Token|LoggedOut)") { >>> # dont do anything, the user is logged in >>> } else { >>> # dont care about any other cookies >>> unset req.http.Cookie; >>> } >>> >>> >>> } >>> >>> # varnish XFF is broken, it doesn't chain them >>> # if you have chained varnishes, or trust AOL, you need to append >>> them >>> sub vcl_pipe { >>> # do the right XFF processing >>> set bereq.http.X-Forwarded-For = req.http.X-Forwarded-For; >>> set bereq.http.X-Forwarded-For = regsub(bereq.http.X-Forwarded- >>> For, "$", ", >>> "); >>> set bereq.http.X-Forwarded-For = regsub(bereq.http.X-Forwarded- >>> For, "$", >>> client.ip); >>> set bereq.http.Cookie = req.http.X-Orig-Cookie; >>> } >>> >>> >>> # this implements purging (we purge all 3 versions of the accept- >>> encoding, >>> none,gzip,deflate) >>> sub vcl_hit { >>> if (req.request == "PURGE") { >>> set obj.ttl = 0s; >>> error 200 "Purged."; >>> } >>> } >>> >>> sub vcl_miss { >>> >>> if (req.request == "PURGE") { >>> error 404 "Not purged"; >>> } >>> >>> set bereq.http.X-Forwarded-For = req.http.X-Forwarded-For; >>> set bereq.http.X-Forwarded-For = regsub(bereq.http.X-Forwarded- >>> For, "$", ", >>> "); >>> set bereq.http.X-Forwarded-For = regsub(bereq.http.X-Forwarded- >>> For, "$", >>> client.ip); >>> } >>> >>> >>> # this marks if something is cacheable or not, if it isn't >>> # say why >>> vcl_fetch { >>> # so we have access to this in deliver >>> set obj.http.X-Orighost = req.http.host; >>> set obj.http.X-Origurl = req.url; >>> if (!obj.cacheable) { >>> set obj.http.X-Cacheable = "NO:Not-Cacheable"; >>> pass; >>> } >>> if (obj.http.Cache-Control ~ "private") { >>> if(req.http.Cookie ~"(UserID|_session)") { >>> set obj.http.X-Cacheable = "NO:Got Session"; >>> } else { >>> set obj.http.X-Cacheable = >>> "NO:Cache-Control=private"; >>> } >>> pass; >>> } >>> if (obj.http.Set-Cookie ~ "(UserID|_session)") { >>> set obj.http.X-Cacheable = "NO:Set-Cookie"; >>> pass; >>> } >>> >>> set obj.http.X-Cacheable = "YES"; >>> set obj.grace = 360000s; >>> >>> >>> } >>> >>> >>> #Following sets X-Served-By, if it is already set it appends it >>> # it also says if it is a HIT, and how many hits >>> >>> sub vcl_deliver { >>> >>> #add or append Served By >>> if(!resp.http.X-Served-By) { >>> set resp.http.X-Served-By = "varnish8"; >>> if (obj.hits > 0) { >>> set resp.http.X-Cache = "HIT"; >>> } else { >>> set resp.http.X-Cache = "MISS"; >>> } >>> set resp.http.X-Cache-Hits = obj.hits; >>> } else { >>> # append current data >>> set resp.http.X-Served-By = regsub(resp.http.X-Served-By, "$", ", >>> varnish8"); >>> if (obj.hits > 0) { >>> set resp.http.X-Cache = regsub(resp.http.X-Cache, "$", ", HIT"); >>> } else { >>> set resp.http.X-Cache = regsub(resp.http.X-Cache, "$" , ", >>> MISS"); >>> } >>> set resp.http.X-Cache-Hits = regsub(resp.http.X-Cache-Hits, "$", >>> ", "); >>> set resp.http.X-Cache-Hits = regsub(resp.http.X-Cache-Hits, "$", >>> obj.hits); >>> } >>> >>> # >>> >>> # if the client is another DC, just remove stuff and deliver >>> if ( client.ip ~ LON >>> || client.ip ~ SJC >>> || client.ip ~ IOWA >>> ) { >>> unset resp.http.X-CPU-Time; >>> unset resp.http.X-Real-Time; >>> unset resp.http.X-Served-By-Backend; >>> unset resp.http.X-User-Id; >>> unset resp.http.X-Namespace-Number; >>> unset resp.http.X-Orighost; >>> unset resp.http.X-Origurl; >>> deliver; >>> } >>> # else do cache-control >>> # nuke the headers since they were generally meant for varnish >>> # these rules are mostly based on mediawiki rules >>> if ( resp.http.X-Pass-Cache-Control ) { >>> set resp.http.Cache-Control = resp.http.X-Pass-Cache-Control; >>> } elsif ( resp.status == 304 ) { >>> # no headers on if-modified since >>> } elsif ( resp.http.X-Origurl ~ ".*/index\.php.*(css|js)" >>> || resp.http.X-Origurl ~ "raw") { >>> # dont touch it let mediawiki decide >>> } elsif (resp.http.X-Orighost ~ "images.wikia.com") { >>> # lighttpd knows what it is doing >>> } elsif (resp.http.X-Orighost ~ "geoiplookup") { >>> } else { >>> #follow squid content here >>> set resp.http.Cache-Control = "private, s-maxage=0, max-age=0, >>> must-revalidate"; >>> } >>> >>> # this will calculate an Expire headers which is based on now+max- >>> age >>> # if you cache the Expire header, then it won't match max-age >>> since it is >>> static >>> if (!resp.status == 304) { >>> C{ >>> char *cache = VRT_GetHdr(sp, HDR_REQ, "\016cache-control:"); >>> char date[40]; >>> int max_age; >>> int want_equals = 0; >>> if(cache) { >>> while(*cache != '\0') { >>> if (want_equals && *cache == '=') { >>> cache++; >>> max_age = strtoul(cache, 0, 0); >>> break; >>> } >>> >>> if (*cache == 'm' && !memcmp(cache, "max-age", 7)) { >>> cache += 7; >>> want_equals = 1; >>> continue; >>> } >>> cache++; >>> } >>> if (max_age) { >>> TIM_format(TIM_real() + max_age, date); >>> VRT_SetHdr(sp, HDR_RESP, "\010Expires:", date, >>> vrt_magic_string_end); >>> } >>> } >>> }C >>> #; >>> } >>> >>> } >>> >>> >>> vcl_error { >>> # this implements geoip lookups inside varnish >>> # so clients can get the data without hitting the backend >>> if(req.http.host == "geoiplookup.wikia.com" || req.url == >>> "/__varnish/geoip") { >>> set obj.http.Content-Type = "text/plain"; >>> set obj.http.cache-control = "private, s-maxage=0, max-age=360"; >>> set obj.http.X-Orighost = req.http.host; >>> C{ >>> char *ip = VRT_IP_string(sp, VRT_r_client_ip(sp)); >>> char date[40]; >>> char json[255]; >>> >>> pthread_mutex_lock(&geoip_mutex); >>> >>> if(!gi) { geo_init(); } >>> >>> GeoIPRecord *record = GeoIP_record_by_addr(gi, ip); >>> if(record) { >>> snprintf(json, 255, "Geo = >>> {\"city\":\"%s\",\"country\":\"%s\",\"lat\":\"%f\",\"lon\":\"%f\", >>> \"classC\":\"%s\",\"netmask\":\"%d\"}", >>> record->city, >>> record->country_code, >>> record->latitude, >>> record->longitude, >>> ip, >>> GeoIP_last_netmask(gi) >>> ); >>> pthread_mutex_unlock(&geoip_mutex); >>> VRT_synth_page(sp, 0, json, vrt_magic_string_end); >>> } else { >>> pthread_mutex_unlock(&geoip_mutex); >>> VRT_synth_page(sp, 0, "Geo = {}", vrt_magic_string_end); >>> } >>> >>> >>> TIM_format(TIM_real(), date); >>> VRT_SetHdr(sp, HDR_OBJ, "\016Last-Modified:", date, >>> vrt_magic_string_end); >>> }C >>> # check if site is working >>> if(req.url ~ "lvscheck.html") { >>> synthetic {"varnish is okay"}; >>> deliver; >>> } >>> >>> deliver; >>> >>> } >>> >>> >>> ############# >>> >>> sysctl >>> >>> net.ipv4.ip_local_port_range = 1024 65536 >>> net.core.rmem_max=16777216 >>> net.core.wmem_max=16777216 >>> net.ipv4.tcp_rmem=4096 87380 16777216 >>> net.ipv4.tcp_wmem=4096 65536 16777216 >>> net.ipv4.tcp_fin_timeout = 3 >>> net.ipv4.tcp_tw_recycle = 1 >>> net.core.netdev_max_backlog = 30000 >>> net.ipv4.tcp_no_metrics_save=1 >>> net.core.somaxconn = 262144 >>> net.ipv4.tcp_syncookies = 0 >>> net.ipv4.tcp_max_orphans = 262144 >>> net.ipv4.tcp_max_syn_backlog = 262144 >>> net.ipv4.tcp_synack_retries = 2 >>> net.ipv4.tcp_syn_retries = 2 >>> >>> These are mostly cargo culted from previous emails here. >>> >>> Cheers >>> Artur >>> >> >> >> >> -- >> VP of Product Development >> Instructables.com >> >> http://www.instructables.com/member/lebowski > > --- > John Adams > Twitter Operations > j...@twitter.com > http://twitter.com/netik > > > > _______________________________________________ varnish-dev mailing list varnish-dev@projects.linpro.no http://projects.linpro.no/mailman/listinfo/varnish-dev