Hi,

I've written a patch for something which has annoyed me for long:

It's nice to have a really long grace period, such that even when all backend
servers go down, Varnish can keep most of a website up.

But setting a (really) long grace period (to catch most of the long tail) will
mean that expiry by TTL (object time_when = ttl + grace, actually) will
effectively be disabled and objects will get nuked by LRU only (mostly).
varnishstat figures for such a configuration might look like this:

     3451931          .            .   N expired objects
   239097244          .            .   N LRU nuked objects
  1059383291          .            .   N LRU moved objects


Even more importantly, the cache will contain many copies of each object, so it
will become unnecessarily huge (at least until the backend refresh
implementation is done, which we're working on). I am not using -spersistent so
I can't tell from own experience, but with persistent storage, using a long
grace time will probably be unfeasible.

The attached patch basically shuffles some of phk's code around in cache_hash.c
(I guess I might have written half a dozen lines myself :) ) to collect unneeded
graced objects while looking for a graced object. All graced objects older than
the youngest (which also match the other criteria (think Vary)) will get 
released.

This should dramatically reduce the effective cache size on systems with long
grace, bring up the expired / nuked ratio, and speed up cache lookup, because
the object list hanging off head object head should shrink significantly.

The additional code will only kick in when a graced object needs to be looked
up, so it should not lower efficiency for the standard case. But still it think
a better place for this might be somewhen after inserting an object into the 
cache.

The patch passes all tests, but I HAVE NOT TESTED IT IN PRODUCTION. Be warned.
Any feedback from pre-production or production tests is more than welcome.

Thanks, Nils
Index: bin/varnishd/cache_hash.c
===================================================================
--- bin/varnishd/cache_hash.c   (revision 5515)
+++ bin/varnishd/cache_hash.c   (working copy)
@@ -71,6 +71,10 @@
 
 static const struct hash_slinger *hash;
 
+void
+hsh_release(struct worker *w, struct object *o, struct objcore *oc, struct 
objhead *oh);
+
+
 double
 HSH_Grace(double g)
 {
@@ -95,6 +99,7 @@
                sp->obj->objstore->stevedore->object(sp);
 }
 
+
 /* Precreate an objhead and object for later use */
 void
 HSH_Prealloc(const struct sess *sp)
@@ -311,6 +316,20 @@
        return (oc);
 }
 
+/*
+ * collect object core oc
+ * in an array ocp of size spc
+ * with njob entries
+ *
+ */
+#define collect_oc(ocp, oc, spc, nobj) do {            \
+               xxxassert(spc >= sizeof *ocp);          \
+               oc->refcnt++;                           \
+               spc -= sizeof *ocp;                     \
+               ocp[nobj++] = oc;                       \
+       } while(0)
+
+
 /**********************************************************************
  */
 
@@ -319,10 +338,14 @@
 {
        struct worker *w;
        struct objhead *oh;
-       struct objcore *oc;
+       struct objcore *oc, **ocp, *oct;
        struct objcore *busy_oc, *grace_oc;
-       struct object *o;
+       struct object *o;       
+       unsigned spc, nobj, n;
+       double grace_oc_entered;
 
+       grace_oc_entered = NAN;
+
        CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
        CHECK_OBJ_NOTNULL(sp->wrk, WORKER_MAGIC);
        CHECK_OBJ_NOTNULL(sp->http, HTTP_MAGIC);
@@ -350,12 +373,17 @@
                        w->nobjhead = NULL;
        }
 
+       /* prepare space to remeber object cores we don't need any more */
+       spc = WS_Reserve(w->ws, 0);
+       ocp = (void*)w->ws->f;
+       nobj = 0;
+
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
        Lck_Lock(&oh->mtx);
        assert(oh->refcnt > 0);
        busy_oc = NULL;
        grace_oc = NULL;
-       VTAILQ_FOREACH(oc, &oh->objcs, list) {
+       VTAILQ_FOREACH_SAFE(oc, &oh->objcs, list, oct) {
                /* Must be at least our own ref + the objcore we examine */
                assert(oh->refcnt > 1);
                CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
@@ -385,9 +413,41 @@
                if (o->ttl >= sp->t_req)
                        break;
 
-               /* Remember any matching objects inside their grace period */
-               if (o->ttl + HSH_Grace(o->grace) >= sp->t_req)
+               /* 
+                * Remember any matching objects inside their grace period
+                *
+                * XXX: Note on finding the best grace object: Ideally, we
+                *      should assume that the objects list is ordered by
+                *      o->entered (from young to old), so we could simply break
+                *      when having found the first grace obj (which would be
+                *      the youngest).
+                *
+                *      But as long as HSH_Insert will insert Vampires out of
+                *      order, we have to be safe and not assume that the list
+                *      is ordered, so we chose the best grace_oc by time it
+                *      entered the cache.
+                *
+                *      Being at it, we throw away any old grace objects which
+                *      we don't need
+                *
+                *      We probably want to move this to some point during cache
+                *      insertion in order to make this process more efficient
+                */
+
+               if ((o->ttl + HSH_Grace(o->grace) >= sp->t_req) &&
+                   (isnan(grace_oc_entered) || (grace_oc_entered < 
o->entered))) {
+                       /* XXX: Do we need the safety check for busy_oc ? */
+                       if (grace_oc && (grace_oc != busy_oc) &&
+                           (grace_oc->refcnt == 0)) {
+                               VTAILQ_REMOVE(&oh->objcs, grace_oc, list);
+                               collect_oc(ocp, grace_oc, spc, nobj);
+                       }
                        grace_oc = oc;
+                       grace_oc_entered = o->entered;
+               } else if (oc->refcnt == 0) {
+                       VTAILQ_REMOVE(&oh->objcs, oc, list);
+                       collect_oc(ocp, oc, spc, nobj);
+               }
        }
 
        /*
@@ -423,8 +483,7 @@
                assert(oh->refcnt > 1);
                Lck_Unlock(&oh->mtx);
                assert(hash->deref(oh));
-               *poh = oh;
-               return (oc);
+               goto return_oc;
        }
 
        if (busy_oc != NULL && !sp->hash_ignore_busy) {
@@ -443,7 +502,7 @@
                sp->objhead = oh;
                sp->wrk = NULL;
                Lck_Unlock(&oh->mtx);
-               return (NULL);
+               goto return_null;
        }
 
        /* Insert (precreated) objcore in objecthead */
@@ -456,7 +515,44 @@
        oc->objhead = oh;
        /* NB: do not deref objhead the new object inherits our reference */
        Lck_Unlock(&oh->mtx);
+
+       goto return_oc;
+
+  return_oc:
        *poh = oh;
+       goto return_finish;
+
+  return_null:
+       oc = NULL;
+
+  return_finish:
+
+       /* 
+        * XXX: Clearning up this way should be more efficient than keeping
+        * around a lot of objects which we'll eventuelly nuke rather than
+        * expire due to a full cache, but we could still be more efficient if
+        * we had the mechanics to destroy several obects at once
+        *
+        * for instance, hash->deref(oh) will grab oh_mtx, which we had already
+        * aquired above, but we do need *some* level of isolation...
+        */
+
+       for (n = 0; n < nobj; n++) {
+               struct objcore *ooc;
+               struct object *oo;
+
+               ooc = ocp[n];
+               CHECK_OBJ_NOTNULL(ooc, OBJCORE_MAGIC);
+               oo = ooc->obj;
+               CHECK_OBJ_NOTNULL(oo, OBJECT_MAGIC);
+
+               hsh_release(w, oo, ooc, oh);
+       }
+
+       ocp = NULL;
+       nobj = NULL;
+       WS_Release(w->ws, 0);
+
        return (oc);
 }
 
@@ -510,14 +606,11 @@
        VTAILQ_FOREACH(oc, &oh->objcs, list) {
                CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
                assert(oc->objhead == oh);
-
+                                                       
                if (oc->flags & OC_F_PERSISTENT)
                        SMP_Fixup(sp, oh, oc);
 
-               xxxassert(spc >= sizeof *ocp);
-               oc->refcnt++;
-               spc -= sizeof *ocp;
-               ocp[nobj++] = oc;
+               collect_oc(ocp, oc, spc, nobj);
        }
        Lck_Unlock(&oh->mtx);
 
@@ -676,7 +769,49 @@
        *oc = oc2;
 }
 
+/*
+ * last phase of HSH_Deref: Release an object which is not referenced any more
+ */
+
 void
+hsh_release(struct worker *w, struct object *o, struct objcore *oc, struct 
objhead *oh)
+{
+       if (oh != NULL)
+               BAN_DestroyObj(o);
+       AZ(o->ban);
+       DSL(0x40, SLT_Debug, 0, "Object %u workspace min free %u",
+           o->xid, WS_Free(o->ws_o));
+       
+       if (o->esidata != NULL)
+               ESI_Destroy(o);
+       if (o->objcore != NULL && o->objcore->smp_seg != NULL) {
+               SMP_FreeObj(o);
+       } else {
+               HSH_Freestore(o);
+               if (o->objstore != NULL)
+                       STV_free(o->objstore);
+               else
+                       FREE_OBJ(o);
+       }
+       o = NULL;
+       w->stats.n_object--;
+       
+       if (oc == NULL) {
+               AZ(oh);
+               return;
+       }
+       AN(oh);
+       FREE_OBJ(oc);
+       w->stats.n_objectcore--;
+
+       /* Drop our ref on the objhead */
+       assert(oh->refcnt > 0);
+       if (hash->deref(oh))
+               return;
+       HSH_DeleteObjHead(w, oh);
+}
+
+void
 HSH_Deref(struct worker *w, struct object **oo)
 {
        struct object *o;
@@ -711,38 +846,7 @@
        if (r != 0)
                return;
 
-       if (oh != NULL)
-               BAN_DestroyObj(o);
-       AZ(o->ban);
-       DSL(0x40, SLT_Debug, 0, "Object %u workspace min free %u",
-           o->xid, WS_Free(o->ws_o));
-
-       if (o->esidata != NULL)
-               ESI_Destroy(o);
-       if (o->objcore != NULL && o->objcore->smp_seg != NULL) {
-               SMP_FreeObj(o);
-       } else {
-               HSH_Freestore(o);
-               if (o->objstore != NULL)
-                       STV_free(o->objstore);
-               else
-                       FREE_OBJ(o);
-       }
-       o = NULL;
-       w->stats.n_object--;
-
-       if (oc == NULL) {
-               AZ(oh);
-               return;
-       }
-       AN(oh);
-       FREE_OBJ(oc);
-       w->stats.n_objectcore--;
-       /* Drop our ref on the objhead */
-       assert(oh->refcnt > 0);
-       if (hash->deref(oh))
-               return;
-       HSH_DeleteObjHead(w, oh);
+       hsh_release(w, o, oc, oh);
 }
 
 void
_______________________________________________
varnish-dev mailing list
[email protected]
http://lists.varnish-cache.org/mailman/listinfo/varnish-dev

Reply via email to