Attached patch modifies mod_disk_cache to handle the 'Vary' header more gracefully.

It creates a new ".vary" file, that contains a list of all the varied headers. When the cache is serving out, it reads the ".vary" file, and computes a new URL hash using the Varied Headers and the Client's Values. This new hash is used to store the actual ".header" and ".data" files for that URL. This means each URL will have many Hashes when combined with the Varied headers.

If any varied page isn't fresh, it will fetch a new version of the page. When any new version is fetched, it will replace the ".vary" file atomically.

This is also discussed in PR 35211:
http://issues.apache.org/bugzilla/show_bug.cgi?id=35211

I would really love some testing on this. It seems to work on my simple test cases here, but it does some hackish things with the file paths, so I would really like someone to review it before I commit it to trunk.

This does not fix mod_mem_cache. A significantly different approach would need to be taken to fix it at the mod_cache level. I found it much easier to do it at the mod_disk_cache level.

-Paul
Index: modules/cache/mod_disk_cache.c
===================================================================
--- modules/cache/mod_disk_cache.c      (revision 189999)
+++ modules/cache/mod_disk_cache.c      (working copy)
@@ -63,10 +63,13 @@
     int dirlevels;              /* Number of levels of subdirectories */
     int dirlength;            /* Length of subdirectory names */
 #endif
+    int varied;
+    char *varyfile;          /* name of file where the vary info will go */
     char *datafile;          /* name of file where the data will go */
     char *hdrsfile;          /* name of file where the hdrs will go */
     char *hashfile;          /* Computed hash key for this URI */
     char *name;
+    char *key;
     apr_file_t *fd;          /* data file */
     apr_file_t *hfd;         /* headers file */
     apr_file_t *tfd;         /* temporary file for data */
@@ -96,18 +99,37 @@
 
 module AP_MODULE_DECLARE_DATA disk_cache_module;
 
+#ifndef DISK_CACHE_DEBUG
+#define DISK_CACHE_DEBUG 0
+#endif
+
 /* Forward declarations */
 static int remove_entity(cache_handle_t *h);
 static apr_status_t store_headers(cache_handle_t *h, request_rec *r, 
cache_info *i);
 static apr_status_t store_body(cache_handle_t *h, request_rec *r, 
apr_bucket_brigade *b);
 static apr_status_t recall_headers(cache_handle_t *h, request_rec *r);
 static apr_status_t recall_body(cache_handle_t *h, apr_pool_t *p, 
apr_bucket_brigade *bb);
+static apr_status_t read_array(request_rec *r, apr_array_header_t* arr, 
+                               apr_file_t *file);
 
 /*
  * Local static functions
  */
+#define CACHE_VARY_SUFFIX   ".vary"
 #define CACHE_HEADER_SUFFIX ".header"
 #define CACHE_DATA_SUFFIX   ".data"
+
+static char *vary_file(apr_pool_t *p, disk_cache_conf *conf,
+                         disk_cache_object_t *dobj, const char *name)
+{
+    if (!dobj->hashfile) {
+        dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels, 
+                                                conf->dirlength, name);
+    }
+    return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
+                       CACHE_VARY_SUFFIX, NULL);
+}
+
 static char *header_file(apr_pool_t *p, disk_cache_conf *conf,
                          disk_cache_object_t *dobj, const char *name)
 {
@@ -246,6 +268,59 @@
     return APR_SUCCESS;
 }
 
+static char* regen_key(apr_pool_t* p, apr_table_t* headers,
+                       apr_array_header_t* varray, const char* oldkey)
+{
+    struct iovec *iov;
+    int i,k;
+    int nvec;
+    const char *header;
+    const char **elts;
+
+    nvec = (varray->nelts * 2) + 2;
+    iov = apr_palloc(p, sizeof(struct iovec) * nvec);
+    elts = (const char **) varray->elts;
+    for(i=0, k=0; i < varray->nelts; i++) {
+        header = apr_table_get(headers, elts[i]);
+        if (!header) {
+            header = "";
+        }
+        iov[k].iov_base = (char*) elts[i];
+        iov[k].iov_len = strlen(elts[i]);
+        k++;
+        iov[k].iov_base = (char*) header;
+        iov[k].iov_len = strlen(header);
+        k++;
+    }
+    iov[k].iov_base = (char*)"\001";
+    iov[k].iov_len = 1;
+    k++;
+    iov[k].iov_base = (char*) oldkey;
+    iov[k].iov_len = strlen(oldkey);
+    k++;
+
+    return apr_pstrcatv(p, iov, k, NULL);
+}
+
+static int array_alphasort(const void *fn1, const void *fn2)
+{
+    return strcmp(*(char**)fn1, *(char**)fn2);
+}
+
+static void tokens_to_array(apr_pool_t* p, const char* data,
+                            apr_array_header_t* arr)
+{
+    char *token;
+
+    while ((token = ap_get_list_item(p, &data)) != NULL) {
+        *((const char **) apr_array_push(arr)) = token;
+    }
+
+    /* Sort it so that "Vary: A, B" and "Vary: B, A" are stored the same. */
+    qsort((void *) arr->elts, arr->nelts,
+         sizeof(char *), array_alphasort);
+}
+
 /*
  * Hook and mod_cache callback functions
  */
@@ -258,6 +333,8 @@
     disk_cache_object_t *dobj;
 
     if (conf->cache_root == NULL) {
+            ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
+                         "disk_cache,create_entity: Cannot cache files to disk 
without a CacheRoot specified.");
         return DECLINED;
     }
 
@@ -268,6 +345,8 @@
     obj->key = apr_pstrdup(r->pool, key);
 
     dobj->name = obj->key;
+    dobj->varied = 0;
+    dobj->varyfile = vary_file(r->pool, conf, dobj, key);
     dobj->datafile = data_file(r->pool, conf, dobj, key);
     dobj->hdrsfile = header_file(r->pool, conf, dobj, key);
     dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
@@ -277,6 +356,8 @@
 
 static int open_entity(cache_handle_t *h, request_rec *r, const char *key)
 {
+    char* nkey;
+    apr_file_t* vfd;
     apr_status_t rc;
     static int error_logged = 0;
     disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
@@ -304,10 +385,38 @@
     obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(disk_cache_object_t));
 
     info = &(obj->info);
-    obj->key = (char *) key;
-    dobj->name = (char *) key;
-    dobj->datafile = data_file(r->pool, conf, dobj, key);
-    dobj->hdrsfile = header_file(r->pool, conf, dobj, key);
+
+    /* Attempt to open the varied headers file */
+    flags = APR_READ|APR_BINARY|APR_BUFFERED;
+    dobj->varyfile = vary_file(r->pool, conf, dobj, key);
+    dobj->varied = 0;
+    dobj->name = (char*)key;
+
+    rc = apr_file_open(&vfd, dobj->varyfile, flags, 0, r->pool);
+
+    if (rc == APR_SUCCESS) {
+        apr_array_header_t* varray;
+
+        varray = apr_array_make(r->pool, 5, sizeof(char*));
+        rc = read_array(r, varray, vfd);
+        if (rc != APR_SUCCESS) {
+            ap_log_error(APLOG_MARK, APLOG_ERR, rc, r->server,
+                         "disk_cache: Cannot parse vary header file: %s", 
+                         dobj->varyfile);
+            return DECLINED;
+        }
+        nkey = regen_key(r->pool, r->headers_in, varray, dobj->name);
+        dobj->hashfile = NULL;
+        dobj->varied = 1;
+    }
+    else {
+        nkey = apr_pstrdup(r->pool, key);
+    }
+
+    obj->key = nkey;
+    dobj->key = nkey;
+    dobj->datafile = data_file(r->pool, conf, dobj, nkey);
+    dobj->hdrsfile = header_file(r->pool, conf, dobj, nkey);
     dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
 
     /* Open the data file */
@@ -360,6 +469,72 @@
     return OK;
 }
 
+static apr_status_t read_array(request_rec *r, apr_array_header_t* arr, 
+                               apr_file_t *file)
+{
+    char w[MAX_STRING_LEN];
+    int p;
+    apr_status_t rv;
+
+    while (1) {
+        rv = apr_file_gets(w, MAX_STRING_LEN - 1, file);
+        if (rv != APR_SUCCESS) {
+            ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
+                          "Premature end of vary array.");
+            return rv;
+        }
+
+        p = strlen(w);
+        if (p > 0 && w[p - 1] == '\n') {
+            if (p > 1 && w[p - 2] == CR) {
+                w[p - 2] = '\0';
+            }
+            else {
+                w[p - 1] = '\0';
+            }
+        }
+
+        /* If we've finished reading the array, break out of the loop. */
+        if (w[0] == '\0') {
+            break;
+        }
+
+       *((const char **) apr_array_push(arr)) = apr_pstrdup(r->pool, w);
+    }
+
+    return APR_SUCCESS;
+}
+
+static apr_status_t store_array(apr_file_t *fd, apr_array_header_t* arr)
+{
+    int i;
+    apr_status_t rv;
+    struct iovec iov[2];
+    apr_size_t amt;
+    const char **elts;
+
+    elts = (const char **) arr->elts;
+
+    for (i = 0; i < arr->nelts; i++) {
+        iov[0].iov_base = (char*) elts[i];
+        iov[0].iov_len = strlen(elts[i]);
+        iov[1].iov_base = CRLF;
+        iov[1].iov_len = sizeof(CRLF) - 1;
+
+        rv = apr_file_writev(fd, (const struct iovec *) &iov, 2,
+                             &amt);
+        if (rv != APR_SUCCESS) {
+            return rv;
+        }
+    }
+
+    iov[0].iov_base = CRLF;
+    iov[0].iov_len = sizeof(CRLF) - 1;
+
+    return apr_file_writev(fd, (const struct iovec *) &iov, 1,
+                         &amt);
+}
+
 static apr_status_t read_table(cache_handle_t *handle, request_rec *r,
                                apr_table_t *table, apr_file_t *file)
 {
@@ -532,9 +707,62 @@
     disk_cache_info_t disk_info;
     struct iovec iov[2];
 
+#if DISK_CACHE_DEBUG
+    ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
+                 "disk_cache: Storing headers for URL %s",  dobj->name);
+#endif
+
     /* This is flaky... we need to manage the cache_info differently */
     h->cache_obj->info = *info;
 
+    if (r->headers_out) {
+        const char *tmp;
+
+        tmp = apr_table_get(r->headers_out, "Vary");
+
+        if (tmp) {
+            apr_array_header_t* varray;
+
+            mkdir_structure(conf, dobj->varyfile, r->pool);
+
+            rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile,
+                             APR_CREATE | APR_WRITE | APR_BINARY |
+                             APR_BUFFERED | APR_EXCL, r->pool);
+
+            if (rv != APR_SUCCESS) {
+                return rv;
+            }
+
+            varray = apr_array_make(r->pool, 6, sizeof(char*));
+            tokens_to_array(r->pool, tmp, varray);
+
+#if DISK_CACHE_DEBUG
+            ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
+                 "disk_cache: varry array contents: %s", 
apr_array_pstrcat(r->pool, varray, ','));
+#endif
+            store_array(dobj->tfd, varray);
+
+            apr_file_close(dobj->tfd);
+
+            dobj->tfd = NULL;
+
+            rv = apr_file_rename(dobj->tempfile, dobj->varyfile, r->pool);
+            if (rv != APR_SUCCESS) {
+                ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
+                    "disk_cache: rename tempfile to varyfile failed: %s -> %s",
+                    dobj->tempfile, dobj->varyfile);
+                return rv;
+            }
+
+            dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, 
AP_TEMPFILE, NULL);
+            tmp = regen_key(r->pool, r->headers_in, varray, dobj->name);
+            dobj->varied = 1;
+            dobj->hashfile = NULL;
+            dobj->datafile = data_file(r->pool, conf, dobj, tmp);
+            dobj->hdrsfile = header_file(r->pool, conf, dobj, tmp);
+        }
+    }
+
     /* Remove old file with the same name. If remove fails, then
      * perhaps we need to create the directory tree where we are
      * about to write the new headers file.
@@ -607,8 +835,10 @@
     
     apr_file_close(dobj->hfd); /* flush and close */
 
+#if DISK_CACHE_DEBUG
     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
                  "disk_cache: Stored headers for URL %s",  dobj->name);
+#endif
     return APR_SUCCESS;
 }
 

Reply via email to