Too many changes in one patch. Break this up into multiple consumable in 15 minute patches and I'll review them.
Some more work, analysis, and tests yielded apr_file_gets() and MD5 as two more bottlenecks.
I've already committed a fix to APR HEAD for apr_file_gets() - what it would do was constantly call apr_file_read() for one byte - but there was an enormous overhead implied in that. And, mod_disk_cache was relying upon that to load the headers from the disk. That was a super-big bottleneck and is responsible for the big jump here in performance. Modules like mod_negotiation also go *a lot* faster after that change.
The other bottleneck I looked at was MD5 as the on-disk naming scheme. I think MD5 is a poor choice here because it's not very fast. However, we were calling MD5 *twice* - one for the header and one for the data file - and each had the same MD5 input! So, I implemented a lazy caching routine there. Ideally, switching to a variant of the times-33 hash might work out better. *shrug*
Here's some completely unscientific benchmarks on my Mac (flood/localhost):
No cache: Requests: 3500 Time: 27.02 Req/Sec: 129.66 mod_mem_cache: Requests: 3500 Time: 25.18 Req/Sec: 139.23 mod_disk_cache: Requests: 3500 Time: 13.58 Req/Sec: 257.83
(My test case is using mod_autoindex, mod_negotiation, and small files.)
* modules/experimental/mod_disk_cache.c: Don't call MD5 twice for the same value.
Index: modules/experimental/mod_disk_cache.c
===================================================================
RCS file: /home/cvs/httpd-2.0/modules/experimental/mod_disk_cache.c,v
retrieving revision 1.52
diff -u -r1.52 mod_disk_cache.c
--- modules/experimental/mod_disk_cache.c 18 Mar 2004 21:40:12 -0000 1.52
+++ modules/experimental/mod_disk_cache.c 2 Aug 2004 10:03:23 -0000
@@ -36,6 +36,7 @@
#endif
char *datafile; /* name of file where the data will go */
char *hdrsfile; /* name of file where the hdrs will go */
+ char *hashfile; /* Computed hash key for this URI */
char *name;
apr_time_t version; /* update count of the file */
apr_file_t *fd; /* data file */
@@ -88,20 +89,26 @@
*/
#define CACHE_HEADER_SUFFIX ".header"
#define CACHE_DATA_SUFFIX ".data"
-static char *header_file(apr_pool_t *p, int dirlevels, int dirlength,
- const char *root, const char *name)
+static char *header_file(apr_pool_t *p, disk_cache_conf *conf,
+ disk_cache_object_t *dobj, const char *name)
{
- char *hashfile;
- hashfile = generate_name(p, dirlevels, dirlength, name);
- return apr_pstrcat(p, root, "/", hashfile, CACHE_HEADER_SUFFIX, NULL);
+ if (!dobj->hashfile) {
+ dobj->hashfile = generate_name(p, conf->dirlevels, conf->dirlength,
+ name);
+ }
+ return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
+ CACHE_HEADER_SUFFIX, NULL);
}-static char *data_file(apr_pool_t *p, int dirlevels, int dirlength,
- const char *root, const char *name)
+static char *data_file(apr_pool_t *p, disk_cache_conf *conf,
+ disk_cache_object_t *dobj, const char *name)
{
- char *hashfile;
- hashfile = generate_name(p, dirlevels, dirlength, name);
- return apr_pstrcat(p, root, "/", hashfile, CACHE_DATA_SUFFIX, NULL);
+ if (!dobj->hashfile) {
+ dobj->hashfile = generate_name(p, conf->dirlevels, conf->dirlength,
+ name);
+ }
+ return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
+ CACHE_DATA_SUFFIX, NULL);
}static void mkdir_structure(disk_cache_conf *conf, char *file, apr_pool_t *pool)
@@ -136,8 +143,7 @@
if (dobj->fd) {
apr_file_flush(dobj->fd);
if (!dobj->datafile) {
- dobj->datafile = data_file(r->pool, conf->dirlevels, conf->dirlength,
- conf->cache_root, h->cache_obj->key);
+ dobj->datafile = data_file(r->pool, conf, dobj, h->cache_obj->key);
}
/* Remove old file with the same name. If remove fails, then
* perhaps we need to create the directory tree where we are
@@ -362,8 +368,6 @@
static int error_logged = 0;
disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
&disk_cache_module);
- char *data;
- char *headers;
apr_file_t *fd;
apr_file_t *hfd;
apr_finfo_t finfo;
@@ -387,44 +391,39 @@
return DECLINED;
}
- data = data_file(r->pool, conf->dirlevels, conf->dirlength, - conf->cache_root, key); - headers = header_file(r->pool, conf->dirlevels, conf->dirlength, - conf->cache_root, key); + /* Create and init the cache object */ + h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(cache_object_t)); + obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(disk_cache_object_t)); + + info = &(obj->info); + obj->key = (char *) key; + dobj->name = (char *) key; + dobj->datafile = data_file(r->pool, conf, dobj, key); + dobj->hdrsfile = header_file(r->pool, conf, dobj, key);
/* Open the data file */
- rc = apr_file_open(&fd, data, APR_READ|APR_BINARY, 0, r->pool);
+ rc = apr_file_open(&dobj->fd, dobj->datafile, APR_READ|APR_BINARY, 0,
+ r->pool);
if (rc != APR_SUCCESS) {
/* XXX: Log message */
return DECLINED;
} /* Open the headers file */
- rc = apr_file_open(&hfd, headers, APR_READ|APR_BINARY, 0, r->pool);
+ rc = apr_file_open(&dobj->hfd, dobj->hdrsfile, APR_READ|APR_BINARY, 0,
+ r->pool);
if (rc != APR_SUCCESS) {
/* XXX: Log message */
return DECLINED;
}- /* Create and init the cache object */
- h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(cache_object_t));
- obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(disk_cache_object_t));
-
- info = &(obj->info);
- obj->key = (char *) key;
- dobj->name = (char *) key;
- dobj->fd = fd;
- dobj->hfd = hfd;
- dobj->datafile = data;
- dobj->hdrsfile = headers;
-
- rc = apr_file_info_get(&finfo, APR_FINFO_SIZE, fd);
+ rc = apr_file_info_get(&finfo, APR_FINFO_SIZE, dobj->fd);
if (rc == APR_SUCCESS) {
dobj->file_size = finfo.size;
} /* Read the bytes to setup the cache_info fields */
- rc = file_cache_read_mydata(hfd, info, dobj);
+ rc = file_cache_read_mydata(dobj->hfd, info, dobj);
if (rc != APR_SUCCESS) {
/* XXX log message */
return DECLINED;
@@ -544,10 +543,7 @@ if (!hfd) {
if (!dobj->hdrsfile) {
- dobj->hdrsfile = header_file(r->pool,
- conf->dirlevels,
- conf->dirlength,
- conf->cache_root,
+ dobj->hdrsfile = header_file(r->pool, conf, dobj,
h->cache_obj->key);
}
