Hi,
This patch converts the mod_disk_cache cache directory structure to a
uniformly distributed two level hierarchy. The admin specifies the number
of level-1 and level-2 directories and the files are scattered across
the level-2 directories.
Also, with this patch it is possible to designate directories to separate
partitions because the temporary files are created on the destination
directory.
For example, running Apache/proxy+cache for a small network:
[EMAIL PROTECTED] cache]# sh files-per-directory.sh
dir: 00/ subs: 139 files: 632 size: 4.8M
dir: 01/ subs: 156 files: 765 size: 5.7M
dir: 02/ subs: 144 files: 626 size: 4.8M
dir: 03/ subs: 160 files: 714 size: 6.1M
dir: 04/ subs: 169 files: 820 size: 5.9M
dir: 05/ subs: 131 files: 590 size: 4.1M
dir: 06/ subs: 148 files: 677 size: 5.3M
dir: 07/ subs: 142 files: 644 size: 5.8M
dir: 08/ subs: 148 files: 749 size: 5.8M
dir: 09/ subs: 158 files: 711 size: 6.3M
dir: 0A/ subs: 146 files: 666 size: 5.1M
dir: 0B/ subs: 157 files: 701 size: 5.1M
dir: 0C/ subs: 157 files: 671 size: 5.2M
dir: 0D/ subs: 157 files: 711 size: 5.7M
dir: 0E/ subs: 149 files: 704 size: 5.6M
dir: 0F/ subs: 158 files: 742 size: 5.8M
--
Davi Arnaut
Index: modules/cache/cache_util.c
===================================================================
--- modules/cache/cache_util.c (revision 423984)
+++ modules/cache/cache_util.c (working copy)
@@ -19,6 +19,7 @@
#include "mod_cache.h"
#include <ap_provider.h>
+#include <util_md5.h>
/* -------------------------------------------------------------- */
@@ -489,54 +490,31 @@
y[sizeof(j) * 2] = '\0';
}
-static void cache_hash(const char *it, char *val, int ndepth, int nlength)
+static unsigned int cdb_string_hash(const char *str)
{
- apr_md5_ctx_t context;
- unsigned char digest[16];
- char tmp[22];
- int i, k, d;
- unsigned int x;
- static const char enc_table[64] =
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_@";
+ unsigned int hash = 5381;
- apr_md5_init(&context);
- apr_md5_update(&context, (const unsigned char *) it, strlen(it));
- apr_md5_final(digest, &context);
+ while (*str)
+ hash = 33 * hash + *str++;
- /* encode 128 bits as 22 characters, using a modified uuencoding
- * the encoding is 3 bytes -> 4 characters* i.e. 128 bits is
- * 5 x 3 bytes + 1 byte -> 5 * 4 characters + 2 characters
- */
- for (i = 0, k = 0; i < 15; i += 3) {
- x = (digest[i] << 16) | (digest[i + 1] << 8) | digest[i + 2];
- tmp[k++] = enc_table[x >> 18];
- tmp[k++] = enc_table[(x >> 12) & 0x3f];
- tmp[k++] = enc_table[(x >> 6) & 0x3f];
- tmp[k++] = enc_table[x & 0x3f];
- }
-
- /* one byte left */
- x = digest[15];
- tmp[k++] = enc_table[x >> 2]; /* use up 6 bits */
- tmp[k++] = enc_table[(x << 4) & 0x3f];
-
- /* now split into directory levels */
- for (i = k = d = 0; d < ndepth; ++d) {
- memcpy(&val[i], &tmp[k], nlength);
- k += nlength;
- val[i + nlength] = '/';
- i += nlength + 1;
- }
- memcpy(&val[i], &tmp[k], 22 - k);
- val[i + 22 - k] = '\0';
+ return hash;
}
-CACHE_DECLARE(char *)ap_cache_generate_name(apr_pool_t *p, int dirlevels,
- int dirlength, const char *name)
+CACHE_DECLARE(char *)ap_cache_generate_name(apr_pool_t *p, unsigned int L1,
+ unsigned int L2, const char *name)
{
- char hashfile[66];
- cache_hash(name, hashfile, dirlevels, dirlength);
- return apr_pstrdup(p, hashfile);
+ char *key;
+ char *md5_hash;
+ unsigned int cdb_hash;
+
+ md5_hash = ap_md5_binary(p, (unsigned char *) name, (int) strlen(name));
+
+ cdb_hash = cdb_string_hash(md5_hash) / L2;
+
+ key = apr_psprintf(p, "%02X/%02X/%s", (cdb_hash / L2) % L1,
+ cdb_hash % L2, md5_hash);
+
+ return key;
}
/* Create a new table consisting of those elements from an input
Index: modules/cache/mod_cache.h
===================================================================
--- modules/cache/mod_cache.h (revision 423984)
+++ modules/cache/mod_cache.h (working copy)
@@ -274,8 +274,8 @@
CACHE_DECLARE(apr_time_t) ap_cache_hex2usec(const char *x);
CACHE_DECLARE(void) ap_cache_usec2hex(apr_time_t j, char *y);
-CACHE_DECLARE(char *) ap_cache_generate_name(apr_pool_t *p, int dirlevels,
- int dirlength,
+CACHE_DECLARE(char *) ap_cache_generate_name(apr_pool_t *p, unsigned int L1,
+ unsigned int L2,
const char *name);
CACHE_DECLARE(cache_provider_list *)ap_cache_get_providers(request_rec *r,
cache_server_conf *conf, apr_uri_t uri);
CACHE_DECLARE(int) ap_cache_liststr(apr_pool_t *p, const char *list,
Index: modules/cache/mod_disk_cache.c
===================================================================
--- modules/cache/mod_disk_cache.c (revision 423984)
+++ modules/cache/mod_disk_cache.c (working copy)
@@ -66,17 +66,38 @@
* Local static functions
*/
+static apr_status_t disk_mktemp(apr_file_t **fp, const char *dest, char
**tempfile,
+ apr_int32_t flags, apr_size_t cache_root_len,
+ apr_pool_t *p)
+{
+ apr_status_t rv;
+ struct iovec iov[2];
+
+ iov[0].iov_base = (char *) dest;
+ iov[0].iov_len = cache_root_len + DIR_LEVELS_LEN;
+
+ iov[1].iov_base = AP_TEMPFILE;
+ iov[1].iov_len = sizeof AP_TEMPFILE;
+
+ *tempfile = apr_pstrcatv(p, iov, 2, NULL);
+
+ rv = apr_file_mktemp(fp, *tempfile, flags, p);
+
+ return rv;
+}
+
static char *header_file(apr_pool_t *p, disk_cache_conf *conf,
disk_cache_object_t *dobj, const char *name)
{
if (!dobj->hashfile) {
- dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
- conf->dirlength, name);
+ dobj->hashfile = ap_cache_generate_name(p, conf->dirlevel1,
+ conf->dirlevel2, name);
}
if (dobj->prefix) {
return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/",
- dobj->hashfile, CACHE_HEADER_SUFFIX, NULL);
+ dobj->hashfile + DIR_LEVELS_LEN,
+ CACHE_HEADER_SUFFIX, NULL);
}
else {
return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
@@ -88,13 +109,14 @@
disk_cache_object_t *dobj, const char *name)
{
if (!dobj->hashfile) {
- dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
- conf->dirlength, name);
+ dobj->hashfile = ap_cache_generate_name(p, conf->dirlevel1,
+ conf->dirlevel2, name);
}
if (dobj->prefix) {
return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/",
- dobj->hashfile, CACHE_DATA_SUFFIX, NULL);
+ dobj->hashfile + DIR_LEVELS_LEN,
+ CACHE_DATA_SUFFIX, NULL);
}
else {
return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
@@ -359,7 +381,6 @@
dobj->root_len = conf->cache_root_len;
dobj->datafile = data_file(r->pool, conf, dobj, key);
dobj->hdrsfile = header_file(r->pool, conf, dobj, key);
- dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
return OK;
}
@@ -467,7 +488,6 @@
dobj->key = nkey;
dobj->name = key;
dobj->datafile = data_file(r->pool, conf, dobj, nkey);
- dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
/* Open the data file */
flags = APR_READ|APR_BINARY;
@@ -843,9 +863,9 @@
mkdir_structure(conf, dobj->hdrsfile, r->pool);
- rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile,
- APR_CREATE | APR_WRITE | APR_BINARY |
APR_EXCL,
- r->pool);
+ rv = disk_mktemp(&dobj->tfd, dobj->hdrsfile, &dobj->tempfile,
+ APR_CREATE | APR_WRITE | APR_BINARY | APR_EXCL,
+ conf->cache_root_len, r->pool);
if (rv != APR_SUCCESS) {
return rv;
@@ -876,7 +896,6 @@
return rv;
}
- dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root,
AP_TEMPFILE, NULL);
tmp = regen_key(r->pool, r->headers_in, varray, dobj->name);
dobj->prefix = dobj->hdrsfile;
dobj->hashfile = NULL;
@@ -885,11 +904,10 @@
}
}
+ rv = disk_mktemp(&dobj->hfd, dobj->hdrsfile, &dobj->tempfile,
+ APR_CREATE | APR_WRITE | APR_BINARY | APR_BUFFERED |
+ APR_EXCL, conf->cache_root_len, r->pool);
- rv = apr_file_mktemp(&dobj->hfd, dobj->tempfile,
- APR_CREATE | APR_WRITE | APR_BINARY |
- APR_BUFFERED | APR_EXCL, r->pool);
-
if (rv != APR_SUCCESS) {
return rv;
}
@@ -969,8 +987,6 @@
return rv;
}
- dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
-
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
"disk_cache: Stored headers for URL %s", dobj->name);
return APR_SUCCESS;
@@ -989,9 +1005,10 @@
* in file_cache_el_final().
*/
if (!dobj->tfd) {
- rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile,
- APR_CREATE | APR_WRITE | APR_BINARY |
- APR_BUFFERED | APR_EXCL, r->pool);
+ rv = disk_mktemp(&dobj->tfd, dobj->datafile, &dobj->tempfile,
+ APR_CREATE | APR_WRITE | APR_BINARY |
+ APR_BUFFERED | APR_EXCL, conf->cache_root_len,
+ r->pool);
if (rv != APR_SUCCESS) {
return rv;
}
@@ -1072,8 +1089,8 @@
disk_cache_conf *conf = apr_pcalloc(p, sizeof(disk_cache_conf));
/* XXX: Set default values */
- conf->dirlevels = DEFAULT_DIRLEVELS;
- conf->dirlength = DEFAULT_DIRLENGTH;
+ conf->dirlevel1 = DEFAULT_DIRLEVEL1;
+ conf->dirlevel2 = DEFAULT_DIRLEVEL2;
conf->maxfs = DEFAULT_MAX_FILE_SIZE;
conf->minfs = DEFAULT_MIN_FILE_SIZE;
@@ -1105,33 +1122,22 @@
* filename = "/key % prime1 /key %prime2/key %prime3"
*/
static const char
-*set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr, const char *arg)
+*set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr, const char *arg1,
+ const char *arg2)
{
disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
&disk_cache_module);
- int val = atoi(arg);
- if (val < 1)
+ int val1 = atoi(arg1);
+ int val2 = atoi(arg2);
+
+ if (val1 < 1 || val2 < 1)
return "CacheDirLevels value must be an integer greater than 0";
- if (val * conf->dirlength > CACHEFILE_LEN)
- return "CacheDirLevels*CacheDirLength value must not be higher than
20";
- conf->dirlevels = val;
- return NULL;
-}
-static const char
-*set_cache_dirlength(cmd_parms *parms, void *in_struct_ptr, const char *arg)
-{
- disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
- &disk_cache_module);
- int val = atoi(arg);
- if (val < 1)
- return "CacheDirLength value must be an integer greater than 0";
- if (val * conf->dirlevels > CACHEFILE_LEN)
- return "CacheDirLevels*CacheDirLength value must not be higher than
20";
- conf->dirlength = val;
+ conf->dirlevel1 = val1;
+ conf->dirlevel2 = val2;
+
return NULL;
}
-
static const char
*set_cache_minfs(cmd_parms *parms, void *in_struct_ptr, const char *arg)
{
@@ -1153,10 +1159,8 @@
{
AP_INIT_TAKE1("CacheRoot", set_cache_root, NULL, RSRC_CONF,
"The directory to store cache files"),
- AP_INIT_TAKE1("CacheDirLevels", set_cache_dirlevels, NULL, RSRC_CONF,
+ AP_INIT_TAKE2("CacheDirLevels", set_cache_dirlevels, NULL, RSRC_CONF,
"The number of levels of subdirectories in the cache"),
- AP_INIT_TAKE1("CacheDirLength", set_cache_dirlength, NULL, RSRC_CONF,
- "The number of characters in subdirectory names"),
AP_INIT_TAKE1("CacheMinFileSize", set_cache_minfs, NULL, RSRC_CONF,
"The minimum file size to cache a document"),
AP_INIT_TAKE1("CacheMaxFileSize", set_cache_maxfs, NULL, RSRC_CONF,
Index: modules/cache/mod_disk_cache.h
===================================================================
--- modules/cache/mod_disk_cache.h (revision 423984)
+++ modules/cache/mod_disk_cache.h (working copy)
@@ -24,6 +24,8 @@
#define VARY_FORMAT_VERSION 3
#define DISK_FORMAT_VERSION 4
+#define DIR_LEVELS_LEN 6
+
#define CACHE_HEADER_SUFFIX ".header"
#define CACHE_DATA_SUFFIX ".data"
#define CACHE_VDIR_SUFFIX ".vary"
@@ -78,16 +80,16 @@
*/
/* TODO: Make defaults OS specific */
#define CACHEFILE_LEN 20 /* must be less than HASH_LEN/2 */
-#define DEFAULT_DIRLEVELS 3
-#define DEFAULT_DIRLENGTH 2
+#define DEFAULT_DIRLEVEL1 16
+#define DEFAULT_DIRLEVEL2 256
#define DEFAULT_MIN_FILE_SIZE 1
#define DEFAULT_MAX_FILE_SIZE 1000000
typedef struct {
const char* cache_root;
apr_size_t cache_root_len;
- int dirlevels; /* Number of levels of subdirectories */
- int dirlength; /* Length of subdirectory names */
+ unsigned int dirlevel1; /* Number of level 1 directories */
+ unsigned int dirlevel2; /* Number of level 2 subdirectories */
apr_size_t minfs; /* minumum file size for cached files */
apr_size_t maxfs; /* maximum file size for cached files */
} disk_cache_conf;