Justin Erenkrantz wrote:
Feel free to submit a patch that efficiently allows the constraint of
the cache size. I just don't see a way to do that as mod_disk_cache
does not have any indexing.
IMHO, instead of making a false promise, we should remove it. If we
were to add such a feature later, we can add such directives
accordingly. -- justin
After looking at the code and thinking about the performance hits involved
I do believe Justin is right. Therefore I created a little "htcacheclean"
utility that does the the job of cache cleaning outside of apache itself.
The utility is right now not complete and intended to be a base for further
discussion. Please see below for the code (based on 2.0.52).
--
Andreas Steinmetz SPAMmers use [EMAIL PROTECTED]
/* Copyright 2001-2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* htcacheclean.c: simple program for cleaning of
* the disk cache of the Apache HTTP server
*
* Contributed by Andreas Steinmetz <[EMAIL PROTECTED]>
* 8 Oct 2004
*/
#include "apr.h"
#include "apr_lib.h"
#include "apr_strings.h"
#include "apr_file_io.h"
#include "apr_file_info.h"
#include "apr_pools.h"
#include "apr_hash.h"
#define DEBUG
/* mod_disk_cache.c extract start */
#define DISK_FORMAT_VERSION 0
typedef struct {
/* Indicates the format of the header struct stored on-disk. */
int format;
/* The HTTP status code returned for this response. */
int status;
/* The size of the entity name that follows. */
apr_size_t name_len;
/* The number of times we've cached this entity. */
apr_size_t entity_version;
/* Miscellaneous time values. */
apr_time_t date;
apr_time_t expire;
apr_time_t request_time;
apr_time_t response_time;
} disk_cache_info_t;
/* mod_disk_cache.c extract end */
#define DIRINFO (APR_FINFO_MTIME|APR_FINFO_SIZE|APR_FINFO_TYPE|APR_FINFO_NAME)
#define HEADER 1
#define DATA 2
#define TEMP 4
#define HEADERDATA (HEADER|DATA)
typedef struct
{
char *basename;
int type;
apr_time_t htime;
apr_time_t dtime;
apr_off_t hsize;
apr_off_t dsize;
} DIRENTRY;
typedef struct _entry
{
struct _entry *next;
apr_time_t expire;
apr_time_t response_time;
apr_time_t htime;
apr_time_t dtime;
apr_off_t hsize;
apr_off_t dsize;
char basename[0];
} ENTRY;
static int realclean;
static int verbose;
static ENTRY *anchor;
static apr_time_t now;
apr_file_t *errfile;
static void delete_file(char *path, char *basename, apr_pool_t *p)
{
char *nextpath;
nextpath = apr_pstrcat(p, path, "/", basename, NULL);
#ifndef DEBUG
apr_file_remove(nextpath, p);
#else
apr_file_printf(errfile, "would delete %s\n", nextpath);
#endif
}
static void delete_entry(char *path, char *basename, apr_pool_t *p)
{
char *nextpath;
nextpath = apr_pstrcat(p, path, "/", basename, ".header", NULL);
#ifndef DEBUG
apr_file_remove(nextpath, p);
#else
apr_file_printf(errfile, "would delete %s\n", nextpath);
#endif
nextpath = apr_pstrcat(p, path, "/", basename, ".data", NULL);
#ifndef DEBUG
apr_file_remove(nextpath, p);
#else
apr_file_printf(errfile, "would delete %s\n", nextpath);
#endif
}
static int process_dir(char *path, int baselen, apr_pool_t *pool)
{
apr_dir_t *dir;
apr_pool_t *p;
apr_hash_t *h;
apr_hash_index_t *i;
apr_file_t *fd;
apr_finfo_t info;
apr_size_t len;
char *nextpath;
char *ext;
DIRENTRY *d;
ENTRY *e;
int skip;
disk_cache_info_t disk_info;
if (apr_dir_open(&dir, path, pool) != APR_SUCCESS)
return 1;
apr_pool_create(&p, pool);
h = apr_hash_make(p);
fd = NULL;
skip = baselen;
if (path[baselen])
skip++;
while (apr_dir_read(&info, DIRINFO, dir) == APR_SUCCESS) {
if (info.filetype == APR_DIR) {
if (info.name[0] == '.')
continue;
nextpath = apr_pstrcat(p, path, "/", info.name, NULL);
if (process_dir(nextpath, baselen, pool))
return 1;
continue;
}
if (info.filetype != APR_REG)
continue;
ext = strrchr(info.name, '.');
if (!ext) {
if (!strncasecmp(info.name, "aptmp", 5)) {
d = apr_pcalloc(p, sizeof(DIRENTRY));
d->basename = apr_pstrcat(p, path + skip, "/", info.name, NULL);
d->type = TEMP;
apr_hash_set(h, nextpath + skip, APR_HASH_KEY_STRING, d);
}
continue;
}
if (!strcasecmp(ext,".header")) {
*ext = '\0';
nextpath = apr_pstrcat(p, path, "/", info.name, NULL);
d = apr_hash_get(h, nextpath + skip, APR_HASH_KEY_STRING);
if (!d) {
d = apr_pcalloc(p, sizeof(DIRENTRY));
d->basename = apr_pstrcat(p, path + skip, "/", info.name, NULL);
d->type = HEADER;
}
else
d->type |= HEADER;
d->htime = info.mtime;
d->hsize = info.size;
apr_hash_set(h, nextpath + skip, APR_HASH_KEY_STRING, d);
continue;
}
if (!strcasecmp(ext, ".data")) {
*ext = '\0';
nextpath = apr_pstrcat(p, path, "/", info.name, NULL);
d = apr_hash_get(h, nextpath + skip, APR_HASH_KEY_STRING);
if (!d) {
d = apr_pcalloc(p, sizeof(DIRENTRY));
d->basename = apr_pstrcat(p, path + skip, "/", info.name, NULL);
d->type = DATA;
}
else
d->type |= DATA;
d->dtime = info.mtime;
d->dsize = info.size;
apr_hash_set(h, nextpath + skip, APR_HASH_KEY_STRING, d);
continue;
}
}
apr_dir_close(dir);
path[baselen] = '\0';
for (i = apr_hash_first(p, h); i; i = apr_hash_next(i)) {
apr_hash_this(i, NULL, NULL, (void **)(&d));
if (d->type == HEADERDATA) {
nextpath = apr_pstrcat(p, path, "/", d->basename, ".header", NULL);
if (apr_file_open(&fd, nextpath, APR_READ, APR_OS_DEFAULT, p)
== APR_SUCCESS) {
len = sizeof(disk_cache_info_t);
if (apr_file_read_full(fd, &disk_info, len, &len)
== APR_SUCCESS) {
apr_file_close(fd);
if (disk_info.format == DISK_FORMAT_VERSION ) {
e = apr_palloc(pool, sizeof(ENTRY) +
strlen(d->basename) + 1);
e->next = anchor;
anchor= e;
e->expire = disk_info.expire;
e->response_time = disk_info.response_time;
e->htime = d->htime;
e->dtime = d->dtime;
e->hsize = d->hsize;
e->dsize = d->dsize;
strcpy(e->basename, d->basename);
continue;
}
}
else
apr_file_close(fd);
}
if(realclean)
delete_entry(path, d->basename, p);
} else if (realclean) {
if (d->type == HEADER || d->type == DATA)
delete_entry(path, d->basename, p);
else
delete_file(path, d->basename, p);
}
}
apr_pool_destroy(p);
return 0;
}
static void printstats(apr_off_t total, apr_off_t sum, apr_off_t max)
{
if (!verbose)
return;
apr_file_printf(errfile,
"Statistics: total was %dK, total now %dK, limit %dK\n",
(int)(total/1024), (int)(sum/1024), (int)(max/1024));
}
static void purge(char *path, apr_pool_t *pool, apr_off_t max)
{
apr_pool_t *p;
apr_off_t sum;
apr_off_t total;
ENTRY *e;
ENTRY *oldest;
char *nextpath;
sum = 0;
for(e=anchor; e; e=e->next) {
sum += e->hsize;
sum += e->dsize;
}
total = sum;
if (sum<=max) {
printstats(total, sum, max);
return;
}
apr_pool_create(&p, pool);
for(e=anchor; e; e=e->next) {
if (e->response_time>now || e->htime>now || e->dtime>now) {
delete_entry(path, e->basename, p);
sum -= e->hsize;
sum -= e->dsize;
e->basename[0] = '\0';
}
}
apr_pool_destroy(p);
if (sum <= max) {
printstats(total, sum, max);
return;
}
apr_file_printf(errfile, "sum %d, max %d\n",sum,max);
apr_pool_create(&p, pool);
for(e=anchor; e; e=e->next) {
if (e->expire<now && e->basename[0]) {
delete_entry(path, e->basename, p);
sum -= e->hsize;
sum -= e->dsize;
e->basename[0] = '\0';
}
}
apr_pool_destroy(p);
apr_pool_create(&p, pool);
while (sum>max) {
oldest = NULL;
for(e=anchor; e; e=e->next)
if (e->basename[0]) {
if (!oldest)
oldest = e;
else if(e->dtime < oldest->dtime)
oldest = e;
}
delete_entry(path, oldest->basename, p);
sum -= oldest->hsize;
sum -= oldest->dsize;
oldest->basename[0] = '\0';
}
apr_pool_destroy(p);
printstats(total, sum, max);
}
static void usage(void)
{
fprintf(stderr, "htcacheclean -- program for cleaning the disk cache.\n");
fprintf(stderr, "Usage: htcacheclean [-vr] -pPATH -lLIMIT\n");
fprintf(stderr, "Options:\n");
fprintf(stderr, " -v Be verbose and print statistics.\n");
fprintf(stderr, " -r Clean thoroughly. This assumes that the Apache "
"web server\n is not running.\n");
fprintf(stderr, " -p Specify PATH as the root of the disk cache.\n");
fprintf(stderr, " -l Specify LIMIT as the total disk cache size "
"limit in KBytes.\n");
exit(1);
}
int main(int argc, const char * const argv[])
{
apr_off_t max;
int i;
const char *arg;
apr_pool_t *pool;
char *proxypath;
char *path;
max = 0;
verbose = 0;
realclean = 0;
proxypath = NULL;
anchor = NULL;
apr_app_initialize(&argc, &argv, NULL);
apr_pool_create(&pool, NULL);
apr_file_open_stderr(&errfile, pool);
now = apr_time_now();
for (i = 1; i < argc; i++) {
arg = argv[i];
if (*arg != '-')
break;
while (*++arg != '\0') {
switch (*arg) {
case 'v':
verbose = 1;
break;
case 'r':
realclean = 1;
break;
case 'l':
max = apr_atoi64(++arg);
while (*arg != '\0')
++arg;
--arg;
break;
case 'p':
proxypath = apr_pstrdup(pool, ++arg);
apr_filepath_set(proxypath, pool);
while (*arg != '\0')
++arg;
--arg;
break;
}
}
}
if (!proxypath || max<=0) {
usage();
}
apr_filepath_get(&path, 0, pool);
if (!process_dir(path, strlen(path), pool)) {
purge(path, pool, max*1024);
} else {
apr_file_printf(errfile,
"An error occurred, cache cleaning aborted.\n");
apr_pool_destroy(pool);
return 1;
}
apr_pool_destroy(pool);
return 0;
}