Justin Erenkrantz wrote:
Feel free to submit a patch that efficiently allows the constraint of the cache size. I just don't see a way to do that as mod_disk_cache does not have any indexing.

IMHO, instead of making a false promise, we should remove it. If we were to add such a feature later, we can add such directives accordingly. -- justin

After looking at the code and thinking about the performance hits involved I do believe Justin is right. Therefore I created a little "htcacheclean" utility that does the the job of cache cleaning outside of apache itself. The utility is right now not complete and intended to be a base for further discussion. Please see below for the code (based on 2.0.52). -- Andreas Steinmetz SPAMmers use [EMAIL PROTECTED]


/* Copyright 2001-2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */

/*
 * htcacheclean.c: simple program for cleaning of
 * the disk cache of the Apache HTTP server
 *
 * Contributed by Andreas Steinmetz <[EMAIL PROTECTED]>
 * 8 Oct 2004
 */

#include "apr.h"
#include "apr_lib.h"
#include "apr_strings.h"
#include "apr_file_io.h"
#include "apr_file_info.h"
#include "apr_pools.h"
#include "apr_hash.h"

#define DEBUG

/* mod_disk_cache.c extract start */

#define DISK_FORMAT_VERSION 0
typedef struct {
    /* Indicates the format of the header struct stored on-disk. */
    int format;
    /* The HTTP status code returned for this response.  */
    int status;
    /* The size of the entity name that follows. */
    apr_size_t name_len;
    /* The number of times we've cached this entity. */
    apr_size_t entity_version;
    /* Miscellaneous time values. */
    apr_time_t date;
    apr_time_t expire;
    apr_time_t request_time;
    apr_time_t response_time;
} disk_cache_info_t;

/* mod_disk_cache.c extract end */

#define DIRINFO (APR_FINFO_MTIME|APR_FINFO_SIZE|APR_FINFO_TYPE|APR_FINFO_NAME)

#define HEADER 1
#define DATA 2
#define TEMP 4
#define HEADERDATA (HEADER|DATA)

typedef struct
{
    char *basename;
    int type;
    apr_time_t htime;
    apr_time_t dtime;
    apr_off_t hsize;
    apr_off_t dsize;
} DIRENTRY;

typedef struct _entry
{
    struct _entry *next;
    apr_time_t expire;
    apr_time_t response_time;
    apr_time_t htime;
    apr_time_t dtime;
    apr_off_t hsize;
    apr_off_t dsize;
    char basename[0];
} ENTRY;

static int realclean;
static int verbose;
static ENTRY *anchor;
static apr_time_t now;
apr_file_t *errfile;

static void delete_file(char *path, char *basename, apr_pool_t *p)
{
    char *nextpath;


nextpath = apr_pstrcat(p, path, "/", basename, NULL); #ifndef DEBUG apr_file_remove(nextpath, p); #else apr_file_printf(errfile, "would delete %s\n", nextpath); #endif }

static void delete_entry(char *path, char *basename, apr_pool_t *p)
{
    char *nextpath;


nextpath = apr_pstrcat(p, path, "/", basename, ".header", NULL); #ifndef DEBUG apr_file_remove(nextpath, p); #else apr_file_printf(errfile, "would delete %s\n", nextpath); #endif nextpath = apr_pstrcat(p, path, "/", basename, ".data", NULL); #ifndef DEBUG apr_file_remove(nextpath, p); #else apr_file_printf(errfile, "would delete %s\n", nextpath); #endif }

static int process_dir(char *path, int baselen, apr_pool_t *pool)
{
    apr_dir_t *dir;
    apr_pool_t *p;
    apr_hash_t *h;
    apr_hash_index_t *i;
    apr_file_t *fd;
    apr_finfo_t info;
    apr_size_t len;
    char *nextpath;
    char *ext;
    DIRENTRY *d;
    ENTRY *e;
    int skip;
    disk_cache_info_t disk_info;


if (apr_dir_open(&dir, path, pool) != APR_SUCCESS) return 1;

    apr_pool_create(&p, pool);
    h = apr_hash_make(p);
    fd = NULL;

    skip = baselen;
    if (path[baselen])
        skip++;

    while (apr_dir_read(&info, DIRINFO, dir) == APR_SUCCESS) {
        if (info.filetype == APR_DIR) {
            if (info.name[0] == '.')
                continue;
            nextpath = apr_pstrcat(p, path, "/", info.name, NULL);
            if (process_dir(nextpath, baselen, pool))
                return 1;
            continue;
        }

        if (info.filetype != APR_REG)
            continue;

        ext = strrchr(info.name, '.');
        if (!ext) {
            if (!strncasecmp(info.name, "aptmp", 5)) {
                d = apr_pcalloc(p, sizeof(DIRENTRY));
                d->basename = apr_pstrcat(p, path + skip, "/", info.name, NULL);
                d->type = TEMP;
                apr_hash_set(h, nextpath + skip, APR_HASH_KEY_STRING, d);
            }
            continue;
        }

        if (!strcasecmp(ext,".header")) {
            *ext = '\0';
            nextpath = apr_pstrcat(p, path, "/", info.name, NULL);
            d = apr_hash_get(h, nextpath + skip, APR_HASH_KEY_STRING);
            if (!d) {
                d = apr_pcalloc(p, sizeof(DIRENTRY));
                d->basename = apr_pstrcat(p, path + skip, "/", info.name, NULL);
                d->type = HEADER;
            }
            else
                d->type |= HEADER;
            d->htime = info.mtime;
            d->hsize = info.size;
            apr_hash_set(h, nextpath + skip, APR_HASH_KEY_STRING, d);
            continue;
        }

        if (!strcasecmp(ext, ".data")) {
            *ext = '\0';
            nextpath = apr_pstrcat(p, path, "/", info.name, NULL);
            d = apr_hash_get(h, nextpath + skip, APR_HASH_KEY_STRING);
            if (!d) {
                d = apr_pcalloc(p, sizeof(DIRENTRY));
                d->basename = apr_pstrcat(p, path + skip, "/", info.name, NULL);
                d->type = DATA;
            }
            else
                d->type |= DATA;
            d->dtime = info.mtime;
            d->dsize = info.size;
            apr_hash_set(h, nextpath + skip, APR_HASH_KEY_STRING, d);
            continue;
        }
    }

    apr_dir_close(dir);

    path[baselen] = '\0';

    for (i = apr_hash_first(p, h); i; i = apr_hash_next(i)) {
        apr_hash_this(i, NULL, NULL, (void **)(&d));
        if (d->type == HEADERDATA) {
            nextpath = apr_pstrcat(p, path, "/", d->basename, ".header", NULL);
            if (apr_file_open(&fd, nextpath, APR_READ, APR_OS_DEFAULT, p)
              == APR_SUCCESS) {
                len = sizeof(disk_cache_info_t);
                if (apr_file_read_full(fd, &disk_info, len, &len)
                  == APR_SUCCESS) {
                    apr_file_close(fd);
                    if (disk_info.format == DISK_FORMAT_VERSION ) {
                        e = apr_palloc(pool, sizeof(ENTRY) +
                            strlen(d->basename) + 1);
                        e->next = anchor;
                        anchor= e;
                        e->expire = disk_info.expire;
                        e->response_time = disk_info.response_time;
                        e->htime = d->htime;
                        e->dtime = d->dtime;
                        e->hsize = d->hsize;
                        e->dsize = d->dsize;
                        strcpy(e->basename, d->basename);
                        continue;
                    }
                }
                else
                    apr_file_close(fd);
            }
            if(realclean)
                delete_entry(path, d->basename, p);
        } else if (realclean) {
            if (d->type == HEADER || d->type == DATA)
                delete_entry(path, d->basename, p);
            else
                delete_file(path, d->basename, p);
        }
    }

    apr_pool_destroy(p);

    return 0;
}

static void printstats(apr_off_t total, apr_off_t sum, apr_off_t max)
{
    if (!verbose)
        return;
    apr_file_printf(errfile,
        "Statistics: total was %dK, total now %dK, limit %dK\n",
        (int)(total/1024), (int)(sum/1024), (int)(max/1024));
}

static void purge(char *path, apr_pool_t *pool, apr_off_t max)
{
    apr_pool_t *p;
    apr_off_t sum;
    apr_off_t total;
    ENTRY *e;
    ENTRY *oldest;
    char *nextpath;


sum = 0;

    for(e=anchor; e; e=e->next) {
        sum += e->hsize;
        sum += e->dsize;
    }

    total = sum;

    if (sum<=max) {
        printstats(total, sum, max);
        return;
    }

    apr_pool_create(&p, pool);

    for(e=anchor; e; e=e->next) {
        if (e->response_time>now || e->htime>now || e->dtime>now) {
            delete_entry(path, e->basename, p);
            sum -= e->hsize;
            sum -= e->dsize;
            e->basename[0] = '\0';
        }
    }

    apr_pool_destroy(p);

    if (sum <= max) {
        printstats(total, sum, max);
        return;
    }

    apr_file_printf(errfile, "sum %d, max %d\n",sum,max);

    apr_pool_create(&p, pool);

    for(e=anchor; e; e=e->next) {
        if (e->expire<now && e->basename[0]) {
            delete_entry(path, e->basename, p);
            sum -= e->hsize;
            sum -= e->dsize;
            e->basename[0] = '\0';
        }
    }

    apr_pool_destroy(p);

    apr_pool_create(&p, pool);

    while (sum>max) {
        oldest = NULL;
        for(e=anchor; e; e=e->next)
            if (e->basename[0]) {
            if (!oldest)
                oldest = e;
            else if(e->dtime < oldest->dtime)
                oldest = e;
        }

        delete_entry(path, oldest->basename, p);
        sum -= oldest->hsize;
        sum -= oldest->dsize;
        oldest->basename[0] = '\0';
    }

    apr_pool_destroy(p);

    printstats(total, sum, max);
}

static void usage(void)
{
    fprintf(stderr, "htcacheclean -- program for cleaning the disk cache.\n");
    fprintf(stderr, "Usage: htcacheclean [-vr] -pPATH -lLIMIT\n");
    fprintf(stderr, "Options:\n");
    fprintf(stderr, "   -v   Be verbose and print statistics.\n");
    fprintf(stderr, "   -r   Clean thoroughly. This assumes that the Apache "
                    "web server\n        is not running.\n");
    fprintf(stderr, "   -p   Specify PATH as the root of the disk cache.\n");
    fprintf(stderr, "   -l   Specify LIMIT as the total disk cache size "
                    "limit in KBytes.\n");
    exit(1);
}

int main(int argc, const char * const argv[])
{
    apr_off_t max;
    int i;
    const char *arg;
    apr_pool_t *pool;
    char *proxypath;
    char *path;

    max = 0;
    verbose = 0;
    realclean = 0;
    proxypath = NULL;
    anchor = NULL;

    apr_app_initialize(&argc, &argv, NULL);
    apr_pool_create(&pool, NULL);
    apr_file_open_stderr(&errfile, pool);
    now = apr_time_now();

    for (i = 1; i < argc; i++) {
        arg = argv[i];
        if (*arg != '-')
            break;
        while (*++arg != '\0') {
            switch (*arg) {
            case 'v':
                verbose = 1;
                break;
            case 'r':
                realclean = 1;
                break;
            case 'l':
                max = apr_atoi64(++arg);
                while (*arg != '\0')
                    ++arg;
                --arg;
                break;
            case 'p':
                proxypath = apr_pstrdup(pool, ++arg);
                apr_filepath_set(proxypath, pool);
                while (*arg != '\0')
                    ++arg;
                --arg;
                break;
            }
        }
    }

    if (!proxypath || max<=0) {
        usage();
    }

    apr_filepath_get(&path, 0, pool);

    if (!process_dir(path, strlen(path), pool)) {
        purge(path, pool, max*1024);
    } else {
        apr_file_printf(errfile,
            "An error occurred, cache cleaning aborted.\n");
        apr_pool_destroy(pool);
        return 1;
    }

    apr_pool_destroy(pool);
    return 0;
}

Reply via email to