htcacheclean, take two:

Code cleanups, more apr style coding, presumably feature complete, now
built against apache 2.1 cvs. Needs further testing and especially
niceness tuning. See code below. Comments welcome.
--
Andreas Steinmetz                       SPAMmers use [EMAIL PROTECTED]


/* Copyright 2001-2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */

/*
 * htcacheclean.c: simple program for cleaning of
 * the disk cache of the Apache HTTP server
 *
 * Contributed by Andreas Steinmetz <[EMAIL PROTECTED]>
 * 8 Oct 2004
 */

#include "apr.h"
#include "apr_lib.h"
#include "apr_strings.h"
#include "apr_file_io.h"
#include "apr_file_info.h"
#include "apr_pools.h"
#include "apr_hash.h"
#include "apr_thread_proc.h"
#include "apr_signal.h"
#include "apr_getopt.h"
#include "apr_ring.h"


#define DEBUG

/* mod_disk_cache.c extract start */

#define DISK_FORMAT_VERSION 0
typedef struct {
    /* Indicates the format of the header struct stored on-disk. */
    int format;
    /* The HTTP status code returned for this response.  */
    int status;
    /* The size of the entity name that follows. */
    apr_size_t name_len;
    /* The number of times we've cached this entity. */
    apr_size_t entity_version;
    /* Miscellaneous time values. */
    apr_time_t date;
    apr_time_t expire;
    apr_time_t request_time;
    apr_time_t response_time;
} disk_cache_info_t;

/* mod_disk_cache.c extract end */

/* mod_disk_cache.c related definitions start */

#define TEMPFILE    "aptmp"
#define HEADEREXT   ".header"
#define DATAEXT     ".data"

/* mod_disk_cache.c related definitions end */

#define NICE_DELAY  100000
#define DELETE_NICE 250
#define HEADER      1
#define DATA        2
#define TEMP        4
#define HEADERDATA  (HEADER|DATA)

#define DIRINFO (APR_FINFO_MTIME|APR_FINFO_SIZE|APR_FINFO_TYPE|APR_FINFO_NAME)

typedef struct
{
    char *basename;
    int type;
    apr_time_t htime;
    apr_time_t dtime;
    apr_off_t hsize;
    apr_off_t dsize;
} DIRENTRY;

typedef struct _entry
{
    APR_RING_ENTRY(_entry) link;
    apr_time_t expire;
    apr_time_t response_time;
    apr_time_t htime;
    apr_time_t dtime;
    apr_off_t hsize;
    apr_off_t dsize;
    char basename[0];
} ENTRY;

static int delcount;
static int interrupted;
static int realclean;
static int verbose;
static int benice;
static apr_time_t now;
static apr_file_t *errfile;
static APR_RING_ENTRY(_entry) root;

/*
 * called on SIGINT or SIGTERM
 */

void setterm(int unused)
{
#ifdef DEBUG
    apr_file_printf(errfile, "interrupt\n");
#endif
    interrupted = 1;
}

/*
 * printpurge statistics
 */

static void printstats(apr_off_t total, apr_off_t sum, apr_off_t max)
{
    if (!verbose)
        return;

    apr_file_printf(errfile,
        "Statistics: total was %dK, total now %dK, limit %dK\n",
        (int)(total/1024), (int)(sum/1024), (int)(max/1024));
}

/*
 * delete a single file
 */

static void delete_file(char *path, char *basename, apr_pool_t *p)
{
    char *nextpath;


nextpath = apr_pstrcat(p, path, "/", basename, NULL); #ifndef DEBUG apr_file_remove(nextpath, p); #else apr_file_printf(errfile, "would delete %s\n", nextpath); #endif

    if (benice) {
        if(++delcount >= DELETE_NICE) {
            apr_sleep(NICE_DELAY);
            delcount = 0;
        }
    }
}

/*
 * delete cache file set
 */

static void delete_entry(char *path, char *basename, apr_pool_t *p)
{
    char *nextpath;


nextpath = apr_pstrcat(p, path, "/", basename, HEADEREXT, NULL); #ifndef DEBUG apr_file_remove(nextpath, p); #else apr_file_printf(errfile, "would delete %s\n", nextpath); #endif nextpath = apr_pstrcat(p, path, "/", basename, DATAEXT, NULL); #ifndef DEBUG apr_file_remove(nextpath, p); #else apr_file_printf(errfile, "would delete %s\n", nextpath); #endif if (benice) { delcount += 2; if(delcount >= DELETE_NICE) { apr_sleep(NICE_DELAY); delcount = 0; } } }

/*
 * walk the cache directory tree
 */

static int process_dir(char *path, int baselen, apr_pool_t *pool)
{
    apr_dir_t *dir;
    apr_pool_t *p;
    apr_hash_t *h;
    apr_hash_index_t *i;
    apr_file_t *fd;
    apr_finfo_t info;
    apr_size_t len;
    char *nextpath;
    char *ext;
    DIRENTRY *d;
    ENTRY *e;
    int skip;
    disk_cache_info_t disk_info;


if (apr_dir_open(&dir, path, pool) != APR_SUCCESS) return 1;

    apr_pool_create(&p, pool);
    h = apr_hash_make(p);
    fd = NULL;

    skip = baselen;
    if (path[baselen])
        skip++;

    while (apr_dir_read(&info, DIRINFO, dir) == APR_SUCCESS && !interrupted) {
        if (info.filetype == APR_DIR) {
            if (info.name[0] == '.')
                continue;
            nextpath = apr_pstrcat(p, path, "/", info.name, NULL);
            if (process_dir(nextpath, baselen, pool))
                return 1;
            continue;
        }

        if (info.filetype != APR_REG)
            continue;

        ext = strrchr(info.name, '.');
        if (!ext) {
            if (!strncasecmp(info.name, TEMPFILE, strlen(TEMPFILE))) {
                d = apr_pcalloc(p, sizeof(DIRENTRY));
                d->basename = apr_pstrcat(p, path + skip, "/", info.name, NULL);
                d->type = TEMP;
                apr_hash_set(h, nextpath + skip, APR_HASH_KEY_STRING, d);
            }
            continue;
        }

        if (!strcasecmp(ext, HEADEREXT)) {
            *ext = '\0';
            nextpath = apr_pstrcat(p, path, "/", info.name, NULL);
            d = apr_hash_get(h, nextpath + skip, APR_HASH_KEY_STRING);
            if (!d) {
                d = apr_pcalloc(p, sizeof(DIRENTRY));
                d->basename = apr_pstrcat(p, path + skip, "/", info.name, NULL);
                d->type = HEADER;
            }
            else
                d->type |= HEADER;
            d->htime = info.mtime;
            d->hsize = info.size;
            apr_hash_set(h, nextpath + skip, APR_HASH_KEY_STRING, d);
            continue;
        }

        if (!strcasecmp(ext, DATAEXT)) {
            *ext = '\0';
            nextpath = apr_pstrcat(p, path, "/", info.name, NULL);
            d = apr_hash_get(h, nextpath + skip, APR_HASH_KEY_STRING);
            if (!d) {
                d = apr_pcalloc(p, sizeof(DIRENTRY));
                d->basename = apr_pstrcat(p, path + skip, "/", info.name, NULL);
                d->type = DATA;
            }
            else
                d->type |= DATA;
            d->dtime = info.mtime;
            d->dsize = info.size;
            apr_hash_set(h, nextpath + skip, APR_HASH_KEY_STRING, d);
            continue;
        }
    }

    apr_dir_close(dir);

    if (interrupted)
        return 1;

    path[baselen] = '\0';

    for (i = apr_hash_first(p, h); i && !interrupted; i = apr_hash_next(i)) {
        apr_hash_this(i, NULL, NULL, (void **)(&d));
        if (d->type == HEADERDATA) {
            nextpath = apr_pstrcat(p, path, "/", d->basename, HEADEREXT, NULL);
            if (apr_file_open(&fd, nextpath, APR_READ, APR_OS_DEFAULT, p)
              == APR_SUCCESS) {
                len = sizeof(disk_cache_info_t);
                if (apr_file_read_full(fd, &disk_info, len, &len)
                  == APR_SUCCESS) {
                    apr_file_close(fd);
                    if (disk_info.format == DISK_FORMAT_VERSION ) {
                        e = apr_palloc(pool, sizeof(ENTRY) +
                            strlen(d->basename) + 1);
                        APR_RING_INSERT_TAIL(&root, e, _entry, link);
                        e->expire = disk_info.expire;
                        e->response_time = disk_info.response_time;
                        e->htime = d->htime;
                        e->dtime = d->dtime;
                        e->hsize = d->hsize;
                        e->dsize = d->dsize;
                        strcpy(e->basename, d->basename);
                        continue;
                    }
                }
                else
                    apr_file_close(fd);
            }
            if(realclean)
                delete_entry(path, d->basename, p);
        } else if (realclean) {
            if (d->type == HEADER || d->type == DATA)
                delete_entry(path, d->basename, p);
            else
                delete_file(path, d->basename, p);
        }
    }

    apr_pool_destroy(p);

    if (interrupted)
        return 1;

    if (benice)
        apr_sleep(NICE_DELAY);

    if (interrupted)
        return 1;

    return 0;
}

/*
 * purge cache entries
 */

static void purge(char *path, apr_pool_t *pool, apr_off_t max)
{
    apr_pool_t *p;
    apr_off_t sum;
    apr_off_t total;
    ENTRY *e;
    ENTRY *n;
    ENTRY *oldest;
    char *nextpath;


sum = 0;

    for(e=APR_RING_FIRST(&root); e!=APR_RING_SENTINEL(&root, _entry, link);
      e=APR_RING_NEXT(e, link)) {
        sum += e->hsize;
        sum += e->dsize;
    }

    total = sum;

    if (sum<=max) {
        printstats(total, sum, max);
        return;
    }

    apr_pool_create(&p, pool);

    for(e=APR_RING_FIRST(&root); e!=APR_RING_SENTINEL(&root, _entry, link);) {
        n = APR_RING_NEXT(e, link);
        if (e->response_time>now || e->htime>now || e->dtime>now) {
            delete_entry(path, e->basename, p);
            sum -= e->hsize;
            sum -= e->dsize;
            APR_RING_REMOVE(e, link);
        }
        e = n;
    }

    apr_pool_destroy(p);

    if (sum <= max) {
        printstats(total, sum, max);
        return;
    }

    if (interrupted)
        return;

    apr_pool_create(&p, pool);

    for(e=APR_RING_FIRST(&root); e!=APR_RING_SENTINEL(&root, _entry, link);) {
        n = APR_RING_NEXT(e, link);
        if (e->expire<now) {
            delete_entry(path, e->basename, p);
            sum -= e->hsize;
            sum -= e->dsize;
            APR_RING_REMOVE(e, link);
        }
        e = n;
    }

    apr_pool_destroy(p);

    if (interrupted)
        return;

    apr_pool_create(&p, pool);

    while (sum>max && !interrupted) {
        oldest = NULL;
        for(e=APR_RING_FIRST(&root); e!=APR_RING_SENTINEL(&root, _entry, link);
          e=APR_RING_NEXT(e, link)) {
            if (!oldest)
                oldest = e;
            else if(e->dtime < oldest->dtime)
                oldest = e;
        }

        delete_entry(path, oldest->basename, p);
        sum -= oldest->hsize;
        sum -= oldest->dsize;
        APR_RING_REMOVE(oldest, link);
    }

    apr_pool_destroy(p);

    printstats(total, sum, max);
}

/*
 * usage info
 */

static void usage(void)
{
    apr_file_printf(errfile, "htcacheclean -- program for cleaning the "
                             "disk cache.\n");
    apr_file_printf(errfile, "Usage: htcacheclean [-vrn] -pPATH -lLIMIT\n");
    apr_file_printf(errfile, "Usage: htcacheclean [-n] -dINTERVAL -pPATH "
                             "-lLIMIT\n");
    apr_file_printf(errfile, "Options:\n");
    apr_file_printf(errfile, "   -d   Daemonize and repeat cache cleaning "
                             "every INTERVAL minutes. This\n"
                             "        option is mutually exclusive with "
                             "the -v and the -r options.\n");
    apr_file_printf(errfile, "   -v   Be verbose and print statistics. "
                             "This option is mutually exclusive\n"
                             "        with the -d option.\n");
    apr_file_printf(errfile, "   -r   Clean thoroughly. This assumes that "
                             "the Apache web server\n"
                             "        is not running. This option is "
                             "mutually exclusive with the -d option.\n");
    apr_file_printf(errfile, "   -n   Be nice. This causes longer processing "
                             "in favour of other processes.\n");
    apr_file_printf(errfile, "   -p   Specify PATH as the root of the "
                             "disk cache.\n");
    apr_file_printf(errfile, "   -l   Specify LIMIT as the total disk cache "
                             "size limit in KBytes.\n");
    exit(1);
}

/*
 * main
 */

int main(int argc, const char * const argv[])
{
    apr_off_t max;
    apr_time_t current;
    apr_time_t repeat;
    apr_time_t delay;
    apr_status_t status;
    int repeat_found;
    apr_pool_t *pool;
    apr_pool_t *instance;
    apr_getopt_t *o;
    char opt;
    const char *arg;
    char *proxypath;
    char *path;


interrupted = 0; repeat = 0; repeat_found = 0; max = 0; verbose = 0; realclean = 0; benice = 0; proxypath = NULL;

    apr_app_initialize(&argc, &argv, NULL);
    atexit(apr_terminate);

    apr_pool_create(&pool, NULL);
    apr_file_open_stderr(&errfile, pool);
    apr_signal(SIGINT, setterm);
    apr_signal(SIGTERM, setterm);

    apr_getopt_init(&o, pool, argc, argv);

    while (1) {
        status = apr_getopt(o, "nvrd:l:p:", &opt, &arg);
        if (status == APR_EOF)
            break;
        else if (status == APR_SUCCESS)
          switch (opt) {
            case 'n':
                benice = 1;
                break;
            case 'v':
                verbose = 1;
                break;
            case 'r':
                realclean = 1;
                break;
            case 'd':
                repeat_found = 1;
                repeat = apr_atoi64(arg);
                repeat *= 60000000;
                break;
            case 'l':
                max = apr_atoi64(arg);
                break;
            case 'p':
                proxypath = apr_pstrdup(pool, arg);
                apr_filepath_set(proxypath, pool);
                break;
        }
        else usage();
    }

    if (repeat_found && (repeat<=0 || verbose || realclean))
        usage();

    if (!proxypath || max<=0)
        usage();

    apr_filepath_get(&path, 0, pool);

#ifndef DEBUG
    if (repeat_found) {
        apr_file_close(errfile);
        apr_proc_detach(APR_PROC_DETACH_DAEMONIZE);
    }
#endif

    do
    {
        now = apr_time_now();
        apr_pool_create(&instance, pool);
        APR_RING_INIT(&root, _entry, link);
        delcount = 0;

        if (!process_dir(path, strlen(path), instance)) {
            purge(path, instance, max*1024);
        } else if (!repeat_found) {
            apr_file_printf(errfile,
                "An error occurred, cache cleaning aborted.\n");
            return 1;
        }

        if (repeat_found && !interrupted) {
            current=apr_time_now();
            if (current<now)
                delay = repeat;
            else if(now+delay<=current)
                delay = repeat;
            else
                delay = now+repeat-current;
            apr_sleep(delay);
        }

        apr_pool_destroy(instance);
    } while (repeat_found && !interrupted);

    return 0;
}

Reply via email to