htcacheclean, take two:
Code cleanups, more apr style coding, presumably feature complete, now
built against apache 2.1 cvs. Needs further testing and especially
niceness tuning. See code below. Comments welcome.
--
Andreas Steinmetz SPAMmers use [EMAIL PROTECTED]
/* Copyright 2001-2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* htcacheclean.c: simple program for cleaning of
* the disk cache of the Apache HTTP server
*
* Contributed by Andreas Steinmetz <[EMAIL PROTECTED]>
* 8 Oct 2004
*/
#include "apr.h"
#include "apr_lib.h"
#include "apr_strings.h"
#include "apr_file_io.h"
#include "apr_file_info.h"
#include "apr_pools.h"
#include "apr_hash.h"
#include "apr_thread_proc.h"
#include "apr_signal.h"
#include "apr_getopt.h"
#include "apr_ring.h"
#define DEBUG
/* mod_disk_cache.c extract start */
#define DISK_FORMAT_VERSION 0
typedef struct {
/* Indicates the format of the header struct stored on-disk. */
int format;
/* The HTTP status code returned for this response. */
int status;
/* The size of the entity name that follows. */
apr_size_t name_len;
/* The number of times we've cached this entity. */
apr_size_t entity_version;
/* Miscellaneous time values. */
apr_time_t date;
apr_time_t expire;
apr_time_t request_time;
apr_time_t response_time;
} disk_cache_info_t;
/* mod_disk_cache.c extract end */
/* mod_disk_cache.c related definitions start */
#define TEMPFILE "aptmp"
#define HEADEREXT ".header"
#define DATAEXT ".data"
/* mod_disk_cache.c related definitions end */
#define NICE_DELAY 100000
#define DELETE_NICE 250
#define HEADER 1
#define DATA 2
#define TEMP 4
#define HEADERDATA (HEADER|DATA)
#define DIRINFO (APR_FINFO_MTIME|APR_FINFO_SIZE|APR_FINFO_TYPE|APR_FINFO_NAME)
typedef struct
{
char *basename;
int type;
apr_time_t htime;
apr_time_t dtime;
apr_off_t hsize;
apr_off_t dsize;
} DIRENTRY;
typedef struct _entry
{
APR_RING_ENTRY(_entry) link;
apr_time_t expire;
apr_time_t response_time;
apr_time_t htime;
apr_time_t dtime;
apr_off_t hsize;
apr_off_t dsize;
char basename[0];
} ENTRY;
static int delcount;
static int interrupted;
static int realclean;
static int verbose;
static int benice;
static apr_time_t now;
static apr_file_t *errfile;
static APR_RING_ENTRY(_entry) root;
/*
* called on SIGINT or SIGTERM
*/
void setterm(int unused)
{
#ifdef DEBUG
apr_file_printf(errfile, "interrupt\n");
#endif
interrupted = 1;
}
/*
* printpurge statistics
*/
static void printstats(apr_off_t total, apr_off_t sum, apr_off_t max)
{
if (!verbose)
return;
apr_file_printf(errfile,
"Statistics: total was %dK, total now %dK, limit %dK\n",
(int)(total/1024), (int)(sum/1024), (int)(max/1024));
}
/*
* delete a single file
*/
static void delete_file(char *path, char *basename, apr_pool_t *p)
{
char *nextpath;
nextpath = apr_pstrcat(p, path, "/", basename, NULL);
#ifndef DEBUG
apr_file_remove(nextpath, p);
#else
apr_file_printf(errfile, "would delete %s\n", nextpath);
#endif
if (benice) {
if(++delcount >= DELETE_NICE) {
apr_sleep(NICE_DELAY);
delcount = 0;
}
}
}
/*
* delete cache file set
*/
static void delete_entry(char *path, char *basename, apr_pool_t *p)
{
char *nextpath;
nextpath = apr_pstrcat(p, path, "/", basename, HEADEREXT, NULL);
#ifndef DEBUG
apr_file_remove(nextpath, p);
#else
apr_file_printf(errfile, "would delete %s\n", nextpath);
#endif
nextpath = apr_pstrcat(p, path, "/", basename, DATAEXT, NULL);
#ifndef DEBUG
apr_file_remove(nextpath, p);
#else
apr_file_printf(errfile, "would delete %s\n", nextpath);
#endif
if (benice) {
delcount += 2;
if(delcount >= DELETE_NICE) {
apr_sleep(NICE_DELAY);
delcount = 0;
}
}
}
/*
* walk the cache directory tree
*/
static int process_dir(char *path, int baselen, apr_pool_t *pool)
{
apr_dir_t *dir;
apr_pool_t *p;
apr_hash_t *h;
apr_hash_index_t *i;
apr_file_t *fd;
apr_finfo_t info;
apr_size_t len;
char *nextpath;
char *ext;
DIRENTRY *d;
ENTRY *e;
int skip;
disk_cache_info_t disk_info;
if (apr_dir_open(&dir, path, pool) != APR_SUCCESS)
return 1;
apr_pool_create(&p, pool);
h = apr_hash_make(p);
fd = NULL;
skip = baselen;
if (path[baselen])
skip++;
while (apr_dir_read(&info, DIRINFO, dir) == APR_SUCCESS && !interrupted) {
if (info.filetype == APR_DIR) {
if (info.name[0] == '.')
continue;
nextpath = apr_pstrcat(p, path, "/", info.name, NULL);
if (process_dir(nextpath, baselen, pool))
return 1;
continue;
}
if (info.filetype != APR_REG)
continue;
ext = strrchr(info.name, '.');
if (!ext) {
if (!strncasecmp(info.name, TEMPFILE, strlen(TEMPFILE))) {
d = apr_pcalloc(p, sizeof(DIRENTRY));
d->basename = apr_pstrcat(p, path + skip, "/", info.name, NULL);
d->type = TEMP;
apr_hash_set(h, nextpath + skip, APR_HASH_KEY_STRING, d);
}
continue;
}
if (!strcasecmp(ext, HEADEREXT)) {
*ext = '\0';
nextpath = apr_pstrcat(p, path, "/", info.name, NULL);
d = apr_hash_get(h, nextpath + skip, APR_HASH_KEY_STRING);
if (!d) {
d = apr_pcalloc(p, sizeof(DIRENTRY));
d->basename = apr_pstrcat(p, path + skip, "/", info.name, NULL);
d->type = HEADER;
}
else
d->type |= HEADER;
d->htime = info.mtime;
d->hsize = info.size;
apr_hash_set(h, nextpath + skip, APR_HASH_KEY_STRING, d);
continue;
}
if (!strcasecmp(ext, DATAEXT)) {
*ext = '\0';
nextpath = apr_pstrcat(p, path, "/", info.name, NULL);
d = apr_hash_get(h, nextpath + skip, APR_HASH_KEY_STRING);
if (!d) {
d = apr_pcalloc(p, sizeof(DIRENTRY));
d->basename = apr_pstrcat(p, path + skip, "/", info.name, NULL);
d->type = DATA;
}
else
d->type |= DATA;
d->dtime = info.mtime;
d->dsize = info.size;
apr_hash_set(h, nextpath + skip, APR_HASH_KEY_STRING, d);
continue;
}
}
apr_dir_close(dir);
if (interrupted)
return 1;
path[baselen] = '\0';
for (i = apr_hash_first(p, h); i && !interrupted; i = apr_hash_next(i)) {
apr_hash_this(i, NULL, NULL, (void **)(&d));
if (d->type == HEADERDATA) {
nextpath = apr_pstrcat(p, path, "/", d->basename, HEADEREXT, NULL);
if (apr_file_open(&fd, nextpath, APR_READ, APR_OS_DEFAULT, p)
== APR_SUCCESS) {
len = sizeof(disk_cache_info_t);
if (apr_file_read_full(fd, &disk_info, len, &len)
== APR_SUCCESS) {
apr_file_close(fd);
if (disk_info.format == DISK_FORMAT_VERSION ) {
e = apr_palloc(pool, sizeof(ENTRY) +
strlen(d->basename) + 1);
APR_RING_INSERT_TAIL(&root, e, _entry, link);
e->expire = disk_info.expire;
e->response_time = disk_info.response_time;
e->htime = d->htime;
e->dtime = d->dtime;
e->hsize = d->hsize;
e->dsize = d->dsize;
strcpy(e->basename, d->basename);
continue;
}
}
else
apr_file_close(fd);
}
if(realclean)
delete_entry(path, d->basename, p);
} else if (realclean) {
if (d->type == HEADER || d->type == DATA)
delete_entry(path, d->basename, p);
else
delete_file(path, d->basename, p);
}
}
apr_pool_destroy(p);
if (interrupted)
return 1;
if (benice)
apr_sleep(NICE_DELAY);
if (interrupted)
return 1;
return 0;
}
/*
* purge cache entries
*/
static void purge(char *path, apr_pool_t *pool, apr_off_t max)
{
apr_pool_t *p;
apr_off_t sum;
apr_off_t total;
ENTRY *e;
ENTRY *n;
ENTRY *oldest;
char *nextpath;
sum = 0;
for(e=APR_RING_FIRST(&root); e!=APR_RING_SENTINEL(&root, _entry, link);
e=APR_RING_NEXT(e, link)) {
sum += e->hsize;
sum += e->dsize;
}
total = sum;
if (sum<=max) {
printstats(total, sum, max);
return;
}
apr_pool_create(&p, pool);
for(e=APR_RING_FIRST(&root); e!=APR_RING_SENTINEL(&root, _entry, link);) {
n = APR_RING_NEXT(e, link);
if (e->response_time>now || e->htime>now || e->dtime>now) {
delete_entry(path, e->basename, p);
sum -= e->hsize;
sum -= e->dsize;
APR_RING_REMOVE(e, link);
}
e = n;
}
apr_pool_destroy(p);
if (sum <= max) {
printstats(total, sum, max);
return;
}
if (interrupted)
return;
apr_pool_create(&p, pool);
for(e=APR_RING_FIRST(&root); e!=APR_RING_SENTINEL(&root, _entry, link);) {
n = APR_RING_NEXT(e, link);
if (e->expire<now) {
delete_entry(path, e->basename, p);
sum -= e->hsize;
sum -= e->dsize;
APR_RING_REMOVE(e, link);
}
e = n;
}
apr_pool_destroy(p);
if (interrupted)
return;
apr_pool_create(&p, pool);
while (sum>max && !interrupted) {
oldest = NULL;
for(e=APR_RING_FIRST(&root); e!=APR_RING_SENTINEL(&root, _entry, link);
e=APR_RING_NEXT(e, link)) {
if (!oldest)
oldest = e;
else if(e->dtime < oldest->dtime)
oldest = e;
}
delete_entry(path, oldest->basename, p);
sum -= oldest->hsize;
sum -= oldest->dsize;
APR_RING_REMOVE(oldest, link);
}
apr_pool_destroy(p);
printstats(total, sum, max);
}
/*
* usage info
*/
static void usage(void)
{
apr_file_printf(errfile, "htcacheclean -- program for cleaning the "
"disk cache.\n");
apr_file_printf(errfile, "Usage: htcacheclean [-vrn] -pPATH -lLIMIT\n");
apr_file_printf(errfile, "Usage: htcacheclean [-n] -dINTERVAL -pPATH "
"-lLIMIT\n");
apr_file_printf(errfile, "Options:\n");
apr_file_printf(errfile, " -d Daemonize and repeat cache cleaning "
"every INTERVAL minutes. This\n"
" option is mutually exclusive with "
"the -v and the -r options.\n");
apr_file_printf(errfile, " -v Be verbose and print statistics. "
"This option is mutually exclusive\n"
" with the -d option.\n");
apr_file_printf(errfile, " -r Clean thoroughly. This assumes that "
"the Apache web server\n"
" is not running. This option is "
"mutually exclusive with the -d option.\n");
apr_file_printf(errfile, " -n Be nice. This causes longer processing "
"in favour of other processes.\n");
apr_file_printf(errfile, " -p Specify PATH as the root of the "
"disk cache.\n");
apr_file_printf(errfile, " -l Specify LIMIT as the total disk cache "
"size limit in KBytes.\n");
exit(1);
}
/*
* main
*/
int main(int argc, const char * const argv[])
{
apr_off_t max;
apr_time_t current;
apr_time_t repeat;
apr_time_t delay;
apr_status_t status;
int repeat_found;
apr_pool_t *pool;
apr_pool_t *instance;
apr_getopt_t *o;
char opt;
const char *arg;
char *proxypath;
char *path;
interrupted = 0;
repeat = 0;
repeat_found = 0;
max = 0;
verbose = 0;
realclean = 0;
benice = 0;
proxypath = NULL;
apr_app_initialize(&argc, &argv, NULL);
atexit(apr_terminate);
apr_pool_create(&pool, NULL);
apr_file_open_stderr(&errfile, pool);
apr_signal(SIGINT, setterm);
apr_signal(SIGTERM, setterm);
apr_getopt_init(&o, pool, argc, argv);
while (1) {
status = apr_getopt(o, "nvrd:l:p:", &opt, &arg);
if (status == APR_EOF)
break;
else if (status == APR_SUCCESS)
switch (opt) {
case 'n':
benice = 1;
break;
case 'v':
verbose = 1;
break;
case 'r':
realclean = 1;
break;
case 'd':
repeat_found = 1;
repeat = apr_atoi64(arg);
repeat *= 60000000;
break;
case 'l':
max = apr_atoi64(arg);
break;
case 'p':
proxypath = apr_pstrdup(pool, arg);
apr_filepath_set(proxypath, pool);
break;
}
else usage();
}
if (repeat_found && (repeat<=0 || verbose || realclean))
usage();
if (!proxypath || max<=0)
usage();
apr_filepath_get(&path, 0, pool);
#ifndef DEBUG
if (repeat_found) {
apr_file_close(errfile);
apr_proc_detach(APR_PROC_DETACH_DAEMONIZE);
}
#endif
do
{
now = apr_time_now();
apr_pool_create(&instance, pool);
APR_RING_INIT(&root, _entry, link);
delcount = 0;
if (!process_dir(path, strlen(path), instance)) {
purge(path, instance, max*1024);
} else if (!repeat_found) {
apr_file_printf(errfile,
"An error occurred, cache cleaning aborted.\n");
return 1;
}
if (repeat_found && !interrupted) {
current=apr_time_now();
if (current<now)
delay = repeat;
else if(now+delay<=current)
delay = repeat;
else
delay = now+repeat-current;
apr_sleep(delay);
}
apr_pool_destroy(instance);
} while (repeat_found && !interrupted);
return 0;
}