On Fri, May 30, 2008 at 1:59 PM, Akins, Brian <[EMAIL PROTECTED]> wrote:
> How we handle purge:

Oh that reminds me, a long time ago, I wrote htcacheadmin - a generic
command line utility for administering mod_disk_cache caches. Which is
how I /used/ to handle this situation. (I've attached the source, but
it's *very* stale). If I find the time next week, I'll get this
working again, I think it might be useful to others!

-- 
Colm
/* Copyright 2001-2005 The Apache Software Foundation or its licensors, as
 * applicable.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * htcacheadmin.c: a utility to allow administrators to track down urls
 * in their caches, and perform actions on that basis.
 *
 * Contributed by Colm MacCarthaigh  <colm stdlib.net>
 * 11 Aug 2005
 */

#include "apr.h"
#include "apr_lib.h"
#include "apr_strings.h"
#include "apr_file_io.h"
#include "apr_file_info.h"
#include "apr_pools.h"
#include "apr_md5.h"
#include "apr_getopt.h"
#include "apr_date.h"
#include "apr_uri.h"

#if APR_HAVE_UNISTD_H
#include <unistd.h>
#endif
#if APR_HAVE_STDLIB_H
#include <stdlib.h>
#endif

/* mod_disk_cache.c extract start */

#define VARY_FORMAT_VERSION 3
#define DISK_FORMAT_VERSION 4

typedef struct
{
    /* Indicates the format of the header struct stored on-disk. */
    apr_uint32_t format;
    /* The HTTP status code returned for this response.  */
    int status;
    /* The size of the entity name that follows. */
    apr_size_t name_len;
    /* The number of times we've cached this entity. */
    apr_size_t entity_version;
    /* Miscellaneous time values. */
    apr_time_t date;
    apr_time_t expire;
    apr_time_t request_time;
    apr_time_t response_time;
} disk_cache_info_t;

/* mod_disk_cache.c extract end */

/* cache_util.c extract started */

static void cache_hash(const char *it, char *val, int ndepth, int nlength)
{
    apr_md5_ctx_t context;
    unsigned char digest[16];
    char tmp[22];
    int i, k, d;
    unsigned int x;
    static const char enc_table[64] =
        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_@";

    apr_md5_init(&context);
    apr_md5_update(&context, (const unsigned char *) it, strlen(it));
    apr_md5_final(digest, &context);

    /* encode 128 bits as 22 characters, using a modified uuencoding 
     * the encoding is 3 bytes -> 4 characters* i.e. 128 bits is 
     * 5 x 3 bytes + 1 byte -> 5 * 4 characters + 2 characters
     */
    for (i = 0, k = 0; i < 15; i += 3) {
        x = (digest[i] << 16) | (digest[i + 1] << 8) | digest[i + 2];
        tmp[k++] = enc_table[x >> 18];
        tmp[k++] = enc_table[(x >> 12) & 0x3f];
        tmp[k++] = enc_table[(x >> 6) & 0x3f];
        tmp[k++] = enc_table[x & 0x3f];
    }
    /* one byte left */
    x = digest[15];
    tmp[k++] = enc_table[x >> 2];       /* use up 6 bits */
    tmp[k++] = enc_table[(x << 4) & 0x3f];

    /* now split into directory levels */
    for (i = k = d = 0; d < ndepth; ++d) {
        memcpy(&val[i], &tmp[k], nlength);
        k += nlength;
        val[i + nlength] = '/';
        i += nlength + 1;
    }
    memcpy(&val[i], &tmp[k], 22 - k);
    val[i + 22 - k] = '\0';
}

/* cache_util.c extract end */

static apr_file_t *errfile;     /* stderr file handle */

/* short program name as called */
static const char *shortname = "htcacheadmin";

/*
 * usage info
 */
#define NL APR_EOL_STR
static void usage(void)
{
    apr_file_printf(errfile,
    "%s -- utility for administration of the disk cache."                    NL
    "Usage: %s [-gq] [-h HOSTNAME] -c ROOT -d DIRLEVEL -l DIRLENGTH URL"     NL
    "       %s [-emHRbD] [-E DATE] HEADERFILE"                               NL
                                                                             NL
    "Options:"                                                               NL
                                                                             NL
    "  Locate mode:"                                                         NL
                                                                             NL
    "  -c   Use ROOT as the root directory for the cache. Must be identical" NL
    "       to the CacheRoot setting for Apache."                            NL
                                                                             NL
    "  -d   Use DIRLEVEL as the number of subdirectories mod_disk_cache"     NL
    "       creates for cached files. Must be identical to the"              NL
    "       CacheDirLevels setting for Apache."                              NL 
  
                                                                             NL
    "  -l   Use DIRLENGTH as the length of the subdirectories"               NL
    "       mod_disk_cache creates for cached files. Must be identical to"   NL
    "       the CacheDirLength setting for Apache."                          NL
                                                                             NL
    "  -h   Use HOSTNAME as the value of the HTTP \"Host:\" header for the"  NL
    "       cached URL."                                                     NL
                                                                             NL
    "  -g   Use some educated guesses to locate alternative locations for"   NL
    "       the cached entity."                                              NL
                                                                             NL
    "  -q   Do not output translated headername(s). 0 will be returned if"   NL
    "       a cached entity is found, 1 otherwise."                          NL
                                                                             NL
    "  Operate mode:"                                                        NL
                                                                             NL
    "  -e   Return 0 if the entity described by HEADERFILE has not expired." NL
    "       Return 1 otherwise."                                             NL
                                                                             NL
    "  -E   Change the expires time for HEADERFILE to a time indicated by"   NL
    "       the string DATE."                                                NL
                                                                             NL
    "  -m   Output mod_disk_cache meta information for HEADERFILE."          NL
                                                                             NL
    "  -H   Output the cached HTTP response header information for"          NL
    "       HEADERFILE"                                                      NL
                                                                             NL
    "  -R   Output the cached HTTP request header information for"           NL
    "       HEADERFILE"                                                      NL
                                                                             NL
    "  -b   Output the cached body for HEADERFILE."                          NL
                                                                             NL
    "  -D   Delete the cached header and any data files for HEADERFILE."     NL,
    shortname,
    shortname,
    shortname
    );
    exit(1);
}
#undef NL

/*
 * main
 */
int main(int argc, const char *const argv[])
{
    apr_status_t status;
    apr_pool_t *pool, *instance;
    apr_getopt_t *o;
    int quiet, delete, showheaders, showbody, showrequest, checkexpires,
        showmetadata, dirlevels, dirlength, guess;
    char opt;
    const char *arg;
    char *proxyroot, *root, *host, *headerpath, *datapath;
    char prefix[66];
    apr_uri_t inspect_uri;
    apr_file_t *header, *data, *out;
    enum { LOCATE, OPERATE } mode = OPERATE;

    quiet = 0;
    delete = 0;
    showheaders = 0;
    showbody = 0;
    showrequest = 0;
    showmetadata = 0;
    dirlevels = 0;
    dirlength = 0;
    checkexpires = 0;
    guess = 0;
    proxyroot = NULL;
    host = NULL;

    if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) {
        return 1;
    }
    atexit(apr_terminate);

    if (argc) {
        shortname = apr_filepath_name_get(argv[0]);
    }

    if (apr_pool_create(&pool, NULL) != APR_SUCCESS)
    {
        return 1;
    }
    apr_file_open_stderr(&errfile, pool);
    apr_file_open_stderr(&out, pool);

    apr_getopt_init(&o, pool, argc, argv);

    while (1) {
        status = apr_getopt(o, "tghmeHRbDc:d:l:", &opt, &arg);
        if (status == APR_EOF) {
            break;
        }
        else if (status != APR_SUCCESS) {
            usage();
        }
        else {
            switch (opt) {
            case 'q':
                if (quiet) {
                    usage();
                }
                mode = LOCATE;
                quiet = 1;
                break;
                
             case 'g':
                if (guess) {
                    usage();
                }
                mode = LOCATE;
                guess = 1;
                break;
 
             case 'h':
                if (host) {
                    usage();
                }
                mode = LOCATE;
                host = apr_pstrdup(pool, arg);
                break;
                
            case 'c':
                if (proxyroot) {
                    usage();
                }
                mode = LOCATE;
                proxyroot = apr_pstrdup(pool, arg);
                if (apr_filepath_set(proxyroot, pool) != APR_SUCCESS)
                {
                    usage();
                }
                break;

            case 'd':
                if (dirlevels) {
                    usage();
                }
                mode = LOCATE;
                dirlevels = apr_atoi64(arg);
                break;

            case 'l':
                if (dirlength) {
                    usage();
                }
                mode = LOCATE;
                dirlength = apr_atoi64(arg);
                break;

            case 'e':
                if (checkexpires || mode == LOCATE) {
                    usage();
                }
                checkexpires = 1;
                break;

            case 'm':
                if (showmetadata || mode == LOCATE) {
                    usage();
                }
                showmetadata = 1;
                break;

            case 'H':
                if (showheaders || mode == LOCATE) {
                    usage();
                }
                showheaders = 1;
                break;

            case 'R':
                if (showrequest || mode == LOCATE) {
                    usage();
                }
                showrequest = 1;
                break;

            case 'b':
                if (showbody || mode == LOCATE) {
                    usage();
                }
                showbody = 1;
                break;

            case 'D':
                if (delete || mode == LOCATE) {
                    usage();
                }
                delete = 1;
                break;
            }                   /* switch */
        }                       /* else */
    }                           /* while */

    /* We should be given one more argument  */
    if (o->ind != (argc - 1)) {
        usage();
    }

    /* XXX: We need to do some more argument validation to make sure that
     *      arguments for both modes have not been given
     */

    if (mode == LOCATE) {
        char *keys[7];
        int  numkeys = 0;
        
        /* In this mode, the spare argument is a url */
        if (apr_uri_parse(pool, argv[o->ind], &inspect_uri) != APR_SUCCESS)
        {
            usage();
        }

        /* Right now, we only handle HTTP */
        if (inspect_uri.scheme && strcasecmp(inspect_uri.scheme, "http")) {
            usage();
        } 

        /* We need at least valid dirlength, dirlevel and root arguments */
        if (proxyroot == NULL || dirlevels <= 0 || dirlength <= 0) {
            usage();
        }

        if (apr_filepath_get(&root, 0, pool) != APR_SUCCESS)
        {
            usage();
        }

        /* Generate a key from the url given */
        if (!inspect_uri.path) {
            inspect_uri.path = "/";
        }
        
        if (guess) {
            /* Depending on whether the content is locally generated, proxied
             * , transparently proxied or requested with a port specified the
             * same url can be cached in different ways. If the user has asked
             * us to, we try some educated guesses;
             *
             *      /path/?
             *      hostname/path/?
             *      http://hostname/path/?
             *      hostnamehttp://hostname/path/?
             *      http://hostname:80/path/? 
             *      hostnamehttp://hostname:80/path/?
             *
             * If no hostname was supplied with the -h argument, extract it
             * from the parsed uri, and vice versa. If neither was supplied,
             * only the first key can be checked.
             */
            if(!host) {
                host = inspect_uri.hostname;
            }
            if (!inspect_uri.hostname) {
                inspect_uri.hostname = host;
            }
                
            keys[numkeys++] = apr_pstrcat(pool, inspect_uri.path, "?", 
                                          inspect_uri.query, NULL);
            
            if (host) {
                keys[numkeys++] = apr_pstrcat(pool, host, inspect_uri.path, 
                                              "?", inspect_uri.query, NULL);
                keys[numkeys++] = apr_pstrcat(pool, 
                 apr_uri_unparse(pool, &inspect_uri, APR_URI_UNP_OMITUSERINFO),
                                        inspect_uri.query ? NULL : "?", NULL );
                keys[numkeys++] = apr_pstrcat(pool, host,
                 apr_uri_unparse(pool, &inspect_uri, APR_URI_UNP_OMITUSERINFO),
                                        inspect_uri.query ? NULL : "?", NULL );

                /* If the user specified a port, then leave this alone */
                if (!inspect_uri.port_str) {
                    inspect_uri.port_str = ":80";
                    inspect_uri.port = 80;

                    keys[numkeys++] = apr_pstrcat(pool, 
                 apr_uri_unparse(pool, &inspect_uri, APR_URI_UNP_OMITUSERINFO),
                                        inspect_uri.query ? NULL : "?", NULL );
                    keys[numkeys++] = apr_pstrcat(pool, host,
                 apr_uri_unparse(pool, &inspect_uri, APR_URI_UNP_OMITUSERINFO),
                                        inspect_uri.query ? NULL : "?", NULL );
                }       
            }
            keys[numkeys] = NULL;
        }
        else {
            /* Set the key as exactly as the user supplied it to us,
             * adding only the "?" if neccessary 
             */
            if(host) {
                keys[0] = apr_pstrcat(pool, host, argv[o->ind], 
                                      inspect_uri.query ? NULL : "?", NULL );
            } 
            else {
                keys[0] = apr_pstrcat(pool, argv[o->ind],
                                      inspect_uri.query ? NULL : "?", NULL );
            }
            keys[1] = NULL;
        }

        /* Run through whatever keys we have been asked to check */ 
        for(numkeys = 0; keys[numkeys]; numkeys++) {
            /* Calculate the hash */
            cache_hash(keys[numkeys], prefix, dirlevels, dirlength);

            printf("key: %s\n", keys[numkeys]);
#if 0
            /* Open the header file */
            status = apr_file_open(&header, headerpath,
                                   APR_FOPEN_READ | APR_FOPEN_BINARY, 
                                   APR_OS_DEFAULT, pool);
            if (status != APR_SUCCESS) {
                if (status != APR_ENOENT) {
                    apr_file_printf(errfile, "%s: could not determine if %s "
                                    "exists. Possible perrmissions problem." 
                                    APR_EOL_STR, shortname, headerpath);
                }
            }


            /* Close the header file */
            apr_file_close(header);
#endif 
        }
        
        /* File is not cached */
        return 1;
    }

    /* We are in operate mode */

    headerpath = apr_pstrcat(pool, root, "/", prefix, ".header", NULL);
    datapath = apr_pstrcat(pool, root, "/", prefix, ".data", NULL);

    /* Output the filenames */
    if (!quiet) {
        apr_file_printf(out, "header: %s" APR_EOL_STR "data: %s" APR_EOL_STR,
                        headerpath, datapath);
    }

    /* Open the header file */
    status = apr_file_open(&header, headerpath,
                           APR_FOPEN_READ | APR_FOPEN_BINARY, APR_OS_DEFAULT,
                           pool);
    if (status != APR_SUCCESS) {
        if (status != APR_ENOENT) {
            apr_file_printf(errfile, "%s: could not determine if %s exists. "
                            "Possible perrmissions problem." APR_EOL_STR,
                            shortname, datapath);
        }
        /* File is not cached */
        return 1;
    }

    /* Open the data file */
    status = apr_file_open(&data, datapath,
                           APR_FOPEN_READ | APR_FOPEN_BINARY, APR_OS_DEFAULT,
                           pool);
    if (status != APR_SUCCESS) {
        apr_file_printf(errfile,
                        "%s: header exists, but data file does not."
                        APR_EOL_STR, shortname);
        if (status != APR_ENOENT) {
            apr_file_printf(errfile, "%s: could not determine if %s exists. "
                            "Possible perrmissions problem." APR_EOL_STR,
                            shortname, datapath);
        }
        /* File is not cached */
        return 1;
    }

    /* Should the header file be investigated? */
    if (showmetadata || showheaders || showrequest || checkexpires) {
        apr_uint32_t format;
        apr_size_t len = sizeof(format);
        apr_off_t offset = 0;
        disk_cache_info_t disk_info;
        char *entity;

        /* Check for the format of the header file */
        if (apr_file_read_full(header, &format, len, &len) != APR_SUCCESS) {
            apr_file_printf(errfile, "%s: error reading from %s" APR_EOL_STR,
                            shortname, headerpath);
            return 1;
        }

        /* Rewind to the start of the file */
        apr_file_seek(header, APR_SET, &offset);

        if (format == DISK_FORMAT_VERSION) {
            len = sizeof(disk_cache_info_t);

            /* Read in the meta information */
            if (apr_file_read_full(header, &disk_info, len, &len) !=
                APR_SUCCESS) {
                apr_file_printf(errfile,
                                "%s: error reading from %s" APR_EOL_STR,
                                shortname, headerpath);
                return 1;
            }
        }
        else if (format == VARY_FORMAT_VERSION) {
            apr_file_printf(errfile,
                            "%s: header file in unsupported vary format"
                            APR_EOL_STR, shortname);
            return 0;
        }
        else {
            apr_file_printf(errfile, "%s: header file in unrecognised format %d"
                            APR_EOL_STR, shortname, format);
            return 1;
        }

        /* Read in the entity name */
        len = disk_info.name_len + 1;
        if (!(entity = (char *)apr_pcalloc(pool, len))) {
             apr_file_printf(errfile,
                             "%s: Cannot assign memory" APR_EOL_STR,
                             shortname);
            return 1;
        }
        if (apr_file_read_full(header, entity, len - 1, &len) != APR_SUCCESS) {
                apr_file_printf(errfile,
                                "%s: error reading from %s" APR_EOL_STR,
                                shortname, headerpath);
                return 1;
        }

        /* At this point, we now have the meta-information */
        if (showmetadata) {
            char sdate[APR_RFC822_DATE_LEN], sexpire[APR_RFC822_DATE_LEN],
                 sreq[APR_RFC822_DATE_LEN], sresp[APR_RFC822_DATE_LEN];
            
            apr_rfc822_date(sdate, disk_info.date);
            apr_rfc822_date(sexpire, disk_info.expire);
            apr_rfc822_date(sreq, disk_info.request_time);
            apr_rfc822_date(sresp, disk_info.response_time);
            
            apr_file_printf(out, "Entity Name:      %s" APR_EOL_STR, entity);
            apr_file_printf(out, "HTTP Status code: %d" APR_EOL_STR, 
                            disk_info.status);
            apr_file_printf(out, "Entity version:   %lu" APR_EOL_STR, 
                            disk_info.entity_version);
            apr_file_printf(out, "Date:             %s" APR_EOL_STR, sdate);
            apr_file_printf(out, "Expire:           %s" APR_EOL_STR, sexpire);
            apr_file_printf(out, "Request time:     %s" APR_EOL_STR, sreq);
            apr_file_printf(out, "Response time:    %s" APR_EOL_STR, sresp);
        }

        /* Output the cached headers */
        if (showheaders) {
            apr_table_t * req_hdrs = apr_table_make(pool, 20);
                                        
                
        }

        /* Output the cached request headers */
        if (showrequest) {
            apr_table_t * resp_hdrs = apr_table_make(pool, 20);

        }       
    }

    /* If the user wants to see the body, write it out */
    if (showbody) {
        char buffer[4096];
        apr_size_t len = sizeof(buffer);
        
        /* Output the body */
        while ((status = apr_file_read(data, buffer, &len)) == APR_SUCCESS) {
                apr_file_write(out, buffer, &len);
        }
        if (status != APR_EOF) {
            apr_file_printf(errfile, "%s: error reading all of %s" APR_EOL_STR, 
                            shortname, datapath);
            return 1;
        }
    }

    /* Close the files */
    apr_file_close(header);
    apr_file_close(data);

    /* Try and delete the files */
    if (delete) {
        if (apr_file_remove(headerpath, pool) != APR_SUCCESS) {
            apr_file_printf(errfile, "%s: error deleting %s", shortname, 
                            headerpath);
            return 0;
        }
        else if (apr_file_remove(datapath, pool) != APR_SUCCESS) {
            apr_file_printf(errfile, "%s: error deleting %s", shortname, 
                            datapath);
        }
    }

    return 0;
}

Reply via email to