On Mon, Aug 08, 2005 at 10:33:47AM +0200, [EMAIL PROTECTED] wrote:
> >>Is a traversal really needed? What about going back the full path of the
> >>header / data file to the cache root and removing each component on the
> >>way by calling apr_dir_remove on each component until it fails?
> >
> > I'm not sure if apr_dir_remove guarantees failure when operated on
> > non-empty directories. If it does then that's an easy enough way.
>
> I should be at least on Unix because it simply calls rmdir and this returns
> ENOTEMPTY on the removal of non empty directories. This results in
> apr_dir_remove returning APR_ENOTEMPTY.
I'm just being paranoid, but I'm not certain of the behaviour of the
win32, beos or other future implementations, and the APR itself makes no
claims about apr_dir_remove's behaviour - it just says "Remove directory
from the file system" which is pretty ambiguous. I've now checked the
win32 and beos implementations, and they seem safe, so I've submitted a
silly doxygen comment patch to [EMAIL PROTECTED] to try and get rid of the
problem
for good.
> > In cache_remove_url, you have code which tries to determine if
> > the cache->handle or cache->stale_handle should be removed, but
> > shouldn't it always be the stale_handle? You only add the
> > remove_url filter if cache_select_url() != OK, which means
> > cache->handle will always be NULL.
>
> You are right. In the current usage of cache_remove_url this
> check does not make sense, but I wanted to keep the scope of
> this function somewhat broder to allow to call this function
> in the case the caller wants to remove the entity behind
> cache->handle.
x. At the very least, I'd definitely make stale_handle the default if
both pointers are non-NULL :)
O.k., I've merged our two patches, but I've changed a few things, tell
me if there's anothing you think is wrong;
1. Did some minor whitespace/style cleanups, nothing
big. also changed the instances of CACHE_REMOVAL_URL
in comments to CACHE_REMOVE_URL, and a few small things like
that cought. Also made the log messages consistent
with the rest of mod_cache, and added some to aid
testing.
2. modified the mod_disk_cache code to delete the data only
if the header could be deleted first.
3. renamed the cache_remove_url_filter pointer in the cache
struct to remove_url_filter, cache->cache_$something seemed
redundant and it makes the formatting neater :-)
4. Fixed a bug in the mod_disk_cache code. It was using
dobj->datafile in the dobj->hdrsfile section for log
output.
5. Swapped the order of the ternary check in cache_storage,
so that we'll remove the stale_handle if both the stale_hand
and handle are non-NULL.
The patch is attached, and I've been testing it for the last few hours
without problem. The code is now running on ftp.heanet.ie. (along
with htcacheclean -t).
--
Colm MacCárthaigh Public Key: [EMAIL PROTECTED]
Index: modules/cache/mod_mem_cache.c
===================================================================
--- modules/cache/mod_mem_cache.c (revision 230717)
+++ modules/cache/mod_mem_cache.c (working copy)
@@ -601,7 +601,7 @@
/* remove_url()
* Notes:
*/
-static int remove_url(const char *key)
+static int remove_url(cache_handle_t *h, apr_pool_t *p)
{
cache_object_t *obj;
int cleanup = 0;
@@ -609,8 +609,8 @@
if (sconf->lock) {
apr_thread_mutex_lock(sconf->lock);
}
-
- obj = cache_find(sconf->cache_cache, key);
+
+ obj = h->cache_obj;
if (obj) {
cache_remove(sconf->cache_cache, obj);
/* For performance, cleanup cache object after releasing the lock */
Index: modules/cache/mod_cache.c
===================================================================
--- modules/cache/mod_cache.c (revision 230717)
+++ modules/cache/mod_cache.c (working copy)
@@ -29,6 +29,7 @@
*/
static ap_filter_rec_t *cache_save_filter_handle;
static ap_filter_rec_t *cache_out_filter_handle;
+static ap_filter_rec_t *cache_remove_url_filter_handle;
/*
* CACHE handler
@@ -123,6 +124,19 @@
/* add cache_save filter to cache this request */
ap_add_output_filter_handle(cache_save_filter_handle, NULL, r,
r->connection);
+
+ ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
+ "Adding CACHE_REMOVE_URL filter.");
+
+ /* Add cache_remove_url filter to this request to remove a
+ * stale cache entry if needed. Also put the current cache
+ * request rec in the filter context, as the request that
+ * is available later during running the filter maybe
+ * different due to an internal redirect.
+ */
+ cache->remove_url_filter =
+
ap_add_output_filter_handle(cache_remove_url_filter_handle,
+ cache, r, r->connection);
}
else if (cache->stale_headers) {
ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
@@ -441,11 +455,6 @@
if (reason) {
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
"cache: %s not cached. Reason: %s", url, reason);
- /* remove this object from the cache
- * BillS Asks.. Why do we need to make this call to remove_url?
- * leave it in for now..
- */
- cache_remove_url(r, url);
/* remove this filter from the chain */
ap_remove_output_filter(f);
@@ -546,6 +555,13 @@
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
"cache: Caching url: %s", url);
+ /* We are actually caching this response. So it does not
+ * make sense to remove this entity any more.
+ */
+ ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
+ "cache: Removing CACHE_REMOVE_URL filter.");
+ ap_remove_output_filter(cache->remove_url_filter);
+
/*
* We now want to update the cache file header information with
* the new date, last modified, expire and content length and write
@@ -666,7 +682,13 @@
ap_cache_accept_headers(cache->handle, r, 1);
}
- /* Write away header information to cache. */
+ /* Write away header information to cache. It is possible that we are
+ * trying to update headers for an entity which has already been cached.
+ *
+ * This may fail, due to an unwritable cache area. E.g. filesystem full,
+ * permissions problems or a read-only (re)mount. This must be handled
+ * later.
+ */
rv = cache->provider->store_headers(cache->handle, r, info);
/* Did we just update the cached headers on a revalidated response?
@@ -675,7 +697,7 @@
* the same way as with a regular response, but conditions are now checked
* against the cached or merged response headers.
*/
- if (rv == APR_SUCCESS && cache->stale_handle) {
+ if (cache->stale_handle) {
apr_bucket_brigade *bb;
apr_bucket *bkt;
int status;
@@ -699,12 +721,39 @@
}
cache->block_response = 1;
+
+ /* Before returning we need to handle the possible case of an
+ * unwritable cache. Rather than leaving the entity in the cache
+ * and having it constantly re-validated, now that we have recalled
+ * the body it is safe to try and remove the url from the cache.
+ */
+ if (rv != APR_SUCCESS) {
+ ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
+ "cache: updating headers with store_headers failed. "
+ "Removing cached url.");
+
+ rv = cache->provider->remove_url(cache->stale_handle, r->pool);
+ if (rv != OK) {
+ /* Probably a mod_disk_cache cache area has been (re)mounted
+ * read-only, or that there is a permissions problem.
+ */
+ ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
+ "cache: attempt to remove url from cache unsuccessful.");
+ }
+ }
+
return ap_pass_brigade(f->next, bb);
}
+
+ if(rv != APR_SUCCESS) {
+ ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
+ "cache: store_headers failed");
+ ap_remove_output_filter(f);
- if (rv == APR_SUCCESS) {
- rv = cache->provider->store_body(cache->handle, r, in);
+ return ap_pass_brigade(f->next, in);
}
+
+ rv = cache->provider->store_body(cache->handle, r, in);
if (rv != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
"cache: store_body failed");
@@ -714,6 +763,53 @@
return ap_pass_brigade(f->next, in);
}
+/*
+ * CACHE_REMOVE_URL filter
+ * ---------------
+ *
+ * This filter gets added in the quick handler every time the CACHE_SAVE filter
+ * gets inserted. Its purpose is to remove a confirmed stale cache entry from
+ * the cache.
+ *
+ * CACHE_REMOVE_URL has to be a protocol filter to ensure that is run even if
+ * the response is a canned error message, which removes the content filters
+ * and thus the CACHE_SAVE filter from the chain.
+ *
+ * CACHE_REMOVE_URL expects cache request rec within its context because the
+ * request this filter runs on can be different from the one whose cache entry
+ * should be removed, due to internal redirects.
+ */
+
+static int cache_remove_url_filter(ap_filter_t *f, apr_bucket_brigade *in)
+{
+ request_rec *r = f->r;
+ cache_request_rec *cache;
+
+ /* Setup cache_request_rec */
+ cache = (cache_request_rec *) f->ctx;
+
+ if (!cache) {
+ /* user likely configured CACHE_REMOVE_URL manually; they should
really
+ * use mod_cache configuration to do that. So:
+ * 1. Remove ourselves
+ * 2. Do nothing and bail out
+ */
+ ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
+ "cache: CACHE_REMOVE_URL enabled unexpectedly");
+ ap_remove_output_filter(f);
+ return ap_pass_brigade(f->next, in);
+ }
+ /*
+ * Now remove this cache entry from the cache
+ */
+ ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
+ "cache: Removing url %s from the cache", f->r->unparsed_uri);
+ cache_remove_url(cache, r->pool);
+ /* remove ourselves */
+ ap_remove_output_filter(f);
+ return ap_pass_brigade(f->next, in);
+}
+
/* -------------------------------------------------------------- */
/* Setup configurable data */
@@ -967,6 +1063,7 @@
return OK;
}
+
static const command_rec cache_cmds[] =
{
/* XXX
@@ -1033,6 +1130,15 @@
cache_out_filter,
NULL,
AP_FTYPE_CONTENT_SET+1);
+ /* CACHE_REMOVE_URL has to be a protocol filter to ensure that is
+ * run even if the response is a canned error message, which
+ * removes the content filters.
+ */
+ cache_remove_url_filter_handle =
+ ap_register_output_filter("CACHE_REMOVE_URL",
+ cache_remove_url_filter,
+ NULL,
+ AP_FTYPE_PROTOCOL);
ap_hook_post_config(cache_post_config, NULL, NULL, APR_HOOK_REALLY_FIRST);
}
Index: modules/cache/mod_cache.h
===================================================================
--- modules/cache/mod_cache.h (revision 230717)
+++ modules/cache/mod_cache.h (working copy)
@@ -197,7 +197,7 @@
const char *urlkey, apr_off_t len);
int (*open_entity) (cache_handle_t *h, request_rec *r,
const char *urlkey);
- int (*remove_url) (const char *urlkey);
+ int (*remove_url) (cache_handle_t *h, apr_pool_t *p);
} cache_provider;
/* A linked-list of authn providers. */
@@ -225,6 +225,7 @@
apr_time_t exp; /* expiration */
apr_time_t lastmod; /* last-modified time */
cache_info *info; /* current cache info */
+ ap_filter_t *remove_url_filter; /* Enable us to remove the filter */
} cache_request_rec;
@@ -271,7 +272,7 @@
/**
* cache_storage.c
*/
-int cache_remove_url(request_rec *r, char *url);
+int cache_remove_url(cache_request_rec *cache, apr_pool_t *p);
int cache_create_entity(request_rec *r, char *url, apr_off_t size);
int cache_select_url(request_rec *r, char *url);
apr_status_t cache_generate_key_default( request_rec *r, apr_pool_t*p,
char**key );
Index: modules/cache/mod_disk_cache.c
===================================================================
--- modules/cache/mod_disk_cache.c (revision 230717)
+++ modules/cache/mod_disk_cache.c (working copy)
@@ -514,9 +514,51 @@
return OK;
}
-static int remove_url(const char *key)
+static int remove_url(cache_handle_t *h, apr_pool_t *p)
{
- /* XXX: Delete file from cache! */
+ apr_status_t rc;
+ disk_cache_object_t *dobj;
+
+ /* Get disk cache object from cache handle */
+ dobj = (disk_cache_object_t *) h->cache_obj->vobj;
+ if (!dobj) {
+ return DECLINED;
+ }
+
+ /* Delete headers file */
+ if (dobj->hdrsfile) {
+ ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
+ "disk_cache: Deleting %s from cache.", dobj->hdrsfile);
+
+ rc = apr_file_remove(dobj->hdrsfile, p);
+ if ((rc != APR_SUCCESS) && (rc != APR_ENOENT)) {
+ /* Will only result in an output if httpd is started with -e debug.
+ * For reason see log_error_core for the case s == NULL.
+ */
+ ap_log_error(APLOG_MARK, APLOG_DEBUG, rc, NULL,
+ "disk_cache: Failed to delete headers file %s from cache.",
+ dobj->hdrsfile);
+ return DECLINED;
+ }
+ }
+
+ /* Delete data file */
+ if (dobj->datafile) {
+ ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
+ "disk_cache: Deleting %s from cache.", dobj->datafile);
+
+ rc = apr_file_remove(dobj->datafile, p);
+ if ((rc != APR_SUCCESS) && (rc != APR_ENOENT)) {
+ /* Will only result in an output if httpd is started with -e debug.
+ * For reason see log_error_core for the case s == NULL.
+ */
+ ap_log_error(APLOG_MARK, APLOG_DEBUG, rc, NULL,
+ "disk_cache: Failed to delete data file %s from cache.",
+ dobj->datafile);
+ return DECLINED;
+ }
+ }
+
return OK;
}
Index: modules/cache/cache_storage.c
===================================================================
--- modules/cache/cache_storage.c (revision 230717)
+++ modules/cache/cache_storage.c (working copy)
@@ -28,24 +28,28 @@
* delete all URL entities from the cache
*
*/
-int cache_remove_url(request_rec *r, char *url)
+int cache_remove_url(cache_request_rec *cache, apr_pool_t *p)
{
cache_provider_list *list;
apr_status_t rv;
+ cache_handle_t *h;
+
char *key;
- cache_request_rec *cache = (cache_request_rec *)
- ap_get_module_config(r->request_config,
&cache_module);
- rv = cache_generate_key(r,r->pool,&key);
- if (rv != APR_SUCCESS) {
- return rv;
+ list = cache->providers;
+
+ /* Remove the stale cache entry if present. If not, we're
+ * being called from outside of a request; remove the
+ * non-stalle handle.
+ */
+ h = cache->stale_handle ? cache->stale_handle : cache->handle;
+ if (!h) {
+ return OK;
}
- list = cache->providers;
-
/* for each specified cache type, delete the URL */
- while(list) {
- list->provider->remove_url(key);
+ while(list) {
+ list->provider->remove_url(h, p);
list = list->next;
}
return OK;