[EMAIL PROTECTED] wrote: > Author: minfrin > Date: Fri Oct 27 06:28:56 2006 > New Revision: 468373 > > URL: http://svn.apache.org/viewvc?view=rev&rev=468373 > Log: > mod_cache: Pass the output filter stack through the store_body() > hook, giving each cache backend the ability to make a better > decision as to how it will allocate the tasks of writing to the > cache and writing to the network. Previously the write to the > cache task needed to be complete before the same brigade was > written to the network, and this caused timing and memory issues > on large cached files. This fix replaces the previous fix for > PR39380. > > Modified: > httpd/httpd/trunk/CHANGES > httpd/httpd/trunk/modules/cache/mod_cache.c > httpd/httpd/trunk/modules/cache/mod_cache.h > httpd/httpd/trunk/modules/cache/mod_disk_cache.c > httpd/httpd/trunk/modules/cache/mod_disk_cache.h > httpd/httpd/trunk/modules/cache/mod_mem_cache.c >
.. > @@ -1286,9 +1292,15 @@ > static apr_status_t open_new_file(request_rec *r, const char *filename, > apr_file_t **fd, disk_cache_conf *conf) > { > - int flags = APR_CREATE | APR_WRITE | APR_BINARY | APR_BUFFERED | > APR_EXCL; > + int flags; > apr_status_t rv; > > + flags = APR_CREATE | APR_WRITE | APR_READ | APR_BINARY | APR_BUFFERED | > APR_EXCL | APR_TRUNCATE; Nitpick: APR_TRUNCATE here is useless. > +#if APR_HAS_SENDFILE > + flags |= ((pdconf->enable_sendfile == ENABLE_SENDFILE_OFF) > + ? 0 : APR_SENDFILE_ENABLED); > +#endif > + Where is pdconf ? Check out all those APR_HAS_SENDFILE. > while(1) { > rv = apr_file_open(fd, filename, flags, > APR_FPROT_UREAD | APR_FPROT_UWRITE, r->pool); > @@ -1611,150 +1623,50 @@ > return APR_SUCCESS; > } > Looking at open_new_file, it is somewhat unreliable: if(finfo.mtime < (apr_time_now() - conf->updtimeout) ) { We use APR_BUFFERED and under various circumstances files may get modified without having the mtime updated. .. > +/** > + * Store the body of the response in the disk cache. > + * > + * As the data is written to the cache, it is also written to > + * the filter provided. On network write failure, the full body > + * will still be cached. > + */ > +static apr_status_t store_body(cache_handle_t *h, ap_filter_t *f, > apr_bucket_brigade *bb) > { > - apr_bucket *e; > + apr_bucket *e, *b; > + request_rec *r = f->r; > apr_status_t rv; > - int copy_file = FALSE; > disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj; > disk_cache_conf *conf = ap_get_module_config(r->server->module_config, > &disk_cache_module); > > dobj->store_body_called++; > - > + White space trash. > if(r->no_cache) { > ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, > "disk_cache: store_body called for URL %s even though" > "no_cache is set", dobj->name); > file_cache_errorcleanup(dobj, r); > - return APR_EGENERAL; > + ap_remove_output_filter(f); > + return ap_pass_brigade(f->next, bb); > } > > if(dobj->initial_size == 0) { > /* Don't waste a body cachefile on a 0 length body */ > - return APR_SUCCESS; > + return ap_pass_brigade(f->next, bb); > } > > if(!dobj->skipstore && dobj->fd == NULL) { > rv = open_new_file(r, dobj->datafile, &(dobj->fd), conf); > - if(rv == CACHE_EEXIST) { > + if (rv == CACHE_EEXIST) { > /* Someone else beat us to storing this */ > dobj->skipstore = TRUE; > } > - else if(rv != APR_SUCCESS) { > - return rv; > + else if (rv != APR_SUCCESS) { > + ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, > + "disk_cache: store_body tried to open cached file " > + "for URL %s and this failed", dobj->name); > + ap_remove_output_filter(f); > + return ap_pass_brigade(f->next, bb); > } > else { > dobj->file_size = 0; > @@ -1762,194 +1674,227 @@ > } > > if(dobj->skipstore) { > - /* Someone else beat us to storing this object */ > - if(dobj->store_body_called == 1 && > - dobj->initial_size > 0 && > - APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(bb)) ) > - { > - /* Yay, we can replace the body with the cached instance */ > - return replace_brigade_with_cache(h, r, bb); > - } > - > - return APR_SUCCESS; > + /* Someone else beat us to storing this object. > + * We are too late to take advantage of this storage :( */ > + ap_remove_output_filter(f); > + return ap_pass_brigade(f->next, bb); > } Those "if(" and "if (" are going on my nerves! :) .. > + /* start caching the brigade */ > + ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server, > + "disk_cache: Caching body for URL %s", dobj->name); > > - } > - else { > - ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server, > - "disk_cache: Caching body for URL %s", dobj->name); > + e = APR_BRIGADE_FIRST(bb); > + while (e != APR_BRIGADE_SENTINEL(bb)) { > > - for (e = APR_BRIGADE_FIRST(bb); > - e != APR_BRIGADE_SENTINEL(bb); > - e = APR_BUCKET_NEXT(e)) > - { > - const char *str; > - apr_size_t length, written; > + const char *str; > + apr_size_t length, written; > + apr_off_t offset = 0; > > - /* Ignore the non-data-buckets */ > - if(APR_BUCKET_IS_METADATA(e)) { > - continue; > - } > + /* try write all data buckets to the cache, except for metadata > buckets */ > + if(!APR_BUCKET_IS_METADATA(e)) { Swapping the metadata check would make the code much more readable: if (is_metadata) deal with it continue handle normal bucket > + /* read in a bucket fragment */ > rv = apr_bucket_read(e, &str, &length, APR_BLOCK_READ); > if (rv != APR_SUCCESS) { > ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, > - "disk_cache: Error when reading bucket for URL > %s", > + "disk_cache: Error when reading bucket for URL > %s, aborting request", > dobj->name); > file_cache_errorcleanup(dobj, r); > + /* not being able to read the bucket is fatal, > + * return this up the filter stack > + */ > return rv; > } > + > + /* try write the bucket fragment to the cache */ > + apr_file_seek(dobj->fd, APR_END, &offset); > rv = apr_file_write_full(dobj->fd, str, length, &written); > - if (rv != APR_SUCCESS) { > + offset = - (apr_off_t)written; > + apr_file_seek(dobj->fd, APR_END, &offset); > + /* if the cache write was successful, swap the original bucket > + * with a file bucket pointing to the same data in the cache. > + * > + * This is done because: > + * > + * - The ap_core_output_filter can take advantage of its ability > + * to do non blocking writes on file buckets. > + * > + * - We are prevented from the need to read the original bucket > + * a second time inside ap_core_output_filter, which could be > + * expensive or memory consuming. > + * > + * - The cache, in theory, should be faster than the backend, > + * otherwise there would be little point in caching in the first > + * place. > + */ > + if (APR_SUCCESS == rv) { > + > + /* remove and destroy the original bucket from the brigade */ > + b = e; > + e = APR_BUCKET_NEXT(e); > + APR_BUCKET_REMOVE(b); > + apr_bucket_destroy(b); aka as apr_bucket_delete > + /* Is our network connection still alive? > + * If not, we must continue caching the file, so keep > looping. > + * We will return the error at the end when caching is done. > + */ > + if (APR_SUCCESS == dobj->frv) { > + > + /* insert a file bucket pointing to the cache into out > temporary brigade */ > + if (diskcache_brigade_insert(dobj->tmpbb, dobj->fd, > dobj->file_size, > + written, > + dobj->updtimeout, r->pool) > == NULL) { > + return APR_ENOMEM; > + } Err. We had the data in memory, we are going to read it back from disk again just in order to not block ? That's nonsense. We don't need a special bucket type! Also, are you really sure you are tracking correctly all those buckets with the correct offset ? The file pointer offset is unique for a single fd. I will stop the review here, I have to calm down to really digest what's happening. -- Davi Arnaut