Here is what I ended up with. diff --git a/modules/filters/mod_deflate.c b/modules/filters/mod_deflate.c index 605c158..fd3662a 100644 --- a/modules/filters/mod_deflate.c +++ b/modules/filters/mod_deflate.c @@ -450,6 +450,12 @@ static apr_status_t deflate_out_filter(ap_filter_t *f, return APR_SUCCESS; }
+ if (!strncasecmp(f->r->content_type, "application/x-gzip", 18)) { + ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, f->r, "not going to compress application/x-gzip content"); + ap_remove_output_filter(f); + return ap_pass_brigade(f->next, bb); + } + c = ap_get_module_config(r->server->module_config, &deflate_module); @@ -1162,7 +1168,6 @@ static apr_status_t deflate_in_filter(ap_filter_t *f, return APR_SUCCESS; } - /* Filter to inflate for a content-transforming proxy. */ static apr_status_t inflate_out_filter(ap_filter_t *f, apr_bucket_brigade *bb) @@ -1181,6 +1186,12 @@ static apr_status_t inflate_out_filter(ap_filter_t *f, return APR_SUCCESS; } + if (!strncasecmp(f->r->content_type, "application/x-gzip", 18)) { + ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, f->r, "not going to decompress application/x-gzip content"); + ap_remove_output_filter(f); + return ap_pass_brigade(f->next, bb); + } + c = ap_get_module_config(r->server->module_config, &deflate_module); if (!ctx) { diff --git a/modules/filters/mod_proxy_html.c b/modules/filters/mod_proxy_html.c index b964fec..61834ff 100644 --- a/modules/filters/mod_proxy_html.c +++ b/modules/filters/mod_proxy_html.c @@ -107,6 +107,8 @@ typedef struct { int strip_comments; int interp; int enabled; + int inflate; + int deflate; } proxy_html_conf; typedef struct { ap_filter_t *f; @@ -1322,6 +1324,8 @@ static void *proxy_html_merge(apr_pool_t *pool, void *BASE, void *ADD) conf->interp = add->interp; conf->strip_comments = add->strip_comments; conf->enabled = add->enabled; + conf->inflate = add->inflate; + conf->deflate = add->deflate; } else { conf->flags = base->flags | add->flags; @@ -1330,6 +1334,8 @@ static void *proxy_html_merge(apr_pool_t *pool, void *BASE, void *ADD) conf->interp = base->interp | add->interp; conf->strip_comments = base->strip_comments | add->strip_comments; conf->enabled = add->enabled | base->enabled; + conf->inflate = add->inflate | base->inflate; + conf->deflate = add->deflate | base->deflate; } return conf; } @@ -1537,6 +1543,14 @@ static const command_rec proxy_html_cmds[] = { (void*)APR_OFFSETOF(proxy_html_conf, enabled), RSRC_CONF|ACCESS_CONF, "Enable proxy-html and xml2enc filters"), + AP_INIT_FLAG("ProxyHTMLInflate", ap_set_flag_slot, + (void*)APR_OFFSETOF(proxy_html_conf, inflate), + RSRC_CONF|ACCESS_CONF, + "Will inflate compressed content before rewriting"), + AP_INIT_FLAG("ProxyHTMLDeflate", ap_set_flag_slot, + (void*)APR_OFFSETOF(proxy_html_conf, deflate), + RSRC_CONF|ACCESS_CONF, + "Will deflate content after rewriting"), { NULL } }; static int mod_proxy_html(apr_pool_t *p, apr_pool_t *p1, apr_pool_t *p2) @@ -1569,10 +1583,16 @@ static void proxy_html_insert(request_rec *r) proxy_html_conf *cfg; cfg = ap_get_module_config(r->per_dir_config, &proxy_html_module); if (cfg->enabled) { + if (cfg->inflate) { + ap_add_output_filter("inflate", NULL, r, r->connection); + } if (xml2enc_filter) xml2enc_filter(r, NULL, ENCIO_INPUT_CHECKS); ap_add_output_filter("proxy-html", NULL, r, r->connection); ap_add_output_filter("proxy-css", NULL, r, r->connection); + if (cfg->deflate) { + ap_add_output_filter("deflate", NULL, r, r->connection); + } } } static void proxy_html_hooks(apr_pool_t *p) The diffs are obviously not against trunk/2.4.x since they are just meant to show what I have in mind. I'm still worried about the mod_xml2enc though. Seeing how it inserts itself into the output filter chain, above mod_proxy_html patch might actually result in xml2enc attaching itself *behind* deflate - which is bad. I haven't figured out how to work around this yet. Any suggestions on how to do this ? In general, is this a sensible way to approach the proxy-html/compression issue in your opinion ? On Tue, Jan 14, 2014 at 2:08 PM, Thomas Eckert <thomas.r.w.eck...@gmail.com>wrote: > > IIRC the OP wants to decompress such contents and run them > > through mod_proxy_html. I don't think that works with any sane > > setup: running non-HTML content-types through proxy_html > > will always be an at-your-own-risk hack. > > What I want is a (preferrably as simple as possible) method of configuring > mod_proxy_html in such a way that it will attempt to rewrite html(/css/js) > content even if the content was delivered in a compressed format by the > backend server. In my opinion the part about compression should actually be > done transparently (to the user) by mod_proxy_html/mod_deflate. > > The reason I brought the .gz files up as example is because they were > handled sligthly incorrect (unnecessary overhead + unpleasant side effect > on client side). > > > > > Gzip compressed content sometimes gets served with no declared encoding > and a media type of, e.g., “application/x-gzip”. I reckon that's more > common than serving it as > > application/octet-stream or with no Content-Type: declared. > > > mod_deflate could use this information to avoid compressing the > response, and without sniffing the content. > > Exactly what I'm aiming for. I think that's the way to go here, see '1)' > in my previous reply. In this case we should also make mod_xml2enc bail out > with corresponding log message when it gets to see compressed content, e.g. > either via env variable set by inflate filter or read Content-Type header, > so all of the involved modules act consistently and their log output will > not be misunderstood as errors. > > > > > This more limited approach is already available through configuration, > so maybe the way to handle this is via a change to documentation / default > configuration, rather than code. > > In order to make mod_proxy_html work with possibly compressed contents you > cannot simply do a > ProxyHTMLEnable On > and what I have been using since the last discussion which I mentioned > before is > SetOutputFilter inflate;xml2enc;proxy-html;deflate > with no other explicit configuration of mod_deflate. I'm aware of > > AddOutputFilterByType DEFLATE text/html text/plain text/xml > AddOutputFilterByType DEFLATE text/css > AddOutputFilterByType DEFLATE application/x-javascript > application/javascript application/ecmascript > AddOutputFilterByType DEFLATE application/rss+xml > but this is not compatible with the above output filter chain (see my > previous reply). > > Maybe one is able to disable output compression on already-compressed > content with a smart <If> like block but do we really want this as default > configuration ? Is there ever a case where someone does *NOT* want > mod_proxy_html and friends to handle compression transparently ? > > > > On Sun, Jan 5, 2014 at 2:57 PM, Tim Bannister <is...@jellybaby.net> wrote: > >> On 5 Jan 2014, at 02:21, Nick Kew wrote: >> >> > IIRC the OP wants to decompress such contents and run them through >> mod_proxy_html. I don't think that works with any sane setup: running >> non-HTML content-types through proxy_html will always be an >> at-your-own-risk hack. >> >> I've believed for a while that the right way to address this is for httpd >> to support gzip Transfer-Encoding which is always hop-by-hop and applies to >> the transfer rather than the entity being transferred. For this scenario, >> it could look like this: >> >> [Client] ⇦ gzip content-encoding ⇦ [transforming reverse proxy] ⇦ >> gzip,chunked transfer-encodings ⇦ [origin server] >> >> (I'm assuming that the client doesn't negotiate gzip transfer encoding) >> >> >> Of course, this still won't help with a badly-configured origin server. >> >> -- >> Tim Bannister – is...@jellybaby.net >> >> >