On Tue, 24 Aug 2004, Nick Kew wrote:

> I actually have an implementation based on the discussion document and
> addressing the concerns people raised in the thread.  I hope to find
> time to finish the accompanying documentation and post it here round
> about this coming weekend.

OK, since you seem to have a real-life use for it, here goes.  As I
said before, I wasn't planning to post without a little more testing
and accompanying documents and discussion, but what the ****?
I'm sure I'll regret this premature posting ....


# 1. Declare a smart filter that dispatches on Content-Type
FilterDeclare   myfilter        Content-Type

# 2. Declare your filter as a Provider, to run whenever Content-Type
#    includes the string "text/html"
FilterProvider  myfilter        STRIP   $text/html

# 3. Set the smart filter chain to this filter where you want to apply it
<Location scope-of-your-proxy>
        FilterChain     =myfilter

Nick Kew
/*      Copyright (C) 2004 Nick Kew

        This is experimental code.  It may be copied and used only for
        evaluation and testing purposes.

        The copyright holder offers to the Apache Software Foundation
        permission to re-license this code under the ASF license. 
        This offer applies if and when the ASF accepts this code or
        any derived work for inclusion in a future release of HTTPD.

        Regardless of the above, the author undertakes to release the
        work under a recognised open-source license in due course.
        Information will be available at http://apache.webthing.com/
        and/or http://dev.apache.org/~niq/
#include <ctype.h>
#include <string.h>

/* apache */
#include <httpd.h>
#include <http_config.h>
#include <http_log.h>
#include <apr_strings.h>
#include <util_filter.h>
#include <apr_hash.h>

module AP_MODULE_DECLARE_DATA filter_module ;

#define PROTO_CHANGE 0x1
#define PROTO_NO_PROXY 0x8
#define PROTO_NO_CACHE 0x10
#define PROTO_TRANSFORM 0x20

typedef apr_status_t (*filter_func_t)(ap_filter_t*, apr_bucket_brigade*) ;

typedef struct {
  const char* name ;
  filter_func_t func ;
  void* fctx ;
} harness_ctx ;

typedef struct mod_filter_provider {
  enum {
  } match_type ;
  union {
    const char* c ;
    regex_t* r ;
    int i ;
  } match ;
  ap_filter_rec_t* frec ;
  struct mod_filter_provider* next ;
  unsigned int proto_flags ;
} mod_filter_provider ;

typedef struct {
  ap_filter_rec_t frec ;
  enum {
  } dispatch ;
  const char* value ;
  mod_filter_provider* providers ;
  unsigned int proto_flags ;
  const char* range ;
} mod_filter_rec ;

typedef struct mod_filter_chain {
  const char* fname ;
  struct mod_filter_chain* next ;
} mod_filter_chain ;

typedef struct {
  apr_hash_t* live_filters ;
  mod_filter_chain* chain ;
} mod_filter_cfg ;

static int filter_init(ap_filter_t* f) {
  mod_filter_provider* p ;
  int err ;
  harness_ctx* ctx = f->ctx ;
  mod_filter_cfg* cfg
        = ap_get_module_config(f->r->per_dir_config, &filter_module);
  mod_filter_rec* filter
        = apr_hash_get(cfg->live_filters, ctx->name, APR_HASH_KEY_STRING) ;
  for ( p = filter->providers ; p ; p = p->next ) {
    if ( p->frec->filter_init_func ) {
      if ( err =  p->frec->filter_init_func(f), err != OK ) {
        break ; /* if anyone errors out here, so do we */
  return err ;
static filter_func_t filter_lookup(request_rec* r, mod_filter_rec* filter) {
  mod_filter_provider* provider ;
  const char* str ;
  const char* cachecontrol ;
  int match ;
  unsigned int proto_flags ;

  /* Check registered providers in order */
  for ( provider = filter->providers; provider; provider = provider->next) {
    match = 1 ;
    switch ( filter->dispatch ) {
        str = apr_table_get(r->headers_in, filter->value) ;
        break ;
        str = apr_table_get(r->headers_out, filter->value) ;
        break ;
      case SUBPROCESS_ENV:
        str = apr_table_get(r->subprocess_env, filter->value) ;
        break ;
      case CONTENT_TYPE:
        str = r->content_type ;
        break ;
    /* treat nulls so we don't have to check every strcmp individually
         Not sure if there's anything better to do with them
    if ( str == NULL ) {
      if ( provider->match_type == DEFINED ) {
        if ( provider->match.c != NULL ) {
          match = 0 ;
    } else if ( provider->match.c == NULL ) {
      match = 0 ;
    } else {
        /* Now we have no nulls, so we can do string and regexp matching */
      switch ( provider->match_type ) {
        case STRING_MATCH:
          if ( strcasecmp(str, provider->match.c) ) {
            match = 0 ;
          break ;
        case STRING_CONTAINS:
          if ( !strstr(str, provider->match.c) ) {
            match = 0 ;
          break ;
        case REGEX_MATCH:
          if ( ap_regexec(provider->match.r, str, 0, NULL, 0)
                == REG_NOMATCH ) {
            match = 0 ;
          break ;
        case INT_EQ:
          if ( atoi(str) != provider->match.i ) {
            match = 0 ;
          break ;
        case INT_LE:
          if ( atoi(str) > provider->match.i ) {
            match = 0 ;
          break ;
        case INT_GE:
          if ( atoi(str) < provider->match.i ) {
            match = 0 ;
          break ;
        case DEFINED:   /* we already handled this:-) */
          break ;
    if ( match == 1 ) { /* all the conditions matched this provider */
      /* check protocol

        This is a quick hack and almost certainly buggy.
        The idea is that by putting this in mod_filter, we relieve
        filter implementations of the burden of fixing up HTTP headers
        for cases that are routinely affected by filters.

        Default is ALWAYS to do nothing, so as not to tread on the
        toes of filters which want to do it themselves.

      proto_flags = filter->proto_flags | provider->proto_flags ;

      /* some specific things can't happen in a proxy */
      if ( r->proxyreq ) {
        if ( proto_flags & PROTO_NO_PROXY ) {
          /* can't use this provider; try next */
          continue ;
        if ( proto_flags & PROTO_TRANSFORM ) {
          cachecontrol = apr_table_get(r->headers_out, "Cache-Control") ;
          if ( cachecontrol && strstr(cachecontrol, "no-transform") ) {
            /* can't use this provider; try next */
            continue ;
          apr_table_addn(r->headers_out, "Warning", apr_psprintf(
                r->pool, "214 %s Transformation applied", r->hostname) ) ;
      /* things that are invalidated if the filter transforms content */
      if ( proto_flags & PROTO_CHANGE ) {
        apr_table_unset(r->headers_out, "Content-MD5") ;
        apr_table_unset(r->headers_out, "ETag") ;
        if ( proto_flags & PROTO_CHANGE_LENGTH ) {
          apr_table_unset(r->headers_out, "Content-Length") ;
      /* no-cache is for a filter that has different effect per-hit */
      if ( proto_flags & PROTO_NO_CACHE ) {
        apr_table_unset(r->headers_out, "Last-Modified") ;
        apr_table_addn(r->headers_out, "Cache-Control", "no-cache") ;
      if ( proto_flags & PROTO_NO_BYTERANGE ) {
        apr_table_unset(r->headers_out, "Accept-Ranges") ;
      } else if ( filter->range ) {
        apr_table_setn(r->headers_in, "Range", filter->range) ;
      return provider->frec->filter_func.out_func ;
  /* No provider matched */
  return NULL ;
static apr_status_t filter_harness(ap_filter_t* f, apr_bucket_brigade* bb) {

  apr_status_t ret ;
  const char* cachecontrol ;
  harness_ctx* ctx = f->ctx ;
  mod_filter_rec* filter = (mod_filter_rec*)f->frec ;

  if ( f->r->status != 200 ) {
    ap_remove_output_filter(f) ;
    return ap_pass_brigade(f->next, bb) ;

  if ( f->r->proxyreq ) {
    if ( filter->proto_flags & PROTO_NO_PROXY ) {
      ap_remove_output_filter(f) ;
      return ap_pass_brigade(f->next, bb) ;
    if ( filter->proto_flags & PROTO_TRANSFORM ) {
      cachecontrol = apr_table_get(f->r->headers_out, "Cache-Control") ;
      if ( cachecontrol && strstr(cachecontrol, "no-transform") ) {
        ap_remove_output_filter(f) ;
        return ap_pass_brigade(f->next, bb) ;

/* look up a handler function if we haven't already set it */
  if ( ! ctx->func ) {
    ctx->func = filter_lookup(f->r, filter) ;
    if ( ! ctx->func ) {
      ap_remove_output_filter(f) ;
      return ap_pass_brigade(f->next, bb) ;

/* call the content filter with its own context, then restore our context */
  f->ctx = ctx->fctx ;
  ret = ctx->func(f, bb) ;
  ctx->fctx = f->ctx ;
  f->ctx = ctx ;
  return ret ;

static const char* filter_protocol(cmd_parms* cmd, void* CFG,
        const char* fname, const char* pname, const char* proto) {

  static const char* sep = " ;, " ;
  char* arg ;
  char* tok = 0 ;
  unsigned int flags = 0 ;
  mod_filter_cfg* cfg = CFG ;
  mod_filter_provider* provider = NULL ;
  mod_filter_rec* filter
        = apr_hash_get(cfg->live_filters, fname, APR_HASH_KEY_STRING) ;

  if ( !provider ) {
    return "FilterProtocol: No such filter" ;

  /* Fixup the args: it's really pname that's optional */
  if ( proto == NULL ) {
    proto = pname ;
    pname = NULL ;
  } else {
    /* Find provider */
    for ( provider = filter->providers; provider; provider = provider->next ) {
      if ( !strcasecmp(provider->frec->name, pname) )
        break ;
    if ( !provider ) {
      return "FilterProtocol: No such provider for this filter" ;
  /* Now set flags from our args */
  for ( arg = apr_strtok(apr_pstrdup(cmd->pool, proto), sep, &tok) ;
        arg ; arg = apr_strtok(NULL, sep, &tok) ) {
    if ( !strcasecmp(arg, "change=yes") ) {
    } else if ( !strcasecmp(arg, "change=1:1") ) {
      flags |= PROTO_CHANGE ;
    } else if ( !strcasecmp(arg, "byteranges=no") ) {
      flags |= PROTO_NO_BYTERANGE ;
    } else if ( !strcasecmp(arg, "proxy=no") ) {
      flags |= PROTO_NO_PROXY ;
    } else if ( !strcasecmp(arg, "proxy=transform") ) {
      flags |= PROTO_TRANSFORM ;
    } else if ( !strcasecmp(arg, "cache=no") ) {
      flags |= PROTO_NO_CACHE ;
  if ( pname ) {
    provider->proto_flags = flags ;
  } else {
    filter->proto_flags = flags ;
  return NULL ;

static const char* filter_declare(cmd_parms* cmd, void* CFG,
        const char* fname, const char* condition) {

  const char* eq ;
  char* tmpname ;

  mod_filter_cfg* cfg = (mod_filter_cfg*)CFG ;
  mod_filter_rec* filter ;

  filter = apr_pcalloc(cmd->pool, sizeof(mod_filter_rec)) ;
  apr_hash_set(cfg->live_filters, fname, APR_HASH_KEY_STRING, filter) ;

  filter->frec.name = fname ;
  filter->frec.filter_init_func = filter_init ;
  filter->frec.filter_func.out_func = filter_harness ;
  filter->frec.ftype = AP_FTYPE_RESOURCE ;
  filter->frec.next = NULL ;

/* determine what this filter will dispatch on */
  eq = strchr(condition, '=') ;
  if ( eq ) {
    tmpname = apr_pstrdup(cmd->pool, eq+1) ;
    if ( !strncasecmp(condition, "env=", 4) ) {
      filter->dispatch = SUBPROCESS_ENV ;
    } else if ( !strncasecmp(condition, "req=", 4) ) {
      filter->dispatch = REQUEST_HEADERS ;
    } else if ( !strncasecmp(condition, "resp=", 5) ) {
      filter->dispatch = RESPONSE_HEADERS ;
    } else {
      return "FilterCondition: unrecognised dispatch table" ;
  } else {
    filter->dispatch = RESPONSE_HEADERS ;
    tmpname = apr_pstrdup(cmd->pool, condition) ;
    ap_str_tolower(tmpname) ;
  if ( ( filter->dispatch == RESPONSE_HEADERS )
        && !strcmp(tmpname, "content-type") ) {
    filter->dispatch = CONTENT_TYPE ;
  filter->value = tmpname ;

  return NULL ;

static const char* filter_provider(cmd_parms* cmd, void* CFG,
        const char* fname, const char* pname, const char* match) {
  int flags ;
  mod_filter_provider* provider ;
  const char* rxend ;
  const char* c ;

  /* fname has been declared with DeclareFilter, so we can look it up */
  mod_filter_cfg* cfg = CFG ;
  mod_filter_rec* frec = apr_hash_get(cfg->live_filters, fname, APR_HASH_KEY_STRING) ;
  /* provider has been registered, so we can look it up */
  ap_filter_rec_t* provider_frec = ap_get_output_filter_handle(pname) ;
  if ( ! frec ) {
    return apr_psprintf(cmd->pool, "Undeclared smart filter %s", fname) ;
  } else if ( !provider_frec ) {
    return apr_psprintf(cmd->pool, "Unknown filter provider %s", pname) ;
  } else {
    provider = apr_palloc(cmd->pool, sizeof(mod_filter_provider) ) ;
    switch ( match[0] ) {
      case '<':
        provider->match_type = INT_LE ;
        provider->match.i = atoi(match+1) ;
        break ;
      case '>':
        provider->match_type = INT_GE ;
        provider->match.i = atoi(match+1) ;
        break ;
      case '=':
        provider->match_type = INT_EQ ;
        provider->match.i = atoi(match+1) ;
        break ;
      case '/':
        provider->match_type = REGEX_MATCH ;
        rxend = strchr(match+1, '/') ;
        if ( !rxend ) {
          return "Bad regexp syntax" ;
        flags = REG_NOSUB ;     /* we're not mod_rewrite:-) */
        for ( c = rxend+1; *c; ++c ) {
          switch (*c) {
            case 'i': flags |= REG_ICASE ; break ;
            case 'x': flags |= REG_EXTENDED ; break ;
        provider->match.r = ap_pregcomp(cmd->pool,
          apr_pstrndup(cmd->pool, match+1, rxend-match-1), flags) ;
        break ;
      case '*':
        provider->match_type = DEFINED ;
        provider->match.i = -1 ;
        break ;
      case '$':
        provider->match_type = STRING_CONTAINS ;
        provider->match.c = match+1 ;
        break ;
        provider->match_type = STRING_MATCH ;
        provider->match.c = match ;
        break ;
    provider->frec = provider_frec ;
    provider->next = frec->providers ;
    frec->providers = provider ;
  return NULL ;
static const char* filter_chain(cmd_parms* cmd, void* CFG, const char* arg){
  mod_filter_chain* p ;
  mod_filter_chain* q ;
  mod_filter_cfg* cfg = CFG ;

  switch (arg[0]) {
    case '+':   /* add to end of chain */
      p = apr_pcalloc(cmd->pool, sizeof(mod_filter_chain)) ;
      p->fname = arg+1 ;
      if ( cfg->chain ) {
        for ( q = cfg->chain ; q->next ; q = q->next ) ;
        q->next = p ;
      } else {
        cfg->chain = p ;
      break ;
    case '@':   /* add to start of chain */
      p = apr_palloc(cmd->pool, sizeof(mod_filter_chain)) ;
      p->fname = arg+1 ;
      p->next = cfg->chain ;
      cfg->chain = p ;
      break ;
    case '-':   /* remove from chain */
      if ( cfg->chain ) {
        if ( strcasecmp(cfg->chain->fname, arg+1) ) {
          for ( p = cfg->chain ; p->next ; p = p->next ) {
            if ( !strcasecmp(p->next->fname, arg+1) ) {
              p->next = p->next->next ;
        } else {
          cfg->chain = cfg->chain->next ;
      break ;
    case '!':   /* Empty the chain */
      cfg->chain = NULL ;
      break ;
    case '=':   /* initialise chain with this arg */
      p = apr_pcalloc(cmd->pool, sizeof(mod_filter_chain)) ;
      p->fname = arg+1 ;
      cfg->chain = p ;
      break ;
    default:    /* add to end */
      p = apr_pcalloc(cmd->pool, sizeof(mod_filter_chain)) ;
      p->fname = arg ;
      if ( cfg->chain ) {
        for ( q = cfg->chain ; q->next ; q = q->next ) ;
        q->next = p ;
      } else {
        cfg->chain = p ;
      break ;
  return NULL ;

static const command_rec filter_cmds[] = {
  AP_INIT_TAKE2("FilterDeclare", filter_declare, NULL, OR_ALL,
        "filter-name, dispatch-criterion") ,
  AP_INIT_TAKE3("FilterProvider", filter_provider, NULL, OR_ALL,
        "filter-name, provider-name, dispatch-match") ,
  AP_INIT_ITERATE("FilterChain", filter_chain, NULL, OR_ALL,
        "list of filter names with optional [EMAIL PROTECTED]") ,
  AP_INIT_TAKE23("FilterProtocol", filter_protocol, NULL, OR_ALL,
        "filter-name [provider-name] protocol-args") ,
  { NULL }
} ;

static int filter_insert(request_rec* r) {
  mod_filter_chain* p ;
  mod_filter_rec* filter ;
  harness_ctx* fctx ;
  mod_filter_cfg* cfg = ap_get_module_config(r->per_dir_config, &filter_module) ;
  int ranges = 1 ;

  for ( p = cfg->chain ; p ; p = p->next ) {
    filter = apr_hash_get(cfg->live_filters, p->fname, APR_HASH_KEY_STRING) ;
    fctx = apr_pcalloc(r->pool, sizeof(harness_ctx)) ;
    fctx->name = p->fname ;
    ap_add_output_filter_handle(&filter->frec, fctx, r, r->connection) ;
    if ( ranges && (filter->proto_flags & (PROTO_NO_BYTERANGE|PROTO_CHANGE_LENGTH)) ) {
      filter->range = apr_table_get(r->headers_in, "Range") ;
      apr_table_unset(r->headers_in, "Range") ;
      ranges = 0 ;
  return OK ;
static void filter_hooks(apr_pool_t* pool) {
  ap_hook_insert_filter(filter_insert, NULL, NULL, APR_HOOK_MIDDLE) ;

static void* filter_config(apr_pool_t* pool, char* x) {
  mod_filter_cfg* cfg = apr_palloc(pool, sizeof(mod_filter_cfg) ) ;
  cfg->live_filters = apr_hash_make(pool) ;
  cfg->chain = NULL ;
  return cfg ;
static void* filter_merge(apr_pool_t* pool, void* BASE, void* ADD) {
  mod_filter_cfg* base = BASE ;
  mod_filter_cfg* add = ADD ;
  mod_filter_chain* savelink = 0 ;
  mod_filter_chain* newlink ;
  mod_filter_chain* p ;
  mod_filter_cfg* conf = apr_palloc(pool, sizeof(mod_filter_cfg)) ;
        = apr_hash_overlay(pool, add->live_filters, base->live_filters) ;
  if ( base->chain && add->chain ) {
    for ( p = base->chain ; p ; p = p->next ) {
      newlink = apr_pmemdup(pool, p, sizeof(mod_filter_chain)) ;
      if ( savelink ) {
        savelink->next = newlink ;
        savelink = newlink ;
      } else {
        conf->chain = savelink = newlink ;
    for ( p = add->chain ; p ; p = p->next ) {
      newlink = apr_pmemdup(pool, p, sizeof(mod_filter_chain)) ;
      savelink->next = newlink ;
      savelink = newlink ;
  } else if ( add->chain ) {
    conf->chain = add->chain ;
  } else {
    conf->chain = base->chain ;
  return conf ;
module AP_MODULE_DECLARE_DATA filter_module = {
} ;

