I've just hacked up a module to perform simple security checks on
an incoming request.  Loosely inspired by Perl's untainting.

It implements untainting rules.  Each rule matches a request attribute
to a regexp, and can either:
  (a) enforce a match, and return an error (default: 400) if it doesn't match.
or
  (b) untaint a request attribute Perl-style

It supports untainting components of the request line, and any request header.
TODO: support untainting of parsed query args.
No plans for anything more ambitious like checking POST data (use mod_security).

Drop it in to trunk?

-- 
Nick Kew
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/* mod_taint: simple security checks on the request
 * Default: check a request field against a regexp, return 400 on not-match
 * Alternative: return another error status on not-match
 * Alternative: untaint the offending field Perl-style and continue processing
 *
 * UNTESTED - do not use yet unless you're happy to debug me
 */

#include "httpd.h"
#include "http_config.h"
#include "http_log.h"

module AP_MODULE_DECLARE_DATA taint_module;

typedef struct taint_check {
    apr_pool_t *pool;
    enum {
        TAINT_HEADER,
        TAINT_REQUEST,
        TAINT_URI,
        TAINT_UNPARSED_URI,
        TAINT_FILE,
        TAINT_CANONICAL_FILE,
        TAINT_PATH_INFO,
        TAINT_QUERY_STRING,
    } what;
    int status;
    const char *name;
    ap_regex_t *match;
    const char *untaint;
/*  FIXME: support regexp flags int flags; */
} taint_check;

typedef struct taint_dir_cfg {
    apr_array_header_t *rules;
    int inherit;
} taint_dir_cfg;

static int taint_run(request_rec *r)
{
    int i;
    int nmatch;
    ap_regmatch_t pmatch[10];
    const char *val;
    taint_dir_cfg *cfg = ap_get_module_config(r->per_dir_config, &taint_module);
    taint_check **checks;
    int rv = DECLINED;
    int folded_headers = 0;

    if (!cfg->rules) {
        return rv;
    }
    /* FIXME: are there circumstances we should run in a
     * subrequest or internal redirect?  An errordocument could
     * need protecting, but then we could end up recursing!
     */
    if (r->main || r->prev) {
        return rv;
    }

    checks = (taint_check**) cfg->rules->elts;
    for (i = 0; i < cfg->rules->nelts && rv == DECLINED; ++i) {
        taint_check *check = checks[i];
        switch (check->what) {
        case TAINT_HEADER:
            /* fold headers, to kill off attack vectors based on
             * duplicated headers
             */
            if (!folded_headers) {
                folded_headers = 1;
                apr_table_compress(r->headers_in, APR_OVERLAP_TABLES_MERGE);
            }
            val = apr_table_get(r->headers_in, check->name);
            break;
        case TAINT_REQUEST:
            val = r->the_request;
            break;
        case TAINT_URI:
            val = r->uri;
            break;
        case TAINT_UNPARSED_URI:
            val = r->unparsed_uri;
            break;
        case TAINT_FILE:
            val = r->filename;
            break;
        case TAINT_CANONICAL_FILE:
            val = r->canonical_filename;
            break;
        case TAINT_PATH_INFO:
            val = r->path_info;
            break;
        case TAINT_QUERY_STRING:
            val = r->args;
            break;
        /* FIXME: add capability to check entries in parsed query string */
        }

        if (val == NULL) {
            val = "";
        }
        /* need two paths here: regexec match yes/no,
         * or an untainting without an error
         */
        if (check->status == 0) {
            /* untaint.  Failure to match the regexp is a server error */
            nmatch = 10;
            if (ap_regexec(check->match, val, nmatch, pmatch, /*check->flags*/ 0)
                == AP_REG_NOMATCH) {
                ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
                              "Untaint for %s failed on %s", check->name, val);
                rv = HTTP_INTERNAL_SERVER_ERROR;
            }
            else {
                char *untainted = ap_pregsub(r->pool, check->untaint, val,
                                             nmatch, pmatch);
                switch (check->what) {
                case TAINT_HEADER:
                    apr_table_setn(r->headers_in, check->name, untainted);
                    break;
                case TAINT_REQUEST:
                    r->the_request = untainted;
                    break;
                case TAINT_URI:
                    r->uri = untainted;
                    break;
                case TAINT_UNPARSED_URI:
                    r->unparsed_uri = untainted;
                    break;
                case TAINT_FILE:
                    r->filename = untainted;
                    break;
                case TAINT_CANONICAL_FILE:
                    r->canonical_filename = untainted;
                    break;
                case TAINT_PATH_INFO:
                    r->path_info = untainted;
                    break;
                case TAINT_QUERY_STRING:
                    r->args = untainted;
                    break;
                /* FIXME: add capability to check entries in parsed query string */
                }
            }
        }
        else {
            /* match, and abort request on match failure */
            nmatch = 0;
            if (ap_regexec(check->match, val, nmatch, pmatch, /*check->flags*/0)
                == AP_REG_NOMATCH) {
                ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
                              "Taint check for %s failed on %s",
                              check->name, val);
                rv = check->status;
            }
        }
    }
    return rv;
}
static void taint_hooks(apr_pool_t *p)
{
    ap_hook_header_parser(taint_run, NULL, NULL, APR_HOOK_MIDDLE);
}
static apr_status_t taint_cleanup(void *CHECK)
{
    taint_check *check = CHECK;
    ap_pregfree(check->pool, check->match);
    return APR_SUCCESS;
}

static const char *taint_item(cmd_parms *cmd, void *CFG, const char *what,
                              const char *match, const char *action)
{
    taint_dir_cfg *cfg = CFG;
    taint_check *check;

    if (cfg->rules == NULL) {
        cfg->rules = apr_array_make(cmd->pool, 10, sizeof(taint_check));
    }
    check = apr_palloc(cmd->pool, sizeof(taint_check));
    check->pool = cmd->pool; /* for regfree */

    check->name = what;
    if (!strncasecmp(what, "HTTP_", 5)) {
        check->what = TAINT_HEADER;
        check->name = what+5;
    }
    else if (!strcasecmp(what, "REQUEST")) {
        check->what = TAINT_REQUEST;
    }
    else if (!strcasecmp(what, "RAW_URI")) {
        check->what = TAINT_UNPARSED_URI;
    }
    else if (!strcasecmp(what, "URI")) {
        check->what = TAINT_URI;
    }
    else if (!strcasecmp(what, "FILE")) {
        check->what = TAINT_FILE;
    }
    else if (!strcasecmp(what, "CANONICAL")) {
        check->what = TAINT_CANONICAL_FILE;
    }
    else if (!strcasecmp(what, "PATH_INFO")) {
        check->what = TAINT_PATH_INFO;
    }
    else if (!strcasecmp(what, "QUERY_STRING")) {
        check->what = TAINT_QUERY_STRING;
    }
    else {
        return "Untaint: unrecognised argument";
    }

    check->match = ap_pregcomp(cmd->pool, match, 0);
    if (check->match == NULL) {
        return "Untaint: can't compile match regexp";
    }
    else {
        apr_pool_cleanup_register(cmd->pool, check, taint_cleanup,
                                  apr_pool_cleanup_null);
    }

    /* If action is null, default to return 400
     * If action is numeric, it becomes an alternative error on failed check
     * Else action is an untainting substitution string
     */
    if (!action || !*action) {
        check->status = HTTP_BAD_REQUEST;
    }
    else {
        char *endp = NULL;
        long int n = strtol(action, &endp, 10);
        if (*endp == 0) {
            /* It's numeric */
            check->status = n;
        }
        else {
            /* It's an untainting replacement string */
            check->status = 0;
            check->untaint = action;
            if (check->what == TAINT_UNPARSED_URI ||
                check->what == TAINT_REQUEST) {
                return "Cannot untaint unparsed request line or URI!";
            }
        }
    }
    APR_ARRAY_PUSH(cfg->rules, taint_check*) = check;
    return NULL;
}
static const command_rec taint_cmds[] = {
    AP_INIT_TAKE23("Untaint", taint_item, NULL, OR_ALL, "Define a taint check"),
    AP_INIT_FLAG("TaintInherit", ap_set_flag_slot,
                 (void*)APR_OFFSETOF(taint_dir_cfg, inherit), OR_ALL,
                 "Whether to inherit taint checks from previous config"),
    { NULL }
};

static void *taint_dir_create_cfg(apr_pool_t *pool, char *x)
{
    taint_dir_cfg *ret = apr_pcalloc(pool, sizeof(taint_dir_cfg));
    ret->inherit = 1;
    return ret;
}
static void *taint_dir_merge_cfg(apr_pool_t *pool, void *BASE, void *ADD)
{
    taint_dir_cfg *add = ADD;
    taint_dir_cfg *base = BASE;
    taint_dir_cfg *ret;
    if (add->inherit) {
        ret = apr_palloc(pool, sizeof(taint_dir_cfg));
        ret->inherit = 1;
        ret->rules = apr_array_append(pool, base->rules, add->rules);
    }
    else {
        ret = add;
    }
    return ret;
}

/* Eventually: let it run early in a server context, later in a directory
 * context.  Like mod_setenvif or mod_rewrite.
 * For now - just run after directory walk, so no svr_cfg
 */

#define taint_svr_create_cfg NULL
#define taint_svr_merge_cfg NULL
AP_DECLARE_MODULE(taint) = {
    STANDARD20_MODULE_STUFF,
    taint_dir_create_cfg,
    taint_dir_merge_cfg,
    taint_svr_create_cfg,
    taint_svr_merge_cfg,
    taint_cmds,
    taint_hooks
};

Reply via email to