On Thu, Jun 11, 2020 at 8:52 AM jean-frederic clere <jfcl...@gmail.com> wrote: > > Should I commit my first proposal (it is easily backportable to 2.4.x) > and later work on the next one?
How about something like the attached patch? It's a new single ap_normalize_path() helper with options (like AP_NORMALIZE_MERGE_SLASHES and AP_NORMALIZE_PATH_PARAMETERS), which we can use in multiple places to replace ap_getparents(). The patch is without the mod_rewrite bits (for now), and uses the "mapping=servlet" syntax for the new proxypass parameter (which I found more extensible). The issue with re-calling ap_getparents() in ap_proxy_trans_match() (or ap_normalize_path() in my patch still) is that it happens after %-decoding, so bets are off. For instance "/docs/..%3Bfoo=bar/%3Bfoo=bar/test/index.jsp" is not the same as "http://localhost:8000/docs/..;foo=bar/;foo=bar/test/index.jsp" and shouldn't be matched the same by mapping=servlet. We need a way to forward non %-decoded URLs upto mod_proxy (reverse) if we want to normalize a second time.. Regards; Yann.
Index: include/httpd.h =================================================================== --- include/httpd.h (revision 1878328) +++ include/httpd.h (working copy) @@ -1779,8 +1779,21 @@ AP_DECLARE(void) ap_no2slash(char *name) AP_DECLARE(void) ap_no2slash_ex(char *name, int is_fs_path) AP_FN_ATTR_NONNULL_ALL; +#define AP_NORMALIZE_NOT_ABOVE_ROOT (1u << 0) +#define AP_NORMALIZE_MERGE_SLASHES (1u << 1) +#define AP_NORMALIZE_PATH_PARAMETERS (1u << 2) /** + * Remove all ////, /./ and /xx/../ substrings from a path, and more + * depending on passed in flags. + * @param path The path to normalize + * @param flags bitmask of AP_NORMALIZE_* flags + * @return non-zero on success + */ +AP_DECLARE(int) ap_normalize_path(char *path, unsigned int flags) + AP_FN_ATTR_NONNULL((1)); + +/** * Remove all ./ and xx/../ substrings from a file name. Also remove * any leading ../ or /../ substrings. * @param name the file name to parse Index: modules/dav/main/util.c =================================================================== --- modules/dav/main/util.c (revision 1878328) +++ modules/dav/main/util.c (working copy) @@ -664,7 +664,11 @@ static dav_error * dav_process_if_header(request_r /* note that parsed_uri.path is allocated; we can trash it */ /* clean up the URI a bit */ - ap_getparents(parsed_uri.path); + if (!ap_normalize_path(parsed_uri.path, 0)) { + return dav_new_error(r->pool, HTTP_BAD_REQUEST, + DAV_ERR_IF_TAGGED, rv, + "Invalid URI path tagged If-header."); + } /* the resources we will compare to have unencoded paths */ if (ap_unescape_url(parsed_uri.path) != OK) { Index: modules/generators/mod_autoindex.c =================================================================== --- modules/generators/mod_autoindex.c (revision 1878328) +++ modules/generators/mod_autoindex.c (working copy) @@ -1266,8 +1266,7 @@ static struct ent *make_parent_entry(apr_int32_t a if (!(p->name = ap_make_full_path(r->pool, r->uri, "../"))) { return (NULL); } - ap_getparents(p->name); - if (!*p->name) { + if (!ap_normalize_path(p->name, 0)) { return (NULL); } Index: modules/proxy/mod_proxy.c =================================================================== --- modules/proxy/mod_proxy.c (revision 1878328) +++ modules/proxy/mod_proxy.c (working copy) @@ -753,6 +753,25 @@ PROXY_DECLARE(int) ap_proxy_trans_match(request_re mismatch = 1; use_uri = r->uri; } + else if (!nocanon && (ent->flags & PROXYPASS_MAPPING_SERVLET)) { + char *uri = apr_pstrdup(r->pool, r->uri); + + /* check against the backend servlet url */ + if (!ap_normalize_path(uri, AP_NORMALIZE_MERGE_SLASHES | + AP_NORMALIZE_PATH_PARAMETERS)) { + ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r, + "servlet normalization failed for '%s'", r->uri); + return HTTP_INTERNAL_SERVER_ERROR; + } + + if (!alias_match(uri, ent->fake)) { + ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO() + "servlet normalization for '%s' mismatch; " + "declining '%s'", r->uri, uri); + return DECLINED; + } + } + found = apr_pstrcat(r->pool, "proxy:", real, use_uri + len, NULL); } } @@ -1806,6 +1825,9 @@ static const char * else if (!strcasecmp(word,"noquery")) { flags |= PROXYPASS_NOQUERY; } + else if (!strcasecmp(word,"mapping=servlet")) { + flags |= PROXYPASS_MAPPING_SERVLET; + } else { char *val = strchr(word, '='); if (!val) { Index: modules/proxy/mod_proxy.h =================================================================== --- modules/proxy/mod_proxy.h (revision 1878328) +++ modules/proxy/mod_proxy.h (working copy) @@ -123,6 +123,7 @@ struct proxy_remote { #define PROXYPASS_NOCANON 0x01 #define PROXYPASS_INTERPOLATE 0x02 #define PROXYPASS_NOQUERY 0x04 +#define PROXYPASS_MAPPING_SERVLET 0x08 struct proxy_alias { const char *real; const char *fake; Index: server/protocol.c =================================================================== --- server/protocol.c (revision 1878328) +++ server/protocol.c (working copy) @@ -627,6 +627,14 @@ AP_CORE_DECLARE(void) ap_parse_uri(request_rec *r, } else { status = apr_uri_parse(r->pool, uri, &r->parsed_uri); + if (status == APR_SUCCESS + && (r->parsed_uri.path != NULL) + && (r->parsed_uri.path[0] != '/') + && (r->method_number != M_OPTIONS + || strcmp(r->parsed_uri.path, "*") != 0)) { + /* Invalid request-target per rfc7230#section-5.3 */ + status = APR_EINVAL; + } } if (status == APR_SUCCESS) { @@ -640,8 +648,15 @@ AP_CORE_DECLARE(void) ap_parse_uri(request_rec *r, } r->args = r->parsed_uri.query; - r->uri = r->parsed_uri.path ? r->parsed_uri.path - : apr_pstrdup(r->pool, "/"); + if (r->parsed_uri.path) { + r->uri = r->parsed_uri.path; + } + else if (r->method_number == M_OPTIONS) { + r->uri = apr_pstrdup(r->pool, "*"); + } + else { + r->uri = apr_pstrdup(r->pool, "/"); + } #if defined(OS2) || defined(WIN32) /* Handle path translations for OS/2 and plug security hole. Index: server/request.c =================================================================== --- server/request.c (revision 1878328) +++ server/request.c (working copy) @@ -169,12 +169,14 @@ AP_DECLARE(int) ap_process_request_internal(reques core_dir_config *d; core_server_config *sconf = ap_get_core_module_config(r->server->module_config); + unsigned int normalize_flags = 0; - /* Ignore embedded %2F's in path for proxy requests */ + /* Ignore URL unescaping for proxy requests */ if (!r->proxyreq && r->parsed_uri.path) { d = ap_get_core_module_config(r->per_dir_config); if (d->allow_encoded_slashes) { - access_status = ap_unescape_url_keep2f(r->parsed_uri.path, d->decode_encoded_slashes); + access_status = ap_unescape_url_keep2f(r->parsed_uri.path, + d->decode_encoded_slashes); } else { access_status = ap_unescape_url(r->parsed_uri.path); @@ -185,7 +187,7 @@ AP_DECLARE(int) ap_process_request_internal(reques ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(00026) "found %%2f (encoded '/') in URI " "(decoded='%s'), returning 404", - r->parsed_uri.path); + r->uri); } } return access_status; @@ -192,14 +194,21 @@ AP_DECLARE(int) ap_process_request_internal(reques } } - ap_getparents(r->uri); /* OK --- shrinking transformations... */ - if (sconf->merge_slashes != AP_CORE_CONFIG_OFF) { - ap_no2slash(r->uri); - if (r->parsed_uri.path) { - ap_no2slash(r->parsed_uri.path); + if (r->parsed_uri.path) { + /* OK --- shrinking transformations... */ + if (sconf->merge_slashes != AP_CORE_CONFIG_OFF) { + normalize_flags |= AP_NORMALIZE_MERGE_SLASHES; } - } + if (!ap_normalize_path(r->parsed_uri.path, normalize_flags)) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO() + "invalid URI path (%s)", r->uri); + return HTTP_BAD_REQUEST; + } + /* Apply transformations */ + r->uri = r->parsed_uri.path; + } + /* All file subrequests are a huge pain... they cannot bubble through the * next several steps. Only file subrequests are allowed an empty uri, * otherwise let translate_name kill the request. Index: server/util.c =================================================================== --- server/util.c (revision 1878328) +++ server/util.c (working copy) @@ -77,7 +77,7 @@ #include "test_char.h" /* Win32/NetWare/OS2 need to check for both forward and back slashes - * in ap_getparents() and ap_escape_url. + * in ap_normalize_path() and ap_escape_url(). */ #ifdef CASE_BLIND_FILESYSTEM #define IS_SLASH(s) ((s == '/') || (s == '\\')) @@ -492,75 +492,114 @@ AP_DECLARE(apr_status_t) ap_pregsub_ex(apr_pool_t return rc; } -/* - * Parse .. so we don't compromise security - */ -AP_DECLARE(void) ap_getparents(char *name) +/* Forward declare */ +static char x2c(const char *what); + +#define IS_SLASH_OR_NUL(s) (s == '\0' || IS_SLASH(s)) + +static int normalize_path(char *path, unsigned int flags) { - char *next; - int l, w, first_dot; + apr_size_t l, w; - /* Four paseses, as per RFC 1808 */ - /* a) remove ./ path segments */ - for (next = name; *next && (*next != '.'); next++) { + /* We assume path[0] == '/' below */ + if (!IS_SLASH(path[0])) { + /* Besides "OPTIONS *" requests, a request-target path should start + * with '/' (RFC-7230 section-5.3), so anything else is invalid. + */ + return (path[0] == '*' && path[1] == '\0'); } - l = w = first_dot = next - name; - while (name[l] != '\0') { - if (name[l] == '.' && IS_SLASH(name[l + 1]) - && (l == 0 || IS_SLASH(name[l - 1]))) - l += 2; - else - name[w++] = name[l++]; - } + for (l = w = 1; path[l] != '\0';) { + if ((flags & AP_NORMALIZE_PATH_PARAMETERS) && path[l] == ';') { + /* Remove path parameters ;foo=bar/ from any path segment */ + do { + l++; + } while (!IS_SLASH_OR_NUL(path[l])); + continue; + } - /* b) remove trailing . path, segment */ - if (w == 1 && name[0] == '.') - w--; - else if (w > 1 && name[w - 1] == '.' && IS_SLASH(name[w - 2])) - w--; - name[w] = '\0'; + /* RFC-3986 section 2.3: + * For consistency, percent-encoded octets in the ranges of + * ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), + * period (%2E), underscore (%5F), or tilde (%7E) should [...] + * be decoded to their corresponding unreserved characters by + * URI normalizers. + */ + if (path[l] == '%' && apr_isxdigit(path[l + 1]) + && apr_isxdigit(path[l + 2])) { + const char c = x2c(&path[l + 1]); + if (apr_isalnum(c) || (c && strchr("-._~", c))) { + /* Replace last char and fall through as the current + * read position */ + l += 2; + path[l] = c; + } + } - /* c) remove all xx/../ segments. (including leading ../ and /../) */ - l = first_dot; + if (IS_SLASH(path[w - 1])) { + if ((flags & AP_NORMALIZE_MERGE_SLASHES) && IS_SLASH(path[l])) { + /* Collapse ///// sequences to / */ + do { + l++; + } while (IS_SLASH(path[l])); + continue; + } - while (name[l] != '\0') { - if (name[l] == '.' && name[l + 1] == '.' && IS_SLASH(name[l + 2]) - && (l == 0 || IS_SLASH(name[l - 1]))) { - int m = l + 3, n; + if (path[l] == '.') { + if (IS_SLASH_OR_NUL(path[l + 1])) { + /* Remove /./ segments */ + l++; + if (path[l]) { + l++; + } + continue; + } - l = l - 2; - if (l >= 0) { - while (l >= 0 && !IS_SLASH(name[l])) - l--; - l++; + if (path[l + 1] == '.' && IS_SLASH_OR_NUL(path[l + 2])) { + /* Remove /xx/../ segments */ + if (w == 1) { + if (flags & AP_NORMALIZE_NOT_ABOVE_ROOT) { + return 0; + } + l += 2; + continue; + } + + /* Wind w back to remove the previous segment */ + do { + w--; + } while (!IS_SLASH(path[w - 1])); + + /* Move l forward to the next segment */ + l += 2; + if (path[l]) { + l++; + } + continue; + } } - else - l = 0; - n = l; - while ((name[n] = name[m])) - (++n, ++m); } - else - ++l; + + path[w++] = path[l++]; } + path[w] = '\0'; - /* d) remove trailing xx/.. segment. */ - if (l == 2 && name[0] == '.' && name[1] == '.') - name[0] = '\0'; - else if (l > 2 && name[l - 1] == '.' && name[l - 2] == '.' - && IS_SLASH(name[l - 3])) { - l = l - 4; - if (l >= 0) { - while (l >= 0 && !IS_SLASH(name[l])) - l--; - l++; - } - else - l = 0; - name[l] = '\0'; - } + return 1; } + +AP_DECLARE(int) ap_normalize_path(char *path, unsigned int flags) +{ + return normalize_path(path, flags); +} + +/* + * Normalize .. so we don't compromise security + */ +AP_DECLARE(void) ap_getparents(char *name) +{ + (void)normalize_path(name, 0); +} + AP_DECLARE(void) ap_no2slash_ex(char *name, int is_fs_path) {