On Thu, Jun 11, 2020 at 8:52 AM jean-frederic clere <jfcl...@gmail.com> wrote:
>
> Should I commit my first proposal (it is easily backportable to 2.4.x)
> and later work on the next one?

How about something like the attached patch?

It's a new single ap_normalize_path() helper with options (like
AP_NORMALIZE_MERGE_SLASHES and AP_NORMALIZE_PATH_PARAMETERS), which we
can use in multiple places to replace ap_getparents(). The patch is
without the mod_rewrite bits (for now), and uses the "mapping=servlet"
syntax for the new proxypass parameter (which I found more
extensible).

The issue with re-calling ap_getparents() in ap_proxy_trans_match()
(or ap_normalize_path() in my patch still) is that it happens after
%-decoding, so bets are off.
For instance "/docs/..%3Bfoo=bar/%3Bfoo=bar/test/index.jsp" is not the
same as "http://localhost:8000/docs/..;foo=bar/;foo=bar/test/index.jsp";
and shouldn't be matched the same by mapping=servlet.

We need a way to forward non %-decoded URLs upto mod_proxy (reverse)
if we want to normalize a second time..


Regards;
Yann.
Index: include/httpd.h
===================================================================
--- include/httpd.h	(revision 1878328)
+++ include/httpd.h	(working copy)
@@ -1779,8 +1779,21 @@ AP_DECLARE(void) ap_no2slash(char *name)
 AP_DECLARE(void) ap_no2slash_ex(char *name, int is_fs_path)
                  AP_FN_ATTR_NONNULL_ALL;
 
+#define AP_NORMALIZE_NOT_ABOVE_ROOT     (1u <<  0)
+#define AP_NORMALIZE_MERGE_SLASHES      (1u <<  1)
+#define AP_NORMALIZE_PATH_PARAMETERS    (1u <<  2)
 
 /**
+ * Remove all ////, /./ and /xx/../ substrings from a path, and more
+ * depending on passed in flags.
+ * @param path The path to normalize
+ * @param flags bitmask of AP_NORMALIZE_* flags
+ * @return non-zero on success
+ */
+AP_DECLARE(int) ap_normalize_path(char *path, unsigned int flags)
+                AP_FN_ATTR_NONNULL((1));
+
+/**
  * Remove all ./ and xx/../ substrings from a file name. Also remove
  * any leading ../ or /../ substrings.
  * @param name the file name to parse
Index: modules/dav/main/util.c
===================================================================
--- modules/dav/main/util.c	(revision 1878328)
+++ modules/dav/main/util.c	(working copy)
@@ -664,7 +664,11 @@ static dav_error * dav_process_if_header(request_r
             /* note that parsed_uri.path is allocated; we can trash it */
 
             /* clean up the URI a bit */
-            ap_getparents(parsed_uri.path);
+            if (!ap_normalize_path(parsed_uri.path, 0)) {
+                return dav_new_error(r->pool, HTTP_BAD_REQUEST,
+                                     DAV_ERR_IF_TAGGED, rv,
+                                     "Invalid URI path tagged If-header.");
+            }
 
             /* the resources we will compare to have unencoded paths */
             if (ap_unescape_url(parsed_uri.path) != OK) {
Index: modules/generators/mod_autoindex.c
===================================================================
--- modules/generators/mod_autoindex.c	(revision 1878328)
+++ modules/generators/mod_autoindex.c	(working copy)
@@ -1266,8 +1266,7 @@ static struct ent *make_parent_entry(apr_int32_t a
     if (!(p->name = ap_make_full_path(r->pool, r->uri, "../"))) {
         return (NULL);
     }
-    ap_getparents(p->name);
-    if (!*p->name) {
+    if (!ap_normalize_path(p->name, 0)) {
         return (NULL);
     }
 
Index: modules/proxy/mod_proxy.c
===================================================================
--- modules/proxy/mod_proxy.c	(revision 1878328)
+++ modules/proxy/mod_proxy.c	(working copy)
@@ -753,6 +753,25 @@ PROXY_DECLARE(int) ap_proxy_trans_match(request_re
                 mismatch = 1;
                 use_uri = r->uri;
             }
+            else if (!nocanon && (ent->flags & PROXYPASS_MAPPING_SERVLET)) {
+                char *uri = apr_pstrdup(r->pool, r->uri);
+
+                /* check against the backend servlet url */
+                if (!ap_normalize_path(uri, AP_NORMALIZE_MERGE_SLASHES |
+                                            AP_NORMALIZE_PATH_PARAMETERS)) {
+                    ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
+                                  "servlet normalization failed for '%s'", r->uri);
+                    return HTTP_INTERNAL_SERVER_ERROR;
+                }
+
+                if (!alias_match(uri, ent->fake)) {
+                    ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO()
+                                  "servlet normalization for '%s' mismatch; "
+                                  "declining '%s'", r->uri, uri);
+                    return DECLINED;
+                }
+            }
+
             found = apr_pstrcat(r->pool, "proxy:", real, use_uri + len, NULL);
         }
     }
@@ -1806,6 +1825,9 @@ static const char *
         else if (!strcasecmp(word,"noquery")) {
             flags |= PROXYPASS_NOQUERY;
         }
+        else if (!strcasecmp(word,"mapping=servlet")) {
+            flags |= PROXYPASS_MAPPING_SERVLET;
+        }
         else {
             char *val = strchr(word, '=');
             if (!val) {
Index: modules/proxy/mod_proxy.h
===================================================================
--- modules/proxy/mod_proxy.h	(revision 1878328)
+++ modules/proxy/mod_proxy.h	(working copy)
@@ -123,6 +123,7 @@ struct proxy_remote {
 #define PROXYPASS_NOCANON 0x01
 #define PROXYPASS_INTERPOLATE 0x02
 #define PROXYPASS_NOQUERY 0x04
+#define PROXYPASS_MAPPING_SERVLET 0x08
 struct proxy_alias {
     const char  *real;
     const char  *fake;
Index: server/protocol.c
===================================================================
--- server/protocol.c	(revision 1878328)
+++ server/protocol.c	(working copy)
@@ -627,6 +627,14 @@ AP_CORE_DECLARE(void) ap_parse_uri(request_rec *r,
     }
     else {
         status = apr_uri_parse(r->pool, uri, &r->parsed_uri);
+        if (status == APR_SUCCESS
+            && (r->parsed_uri.path != NULL)
+            && (r->parsed_uri.path[0] != '/')
+            && (r->method_number != M_OPTIONS
+                || strcmp(r->parsed_uri.path, "*") != 0)) {
+            /* Invalid request-target per rfc7230#section-5.3 */
+            status = APR_EINVAL;
+        }
     }
 
     if (status == APR_SUCCESS) {
@@ -640,8 +648,15 @@ AP_CORE_DECLARE(void) ap_parse_uri(request_rec *r,
         }
 
         r->args = r->parsed_uri.query;
-        r->uri = r->parsed_uri.path ? r->parsed_uri.path
-                 : apr_pstrdup(r->pool, "/");
+        if (r->parsed_uri.path) {
+            r->uri = r->parsed_uri.path;
+        }
+        else if (r->method_number == M_OPTIONS) {
+            r->uri = apr_pstrdup(r->pool, "*");
+        }
+        else {
+            r->uri = apr_pstrdup(r->pool, "/");
+        }
 
 #if defined(OS2) || defined(WIN32)
         /* Handle path translations for OS/2 and plug security hole.
Index: server/request.c
===================================================================
--- server/request.c	(revision 1878328)
+++ server/request.c	(working copy)
@@ -169,12 +169,14 @@ AP_DECLARE(int) ap_process_request_internal(reques
     core_dir_config *d;
     core_server_config *sconf =
         ap_get_core_module_config(r->server->module_config);
+    unsigned int normalize_flags = 0;
 
-    /* Ignore embedded %2F's in path for proxy requests */
+    /* Ignore URL unescaping for proxy requests */
     if (!r->proxyreq && r->parsed_uri.path) {
         d = ap_get_core_module_config(r->per_dir_config);
         if (d->allow_encoded_slashes) {
-            access_status = ap_unescape_url_keep2f(r->parsed_uri.path, d->decode_encoded_slashes);
+            access_status = ap_unescape_url_keep2f(r->parsed_uri.path,
+                                                   d->decode_encoded_slashes);
         }
         else {
             access_status = ap_unescape_url(r->parsed_uri.path);
@@ -185,7 +187,7 @@ AP_DECLARE(int) ap_process_request_internal(reques
                     ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(00026)
                                   "found %%2f (encoded '/') in URI "
                                   "(decoded='%s'), returning 404",
-                                  r->parsed_uri.path);
+                                  r->uri);
                 }
             }
             return access_status;
@@ -192,14 +194,21 @@ AP_DECLARE(int) ap_process_request_internal(reques
         }
     }
 
-    ap_getparents(r->uri);     /* OK --- shrinking transformations... */
-    if (sconf->merge_slashes != AP_CORE_CONFIG_OFF) { 
-        ap_no2slash(r->uri);
-        if (r->parsed_uri.path) {
-            ap_no2slash(r->parsed_uri.path);
+    if (r->parsed_uri.path) {
+        /* OK --- shrinking transformations... */
+        if (sconf->merge_slashes != AP_CORE_CONFIG_OFF) { 
+            normalize_flags |= AP_NORMALIZE_MERGE_SLASHES;
         }
-     }
+        if (!ap_normalize_path(r->parsed_uri.path, normalize_flags)) {
+            ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO()
+                          "invalid URI path (%s)", r->uri);
+            return HTTP_BAD_REQUEST;
+        }
 
+        /* Apply transformations */
+        r->uri = r->parsed_uri.path;
+    }
+
     /* All file subrequests are a huge pain... they cannot bubble through the
      * next several steps.  Only file subrequests are allowed an empty uri,
      * otherwise let translate_name kill the request.
Index: server/util.c
===================================================================
--- server/util.c	(revision 1878328)
+++ server/util.c	(working copy)
@@ -77,7 +77,7 @@
 #include "test_char.h"
 
 /* Win32/NetWare/OS2 need to check for both forward and back slashes
- * in ap_getparents() and ap_escape_url.
+ * in ap_normalize_path() and ap_escape_url().
  */
 #ifdef CASE_BLIND_FILESYSTEM
 #define IS_SLASH(s) ((s == '/') || (s == '\\'))
@@ -492,75 +492,114 @@ AP_DECLARE(apr_status_t) ap_pregsub_ex(apr_pool_t
     return rc;
 }
 
-/*
- * Parse .. so we don't compromise security
- */
-AP_DECLARE(void) ap_getparents(char *name)
+/* Forward declare */
+static char x2c(const char *what);
+
+#define IS_SLASH_OR_NUL(s) (s == '\0' || IS_SLASH(s))
+
+static int normalize_path(char *path, unsigned int flags)
 {
-    char *next;
-    int l, w, first_dot;
+    apr_size_t l, w;
 
-    /* Four paseses, as per RFC 1808 */
-    /* a) remove ./ path segments */
-    for (next = name; *next && (*next != '.'); next++) {
+    /* We assume path[0] == '/' below */
+    if (!IS_SLASH(path[0])) {
+        /* Besides "OPTIONS *" requests, a request-target path should start
+         * with '/' (RFC-7230 section-5.3), so anything else is invalid.
+         */
+        return (path[0] == '*' && path[1] == '\0');
     }
 
-    l = w = first_dot = next - name;
-    while (name[l] != '\0') {
-        if (name[l] == '.' && IS_SLASH(name[l + 1])
-            && (l == 0 || IS_SLASH(name[l - 1])))
-            l += 2;
-        else
-            name[w++] = name[l++];
-    }
+    for (l = w = 1; path[l] != '\0';) {
+        if ((flags & AP_NORMALIZE_PATH_PARAMETERS) && path[l] == ';') {
+            /* Remove path parameters ;foo=bar/ from any path segment */
+            do {
+                l++;
+            } while (!IS_SLASH_OR_NUL(path[l]));
+            continue;
+        }
 
-    /* b) remove trailing . path, segment */
-    if (w == 1 && name[0] == '.')
-        w--;
-    else if (w > 1 && name[w - 1] == '.' && IS_SLASH(name[w - 2]))
-        w--;
-    name[w] = '\0';
+        /* RFC-3986 section 2.3:
+         *  For consistency, percent-encoded octets in the ranges of
+         *  ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
+         *  period (%2E), underscore (%5F), or tilde (%7E) should [...]
+         *  be decoded to their corresponding unreserved characters by
+         *  URI normalizers.
+         */
+        if (path[l] == '%' && apr_isxdigit(path[l + 1])
+                           && apr_isxdigit(path[l + 2])) {
+            const char c = x2c(&path[l + 1]);
+            if (apr_isalnum(c) || (c && strchr("-._~", c))) {
+                /* Replace last char and fall through as the current
+                 * read position */
+                l += 2;
+                path[l] = c;
+            }
+        }
 
-    /* c) remove all xx/../ segments. (including leading ../ and /../) */
-    l = first_dot;
+        if (IS_SLASH(path[w - 1])) {
+            if ((flags & AP_NORMALIZE_MERGE_SLASHES) && IS_SLASH(path[l])) {
+                /* Collapse ///// sequences to / */
+                do {
+                    l++;
+                } while (IS_SLASH(path[l]));
+                continue;
+            }
 
-    while (name[l] != '\0') {
-        if (name[l] == '.' && name[l + 1] == '.' && IS_SLASH(name[l + 2])
-            && (l == 0 || IS_SLASH(name[l - 1]))) {
-            int m = l + 3, n;
+            if (path[l] == '.') {
+                if (IS_SLASH_OR_NUL(path[l + 1])) {
+                    /* Remove /./ segments */
+                    l++;
+                    if (path[l]) {
+                        l++;
+                    }
+                    continue;
+                }
 
-            l = l - 2;
-            if (l >= 0) {
-                while (l >= 0 && !IS_SLASH(name[l]))
-                    l--;
-                l++;
+                if (path[l + 1] == '.' && IS_SLASH_OR_NUL(path[l + 2])) {
+                    /* Remove /xx/../ segments */
+                    if (w == 1) {
+                        if (flags & AP_NORMALIZE_NOT_ABOVE_ROOT) {
+                            return 0;
+                        }
+                        l += 2;
+                        continue;
+                    }
+
+                    /* Wind w back to remove the previous segment */
+                    do {
+                        w--;
+                    } while (!IS_SLASH(path[w - 1]));
+
+                    /* Move l forward to the next segment */
+                    l += 2;
+                    if (path[l]) {
+                        l++;
+                    }
+                    continue;
+                }
             }
-            else
-                l = 0;
-            n = l;
-            while ((name[n] = name[m]))
-                (++n, ++m);
         }
-        else
-            ++l;
+
+        path[w++] = path[l++];
     }
+    path[w] = '\0';
 
-    /* d) remove trailing xx/.. segment. */
-    if (l == 2 && name[0] == '.' && name[1] == '.')
-        name[0] = '\0';
-    else if (l > 2 && name[l - 1] == '.' && name[l - 2] == '.'
-             && IS_SLASH(name[l - 3])) {
-        l = l - 4;
-        if (l >= 0) {
-            while (l >= 0 && !IS_SLASH(name[l]))
-                l--;
-            l++;
-        }
-        else
-            l = 0;
-        name[l] = '\0';
-    }
+    return 1;
 }
+
+AP_DECLARE(int) ap_normalize_path(char *path, unsigned int flags)
+{
+    return normalize_path(path, flags);
+}
+
+/*
+ * Normalize .. so we don't compromise security
+ */
+AP_DECLARE(void) ap_getparents(char *name)
+{
+    (void)normalize_path(name, 0);
+}
+
 AP_DECLARE(void) ap_no2slash_ex(char *name, int is_fs_path)
 {
 

Reply via email to