Christopher,

the general logic of the normalizer is unchanged, but the whole framework was
refactored.

Best regards
Tim Düsterhus

Apply with `git am --scissors` to automatically cut the commit message.

-- >8 --
This normalizer merges `../` path segments with the predecing segment, removing
both the preceding segment and the `../`.

Empty segments do not receive special treatment. The `merge-slashes` normalizer
should be executed first.

See GitHub Issue #714.
---
 doc/configuration.txt                  | 13 ++++
 include/haproxy/action-t.h             |  1 +
 include/haproxy/uri_normalizer.h       |  1 +
 reg-tests/http-rules/normalize_uri.vtc | 71 +++++++++++++++++++++-
 src/http_act.c                         | 22 +++++++
 src/uri_normalizer.c                   | 82 ++++++++++++++++++++++++++
 6 files changed, 189 insertions(+), 1 deletion(-)

diff --git a/doc/configuration.txt b/doc/configuration.txt
index 61cb0b5ad..a6110eb19 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -6012,11 +6012,24 @@ http-request early-hint <name> <fmt> [ { if | unless } 
<condition> ]
   See RFC 8297 for more information.
 
 http-request normalize-uri <normalizer> [ { if | unless } <condition> ]
+http-request normalize-uri dotdot [ { if | unless } <condition> ]
 http-request normalize-uri merge-slashes [ { if | unless } <condition> ]
 
   Performs normalization of the request's URI. The following normalizers are
   available:
 
+  - dotdot: Normalizes "/../" segments within the "path" component. This merges
+      segments that attempt to access the parent directory with their preceding
+      segment. Empty segments do not receive special treatment. Use the
+      "merge-slashes" normalizer first if this is undesired.
+
+      Example:
+      - /foo/../     -> /
+      - /foo/../bar/ -> /bar/
+      - /foo/bar/../ -> /foo/
+      - /../bar/     -> /../bar/
+      - /foo//../    -> /foo/
+
   - merge-slashes: Merges adjacent slashes within the "path" component into a
       single slash.
 
diff --git a/include/haproxy/action-t.h b/include/haproxy/action-t.h
index 4a3e3f8bd..ac9399a6b 100644
--- a/include/haproxy/action-t.h
+++ b/include/haproxy/action-t.h
@@ -103,6 +103,7 @@ enum act_timeout_name {
 
 enum act_normalize_uri {
        ACT_NORMALIZE_URI_MERGE_SLASHES,
+       ACT_NORMALIZE_URI_DOTDOT,
 };
 
 /* NOTE: if <.action_ptr> is defined, the referenced function will always be
diff --git a/include/haproxy/uri_normalizer.h b/include/haproxy/uri_normalizer.h
index 416f5b7c5..9dbbe5826 100644
--- a/include/haproxy/uri_normalizer.h
+++ b/include/haproxy/uri_normalizer.h
@@ -18,6 +18,7 @@
 
 #include <haproxy/uri_normalizer-t.h>
 
+enum uri_normalizer_err uri_normalizer_path_dotdot(const struct ist path, 
struct ist *dst);
 enum uri_normalizer_err uri_normalizer_path_merge_slashes(const struct ist 
path, struct ist *dst);
 
 #endif /* _HAPROXY_URI_NORMALIZER_H */
diff --git a/reg-tests/http-rules/normalize_uri.vtc 
b/reg-tests/http-rules/normalize_uri.vtc
index 3303760d4..e66bdc47b 100644
--- a/reg-tests/http-rules/normalize_uri.vtc
+++ b/reg-tests/http-rules/normalize_uri.vtc
@@ -8,7 +8,7 @@ feature ignore_unknown_macro
 server s1 {
     rxreq
     txresp
-} -repeat 10 -start
+} -repeat 21 -start
 
 haproxy h1 -conf {
     defaults
@@ -29,6 +29,18 @@ haproxy h1 -conf {
 
         default_backend be
 
+    frontend fe_dotdot
+        bind "fd@${fe_dotdot}"
+
+        http-request set-var(txn.before) url
+        http-request normalize-uri dotdot
+        http-request set-var(txn.after) url
+
+        http-response add-header before  %[var(txn.before)]
+        http-response add-header after  %[var(txn.after)]
+
+        default_backend be
+
     backend be
         server s1 ${s1_addr}:${s1_port}
 
@@ -85,3 +97,60 @@ client c1 -connect ${h1_fe_merge_slashes_sock} {
     expect resp.http.before == "*"
     expect resp.http.after == "*"
 } -run
+
+client c2 -connect ${h1_fe_dotdot_sock} {
+    txreq -url "/foo/bar"
+    rxresp
+    expect resp.http.before == "/foo/bar"
+    expect resp.http.after == "/foo/bar"
+
+    txreq -url "/foo/.."
+    rxresp
+    expect resp.http.before == "/foo/.."
+    expect resp.http.after == "/"
+
+    txreq -url "/foo/../"
+    rxresp
+    expect resp.http.before == "/foo/../"
+    expect resp.http.after == "/"
+
+    txreq -url "/foo/bar/../"
+    rxresp
+    expect resp.http.before == "/foo/bar/../"
+    expect resp.http.after == "/foo/"
+
+    txreq -url "/foo/../bar"
+    rxresp
+    expect resp.http.before == "/foo/../bar"
+    expect resp.http.after == "/bar"
+
+    txreq -url "/foo/../bar/"
+    rxresp
+    expect resp.http.before == "/foo/../bar/"
+    expect resp.http.after == "/bar/"
+
+    txreq -url "/foo/../../bar/"
+    rxresp
+    expect resp.http.before == "/foo/../../bar/"
+    expect resp.http.after == "/../bar/"
+
+    txreq -url "/foo//../../bar/"
+    rxresp
+    expect resp.http.before == "/foo//../../bar/"
+    expect resp.http.after == "/bar/"
+
+    txreq -url "/foo/?bar=/foo/../"
+    rxresp
+    expect resp.http.before == "/foo/?bar=/foo/../"
+    expect resp.http.after == "/foo/?bar=/foo/../"
+
+    txreq -url "/foo/../?bar=/foo/../"
+    rxresp
+    expect resp.http.before == "/foo/../?bar=/foo/../"
+    expect resp.http.after == "/?bar=/foo/../"
+
+    txreq -req OPTIONS -url "*"
+    rxresp
+    expect resp.http.before == "*"
+    expect resp.http.after == "*"
+} -run
diff --git a/src/http_act.c b/src/http_act.c
index 2af4d471a..7e1829e9d 100644
--- a/src/http_act.c
+++ b/src/http_act.c
@@ -224,6 +224,23 @@ static enum act_return http_action_normalize_uri(struct 
act_rule *rule, struct p
 
                        err = uri_normalizer_path_merge_slashes(iststop(path, 
'?'), &newpath);
 
+                       if (err != URI_NORMALIZER_ERR_NONE)
+                               break;
+
+                       if (!http_replace_req_path(htx, newpath, 0))
+                               goto fail_rewrite;
+
+                       break;
+               }
+               case ACT_NORMALIZE_URI_DOTDOT: {
+                       const struct ist path = http_get_path(uri);
+                       struct ist newpath = ist2(replace->area, replace->size);
+
+                       if (!isttest(path))
+                               goto leave;
+
+                       err = uri_normalizer_path_dotdot(iststop(path, '?'), 
&newpath);
+
                        if (err != URI_NORMALIZER_ERR_NONE)
                                break;
 
@@ -297,6 +314,11 @@ static enum act_parse_ret parse_http_normalize_uri(const 
char **args, int *orig_
 
                rule->action = ACT_NORMALIZE_URI_MERGE_SLASHES;
        }
+       else if (strcmp(args[cur_arg], "dotdot") == 0) {
+               cur_arg++;
+
+               rule->action = ACT_NORMALIZE_URI_DOTDOT;
+       }
        else {
                memprintf(err, "unknown normalizer '%s'", args[cur_arg]);
                return ACT_RET_PRS_ERR;
diff --git a/src/uri_normalizer.c b/src/uri_normalizer.c
index abc029be5..05e8cd578 100644
--- a/src/uri_normalizer.c
+++ b/src/uri_normalizer.c
@@ -15,6 +15,88 @@
 #include <haproxy/api.h>
 #include <haproxy/uri_normalizer.h>
 
+/* Merges `/../` with preceding path segments. */
+enum uri_normalizer_err uri_normalizer_path_dotdot(const struct ist path, 
struct ist *dst)
+{
+       enum uri_normalizer_err err;
+
+       const size_t size = istclear(dst);
+       char * const tail = istptr(*dst) + size;
+       char *head = tail;
+
+       ssize_t offset = istlen(path) - 1;
+
+       int up = 0;
+
+       /* The path will either be shortened or have the same length. */
+       if (size < istlen(path)) {
+               err = URI_NORMALIZER_ERR_ALLOC;
+               goto fail;
+       }
+
+       /* Handle `/..` at the end of the path without a trailing slash. */
+       if (offset >= 2 && istmatch(istadv(path, offset - 2), ist("/.."))) {
+               up++;
+               offset -= 2;
+       }
+
+       while (offset >= 0) {
+               if (offset >= 3 && istmatch(istadv(path, offset - 3), 
ist("/../"))) {
+                       up++;
+                       offset -= 3;
+                       continue;
+               }
+
+               if (up > 0) {
+                       /* Skip the slash. */
+                       offset--;
+
+                       /* First check whether we already reached the start of 
the path,
+                        * before popping the current `/../`.
+                        */
+                       if (offset >= 0) {
+                               up--;
+
+                               /* Skip the current path segment. */
+                               while (offset >= 0 && istptr(path)[offset] != 
'/')
+                                       offset--;
+                       }
+               }
+               else {
+                       /* Prepend the slash. */
+                       *(--head) = istptr(path)[offset];
+                       offset--;
+
+                       /* Prepend the current path segment. */
+                       while (offset >= 0 && istptr(path)[offset] != '/') {
+                               *(--head) = istptr(path)[offset];
+                               offset--;
+                       }
+               }
+       }
+
+       if (up > 0) {
+               /* Prepend a trailing slash. */
+               *(--head) = '/';
+
+               /* Prepend unconsumed `/..`. */
+               do {
+                       *(--head) = '.';
+                       *(--head) = '.';
+                       *(--head) = '/';
+                       up--;
+               } while (up > 0);
+       }
+
+       *dst = ist2(head, tail - head);
+
+       return URI_NORMALIZER_ERR_NONE;
+
+  fail:
+
+       return err;
+}
+
 /* Merges adjacent slashes in the given path. */
 enum uri_normalizer_err uri_normalizer_path_merge_slashes(const struct ist 
path, struct ist *dst)
 {
-- 
2.31.1


Reply via email to