Christopher,

the general logic of the normalizer is unchanged, but the whole framework was
refactored.

Best regards
Tim Düsterhus

Apply with `git am --scissors` to automatically cut the commit message.

-- >8 --
This normalizer merges adjacent slashes into a single slash, thus removing
empty path segments.

See GitHub Issue #714.
---
 doc/configuration.txt                  | 13 ++++
 include/haproxy/action-t.h             |  2 +-
 include/haproxy/uri_normalizer.h       |  4 ++
 reg-tests/http-rules/normalize_uri.vtc | 87 ++++++++++++++++++++++++++
 src/http_act.c                         | 23 ++++++-
 src/uri_normalizer.c                   | 39 ++++++++++++
 6 files changed, 164 insertions(+), 4 deletions(-)
 create mode 100644 reg-tests/http-rules/normalize_uri.vtc

diff --git a/doc/configuration.txt b/doc/configuration.txt
index 61c2a6dd9..61cb0b5ad 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -6011,6 +6011,19 @@ http-request early-hint <name> <fmt> [ { if | unless } 
<condition> ]
 
   See RFC 8297 for more information.
 
+http-request normalize-uri <normalizer> [ { if | unless } <condition> ]
+http-request normalize-uri merge-slashes [ { if | unless } <condition> ]
+
+  Performs normalization of the request's URI. The following normalizers are
+  available:
+
+  - merge-slashes: Merges adjacent slashes within the "path" component into a
+      single slash.
+
+      Example:
+      - //        -> /
+      - /foo//bar -> /foo/bar
+
 http-request redirect <rule> [ { if | unless } <condition> ]
 
   This performs an HTTP redirection based on a redirect rule. This is exactly
diff --git a/include/haproxy/action-t.h b/include/haproxy/action-t.h
index 2909b0da2..4a3e3f8bd 100644
--- a/include/haproxy/action-t.h
+++ b/include/haproxy/action-t.h
@@ -102,7 +102,7 @@ enum act_timeout_name {
 };
 
 enum act_normalize_uri {
-       ACT_NORMALIZE_URI_PLACEHOLDER,
+       ACT_NORMALIZE_URI_MERGE_SLASHES,
 };
 
 /* NOTE: if <.action_ptr> is defined, the referenced function will always be
diff --git a/include/haproxy/uri_normalizer.h b/include/haproxy/uri_normalizer.h
index 20341a907..416f5b7c5 100644
--- a/include/haproxy/uri_normalizer.h
+++ b/include/haproxy/uri_normalizer.h
@@ -14,8 +14,12 @@
 #ifndef _HAPROXY_URI_NORMALIZER_H
 #define _HAPROXY_URI_NORMALIZER_H
 
+#include <import/ist.h>
+
 #include <haproxy/uri_normalizer-t.h>
 
+enum uri_normalizer_err uri_normalizer_path_merge_slashes(const struct ist 
path, struct ist *dst);
+
 #endif /* _HAPROXY_URI_NORMALIZER_H */
 
 /*
diff --git a/reg-tests/http-rules/normalize_uri.vtc 
b/reg-tests/http-rules/normalize_uri.vtc
new file mode 100644
index 000000000..3303760d4
--- /dev/null
+++ b/reg-tests/http-rules/normalize_uri.vtc
@@ -0,0 +1,87 @@
+varnishtest "normalize-uri tests"
+#REQUIRE_VERSION=2.4
+
+# This reg-test tests the http-request normalize-uri action.
+
+feature ignore_unknown_macro
+
+server s1 {
+    rxreq
+    txresp
+} -repeat 10 -start
+
+haproxy h1 -conf {
+    defaults
+        mode http
+        timeout connect 1s
+        timeout client  1s
+        timeout server  1s
+
+    frontend fe_merge_slashes
+        bind "fd@${fe_merge_slashes}"
+
+        http-request set-var(txn.before) url
+        http-request normalize-uri merge-slashes
+        http-request set-var(txn.after) url
+
+        http-response add-header before  %[var(txn.before)]
+        http-response add-header after  %[var(txn.after)]
+
+        default_backend be
+
+    backend be
+        server s1 ${s1_addr}:${s1_port}
+
+} -start
+
+client c1 -connect ${h1_fe_merge_slashes_sock} {
+    txreq -url "/foo/bar"
+    rxresp
+    expect resp.http.before == "/foo/bar"
+    expect resp.http.after == "/foo/bar"
+
+    txreq -url "/foo//bar"
+    rxresp
+    expect resp.http.before == "/foo//bar"
+    expect resp.http.after == "/foo/bar"
+
+    txreq -url "/foo///bar"
+    rxresp
+    expect resp.http.before == "/foo///bar"
+    expect resp.http.after == "/foo/bar"
+
+    txreq -url "///foo///bar"
+    rxresp
+    expect resp.http.before == "///foo///bar"
+    expect resp.http.after == "/foo/bar"
+
+    txreq -url "///foo/bar"
+    rxresp
+    expect resp.http.before == "///foo/bar"
+    expect resp.http.after == "/foo/bar"
+
+    txreq -url "///foo///bar///"
+    rxresp
+    expect resp.http.before == "///foo///bar///"
+    expect resp.http.after == "/foo/bar/"
+
+    txreq -url "///"
+    rxresp
+    expect resp.http.before == "///"
+    expect resp.http.after == "/"
+
+    txreq -url "/foo?bar=///"
+    rxresp
+    expect resp.http.before == "/foo?bar=///"
+    expect resp.http.after == "/foo?bar=///"
+
+    txreq -url "//foo?bar=///"
+    rxresp
+    expect resp.http.before == "//foo?bar=///"
+    expect resp.http.after == "/foo?bar=///"
+
+    txreq -req OPTIONS -url "*"
+    rxresp
+    expect resp.http.before == "*"
+    expect resp.http.after == "*"
+} -run
diff --git a/src/http_act.c b/src/http_act.c
index 134c9037b..2af4d471a 100644
--- a/src/http_act.c
+++ b/src/http_act.c
@@ -215,8 +215,23 @@ static enum act_return http_action_normalize_uri(struct 
act_rule *rule, struct p
                goto fail_alloc;
 
        switch ((enum act_normalize_uri) rule->action) {
-               case ACT_NORMALIZE_URI_PLACEHOLDER:
-                       (void) uri;
+               case ACT_NORMALIZE_URI_MERGE_SLASHES: {
+                       const struct ist path = http_get_path(uri);
+                       struct ist newpath = ist2(replace->area, replace->size);
+
+                       if (!isttest(path))
+                               goto leave;
+
+                       err = uri_normalizer_path_merge_slashes(iststop(path, 
'?'), &newpath);
+
+                       if (err != URI_NORMALIZER_ERR_NONE)
+                               break;
+
+                       if (!http_replace_req_path(htx, newpath, 0))
+                               goto fail_rewrite;
+
+                       break;
+               }
        }
 
        switch (err) {
@@ -277,8 +292,10 @@ static enum act_parse_ret parse_http_normalize_uri(const 
char **args, int *orig_
                return ACT_RET_PRS_ERR;
        }
 
-       if (0) {
+       if (strcmp(args[cur_arg], "merge-slashes") == 0) {
+               cur_arg++;
 
+               rule->action = ACT_NORMALIZE_URI_MERGE_SLASHES;
        }
        else {
                memprintf(err, "unknown normalizer '%s'", args[cur_arg]);
diff --git a/src/uri_normalizer.c b/src/uri_normalizer.c
index 7db47d198..abc029be5 100644
--- a/src/uri_normalizer.c
+++ b/src/uri_normalizer.c
@@ -10,9 +10,48 @@
  *
  */
 
+#include <import/ist.h>
+
 #include <haproxy/api.h>
 #include <haproxy/uri_normalizer.h>
 
+/* Merges adjacent slashes in the given path. */
+enum uri_normalizer_err uri_normalizer_path_merge_slashes(const struct ist 
path, struct ist *dst)
+{
+       enum uri_normalizer_err err;
+
+       const size_t size = istclear(dst);
+       struct ist newpath = *dst;
+
+       struct ist scanner = path;
+
+       /* The path will either be shortened or have the same length. */
+       if (size < istlen(path)) {
+               err = URI_NORMALIZER_ERR_ALLOC;
+               goto fail;
+       }
+
+       while (istlen(scanner) > 0) {
+               const char current = istshift(&scanner);
+
+               if (current == '/') {
+                       while (istlen(scanner) > 0 && *istptr(scanner) == '/')
+                               scanner = istnext(scanner);
+               }
+
+               newpath = __istappend(newpath, current);
+       }
+
+       *dst = newpath;
+
+       return URI_NORMALIZER_ERR_NONE;
+
+  fail:
+
+       return err;
+}
+
+
 /*
  * Local variables:
  *  c-indent-level: 8
-- 
2.31.1


Reply via email to