This normalizer strips the URI's fragment component which should never be sent
to the server.

See GitHub Issue #714.
---
 doc/configuration.txt                  | 12 ++++++++++
 include/haproxy/action-t.h             |  1 +
 include/haproxy/uri_normalizer.h       |  8 +++++++
 reg-tests/http-rules/normalize_uri.vtc | 31 +++++++++++++++++++++++++-
 src/http_act.c                         | 22 ++++++++++++++++++
 5 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/doc/configuration.txt b/doc/configuration.txt
index 964bc04ce..00749e5ee 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -6172,6 +6172,7 @@ http-request early-hint <name> <fmt> [ { if | unless } 
<condition> ]
   See RFC 8297 for more information.
 
 http-request normalize-uri <normalizer> [ { if | unless } <condition> ]
+http-request normalize-uri fragment-strip [ { if | unless } <condition> ]
 http-request normalize-uri path-merge-slashes [ { if | unless } <condition> ]
 http-request normalize-uri path-strip-dot [ { if | unless } <condition> ]
 http-request normalize-uri path-strip-dotdot [ full ] [ { if | unless } 
<condition> ]
@@ -6209,6 +6210,17 @@ http-request normalize-uri query-sort-by-name [ { if | 
unless } <condition> ]
 
   The following normalizers are available:
 
+  - fragment-strip: Removes the URI's "fragment" component.
+
+      According to RFC 3986#3.5 the "fragment" component of an URI should not
+      be sent, but handled by the User Agent after retrieving a resource.
+
+      This normalizer should be applied first to ensure that the fragment is
+      not interpreted as part of the request's path component.
+
+      Example:
+      - /#foo  -> /
+
   - path-strip-dot: Removes "/./" segments within the "path" component
       (RFC 3986#6.2.2.3).
 
diff --git a/include/haproxy/action-t.h b/include/haproxy/action-t.h
index 5b9f543ae..56ac32f7f 100644
--- a/include/haproxy/action-t.h
+++ b/include/haproxy/action-t.h
@@ -111,6 +111,7 @@ enum act_normalize_uri {
        ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE_STRICT,
        ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED,
        ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT,
+       ACT_NORMALIZE_URI_FRAGMENT_STRIP,
 };
 
 /* NOTE: if <.action_ptr> is defined, the referenced function will always be
diff --git a/include/haproxy/uri_normalizer.h b/include/haproxy/uri_normalizer.h
index 06f237e45..fa5d89dd0 100644
--- a/include/haproxy/uri_normalizer.h
+++ b/include/haproxy/uri_normalizer.h
@@ -18,6 +18,14 @@
 
 #include <haproxy/uri_normalizer-t.h>
 
+/* Cuts the input at the first '#'. */
+static inline enum uri_normalizer_err uri_normalizer_fragment_strip(const 
struct ist input, struct ist *dst)
+{
+       *dst = iststop(input, '#');
+
+       return URI_NORMALIZER_ERR_NONE;
+}
+
 enum uri_normalizer_err uri_normalizer_percent_decode_unreserved(const struct 
ist input, int strict, struct ist *dst);
 enum uri_normalizer_err uri_normalizer_percent_upper(const struct ist input, 
int strict, struct ist *dst);
 enum uri_normalizer_err uri_normalizer_path_dot(const struct ist path, struct 
ist *dst);
diff --git a/reg-tests/http-rules/normalize_uri.vtc 
b/reg-tests/http-rules/normalize_uri.vtc
index 42c4c428b..792bea5d4 100644
--- a/reg-tests/http-rules/normalize_uri.vtc
+++ b/reg-tests/http-rules/normalize_uri.vtc
@@ -8,7 +8,7 @@ feature ignore_unknown_macro
 server s1 {
     rxreq
     txresp
-} -repeat 63 -start
+} -repeat 66 -start
 
 haproxy h1 -conf {
     global
@@ -125,6 +125,18 @@ haproxy h1 -conf {
 
         default_backend be
 
+    frontend fe_fragment_strip
+        bind "fd@${fe_fragment_strip}"
+
+        http-request set-var(txn.before) url
+        http-request normalize-uri fragment-strip
+        http-request set-var(txn.after) url
+
+        http-response add-header before  %[var(txn.before)]
+        http-response add-header after  %[var(txn.after)]
+
+        default_backend be
+
     backend be
         server s1 ${s1_addr}:${s1_port}
 
@@ -471,3 +483,20 @@ client c8 -connect 
${h1_fe_percent_decode_unreserved_strict_sock} {
     rxresp
     expect resp.status == 400
 } -run
+
+client c9 -connect ${h1_fe_fragment_strip_sock} {
+    txreq -url "/#foo"
+    rxresp
+    expect resp.http.before == "/#foo"
+    expect resp.http.after == "/"
+
+    txreq -url "/%23foo"
+    rxresp
+    expect resp.http.before == "/%23foo"
+    expect resp.http.after == "/%23foo"
+
+    txreq -req OPTIONS -url "*"
+    rxresp
+    expect resp.http.before == "*"
+    expect resp.http.after == "*"
+} -run
diff --git a/src/http_act.c b/src/http_act.c
index b8413f331..5eeba631b 100644
--- a/src/http_act.c
+++ b/src/http_act.c
@@ -312,6 +312,23 @@ static enum act_return http_action_normalize_uri(struct 
act_rule *rule, struct p
 
                        err = uri_normalizer_percent_decode_unreserved(path, 
rule->action == ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT, &newpath);
 
+                       if (err != URI_NORMALIZER_ERR_NONE)
+                               break;
+
+                       if (!http_replace_req_path(htx, newpath, 1))
+                               goto fail_rewrite;
+
+                       break;
+               }
+               case ACT_NORMALIZE_URI_FRAGMENT_STRIP: {
+                       const struct ist path = http_get_path(uri);
+                       struct ist newpath = ist2(replace->area, replace->size);
+
+                       if (!isttest(path))
+                               goto leave;
+
+                       err = uri_normalizer_fragment_strip(path, &newpath);
+
                        if (err != URI_NORMALIZER_ERR_NONE)
                                break;
 
@@ -440,6 +457,11 @@ static enum act_parse_ret parse_http_normalize_uri(const 
char **args, int *orig_
                        return ACT_RET_PRS_ERR;
                }
        }
+       else if (strcmp(args[cur_arg], "fragment-strip") == 0) {
+               cur_arg++;
+
+               rule->action = ACT_NORMALIZE_URI_FRAGMENT_STRIP;
+       }
        else {
                memprintf(err, "unknown normalizer '%s'", args[cur_arg]);
                return ACT_RET_PRS_ERR;
-- 
2.31.1


Reply via email to