Christopher,

here I am using `istdiff` and a trash buffer instead of dynamic allocation.

Best regards
Tim Düsterhus

Apply with `git am --scissors` to automatically cut the commit message.

-- >8 --
This normalizer sorts the `&` delimited query parameters by parameter name.

See GitHub Issue #714.
---
 doc/configuration.txt                  | 10 +++
 include/haproxy/action-t.h             |  1 +
 include/haproxy/uri_normalizer.h       |  1 +
 reg-tests/http-rules/normalize_uri.vtc | 81 ++++++++++++++++++++++-
 src/http_act.c                         | 22 +++++++
 src/uri_normalizer.c                   | 89 ++++++++++++++++++++++++++
 6 files changed, 203 insertions(+), 1 deletion(-)

diff --git a/doc/configuration.txt b/doc/configuration.txt
index a073da5fe..862427e51 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -6014,6 +6014,7 @@ http-request early-hint <name> <fmt> [ { if | unless } 
<condition> ]
 http-request normalize-uri <normalizer> [ { if | unless } <condition> ]
 http-request normalize-uri dotdot [ full ] [ { if | unless } <condition> ]
 http-request normalize-uri merge-slashes [ { if | unless } <condition> ]
+http-request normalize-uri sort-query [ { if | unless } <condition> ]
 
   Performs normalization of the request's URI. The following normalizers are
   available:
@@ -6045,6 +6046,15 @@ http-request normalize-uri merge-slashes [ { if | unless 
} <condition> ]
       - //        -> /
       - /foo//bar -> /foo/bar
 
+  - sort-query: Sorts the query string parameters by parameter name.
+      Parameters are assumed to be delimited by '&'. Shorter names sort before
+      longer names and identical parameter names maintain their relative order.
+
+      Example:
+      - /?c=3&a=1&b=2         -> /?a=1&b=2&c=3
+      - /?aaa=3&a=1&aa=2      -> /?a=1&aa=2&aaa=3
+      - /?a=3&b=4&a=1&b=5&a=2 -> /?a=3&a=1&a=2&b=4&b=5
+
 http-request redirect <rule> [ { if | unless } <condition> ]
 
   This performs an HTTP redirection based on a redirect rule. This is exactly
diff --git a/include/haproxy/action-t.h b/include/haproxy/action-t.h
index 5a8155929..ae43a936d 100644
--- a/include/haproxy/action-t.h
+++ b/include/haproxy/action-t.h
@@ -105,6 +105,7 @@ enum act_normalize_uri {
        ACT_NORMALIZE_URI_MERGE_SLASHES,
        ACT_NORMALIZE_URI_DOTDOT,
        ACT_NORMALIZE_URI_DOTDOT_FULL,
+       ACT_NORMALIZE_URI_SORT_QUERY,
 };
 
 /* NOTE: if <.action_ptr> is defined, the referenced function will always be
diff --git a/include/haproxy/uri_normalizer.h b/include/haproxy/uri_normalizer.h
index 811a7ebb6..c16dd3ffa 100644
--- a/include/haproxy/uri_normalizer.h
+++ b/include/haproxy/uri_normalizer.h
@@ -20,6 +20,7 @@
 
 enum uri_normalizer_err uri_normalizer_path_dotdot(const struct ist path, int 
full, struct ist *dst);
 enum uri_normalizer_err uri_normalizer_path_merge_slashes(const struct ist 
path, struct ist *dst);
+enum uri_normalizer_err uri_normalizer_query_sort(const struct ist query, 
const char delim, struct ist *dst);
 
 #endif /* _HAPROXY_URI_NORMALIZER_H */
 
diff --git a/reg-tests/http-rules/normalize_uri.vtc 
b/reg-tests/http-rules/normalize_uri.vtc
index 5ee73a308..cb3fa2f63 100644
--- a/reg-tests/http-rules/normalize_uri.vtc
+++ b/reg-tests/http-rules/normalize_uri.vtc
@@ -8,7 +8,7 @@ feature ignore_unknown_macro
 server s1 {
     rxreq
     txresp
-} -repeat 21 -start
+} -repeat 34 -start
 
 haproxy h1 -conf {
     defaults
@@ -46,6 +46,18 @@ haproxy h1 -conf {
 
         default_backend be
 
+    frontend fe_sort_query
+        bind "fd@${fe_sort_query}"
+
+        http-request set-var(txn.before) url
+        http-request normalize-uri sort-query
+        http-request set-var(txn.after) url
+
+        http-response add-header before  %[var(txn.before)]
+        http-response add-header after  %[var(txn.after)]
+
+        default_backend be
+
     backend be
         server s1 ${s1_addr}:${s1_port}
 
@@ -170,3 +182,70 @@ client c2 -connect ${h1_fe_dotdot_sock} {
     expect resp.http.after == "*"
     expect resp.http.after-full == "*"
 } -run
+
+client c3 -connect ${h1_fe_sort_query_sock} {
+    txreq -url "/?a=a"
+    rxresp
+    expect resp.http.before == "/?a=a"
+    expect resp.http.after == "/?a=a"
+
+    txreq -url "/?a=a&z=z"
+    rxresp
+    expect resp.http.before == "/?a=a&z=z"
+    expect resp.http.after == "/?a=a&z=z"
+
+    txreq -url "/?z=z&a=a"
+    rxresp
+    expect resp.http.before == "/?z=z&a=a"
+    expect resp.http.after == "/?a=a&z=z"
+
+    txreq -url "/?a=z&z=a"
+    rxresp
+    expect resp.http.before == "/?a=z&z=a"
+    expect resp.http.after == "/?a=z&z=a"
+
+    txreq -url "/?z=a&a=z"
+    rxresp
+    expect resp.http.before == "/?z=a&a=z"
+    expect resp.http.after == "/?a=z&z=a"
+
+    txreq -url "/?c&b&a&z&x&y"
+    rxresp
+    expect resp.http.before == "/?c&b&a&z&x&y"
+    expect resp.http.after == "/?a&b&c&x&y&z"
+
+    txreq -url "/?a=&aa=&aaa=&aaaa="
+    rxresp
+    expect resp.http.before == "/?a=&aa=&aaa=&aaaa="
+    expect resp.http.after == "/?a=&aa=&aaa=&aaaa="
+
+    txreq -url "/?aaaa=&a=&aa=&aaa="
+    rxresp
+    expect resp.http.before == "/?aaaa=&a=&aa=&aaa="
+    expect resp.http.after == "/?a=&aa=&aaa=&aaaa="
+
+    txreq -url "/?a=5&a=3&a=1&a=2&a=4"
+    rxresp
+    expect resp.http.before == "/?a=5&a=3&a=1&a=2&a=4"
+    expect resp.http.after == "/?a=5&a=3&a=1&a=2&a=4"
+
+    txreq -url "/?a=5&b=3&a=1&a=2&b=4"
+    rxresp
+    expect resp.http.before == "/?a=5&b=3&a=1&a=2&b=4"
+    expect resp.http.after == "/?a=5&a=1&a=2&b=3&b=4"
+
+    txreq -url "/"
+    rxresp
+    expect resp.http.before == "/"
+    expect resp.http.after == "/"
+
+    txreq -url "/?"
+    rxresp
+    expect resp.http.before == "/?"
+    expect resp.http.after == "/?"
+
+    txreq -req OPTIONS -url "*"
+    rxresp
+    expect resp.http.before == "*"
+    expect resp.http.after == "*"
+} -run
diff --git a/src/http_act.c b/src/http_act.c
index 3751b002f..1480563c9 100644
--- a/src/http_act.c
+++ b/src/http_act.c
@@ -250,6 +250,23 @@ static enum act_return http_action_normalize_uri(struct 
act_rule *rule, struct p
 
                        break;
                }
+               case ACT_NORMALIZE_URI_SORT_QUERY: {
+                       const struct ist path = http_get_path(uri);
+                       struct ist newquery = ist2(replace->area, 
replace->size);
+
+                       if (!isttest(path))
+                               goto leave;
+
+                       err = uri_normalizer_query_sort(istfind(path, '?'), 
'&', &newquery);
+
+                       if (err != URI_NORMALIZER_ERR_NONE)
+                               break;
+
+                       if (!http_replace_req_query(htx, newquery))
+                               goto fail_rewrite;
+
+                       break;
+               }
        }
 
        switch (err) {
@@ -330,6 +347,11 @@ static enum act_parse_ret parse_http_normalize_uri(const 
char **args, int *orig_
                        return ACT_RET_PRS_ERR;
                }
        }
+       else if (strcmp(args[cur_arg], "sort-query") == 0) {
+               cur_arg++;
+
+               rule->action = ACT_NORMALIZE_URI_SORT_QUERY;
+       }
        else {
                memprintf(err, "unknown normalizer '%s'", args[cur_arg]);
                return ACT_RET_PRS_ERR;
diff --git a/src/uri_normalizer.c b/src/uri_normalizer.c
index 53a3321d4..bd67542e8 100644
--- a/src/uri_normalizer.c
+++ b/src/uri_normalizer.c
@@ -13,6 +13,8 @@
 #include <import/ist.h>
 
 #include <haproxy/api.h>
+#include <haproxy/buf.h>
+#include <haproxy/chunk.h>
 #include <haproxy/uri_normalizer.h>
 
 /* Merges `/../` with preceding path segments.
@@ -140,6 +142,93 @@ enum uri_normalizer_err 
uri_normalizer_path_merge_slashes(const struct ist path,
        return err;
 }
 
+/* Compares two query parameters by name. Query parameters are ordered
+ * as with memcmp. Shorter parameter names are ordered lower. Identical
+ * parameter names are compared by their pointer to maintain a stable
+ * sort.
+ */
+static int query_param_cmp(const void *a, const void *b)
+{
+       const struct ist param_a = *(struct ist*)a;
+       const struct ist param_b = *(struct ist*)b;
+       const struct ist param_a_name = iststop(param_a, '=');
+       const struct ist param_b_name = iststop(param_b, '=');
+
+       int cmp = istdiff(param_a_name, param_b_name);
+
+       if (cmp != 0)
+               return cmp;
+
+       /* The contents are identical: Compare the pointer. */
+       if (istptr(param_a) < istptr(param_b))
+               return -1;
+
+       if (istptr(param_a) > istptr(param_b))
+               return 1;
+
+       return 0;
+}
+
+/* Sorts the parameters within the given query string. */
+enum uri_normalizer_err uri_normalizer_query_sort(const struct ist query, 
const char delim, struct ist *dst)
+{
+       enum uri_normalizer_err err;
+
+       const size_t size = istclear(dst);
+       struct ist newquery = *dst;
+
+       struct ist scanner = query;
+
+       const struct buffer *trash = get_trash_chunk();
+       struct ist *params = (struct ist *)b_orig(trash);
+       const size_t max_param = b_size(trash) / sizeof(*params); 
+       size_t param_count = 0;
+
+       size_t i;
+
+       /* The query will have the same length. */
+       if (size < istlen(query)) {
+               err = URI_NORMALIZER_ERR_ALLOC;
+               goto fail;
+       }
+
+       /* Handle the leading '?'. */
+       newquery = __istappend(newquery, istshift(&scanner));
+
+       while (istlen(scanner) > 0) {
+               const struct ist param = istsplit(&scanner, delim);
+
+               if (param_count + 1 > max_param) {
+                       err = URI_NORMALIZER_ERR_ALLOC;
+                       goto fail;
+               }
+
+               params[param_count] = param;
+               param_count++;
+       }
+
+       qsort(params, param_count, sizeof(*params), query_param_cmp);
+
+       for (i = 0; i < param_count; i++) {
+               if (i > 0)
+                       newquery = __istappend(newquery, '&');
+
+               if (istcat(&newquery, params[i], size) < 0) {
+                       /* This is impossible, because we checked the size of 
the destination buffer. */
+                       my_unreachable();
+                       err = URI_NORMALIZER_ERR_INTERNAL_ERROR;
+                       goto fail;
+               }
+       }
+
+       *dst = newquery;
+
+       return URI_NORMALIZER_ERR_NONE;
+
+  fail:
+
+       return err;
+}
 
 /*
  * Local variables:
-- 
2.31.1


Reply via email to