Willy, Christopher, I'm not very happy with the normalization logic, because am processing the URI in reverse. This requires me to directly access offsets instead of using the `ist` API. However this way I don't need to backtrack once I encounter a `../` which I consider to be a win.
Best regards Tim Düsterhus Apply with `git am --scissors` to automatically cut the commit message. -- >8 -- This normalizer merges `../` path segments with the predecing segment, removing both the preceding segment and the `../`. Empty segments do not receive special treatment. The `merge-slashes` normalizer should be executed first. See GitHub Issue #714. --- doc/configuration.txt | 12 +++++ include/haproxy/action-t.h | 1 + include/haproxy/uri_normalizer.h | 1 + reg-tests/http-rules/normalize_uri.vtc | 71 +++++++++++++++++++++++++- src/http_act.c | 22 ++++++++ src/uri_normalizer.c | 71 ++++++++++++++++++++++++++ 6 files changed, 177 insertions(+), 1 deletion(-) diff --git a/doc/configuration.txt b/doc/configuration.txt index d3030b478..99c845ac7 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -6016,6 +6016,18 @@ http-request normalize-uri <normalizer> [ { if | unless } <condition> ] Performs normalization of the request's URI. The following normalizers are available: + - dotdot: Normalizes "/../" segments within the "path" component. This merges + segments that attempt to access the parent directory with their preceding + segment. Empty segments do not receive special treatment. Use the + "merge-slashes" normalizer first if this is undesired. + + Example: + - /foo/../ -> / + - /foo/../bar/ -> /bar/ + - /foo/bar/../ -> /foo/ + - /../bar/ -> /../bar/ + - /foo//../ -> /foo/ + - merge-slashes: Merges adjacent slashes within the "path" component into a single slash. diff --git a/include/haproxy/action-t.h b/include/haproxy/action-t.h index 4a3e3f8bd..ac9399a6b 100644 --- a/include/haproxy/action-t.h +++ b/include/haproxy/action-t.h @@ -103,6 +103,7 @@ enum act_timeout_name { enum act_normalize_uri { ACT_NORMALIZE_URI_MERGE_SLASHES, + ACT_NORMALIZE_URI_DOTDOT, }; /* NOTE: if <.action_ptr> is defined, the referenced function will always be diff --git a/include/haproxy/uri_normalizer.h b/include/haproxy/uri_normalizer.h index b6e15e281..99b27330e 100644 --- a/include/haproxy/uri_normalizer.h +++ b/include/haproxy/uri_normalizer.h @@ -16,6 +16,7 @@ #include <import/ist.h> +struct ist uri_normalizer_path_dotdot(const struct ist path, char *trash, size_t len); struct ist uri_normalizer_path_merge_slashes(const struct ist path, char *trash, size_t len); #endif /* _HAPROXY_URI_NORMALIZER_H */ diff --git a/reg-tests/http-rules/normalize_uri.vtc b/reg-tests/http-rules/normalize_uri.vtc index 3303760d4..e66bdc47b 100644 --- a/reg-tests/http-rules/normalize_uri.vtc +++ b/reg-tests/http-rules/normalize_uri.vtc @@ -8,7 +8,7 @@ feature ignore_unknown_macro server s1 { rxreq txresp -} -repeat 10 -start +} -repeat 21 -start haproxy h1 -conf { defaults @@ -29,6 +29,18 @@ haproxy h1 -conf { default_backend be + frontend fe_dotdot + bind "fd@${fe_dotdot}" + + http-request set-var(txn.before) url + http-request normalize-uri dotdot + http-request set-var(txn.after) url + + http-response add-header before %[var(txn.before)] + http-response add-header after %[var(txn.after)] + + default_backend be + backend be server s1 ${s1_addr}:${s1_port} @@ -85,3 +97,60 @@ client c1 -connect ${h1_fe_merge_slashes_sock} { expect resp.http.before == "*" expect resp.http.after == "*" } -run + +client c2 -connect ${h1_fe_dotdot_sock} { + txreq -url "/foo/bar" + rxresp + expect resp.http.before == "/foo/bar" + expect resp.http.after == "/foo/bar" + + txreq -url "/foo/.." + rxresp + expect resp.http.before == "/foo/.." + expect resp.http.after == "/" + + txreq -url "/foo/../" + rxresp + expect resp.http.before == "/foo/../" + expect resp.http.after == "/" + + txreq -url "/foo/bar/../" + rxresp + expect resp.http.before == "/foo/bar/../" + expect resp.http.after == "/foo/" + + txreq -url "/foo/../bar" + rxresp + expect resp.http.before == "/foo/../bar" + expect resp.http.after == "/bar" + + txreq -url "/foo/../bar/" + rxresp + expect resp.http.before == "/foo/../bar/" + expect resp.http.after == "/bar/" + + txreq -url "/foo/../../bar/" + rxresp + expect resp.http.before == "/foo/../../bar/" + expect resp.http.after == "/../bar/" + + txreq -url "/foo//../../bar/" + rxresp + expect resp.http.before == "/foo//../../bar/" + expect resp.http.after == "/bar/" + + txreq -url "/foo/?bar=/foo/../" + rxresp + expect resp.http.before == "/foo/?bar=/foo/../" + expect resp.http.after == "/foo/?bar=/foo/../" + + txreq -url "/foo/../?bar=/foo/../" + rxresp + expect resp.http.before == "/foo/../?bar=/foo/../" + expect resp.http.after == "/?bar=/foo/../" + + txreq -req OPTIONS -url "*" + rxresp + expect resp.http.before == "*" + expect resp.http.after == "*" +} -run diff --git a/src/http_act.c b/src/http_act.c index 347c9c731..b3b8fbab6 100644 --- a/src/http_act.c +++ b/src/http_act.c @@ -225,6 +225,25 @@ static enum act_return http_action_normalize_uri(struct act_rule *rule, struct p newpath = uri_normalizer_path_merge_slashes(path, replace->area, replace->size); + if (!isttest(newpath)) + goto fail_rewrite; + + if (!http_replace_req_path(htx, newpath, 0)) + goto fail_rewrite; + + break; + } + case ACT_NORMALIZE_URI_DOTDOT: { + struct ist path = http_get_path(uri); + struct ist newpath; + + if (!isttest(path)) + goto leave; + + path = iststop(path, '?'); + + newpath = uri_normalizer_path_dotdot(path, replace->area, replace->size); + if (!isttest(newpath)) goto fail_rewrite; @@ -284,6 +303,9 @@ static enum act_parse_ret parse_http_normalize_uri(const char **args, int *orig_ if (strcmp(args[cur_arg], "merge-slashes") == 0) { rule->action = ACT_NORMALIZE_URI_MERGE_SLASHES; } + else if (strcmp(args[cur_arg], "dotdot") == 0) { + rule->action = ACT_NORMALIZE_URI_DOTDOT; + } else { memprintf(err, "unknown normalizer '%s'", args[cur_arg]); return ACT_RET_PRS_ERR; diff --git a/src/uri_normalizer.c b/src/uri_normalizer.c index 681a1b0cd..ab5443a5b 100644 --- a/src/uri_normalizer.c +++ b/src/uri_normalizer.c @@ -15,6 +15,77 @@ #include <haproxy/api.h> #include <haproxy/uri_normalizer.h> +/* Merges `/../` with preceding path segments. Returns an ist containing the new path + * and backed by `trash` or IST_NULL if the `len` not sufficiently large to store + * the resulting path. + */ +struct ist uri_normalizer_path_dotdot(const struct ist path, char *trash, size_t len) +{ + ssize_t offset = istlen(path) - 1; + char *tail = trash + len; + char *head = tail; + int up = 0; + + if (len < istlen(path)) + return IST_NULL; + + /* Handle `/..` at the end of the path without a trailing slash. */ + if (offset >= 2 && istmatch(istadv(path, offset - 2), ist("/.."))) { + up++; + offset -= 2; + } + + while (offset >= 0) { + if (offset >= 3 && istmatch(istadv(path, offset - 3), ist("/../"))) { + up++; + offset -= 3; + continue; + } + + if (up > 0) { + /* Skip the slash. */ + offset--; + + /* First check whether we already reached the start of the path, + * before popping the current `/../`. + */ + if (offset >= 0) { + up--; + + /* Skip the current path segment. */ + while (offset >= 0 && istptr(path)[offset] != '/') + offset--; + } + } + else { + /* Prepend the slash. */ + *(--head) = istptr(path)[offset]; + offset--; + + /* Prepend the current path segment. */ + while (offset >= 0 && istptr(path)[offset] != '/') { + *(--head) = istptr(path)[offset]; + offset--; + } + } + } + + if (up > 0) { + /* Prepend a trailing slash. */ + *(--head) = '/'; + + /* Prepend unconsumed `/..`. */ + do { + *(--head) = '.'; + *(--head) = '.'; + *(--head) = '/'; + up--; + } while (up > 0); + } + + return ist2(head, tail - head); +} + /* Merges adjacent slashes in the given path. Returns an ist containing the new path * and backed by `trash` or IST_NULL if the `len` not sufficiently large to store * the resulting path. -- 2.31.1