This normalizer decodes percent encoded characters within the RFC 3986 unreserved set.
See GitHub Issue #714. --- doc/configuration.txt | 44 +++++++++++- include/haproxy/action-t.h | 2 + include/haproxy/uri_normalizer.h | 1 + reg-tests/http-rules/normalize_uri.vtc | 75 +++++++++++++++++++- src/http_act.c | 33 +++++++++ src/uri_normalizer.c | 95 ++++++++++++++++++++++++++ 6 files changed, 247 insertions(+), 3 deletions(-) diff --git a/doc/configuration.txt b/doc/configuration.txt index 322932b5e..df01dd848 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -6015,6 +6015,7 @@ http-request normalize-uri <normalizer> [ { if | unless } <condition> ] http-request normalize-uri path-merge-slashes [ { if | unless } <condition> ] http-request normalize-uri path-strip-dot [ { if | unless } <condition> ] http-request normalize-uri path-strip-dotdot [ full ] [ { if | unless } <condition> ] +http-request normalize-uri percent-decode-unreserved [ strict ] [ { if | unless } <condition> ] http-request normalize-uri percent-to-uppercase [ strict ] [ { if | unless } <condition> ] http-request normalize-uri query-sort-by-name [ { if | unless } <condition> ] @@ -6034,11 +6035,25 @@ http-request normalize-uri query-sort-by-name [ { if | unless } <condition> ] filesystem. However it might break routing of an API that expects a specific number of segments in the path. + It is important to note that some normalizers might result in unsafe + transformations for broken URIs. It might also be possible that a combination + of normalizers that are safe by themselves results in unsafe transformations + when improperly combined. + + As an example the "percent-decode-unreserved" normalizer might result in + unexpected results when a broken URI includes bare percent characters. One + such a broken URI is "/%%36%36" which would be decoded to "/%66" which in + turn is equivalent to "/f". By specifying the "strict" option requests to + such a broken URI would safely be rejected. + The following normalizers are available: - path-strip-dot: Removes "/./" segments within the "path" component (RFC 3986#6.2.2.3). + Segments including percent encoded dots ("%2E") will not be detected. Use + the "percent-decode-unreserved" normalizer first if this is undesired. + Example: - /. -> / - /./bar/ -> /bar/ @@ -6049,8 +6064,13 @@ http-request normalize-uri query-sort-by-name [ { if | unless } <condition> ] (RFC 3986#6.2.2.3). This merges segments that attempt to access the parent directory with - their preceding segment. Empty segments do not receive special treatment. - Use the "path-merge-slashes" normalizer first if this is undesired. + their preceding segment. + + Empty segments do not receive special treatment. Use the "merge-slashes" + normalizer first if this is undesired. + + Segments including percent encoded dots ("%2E") will not be detected. Use + the "percent-decode-unreserved" normalizer first if this is undesired. Example: - /foo/../ -> / @@ -6059,6 +6079,7 @@ http-request normalize-uri query-sort-by-name [ { if | unless } <condition> ] - /../bar/ -> /../bar/ - /bar/../../ -> /../ - /foo//../ -> /foo/ + - /foo/%2E%2E/ -> /foo/%2E%2E/ If the "full" option is specified then "../" at the beginning will be removed as well: @@ -6074,6 +6095,25 @@ http-request normalize-uri query-sort-by-name [ { if | unless } <condition> ] - // -> / - /foo//bar -> /foo/bar + - percent-decode-unreserved: Decodes unreserved percent encoded characters to + their representation as a regular character (RFC 3986#6.2.2.2). + + The set of unreserved characters includes all letters, all digits, "-", + ".", "_", and "~". + + Example: + - /%61dmin -> /admin + - /foo%3Fbar=baz -> /foo%3Fbar=baz (no change) + - /%%36%36 -> /%66 (unsafe) + - /%ZZ -> /%ZZ + + If the "strict" option is specified then invalid sequences will result + in a HTTP 400 Bad Request being returned. + + Example: + - /%%36%36 -> HTTP 400 + - /%ZZ -> HTTP 400 + - percent-to-uppercase: Uppercases letters within percent-encoded sequences (RFC 3986#6.2.2.1). diff --git a/include/haproxy/action-t.h b/include/haproxy/action-t.h index 2e3edeaf3..1d0b0d229 100644 --- a/include/haproxy/action-t.h +++ b/include/haproxy/action-t.h @@ -109,6 +109,8 @@ enum act_normalize_uri { ACT_NORMALIZE_URI_QUERY_SORT_BY_NAME, ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE, ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE_STRICT, + ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED, + ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT, }; /* NOTE: if <.action_ptr> is defined, the referenced function will always be diff --git a/include/haproxy/uri_normalizer.h b/include/haproxy/uri_normalizer.h index 81c7e00ce..06f237e45 100644 --- a/include/haproxy/uri_normalizer.h +++ b/include/haproxy/uri_normalizer.h @@ -18,6 +18,7 @@ #include <haproxy/uri_normalizer-t.h> +enum uri_normalizer_err uri_normalizer_percent_decode_unreserved(const struct ist input, int strict, struct ist *dst); enum uri_normalizer_err uri_normalizer_percent_upper(const struct ist input, int strict, struct ist *dst); enum uri_normalizer_err uri_normalizer_path_dot(const struct ist path, struct ist *dst); enum uri_normalizer_err uri_normalizer_path_dotdot(const struct ist path, int full, struct ist *dst); diff --git a/reg-tests/http-rules/normalize_uri.vtc b/reg-tests/http-rules/normalize_uri.vtc index 9884b6c43..cc8806066 100644 --- a/reg-tests/http-rules/normalize_uri.vtc +++ b/reg-tests/http-rules/normalize_uri.vtc @@ -8,7 +8,7 @@ feature ignore_unknown_macro server s1 { rxreq txresp -} -repeat 54 -start +} -repeat 63 -start haproxy h1 -conf { defaults @@ -94,6 +94,30 @@ haproxy h1 -conf { default_backend be + frontend fe_percent_decode_unreserved + bind "fd@${fe_percent_decode_unreserved}" + + http-request set-var(txn.before) url + http-request normalize-uri percent-decode-unreserved + http-request set-var(txn.after) url + + http-response add-header before %[var(txn.before)] + http-response add-header after %[var(txn.after)] + + default_backend be + + frontend fe_percent_decode_unreserved_strict + bind "fd@${fe_percent_decode_unreserved_strict}" + + http-request set-var(txn.before) url + http-request normalize-uri percent-decode-unreserved strict + http-request set-var(txn.after) url + + http-response add-header before %[var(txn.before)] + http-response add-header after %[var(txn.after)] + + default_backend be + backend be server s1 ${s1_addr}:${s1_port} @@ -391,3 +415,52 @@ client c6 -connect ${h1_fe_dot_sock} { expect resp.http.before == "/?a=/./" expect resp.http.after == "/?a=/./" } -run + +client c7 -connect ${h1_fe_percent_decode_unreserved_sock} { + txreq -url "/a?a=a" + rxresp + expect resp.http.before == "/a?a=a" + expect resp.http.after == "/a?a=a" + + txreq -url "/%61?%61=%61" + rxresp + expect resp.http.before == "/%61?%61=%61" + expect resp.http.after == "/a?a=a" + + txreq -url "/%3F?foo=bar" + rxresp + expect resp.http.before == "/%3F?foo=bar" + expect resp.http.after == "/%3F?foo=bar" + + txreq -url "/%%36%36" + rxresp + expect resp.status == 200 + expect resp.http.before == "/%%36%36" + expect resp.http.after == "/%66" + + txreq -req OPTIONS -url "*" + rxresp + expect resp.http.before == "*" + expect resp.http.after == "*" +} -run + +client c8 -connect ${h1_fe_percent_decode_unreserved_strict_sock} { + txreq -url "/a?a=a" + rxresp + expect resp.http.before == "/a?a=a" + expect resp.http.after == "/a?a=a" + + txreq -url "/%61?%61=%61" + rxresp + expect resp.http.before == "/%61?%61=%61" + expect resp.http.after == "/a?a=a" + + txreq -url "/%3F?foo=bar" + rxresp + expect resp.http.before == "/%3F?foo=bar" + expect resp.http.after == "/%3F?foo=bar" + + txreq -url "/%%36%36" + rxresp + expect resp.status == 400 +} -run diff --git a/src/http_act.c b/src/http_act.c index df2bbe41b..58e1bb857 100644 --- a/src/http_act.c +++ b/src/http_act.c @@ -294,6 +294,24 @@ static enum act_return http_action_normalize_uri(struct act_rule *rule, struct p err = uri_normalizer_percent_upper(path, rule->action == ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE_STRICT, &newpath); + if (err != URI_NORMALIZER_ERR_NONE) + break; + + if (!http_replace_req_path(htx, newpath, 1)) + goto fail_rewrite; + + break; + } + case ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED: + case ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT: { + const struct ist path = http_get_path(uri); + struct ist newpath = ist2(replace->area, replace->size); + + if (!isttest(path)) + goto leave; + + err = uri_normalizer_percent_decode_unreserved(path, rule->action == ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT, &newpath); + if (err != URI_NORMALIZER_ERR_NONE) break; @@ -407,6 +425,21 @@ static enum act_parse_ret parse_http_normalize_uri(const char **args, int *orig_ return ACT_RET_PRS_ERR; } } + else if (strcmp(args[cur_arg], "percent-decode-unreserved") == 0) { + cur_arg++; + + if (strcmp(args[cur_arg], "strict") == 0) { + cur_arg++; + rule->action = ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT; + } + else if (!*args[cur_arg]) { + rule->action = ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED; + } + else if (strcmp(args[cur_arg], "if") != 0 && strcmp(args[cur_arg], "unless") != 0) { + memprintf(err, "unknown argument '%s' for 'percent-decode-unreserved' normalizer", args[cur_arg]); + return ACT_RET_PRS_ERR; + } + } else { memprintf(err, "unknown normalizer '%s'", args[cur_arg]); return ACT_RET_PRS_ERR; diff --git a/src/uri_normalizer.c b/src/uri_normalizer.c index 8d959365e..4fd783d4a 100644 --- a/src/uri_normalizer.c +++ b/src/uri_normalizer.c @@ -18,6 +18,101 @@ #include <haproxy/tools.h> #include <haproxy/uri_normalizer.h> +/* Returns 1 if the given character is part of the 'unreserved' set in the + * RFC 3986 ABNF. + * Returns 0 if not. + */ +static int is_unreserved_character(unsigned char c) +{ + switch (c) { + case 'A'...'Z': /* ALPHA */ + case 'a'...'z': /* ALPHA */ + case '0'...'9': /* DIGIT */ + case '-': + case '.': + case '_': + case '~': + return 1; + default: + return 0; + } +} + +/* Decodes percent encoded characters that are part of the 'unreserved' set. + * + * RFC 3986, section 2.3: + * > URIs that differ in the replacement of an unreserved character with + * > its corresponding percent-encoded US-ASCII octet are equivalent [...] + * > when found in a URI, should be decoded to their corresponding unreserved + * > characters by URI normalizers. + * + * If `strict` is set to 0 then percent characters that are not followed by a + * hexadecimal digit are returned as-is without performing any decoding. + * If `strict` is set to 1 then `URI_NORMALIZER_ERR_INVALID_INPUT` is returned + * for invalid sequences. + */ +enum uri_normalizer_err uri_normalizer_percent_decode_unreserved(const struct ist input, int strict, struct ist *dst) +{ + enum uri_normalizer_err err; + + const size_t size = istclear(dst); + struct ist output = *dst; + + struct ist scanner = input; + + /* The output will either be shortened or have the same length. */ + if (size < istlen(input)) { + err = URI_NORMALIZER_ERR_ALLOC; + goto fail; + } + + while (istlen(scanner)) { + const char current = istshift(&scanner); + + if (current == '%') { + if (istlen(scanner) >= 2) { + if (ishex(istptr(scanner)[0]) && ishex(istptr(scanner)[1])) { + char hex1, hex2, c; + + hex1 = istshift(&scanner); + hex2 = istshift(&scanner); + c = (hex2i(hex1) << 4) + hex2i(hex2); + + if (is_unreserved_character(c)) { + output = __istappend(output, c); + } + else { + output = __istappend(output, current); + output = __istappend(output, hex1); + output = __istappend(output, hex2); + } + + continue; + } + } + + if (strict) { + err = URI_NORMALIZER_ERR_INVALID_INPUT; + goto fail; + } + else { + output = __istappend(output, current); + } + } + else { + output = __istappend(output, current); + } + } + + *dst = output; + + return URI_NORMALIZER_ERR_NONE; + + fail: + + return err; +} + /* Uppercases letters used in percent encoding. * * If `strict` is set to 0 then percent characters that are not followed by a -- 2.31.1