This is an automated email from the ASF dual-hosted git repository. bcall pushed a commit to branch 9.2.x in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/9.2.x by this push: new fbb6acb1ca Remove duplicate slashes at the beginning of the incoming URL (#10125) fbb6acb1ca is described below commit fbb6acb1caac752e6719b912e0de971691c1ad99 Author: Bryan Call <bc...@apache.org> AuthorDate: Tue Aug 1 16:00:07 2023 -0700 Remove duplicate slashes at the beginning of the incoming URL (#10125) (cherry picked from commit 479f62e719c65a3e41a2f03afd0d71ce1e2acab5) --- proxy/hdrs/URL.cc | 12 ++++++++++-- proxy/hdrs/unit_tests/test_URL.cc | 24 ++++++++++++++++-------- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/proxy/hdrs/URL.cc b/proxy/hdrs/URL.cc index c9b2f96d6d..a26f5d7911 100644 --- a/proxy/hdrs/URL.cc +++ b/proxy/hdrs/URL.cc @@ -1525,8 +1525,13 @@ done: // correcting this behavior, therefore, we maintain the current // functionality but add state to determine whether the path was // absolutely empty so we can reconstruct such URLs. - ++path_start; + // + // Remove all preceding slashes + while (path_start < path_end && *path_start == '/') { + ++path_start; + } } + url->set_path(heap, path_start, path_end - path_start, copy_strings); } else if (!nothing_after_host) { // There was no path set via '/': it is absolutely empty. However, if there @@ -1578,7 +1583,10 @@ url_parse_http_regex(HdrHeap *heap, URLImpl *url, const char **start, const char cur = static_cast<const char *>(memchr(cur, '/', end - cur)); if (cur) { host_end = cur; - ++cur; + // Remove all preceding slashes + while (cur < end && *cur == '/') { + cur++; + } } else { host_end = cur = end; } diff --git a/proxy/hdrs/unit_tests/test_URL.cc b/proxy/hdrs/unit_tests/test_URL.cc index 115aeee5d8..a06d1d5d05 100644 --- a/proxy/hdrs/unit_tests/test_URL.cc +++ b/proxy/hdrs/unit_tests/test_URL.cc @@ -173,6 +173,14 @@ constexpr bool VERIFY_HOST_CHARACTERS = true; // clang-format off std::vector<url_parse_test_case> url_parse_test_cases = { + { + "///dir////index.html", + "/dir////index.html", + VERIFY_HOST_CHARACTERS, + "/dir////index.html", + IS_VALID, + IS_VALID + }, { "/index.html", "/index.html", @@ -183,9 +191,9 @@ std::vector<url_parse_test_case> url_parse_test_cases = { }, { "//index.html", - "//index.html", + "/index.html", VERIFY_HOST_CHARACTERS, - "//index.html", + "/index.html", IS_VALID, IS_VALID }, @@ -215,9 +223,9 @@ std::vector<url_parse_test_case> url_parse_test_cases = { // with two slash characters ("//"). We have historically allowed this, // however, and will continue to do so. "https:////", - "https:////", + "https:///", VERIFY_HOST_CHARACTERS, - "https:////", + "https:///", IS_VALID, IS_VALID }, @@ -257,9 +265,9 @@ std::vector<url_parse_test_case> url_parse_test_cases = { }, { "https://www.example.com//", - "https://www.example.com//", + "https://www.example.com/", VERIFY_HOST_CHARACTERS, - "https://www.example.com//", + "https://www.example.com/", IS_VALID, IS_VALID }, @@ -313,9 +321,9 @@ std::vector<url_parse_test_case> url_parse_test_cases = { }, { "https://www.example.com//a/path", - "https://www.example.com//a/path", + "https://www.example.com/a/path", VERIFY_HOST_CHARACTERS, - "https://www.example.com//a/path", + "https://www.example.com/a/path", IS_VALID, IS_VALID },