This is an automated email from the ASF dual-hosted git repository.
bcall pushed a commit to branch 9.2.x
in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/9.2.x by this push:
new fbb6acb1ca Remove duplicate slashes at the beginning of the incoming
URL (#10125)
fbb6acb1ca is described below
commit fbb6acb1caac752e6719b912e0de971691c1ad99
Author: Bryan Call <[email protected]>
AuthorDate: Tue Aug 1 16:00:07 2023 -0700
Remove duplicate slashes at the beginning of the incoming URL (#10125)
(cherry picked from commit 479f62e719c65a3e41a2f03afd0d71ce1e2acab5)
---
proxy/hdrs/URL.cc | 12 ++++++++++--
proxy/hdrs/unit_tests/test_URL.cc | 24 ++++++++++++++++--------
2 files changed, 26 insertions(+), 10 deletions(-)
diff --git a/proxy/hdrs/URL.cc b/proxy/hdrs/URL.cc
index c9b2f96d6d..a26f5d7911 100644
--- a/proxy/hdrs/URL.cc
+++ b/proxy/hdrs/URL.cc
@@ -1525,8 +1525,13 @@ done:
// correcting this behavior, therefore, we maintain the current
// functionality but add state to determine whether the path was
// absolutely empty so we can reconstruct such URLs.
- ++path_start;
+ //
+ // Remove all preceding slashes
+ while (path_start < path_end && *path_start == '/') {
+ ++path_start;
+ }
}
+
url->set_path(heap, path_start, path_end - path_start, copy_strings);
} else if (!nothing_after_host) {
// There was no path set via '/': it is absolutely empty. However, if there
@@ -1578,7 +1583,10 @@ url_parse_http_regex(HdrHeap *heap, URLImpl *url, const
char **start, const char
cur = static_cast<const char *>(memchr(cur, '/', end - cur));
if (cur) {
host_end = cur;
- ++cur;
+ // Remove all preceding slashes
+ while (cur < end && *cur == '/') {
+ cur++;
+ }
} else {
host_end = cur = end;
}
diff --git a/proxy/hdrs/unit_tests/test_URL.cc
b/proxy/hdrs/unit_tests/test_URL.cc
index 115aeee5d8..a06d1d5d05 100644
--- a/proxy/hdrs/unit_tests/test_URL.cc
+++ b/proxy/hdrs/unit_tests/test_URL.cc
@@ -173,6 +173,14 @@ constexpr bool VERIFY_HOST_CHARACTERS = true;
// clang-format off
std::vector<url_parse_test_case> url_parse_test_cases = {
+ {
+ "///dir////index.html",
+ "/dir////index.html",
+ VERIFY_HOST_CHARACTERS,
+ "/dir////index.html",
+ IS_VALID,
+ IS_VALID
+ },
{
"/index.html",
"/index.html",
@@ -183,9 +191,9 @@ std::vector<url_parse_test_case> url_parse_test_cases = {
},
{
"//index.html",
- "//index.html",
+ "/index.html",
VERIFY_HOST_CHARACTERS,
- "//index.html",
+ "/index.html",
IS_VALID,
IS_VALID
},
@@ -215,9 +223,9 @@ std::vector<url_parse_test_case> url_parse_test_cases = {
// with two slash characters ("//"). We have historically allowed this,
// however, and will continue to do so.
"https:////",
- "https:////",
+ "https:///",
VERIFY_HOST_CHARACTERS,
- "https:////",
+ "https:///",
IS_VALID,
IS_VALID
},
@@ -257,9 +265,9 @@ std::vector<url_parse_test_case> url_parse_test_cases = {
},
{
"https://www.example.com//",
- "https://www.example.com//",
+ "https://www.example.com/",
VERIFY_HOST_CHARACTERS,
- "https://www.example.com//",
+ "https://www.example.com/",
IS_VALID,
IS_VALID
},
@@ -313,9 +321,9 @@ std::vector<url_parse_test_case> url_parse_test_cases = {
},
{
"https://www.example.com//a/path",
- "https://www.example.com//a/path",
+ "https://www.example.com/a/path",
VERIFY_HOST_CHARACTERS,
- "https://www.example.com//a/path",
+ "https://www.example.com/a/path",
IS_VALID,
IS_VALID
},