This is an automated email from the ASF dual-hosted git repository.
bcall pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/master by this push:
new 479f62e719 Remove duplicate slashes at the beginning of the incoming
URL (#10125)
479f62e719 is described below
commit 479f62e719c65a3e41a2f03afd0d71ce1e2acab5
Author: Bryan Call <[email protected]>
AuthorDate: Tue Aug 1 16:00:07 2023 -0700
Remove duplicate slashes at the beginning of the incoming URL (#10125)
---
proxy/hdrs/URL.cc | 12 ++++++++++--
proxy/hdrs/unit_tests/test_URL.cc | 24 ++++++++++++++++--------
2 files changed, 26 insertions(+), 10 deletions(-)
diff --git a/proxy/hdrs/URL.cc b/proxy/hdrs/URL.cc
index 63aedcccb8..e7536ba547 100644
--- a/proxy/hdrs/URL.cc
+++ b/proxy/hdrs/URL.cc
@@ -1524,8 +1524,13 @@ done:
// correcting this behavior, therefore, we maintain the current
// functionality but add state to determine whether the path was
// absolutely empty so we can reconstruct such URLs.
- ++path_start;
+ //
+ // Remove all preceding slashes
+ while (path_start < path_end && *path_start == '/') {
+ ++path_start;
+ }
}
+
url->set_path(heap, path_start, path_end - path_start, copy_strings);
} else if (!nothing_after_host) {
// There was no path set via '/': it is absolutely empty. However, if there
@@ -1577,7 +1582,10 @@ url_parse_http_regex(HdrHeap *heap, URLImpl *url, const
char **start, const char
cur = static_cast<const char *>(memchr(cur, '/', end - cur));
if (cur) {
host_end = cur;
- ++cur;
+ // Remove all preceding slashes
+ while (cur < end && *cur == '/') {
+ cur++;
+ }
} else {
host_end = cur = end;
}
diff --git a/proxy/hdrs/unit_tests/test_URL.cc
b/proxy/hdrs/unit_tests/test_URL.cc
index bb1886d580..3f11b98c6d 100644
--- a/proxy/hdrs/unit_tests/test_URL.cc
+++ b/proxy/hdrs/unit_tests/test_URL.cc
@@ -188,6 +188,14 @@ constexpr bool VERIFY_HOST_CHARACTERS = true;
// clang-format off
std::vector<url_parse_test_case> url_parse_test_cases = {
+ {
+ "///dir////index.html",
+ "/dir////index.html",
+ VERIFY_HOST_CHARACTERS,
+ "/dir////index.html",
+ IS_VALID,
+ IS_VALID
+ },
{
"/index.html",
"/index.html",
@@ -198,9 +206,9 @@ std::vector<url_parse_test_case> url_parse_test_cases = {
},
{
"//index.html",
- "//index.html",
+ "/index.html",
VERIFY_HOST_CHARACTERS,
- "//index.html",
+ "/index.html",
IS_VALID,
IS_VALID
},
@@ -230,9 +238,9 @@ std::vector<url_parse_test_case> url_parse_test_cases = {
// with two slash characters ("//"). We have historically allowed this,
// however, and will continue to do so.
"https:////",
- "https:////",
+ "https:///",
VERIFY_HOST_CHARACTERS,
- "https:////",
+ "https:///",
IS_VALID,
IS_VALID
},
@@ -272,9 +280,9 @@ std::vector<url_parse_test_case> url_parse_test_cases = {
},
{
"https://www.example.com//",
- "https://www.example.com//",
+ "https://www.example.com/",
VERIFY_HOST_CHARACTERS,
- "https://www.example.com//",
+ "https://www.example.com/",
IS_VALID,
IS_VALID
},
@@ -328,9 +336,9 @@ std::vector<url_parse_test_case> url_parse_test_cases = {
},
{
"https://www.example.com//a/path",
- "https://www.example.com//a/path",
+ "https://www.example.com/a/path",
VERIFY_HOST_CHARACTERS,
- "https://www.example.com//a/path",
+ "https://www.example.com/a/path",
IS_VALID,
IS_VALID
},