This is an automated email from the ASF dual-hosted git repository.

bcall pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git


The following commit(s) were added to refs/heads/master by this push:
     new 479f62e719 Remove duplicate slashes at the beginning of the incoming 
URL (#10125)
479f62e719 is described below

commit 479f62e719c65a3e41a2f03afd0d71ce1e2acab5
Author: Bryan Call <[email protected]>
AuthorDate: Tue Aug 1 16:00:07 2023 -0700

    Remove duplicate slashes at the beginning of the incoming URL (#10125)
---
 proxy/hdrs/URL.cc                 | 12 ++++++++++--
 proxy/hdrs/unit_tests/test_URL.cc | 24 ++++++++++++++++--------
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/proxy/hdrs/URL.cc b/proxy/hdrs/URL.cc
index 63aedcccb8..e7536ba547 100644
--- a/proxy/hdrs/URL.cc
+++ b/proxy/hdrs/URL.cc
@@ -1524,8 +1524,13 @@ done:
       // correcting this behavior, therefore, we maintain the current
       // functionality but add state to determine whether the path was
       // absolutely empty so we can reconstruct such URLs.
-      ++path_start;
+      //
+      // Remove all preceding slashes
+      while (path_start < path_end && *path_start == '/') {
+        ++path_start;
+      }
     }
+
     url->set_path(heap, path_start, path_end - path_start, copy_strings);
   } else if (!nothing_after_host) {
     // There was no path set via '/': it is absolutely empty. However, if there
@@ -1577,7 +1582,10 @@ url_parse_http_regex(HdrHeap *heap, URLImpl *url, const 
char **start, const char
   cur              = static_cast<const char *>(memchr(cur, '/', end - cur));
   if (cur) {
     host_end = cur;
-    ++cur;
+    // Remove all preceding slashes
+    while (cur < end && *cur == '/') {
+      cur++;
+    }
   } else {
     host_end = cur = end;
   }
diff --git a/proxy/hdrs/unit_tests/test_URL.cc 
b/proxy/hdrs/unit_tests/test_URL.cc
index bb1886d580..3f11b98c6d 100644
--- a/proxy/hdrs/unit_tests/test_URL.cc
+++ b/proxy/hdrs/unit_tests/test_URL.cc
@@ -188,6 +188,14 @@ constexpr bool VERIFY_HOST_CHARACTERS = true;
 
 // clang-format off
 std::vector<url_parse_test_case> url_parse_test_cases = {
+  {
+    "///dir////index.html",
+    "/dir////index.html",
+    VERIFY_HOST_CHARACTERS,
+    "/dir////index.html",
+    IS_VALID,
+    IS_VALID
+  },
   {
     "/index.html",
     "/index.html",
@@ -198,9 +206,9 @@ std::vector<url_parse_test_case> url_parse_test_cases = {
   },
   {
     "//index.html",
-    "//index.html",
+    "/index.html",
     VERIFY_HOST_CHARACTERS,
-    "//index.html",
+    "/index.html",
     IS_VALID,
     IS_VALID
   },
@@ -230,9 +238,9 @@ std::vector<url_parse_test_case> url_parse_test_cases = {
     // with two slash characters ("//"). We have historically allowed this,
     // however, and will continue to do so.
     "https:////";,
-    "https:////";,
+    "https:///";,
     VERIFY_HOST_CHARACTERS,
-    "https:////";,
+    "https:///";,
     IS_VALID,
     IS_VALID
   },
@@ -272,9 +280,9 @@ std::vector<url_parse_test_case> url_parse_test_cases = {
   },
   {
     "https://www.example.com//";,
-    "https://www.example.com//";,
+    "https://www.example.com/";,
     VERIFY_HOST_CHARACTERS,
-    "https://www.example.com//";,
+    "https://www.example.com/";,
     IS_VALID,
     IS_VALID
   },
@@ -328,9 +336,9 @@ std::vector<url_parse_test_case> url_parse_test_cases = {
   },
   {
     "https://www.example.com//a/path";,
-    "https://www.example.com//a/path";,
+    "https://www.example.com/a/path";,
     VERIFY_HOST_CHARACTERS,
-    "https://www.example.com//a/path";,
+    "https://www.example.com/a/path";,
     IS_VALID,
     IS_VALID
   },

Reply via email to