This is an automated email from the ASF dual-hosted git repository.

bcall pushed a commit to branch 9.2.x
in repository https://gitbox.apache.org/repos/asf/trafficserver.git


The following commit(s) were added to refs/heads/9.2.x by this push:
     new fbb6acb1ca Remove duplicate slashes at the beginning of the incoming 
URL (#10125)
fbb6acb1ca is described below

commit fbb6acb1caac752e6719b912e0de971691c1ad99
Author: Bryan Call <bc...@apache.org>
AuthorDate: Tue Aug 1 16:00:07 2023 -0700

    Remove duplicate slashes at the beginning of the incoming URL (#10125)
    
    (cherry picked from commit 479f62e719c65a3e41a2f03afd0d71ce1e2acab5)
---
 proxy/hdrs/URL.cc                 | 12 ++++++++++--
 proxy/hdrs/unit_tests/test_URL.cc | 24 ++++++++++++++++--------
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/proxy/hdrs/URL.cc b/proxy/hdrs/URL.cc
index c9b2f96d6d..a26f5d7911 100644
--- a/proxy/hdrs/URL.cc
+++ b/proxy/hdrs/URL.cc
@@ -1525,8 +1525,13 @@ done:
       // correcting this behavior, therefore, we maintain the current
       // functionality but add state to determine whether the path was
       // absolutely empty so we can reconstruct such URLs.
-      ++path_start;
+      //
+      // Remove all preceding slashes
+      while (path_start < path_end && *path_start == '/') {
+        ++path_start;
+      }
     }
+
     url->set_path(heap, path_start, path_end - path_start, copy_strings);
   } else if (!nothing_after_host) {
     // There was no path set via '/': it is absolutely empty. However, if there
@@ -1578,7 +1583,10 @@ url_parse_http_regex(HdrHeap *heap, URLImpl *url, const 
char **start, const char
   cur              = static_cast<const char *>(memchr(cur, '/', end - cur));
   if (cur) {
     host_end = cur;
-    ++cur;
+    // Remove all preceding slashes
+    while (cur < end && *cur == '/') {
+      cur++;
+    }
   } else {
     host_end = cur = end;
   }
diff --git a/proxy/hdrs/unit_tests/test_URL.cc 
b/proxy/hdrs/unit_tests/test_URL.cc
index 115aeee5d8..a06d1d5d05 100644
--- a/proxy/hdrs/unit_tests/test_URL.cc
+++ b/proxy/hdrs/unit_tests/test_URL.cc
@@ -173,6 +173,14 @@ constexpr bool VERIFY_HOST_CHARACTERS = true;
 
 // clang-format off
 std::vector<url_parse_test_case> url_parse_test_cases = {
+  {
+    "///dir////index.html",
+    "/dir////index.html",
+    VERIFY_HOST_CHARACTERS,
+    "/dir////index.html",
+    IS_VALID,
+    IS_VALID
+  },
   {
     "/index.html",
     "/index.html",
@@ -183,9 +191,9 @@ std::vector<url_parse_test_case> url_parse_test_cases = {
   },
   {
     "//index.html",
-    "//index.html",
+    "/index.html",
     VERIFY_HOST_CHARACTERS,
-    "//index.html",
+    "/index.html",
     IS_VALID,
     IS_VALID
   },
@@ -215,9 +223,9 @@ std::vector<url_parse_test_case> url_parse_test_cases = {
     // with two slash characters ("//"). We have historically allowed this,
     // however, and will continue to do so.
     "https:////";,
-    "https:////";,
+    "https:///";,
     VERIFY_HOST_CHARACTERS,
-    "https:////";,
+    "https:///";,
     IS_VALID,
     IS_VALID
   },
@@ -257,9 +265,9 @@ std::vector<url_parse_test_case> url_parse_test_cases = {
   },
   {
     "https://www.example.com//";,
-    "https://www.example.com//";,
+    "https://www.example.com/";,
     VERIFY_HOST_CHARACTERS,
-    "https://www.example.com//";,
+    "https://www.example.com/";,
     IS_VALID,
     IS_VALID
   },
@@ -313,9 +321,9 @@ std::vector<url_parse_test_case> url_parse_test_cases = {
   },
   {
     "https://www.example.com//a/path";,
-    "https://www.example.com//a/path";,
+    "https://www.example.com/a/path";,
     VERIFY_HOST_CHARACTERS,
-    "https://www.example.com//a/path";,
+    "https://www.example.com/a/path";,
     IS_VALID,
     IS_VALID
   },

Reply via email to