This is an automated email from the ASF dual-hosted git repository.

maskit pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git


The following commit(s) were added to refs/heads/master by this push:
     new 5eb87bc8f2 Deprecate the support for URL param segment (#11519)
5eb87bc8f2 is described below

commit 5eb87bc8f264f638763fff93015652417a31ae3d
Author: Masakazu Kitajo <[email protected]>
AuthorDate: Thu Jul 18 14:22:40 2024 -0600

    Deprecate the support for URL param segment (#11519)
    
    * Make TSUrlHttpParamsGet include params segment
    
    * Add some unit tests
    
    * Update docs
    
    * Fix
---
 include/ts/ts.h                       |  6 +++
 src/proxy/hdrs/URL.cc                 | 76 ++++++++++----------------------
 src/proxy/hdrs/unit_tests/test_URL.cc | 82 +++++++++++++++++++++++++++++++++++
 src/proxy/http/HttpSM.cc              |  3 +-
 src/traffic_cache_tool/CacheScan.cc   |  8 ++--
 5 files changed, 115 insertions(+), 60 deletions(-)

diff --git a/include/ts/ts.h b/include/ts/ts.h
index 99e8c41ce7..bc829f1023 100644
--- a/include/ts/ts.h
+++ b/include/ts/ts.h
@@ -667,6 +667,9 @@ TSReturnCode TSUrlFtpTypeSet(TSMBuffer bufp, TSMLoc offset, 
int type);
     argument. Note: the returned string is not guaranteed to be
     null-terminated.
 
+    This function is deprecated and returns empty string.
+    Plugins that need "params" can call TSUrlPathGet to get a whole path 
string to parse it.
+
     @param bufp marshal buffer containing the URL.
     @param offset location of the URL.
     @param length of the returned string.
@@ -683,6 +686,9 @@ const char *TSUrlHttpParamsGet(TSMBuffer bufp, TSMLoc 
offset, int *length);
     copies the string to within bufp, so you can modify or delete
     value after calling TSUrlHttpParamsSet().
 
+    This function is deprecated. The value passed will be internally appended 
to the path portion.
+    Thus, TSUrlHttpParamsGet will not return the value set by this function.
+
     @param bufp marshal buffer containing the URL.
     @param offset location of the URL.
     @param value HTTP params string to set in the URL.
diff --git a/src/proxy/hdrs/URL.cc b/src/proxy/hdrs/URL.cc
index 191e644876..e56ab796c2 100644
--- a/src/proxy/hdrs/URL.cc
+++ b/src/proxy/hdrs/URL.cc
@@ -551,8 +551,22 @@ URLImpl::set_path(HdrHeap *heap, const char *value, int 
length, bool copy_string
 void
 URLImpl::set_params(HdrHeap *heap, const char *value, int length, bool 
copy_string)
 {
-  url_called_set(this);
-  mime_str_u16_set(heap, value, length, &(this->m_ptr_params), 
&(this->m_len_params), copy_string);
+  int path_len = this->m_len_path;
+
+  // Truncate the current param segment if it exists
+  if (auto p = strchr(this->m_ptr_path, ';'); p != nullptr) {
+    auto params_len  = path_len - (p - this->m_ptr_path);
+    path_len        -= params_len;
+  }
+
+  int   total_length    = path_len + 1 + length;
+  char *path_and_params = static_cast<char *>(alloca(total_length));
+
+  memcpy(path_and_params, this->m_ptr_path, path_len);
+  memcpy(path_and_params + path_len, ";", 1);
+  memcpy(path_and_params + path_len + 1, value, length);
+
+  this->set_path(heap, path_and_params, total_length, true);
 }
 
 /*-------------------------------------------------------------------------
@@ -887,10 +901,6 @@ url_length_get(URLImpl *url, unsigned normalization_flags)
     length += 1; // +1 for "/"
   }
 
-  if (url->m_ptr_params && url->m_len_params > 0) {
-    length += url->m_len_params + 1; // +1 for ";"
-  }
-
   if (url->m_ptr_query && url->m_len_query > 0) {
     length += url->m_len_query + 1; // +1 for "?"
   }
@@ -962,12 +972,6 @@ url_to_string(URLImpl *url, Arena *arena, int *length)
   memcpy(&str[idx], url->m_ptr_path, url->m_len_path);
   idx += url->m_len_path;
 
-  if (url->m_ptr_params && url->m_len_params > 0) {
-    str[idx++] = ';';
-    memcpy(&str[idx], url->m_ptr_params, url->m_len_params);
-    idx += url->m_len_params;
-  }
-
   if (url->m_ptr_query && url->m_len_query > 0) {
     str[idx++] = '?';
     memcpy(&str[idx], url->m_ptr_query, url->m_len_query);
@@ -1433,8 +1437,6 @@ url_parse_http(HdrHeap *heap, URLImpl *url, const char 
**start, const char *end,
   const char *cur;
   const char *path_start     = nullptr;
   const char *path_end       = nullptr;
-  const char *params_start   = nullptr;
-  const char *params_end     = nullptr;
   const char *query_start    = nullptr;
   const char *query_end      = nullptr;
   const char *fragment_start = nullptr;
@@ -1456,13 +1458,9 @@ url_parse_http(HdrHeap *heap, URLImpl *url, const char 
**start, const char *end,
   if (*cur == '/') {
     path_start = cur;
   }
-  mask = ';' & '?' & '#';
+  mask = '?' & '#';
 parse_path2:
   if ((*cur & mask) == mask) {
-    if (*cur == ';') {
-      path_end = cur;
-      goto parse_params1;
-    }
     if (*cur == '?') {
       path_end = cur;
       goto parse_query1;
@@ -1472,26 +1470,11 @@ parse_path2:
       goto parse_fragment1;
     }
   } else {
-    ink_assert((*cur != ';') && (*cur != '?') && (*cur != '#'));
+    ink_assert((*cur != '?') && (*cur != '#'));
   }
   GETNEXT(done);
   goto parse_path2;
 
-parse_params1:
-  params_start = cur + 1;
-  GETNEXT(done);
-parse_params2:
-  if (*cur == '?') {
-    params_end = cur;
-    goto parse_query1;
-  }
-  if (*cur == '#') {
-    params_end = cur;
-    goto parse_fragment1;
-  }
-  GETNEXT(done);
-  goto parse_params2;
-
 parse_query1:
   query_start = cur + 1;
   GETNEXT(done);
@@ -1552,12 +1535,6 @@ done:
     // nothing_after_host check.
     url->m_path_is_empty = true;
   }
-  if (params_start) {
-    if (!params_end) {
-      params_end = cur;
-    }
-    url->set_params(heap, params_start, params_end - params_start, 
copy_strings);
-  }
   if (query_start) {
     // There was a query string marked by '?'.
     if (!query_end) {
@@ -1715,11 +1692,6 @@ url_print(URLImpl *url, char *buf_start, int buf_length, 
int *buf_index_inout, i
     TRY(mime_mem_print(url->m_ptr_path, url->m_len_path, buf_start, 
buf_length, buf_index_inout, buf_chars_to_skip_inout));
   }
 
-  if (url->m_ptr_params && url->m_len_params > 0) {
-    TRY(mime_mem_print(";", 1, buf_start, buf_length, buf_index_inout, 
buf_chars_to_skip_inout));
-    TRY(mime_mem_print(url->m_ptr_params, url->m_len_params, buf_start, 
buf_length, buf_index_inout, buf_chars_to_skip_inout));
-  }
-
   if (url->m_ptr_query && url->m_len_query > 0) {
     TRY(mime_mem_print("?", 1, buf_start, buf_length, buf_index_inout, 
buf_chars_to_skip_inout));
     TRY(mime_mem_print(url->m_ptr_query, url->m_len_query, buf_start, 
buf_length, buf_index_inout, buf_chars_to_skip_inout));
@@ -1753,8 +1725,6 @@ url_describe(HdrHeapObjImpl *raw, bool /* recurse 
ATS_UNUSED */)
       obj->m_len_port, obj->m_port);
   Dbg(dbg_ctl_http, "\tPATH: \"%.*s\", PATH_LEN: %d,", obj->m_len_path, 
(obj->m_ptr_path ? obj->m_ptr_path : "NULL"),
       obj->m_len_path);
-  Dbg(dbg_ctl_http, "\tPARAMS: \"%.*s\", PARAMS_LEN: %d,", obj->m_len_params, 
(obj->m_ptr_params ? obj->m_ptr_params : "NULL"),
-      obj->m_len_params);
   Dbg(dbg_ctl_http, "\tQUERY: \"%.*s\", QUERY_LEN: %d,", obj->m_len_query, 
(obj->m_ptr_query ? obj->m_ptr_query : "NULL"),
       obj->m_len_query);
   Dbg(dbg_ctl_http, "\tFRAGMENT: \"%.*s\", FRAGMENT_LEN: %d]", 
obj->m_len_fragment,
@@ -1855,8 +1825,8 @@ url_CryptoHash_get_general(const URLImpl *url, 
CryptoContext &ctx, CryptoHash &h
   ends[7] = strs[7] + 1;
   ends[8] = strs[8] + url->m_len_path;
 
-  strs[9]  = ";";
-  strs[10] = url->m_ptr_params;
+  strs[9]  = "";
+  strs[10] = nullptr;
   strs[11] = "?";
 
   // Special case for the query paramters, allowing us to ignore them if 
requested
@@ -1868,8 +1838,8 @@ url_CryptoHash_get_general(const URLImpl *url, 
CryptoContext &ctx, CryptoHash &h
     ends[12] = nullptr;
   }
 
-  ends[9]  = strs[9] + 1;
-  ends[10] = strs[10] + url->m_len_params;
+  ends[9]  = strs[9] + 0;
+  ends[10] = strs[10] + 0;
   ends[11] = strs[11] + 1;
 
   p = buffer;
@@ -1927,7 +1897,7 @@ url_CryptoHash_get(const URLImpl *url, CryptoHash *hash, 
bool ignore_query, cach
 {
   URLHashContext ctx;
   if ((url_hash_method != 0) && (url->m_url_type == URL_TYPE_HTTP) &&
-      ((url->m_len_user + url->m_len_password + url->m_len_params + 
(ignore_query ? 0 : url->m_len_query)) == 0) &&
+      ((url->m_len_user + url->m_len_password + (ignore_query ? 0 : 
url->m_len_query)) == 0) &&
       (3 + 1 + 1 + 1 + 1 + 1 + 2 + url->m_len_scheme + url->m_len_host + 
url->m_len_path < BUFSIZE) &&
       (memchr(url->m_ptr_host, '%', url->m_len_host) == nullptr) && 
(memchr(url->m_ptr_path, '%', url->m_len_path) == nullptr)) {
     url_CryptoHash_get_fast(url, ctx, hash, generation);
diff --git a/src/proxy/hdrs/unit_tests/test_URL.cc 
b/src/proxy/hdrs/unit_tests/test_URL.cc
index 5c7cdf4f40..9c5b3c6413 100644
--- a/src/proxy/hdrs/unit_tests/test_URL.cc
+++ b/src/proxy/hdrs/unit_tests/test_URL.cc
@@ -697,3 +697,85 @@ TEST_CASE("UrlHashGet", "[url][hash_get]")
     }
   }
 }
+
+struct get_path_test_case {
+  const std::string description;
+  const std::string uri;
+  const std::string path;
+};
+
+// clang-format off
+std::vector<get_path_test_case> get_path_test_cases = {
+  {
+    "Semicolon in paths 1",
+    "http://foo.test/abc/xyz;p1=1,p2=2";,
+    "abc/xyz;p1=1,p2=2",
+  },
+  {
+    "Semicolon in paths 2",
+    "http://foo.test/abc;p1=1,p2=2/xyz";,
+    "abc;p1=1,p2=2/xyz",
+  },
+  {
+    "Semicolon in paths 3",
+    "http://foo.test/abc/xyz;p1=1,p2=2?q1=1";,
+    "abc/xyz;p1=1,p2=2",
+  },
+  {
+    "Semicolon in paths 4",
+    "http://foo.test/abc;p1=1,p2=2/xyz?q1=1";,
+    "abc;p1=1,p2=2/xyz",
+  },
+};
+
+/** Return the hash related to a URI.
+  *
+  * @param[in] uri The URI to hash.
+  * @return The hash of the URI.
+ */
+TEST_CASE("UrlPathGet", "[url][path_get]")
+{
+  for (auto const &test_case : get_path_test_cases) {
+    std::string description = test_case.description + ": " + test_case.uri + " 
-> " + test_case.path;
+    SECTION(description) {
+      URL url;
+      HdrHeap *heap = new_HdrHeap();
+      url.create(heap);
+      url.parse(test_case.uri);
+      const char *path;
+      int path_len;
+      path = url.path_get(&path_len);
+      CHECK(std::string_view(path, path_len) == test_case.path);
+      heap->destroy();
+    }
+  }
+}
+
+
+/**
+ * Tests for deprecated params_get/set
+ */
+TEST_CASE("UrlParamsGet", "[url][params_get]")
+{
+  // Expected behavior
+  // - ParamsGet always return empty string
+  // - ParamsSet appends the value to path
+  // - PathGet returns params appended by ParamsSet
+
+  const char *value;
+  int value_len;
+
+  URL url;
+  HdrHeap *heap = new_HdrHeap();
+  url.create(heap);
+  url.parse("https://foo.test/path;p=1";);
+  value = url.path_get(&value_len);
+  CHECK(std::string_view(value, value_len) == "path;p=1");
+  url.params_set("param=1", 7);
+  value = url.params_get(&value_len);
+  CHECK(value == nullptr);
+  CHECK(value_len == 0);
+  value = url.path_get(&value_len);
+  CHECK(std::string_view(value, value_len) == "path;param=1");
+  heap->destroy();
+}
diff --git a/src/proxy/http/HttpSM.cc b/src/proxy/http/HttpSM.cc
index 2ec59d8127..6dc467f52f 100644
--- a/src/proxy/http/HttpSM.cc
+++ b/src/proxy/http/HttpSM.cc
@@ -709,8 +709,7 @@ HttpSM::state_read_client_request_header(int event, void 
*data)
         call_transact_and_set_next_state(HttpTransact::TooEarly);
         return 0;
       } else if (!SSLConfigParams::server_allow_early_data_params &&
-                 
(t_state.hdr_info.client_request.m_http->u.req.m_url_impl->m_len_params > 0 ||
-                  
t_state.hdr_info.client_request.m_http->u.req.m_url_impl->m_len_query > 0)) {
+                 
t_state.hdr_info.client_request.m_http->u.req.m_url_impl->m_len_query > 0) {
         SMDbg(dbg_ctl_http, "client request was from early data but HAS 
parameters");
         call_transact_and_set_next_state(HttpTransact::TooEarly);
         return 0;
diff --git a/src/traffic_cache_tool/CacheScan.cc 
b/src/traffic_cache_tool/CacheScan.cc
index bac9ea3f75..26e92894ef 100644
--- a/src/traffic_cache_tool/CacheScan.cc
+++ b/src/traffic_cache_tool/CacheScan.cc
@@ -388,19 +388,17 @@ CacheScan::get_alternates(const char *buf, int length, 
bool search)
         std::string str;
 
         if (search) {
-          swoc::bwprint(str, "{}://{}:{}/{};{}?{}", 
std::string_view(url->m_ptr_scheme, url->m_len_scheme),
+          swoc::bwprint(str, "{}://{}:{}/{}?{}", 
std::string_view(url->m_ptr_scheme, url->m_len_scheme),
                         std::string_view(url->m_ptr_host, url->m_len_host), 
std::string_view(url->m_ptr_port, url->m_len_port),
-                        std::string_view(url->m_ptr_path, url->m_len_path), 
std::string_view(url->m_ptr_params, url->m_len_params),
-                        std::string_view(url->m_ptr_query, url->m_len_query));
+                        std::string_view(url->m_ptr_path, url->m_len_path), 
std::string_view(url->m_ptr_query, url->m_len_query));
           if (u_matcher->match(str.data())) {
             str = this->stripe->hashText + " " + str;
             std::cout << "match found " << str << std::endl;
           }
         } else {
-          swoc::bwprint(str, "stripe: {} : {}://{}:{}/{};{}?{}", 
std::string_view(this->stripe->hashText),
+          swoc::bwprint(str, "stripe: {} : {}://{}:{}/{}?{}", 
std::string_view(this->stripe->hashText),
                         std::string_view(url->m_ptr_scheme, 
url->m_len_scheme), std::string_view(url->m_ptr_host, url->m_len_host),
                         std::string_view(url->m_ptr_port, url->m_len_port), 
std::string_view(url->m_ptr_path, url->m_len_path),
-                        std::string_view(url->m_ptr_params, url->m_len_params),
                         std::string_view(url->m_ptr_query, url->m_len_query));
           std::cout << str << std::endl;
         }

Reply via email to