[trafficserver] branch 8.1.x updated: Correctly handle encoding for cache hash generation (#10128)

eze Tue, 01 Aug 2023 11:52:29 -0700

This is an automated email from the ASF dual-hosted git repository.

eze pushed a commit to branch 8.1.x
in repository https://gitbox.apache.org/repos/asf/trafficserver.git



The following commit(s) were added to refs/heads/8.1.x by this push:
     new 811f8dee8b Correctly handle encoding for cache hash generation (#10128)
811f8dee8b is described below

commit 811f8dee8b50816db9d05083771477ddd0cf1230
Author: Brian Neradt <brian.ner...@gmail.com>
AuthorDate: Tue Aug 1 13:51:44 2023 -0500

    Correctly handle encoding for cache hash generation (#10128)
    
    Since origins may treat URL encoded or unencoded paths, query
    parameters, or fragments differently, we should cache them separately.
    This updates our URL cache hashing logic to not unencode these
    components of a URI.
---
 proxy/hdrs/URL.cc                 |  10 ++++
 proxy/hdrs/unit_tests/test_URL.cc | 101 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 111 insertions(+)

diff --git a/proxy/hdrs/URL.cc b/proxy/hdrs/URL.cc
index 9edf1a43cc..e38df3379b 100644
--- a/proxy/hdrs/URL.cc
+++ b/proxy/hdrs/URL.cc
@@ -1776,6 +1776,16 @@ url_CryptoHash_get_general(const URLImpl *url, 
CryptoContext &ctx, CryptoHash &h
       while (t < ends[i]) {
         if ((i == 0) || (i == 6)) { // scheme and host
           unescape_str_tolower(p, e, t, ends[i], s);
+        } else if (i == 8 || i == 10 || i == 12) { // path, params, query
+          // Don't unescape the parts of the URI that are processed by the
+          // origin since it may behave differently based upon whether these 
are
+          // escaped or not. Therefore differently encoded strings should be
+          // cached separately via differentiated hashes.
+          int path_len = ends[i] - t;
+          int min_len  = std::min(path_len, static_cast<int>(e - p));
+          memcpy(p, t, min_len);
+          p += min_len;
+          t += min_len;
         } else {
           unescape_str(p, e, t, ends[i], s);
         }
diff --git a/proxy/hdrs/unit_tests/test_URL.cc 
b/proxy/hdrs/unit_tests/test_URL.cc
index b022d068e4..44f621c76f 100644
--- a/proxy/hdrs/unit_tests/test_URL.cc
+++ b/proxy/hdrs/unit_tests/test_URL.cc
@@ -44,3 +44,104 @@ TEST_CASE("Validate Scheme", "[proxy][validscheme]")
     }
   }
 }
+
+struct get_hash_test_case {
+  const std::string description;
+  const std::string uri_1;
+  const std::string uri_2;
+  const bool has_equal_hash;
+};
+
+constexpr bool HAS_EQUAL_HASH = true;
+
+// clang-format off
+std::vector<get_hash_test_case> get_hash_test_cases = {
+  {
+    "No encoding: equal hashes",
+    
"http://one.example.com/a/path?name=value#some=value?with_question#fragment";,
+    
"http://one.example.com/a/path?name=value#some=value?with_question#fragment";,
+    HAS_EQUAL_HASH,
+  },
+  {
+    "Scheme encoded: equal hashes",
+    
"http%3C://one.example.com/a/path?name=value#some=value?with_question#fragment",
+    
"http<://one.example.com/a/path?name=value#some=value?with_question#fragment",
+    HAS_EQUAL_HASH,
+  },
+  {
+    "Host encoded: equal hashes",
+    
"http://one%2Eexample.com/a/path?name=value#some=value?with_question#fragment";,
+    
"http://one.example.com/a/path?name=value#some=value?with_question#fragment";,
+    HAS_EQUAL_HASH,
+  },
+  {
+    "Path encoded: differing hashes",
+    
"http://one.example.com/a%2Fpath?name=value#some=value?with_question#fragment";,
+    
"http://one.example.com/a/path?name=value#some=value?with_question#fragment";,
+    !HAS_EQUAL_HASH,
+  },
+  {
+    "Query = encoded: differing hashes",
+    
"http://one.example.com/a/path?name%3Dvalue#some=value?with_question#fragment";,
+    
"http://one.example.com/a/path?name=value#some=value?with_question#fragment";,
+    !HAS_EQUAL_HASH,
+  },
+  {
+    "Query internal encoded: differing hashes",
+    
"http://one.example.com/a/path?name=valu%5D#some=value?with_question#fragment";,
+    
"http://one.example.com/a/path?name=valu]#some=value?with_question#fragment";,
+    !HAS_EQUAL_HASH,
+  },
+  {
+    "Fragment encoded: fragment is not part of the hash",
+    
"http://one.example.com/a/path?name=value#some=value?with_question#frag%7Dent";,
+    
"http://one.example.com/a/path?name=value#some=value?with_question/frag}ent";,
+    HAS_EQUAL_HASH,
+  },
+  {
+    "Username encoded: equal hashes",
+    "mysql://my%7Eser:mypassword@localhost/mydatabase",
+    "mysql://my~ser:mypassword@localhost/mydatabase",
+    HAS_EQUAL_HASH,
+  },
+  {
+    "Password encoded: equal hashes",
+    "mysql://myuser:mypa%24sword@localhost/mydatabase",
+    "mysql://myuser:mypa$sword@localhost/mydatabase",
+    HAS_EQUAL_HASH,
+  },
+};
+
+/** Return the hash related to a URI.
+  *
+  * @param[in] uri The URI to hash.
+  * @return The hash of the URI.
+ */
+CryptoHash
+get_hash(const std::string &uri)
+{
+  URL url;
+  HdrHeap *heap = new_HdrHeap();
+  url.create(heap);
+  url.parse(uri.c_str(), uri.length());
+  CryptoHash hash;
+  url.hash_get(&hash);
+  heap->destroy();
+  return hash;
+}
+
+TEST_CASE("UrlHashGet", "[url][hash_get]")
+{
+  for (auto const &test_case : get_hash_test_cases) {
+    std::string description = test_case.description + ": " + test_case.uri_1 + 
" vs " + test_case.uri_2;
+    SECTION(description) {
+      CryptoHash hash1 = get_hash(test_case.uri_1);
+      CryptoHash hash2 = get_hash(test_case.uri_2);
+      if (test_case.has_equal_hash) {
+        CHECK(hash1 == hash2);
+      } else {
+        CHECK(hash1 != hash2);
+      }
+    }
+  }
+}

[trafficserver] branch 8.1.x updated: Correctly handle encoding for cache hash generation (#10128)

Reply via email to