This is an automated email from the ASF dual-hosted git repository.
bcall pushed a commit to branch 9.2.x
in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/9.2.x by this push:
new 32d93ad084 9.2.x: Correctly handle encoding for cache hash generation
(#10127)
32d93ad084 is described below
commit 32d93ad084d18ee592754e3736d960a4fd8ddfd2
Author: Brian Neradt <[email protected]>
AuthorDate: Tue Aug 1 18:05:08 2023 -0500
9.2.x: Correctly handle encoding for cache hash generation (#10127)
Since origins may treat URL encoded or unencoded paths, query
parameters, or fragments differently, we should cache them separately.
This updates our URL cache hashing logic to not unencode these
components of a URI.
---
proxy/hdrs/URL.cc | 10 ++++
proxy/hdrs/unit_tests/test_URL.cc | 102 ++++++++++++++++++++++++++++++++
tests/gold_tests/url/uri.replay.yaml | 111 +++++++++++++++++++++++++++++++++--
tests/gold_tests/url/uri.test.py | 8 ++-
4 files changed, 223 insertions(+), 8 deletions(-)
diff --git a/proxy/hdrs/URL.cc b/proxy/hdrs/URL.cc
index a26f5d7911..7fbae9ccb1 100644
--- a/proxy/hdrs/URL.cc
+++ b/proxy/hdrs/URL.cc
@@ -1859,6 +1859,16 @@ url_CryptoHash_get_general(const URLImpl *url,
CryptoContext &ctx, CryptoHash &h
while (t < ends[i]) {
if ((i == 0) || (i == 6)) { // scheme and host
unescape_str_tolower(p, e, t, ends[i], s);
+ } else if (i == 8 || i == 10 || i == 12) { // path, params, query
+ // Don't unescape the parts of the URI that are processed by the
+ // origin since it may behave differently based upon whether these
are
+ // escaped or not. Therefore differently encoded strings should be
+ // cached separately via differentiated hashes.
+ int path_len = ends[i] - t;
+ int min_len = std::min(path_len, static_cast<int>(e - p));
+ memcpy(p, t, min_len);
+ p += min_len;
+ t += min_len;
} else {
unescape_str(p, e, t, ends[i], s);
}
diff --git a/proxy/hdrs/unit_tests/test_URL.cc
b/proxy/hdrs/unit_tests/test_URL.cc
index a06d1d5d05..28c35c63f1 100644
--- a/proxy/hdrs/unit_tests/test_URL.cc
+++ b/proxy/hdrs/unit_tests/test_URL.cc
@@ -23,6 +23,7 @@
#include "catch.hpp"
#include "URL.h"
+#include "tscore/CryptoHash.h"
TEST_CASE("ValidateURL", "[proxy][validurl]")
{
@@ -555,3 +556,104 @@ TEST_CASE("UrlParse", "[proxy][parseurl]")
test_parse(test_case, URL_PARSE_REGEX);
}
}
+
+struct get_hash_test_case {
+ const std::string description;
+ const std::string uri_1;
+ const std::string uri_2;
+ const bool has_equal_hash;
+};
+
+constexpr bool HAS_EQUAL_HASH = true;
+
+// clang-format off
+std::vector<get_hash_test_case> get_hash_test_cases = {
+ {
+ "No encoding: equal hashes",
+
"http://one.example.com/a/path?name=value#some=value?with_question#fragment",
+
"http://one.example.com/a/path?name=value#some=value?with_question#fragment",
+ HAS_EQUAL_HASH,
+ },
+ {
+ "Scheme encoded: equal hashes",
+
"http%3C://one.example.com/a/path?name=value#some=value?with_question#fragment",
+
"http<://one.example.com/a/path?name=value#some=value?with_question#fragment",
+ HAS_EQUAL_HASH,
+ },
+ {
+ "Host encoded: equal hashes",
+
"http://one%2Eexample.com/a/path?name=value#some=value?with_question#fragment",
+
"http://one.example.com/a/path?name=value#some=value?with_question#fragment",
+ HAS_EQUAL_HASH,
+ },
+ {
+ "Path encoded: differing hashes",
+
"http://one.example.com/a%2Fpath?name=value#some=value?with_question#fragment",
+
"http://one.example.com/a/path?name=value#some=value?with_question#fragment",
+ !HAS_EQUAL_HASH,
+ },
+ {
+ "Query = encoded: differing hashes",
+
"http://one.example.com/a/path?name%3Dvalue#some=value?with_question#fragment",
+
"http://one.example.com/a/path?name=value#some=value?with_question#fragment",
+ !HAS_EQUAL_HASH,
+ },
+ {
+ "Query internal encoded: differing hashes",
+
"http://one.example.com/a/path?name=valu%5D#some=value?with_question#fragment",
+
"http://one.example.com/a/path?name=valu]#some=value?with_question#fragment",
+ !HAS_EQUAL_HASH,
+ },
+ {
+ "Fragment encoded: fragment is not part of the hash",
+
"http://one.example.com/a/path?name=value#some=value?with_question#frag%7Dent",
+
"http://one.example.com/a/path?name=value#some=value?with_question/frag}ent",
+ HAS_EQUAL_HASH,
+ },
+ {
+ "Username encoded: equal hashes",
+ "mysql://my%7Eser:mypassword@localhost/mydatabase",
+ "mysql://my~ser:mypassword@localhost/mydatabase",
+ HAS_EQUAL_HASH,
+ },
+ {
+ "Password encoded: equal hashes",
+ "mysql://myuser:mypa%24sword@localhost/mydatabase",
+ "mysql://myuser:mypa$sword@localhost/mydatabase",
+ HAS_EQUAL_HASH,
+ },
+};
+
+/** Return the hash related to a URI.
+ *
+ * @param[in] uri The URI to hash.
+ * @return The hash of the URI.
+ */
+CryptoHash
+get_hash(const std::string &uri)
+{
+ URL url;
+ HdrHeap *heap = new_HdrHeap();
+ url.create(heap);
+ url.parse(uri);
+ CryptoHash hash;
+ url.hash_get(&hash);
+ heap->destroy();
+ return hash;
+}
+
+TEST_CASE("UrlHashGet", "[url][hash_get]")
+{
+ for (auto const &test_case : get_hash_test_cases) {
+ std::string description = test_case.description + ": " + test_case.uri_1 +
" vs " + test_case.uri_2;
+ SECTION(description) {
+ CryptoHash hash1 = get_hash(test_case.uri_1);
+ CryptoHash hash2 = get_hash(test_case.uri_2);
+ if (test_case.has_equal_hash) {
+ CHECK(hash1 == hash2);
+ } else {
+ CHECK(hash1 != hash2);
+ }
+ }
+ }
+}
diff --git a/tests/gold_tests/url/uri.replay.yaml
b/tests/gold_tests/url/uri.replay.yaml
index 4935ff7951..d6689db11f 100644
--- a/tests/gold_tests/url/uri.replay.yaml
+++ b/tests/gold_tests/url/uri.replay.yaml
@@ -33,12 +33,20 @@ meta:
- [ Content-Length, 16 ]
- [ Cache-Control, max-age=300 ]
+ - 404_ok_response: &404_ok_response
+ server-response:
+ status: 404
+ reason: "Not Found"
+ headers:
+ fields:
+ - [ Content-Length, 16 ]
+ - [ Cache-Control, max-age=300 ]
+
sessions:
- transactions:
# A simply path
- - all: { headers: { fields: [[ uuid, 1 ]]}}
- client-request:
+ - client-request:
method: "GET"
version: "1.1"
scheme: "http"
@@ -46,6 +54,7 @@ sessions:
headers:
fields:
- [ Host, example.com ]
+ - [ uuid, 1 ]
<<: *200_ok_response
@@ -54,8 +63,7 @@ sessions:
# An initial // without an authority is not valid per RFC 3986 section-3.3,
# but we have historically accepted it and will continue to do so.
- - all: { headers: { fields: [[ uuid, 2 ]]}}
- client-request:
+ - client-request:
method: "GET"
version: "1.1"
scheme: "http"
@@ -63,6 +71,7 @@ sessions:
headers:
fields:
- [ Host, example.com ]
+ - [ uuid, 2 ]
<<: *200_ok_response
@@ -70,8 +79,7 @@ sessions:
status: 200
# A '^' is an invalid path.
- - all: { headers: { fields: [[ uuid, 3 ]]}}
- client-request:
+ - client-request:
method: "GET"
version: "1.1"
scheme: "http"
@@ -79,8 +87,99 @@ sessions:
headers:
fields:
- [ Host, example.com ]
+ - [ uuid, 3 ]
<<: *200_ok_response
proxy-response:
status: 400
+
+- transactions:
+ #
+ # Make sure that we consistently treat encoding of paths.
+ #
+ - client-request:
+ method: "GET"
+ version: "1.1"
+ url: /path%2ffoo
+ headers:
+ fields:
+ - [ Host, test.com ]
+ - [ Connection, keep-alive ]
+ - [ uuid, encoded_path ]
+
+ proxy-request:
+ # Verify that the origin gets the encoded path.
+ url: /path%2ffoo
+
+ <<: *404_ok_response
+
+ proxy-response:
+ status: 404
+
+ # Request with an unencoded path: go to origin.
+ - client-request:
+ # Give plenty of time to cache the response.
+ delay: 200ms
+
+ method: "GET"
+ version: "1.1"
+ # Note that the path is not encoded.
+ url: /path/foo
+ headers:
+ fields:
+ - [ Host, test.com ]
+ - [ Connection, keep-alive ]
+ - [ uuid, unencoded_path ]
+
+ proxy-request:
+ url: /path/foo
+
+ <<: *200_ok_response
+
+ proxy-response:
+ # ATS should treat the unencoded path as different and go to the origin,
+ # not serve the encoded path out of cache.
+ status: 200
+
+ # Request again with an encoded path, should be served from cache.
+ - client-request:
+ method: "GET"
+ version: "1.1"
+ url: /path%2ffoo
+ headers:
+ fields:
+ - [ Host, test.com ]
+ - [ Connection, keep-alive ]
+ - [ uuid, encoded_path_again ]
+
+ # The origin should not receive this request, but if it does, reply with a
+ # non-404 response so we can detect it.
+ <<: *200_ok_response
+
+ proxy-response:
+ # ATS should serve the encoded path response from cache.
+ status: 404
+
+ # Request again with an encoded path, should be served from cache.
+ - client-request:
+ # Give plenty of time to cache the unencoded response.
+ delay: 200ms
+
+ method: "GET"
+ version: "1.1"
+ # Note that the path is not encoded.
+ url: /path/foo
+ headers:
+ fields:
+ - [ Host, test.com ]
+ - [ Connection, keep-alive ]
+ - [ uuid, unencoded_path_again ]
+
+ # The origin should not receive this request, but if it does, reply with a
+ # non-200 response so we can detect it.
+ <<: *404_ok_response
+
+ proxy-response:
+ # Verify that ATS returns out of cache for the uneencoded path object.
+ status: 200
diff --git a/tests/gold_tests/url/uri.test.py b/tests/gold_tests/url/uri.test.py
index 8589d07f0f..f1ccd77e00 100644
--- a/tests/gold_tests/url/uri.test.py
+++ b/tests/gold_tests/url/uri.test.py
@@ -26,7 +26,7 @@ replay_file = "uri.replay.yaml"
server = Test.MakeVerifierServerProcess("server", replay_file)
ts.Disk.records_config.update({
'proxy.config.diags.debug.enabled': 1,
- 'proxy.config.diags.debug.tags': 'http',
+ 'proxy.config.diags.debug.tags': 'http|cache|url',
})
ts.Disk.remap_config.AddLine(
'map / http://127.0.0.1:{0}'.format(server.Variables.http_port)
@@ -34,4 +34,8 @@ ts.Disk.remap_config.AddLine(
tr = Test.AddTestRun("Verify correct URI parsing behavior.")
tr.Processes.Default.StartBefore(server)
tr.Processes.Default.StartBefore(ts)
-tr.AddVerifierClientProcess("client", replay_file,
http_ports=[ts.Variables.port])
+tr.AddVerifierClientProcess(
+ "client",
+ replay_file,
+ http_ports=[ts.Variables.port],
+ other_args='--thread-limit 1')