This is an automated email from the ASF dual-hosted git repository. sorber pushed a commit to branch parent_selection_hash_extension in repository https://gitbox.apache.org/repos/asf/trafficserver.git
commit 2c2958b9770a263435ccc0c55fe41b72687e7955 Author: Phil Sorber <[email protected]> AuthorDate: Wed Oct 15 16:12:08 2025 -0600 Make Parent Selection Hash configurable and add SipHash-1-3 and Wyhash v4.1. --- configs/records.yaml.default.in | 1 + doc/admin-guide/files/parent.config.en.rst | 7 + doc/admin-guide/files/records.yaml.en.rst | 22 ++ include/proxy/ParentConsistentHash.h | 7 +- include/proxy/ParentSelection.h | 3 + include/tscore/HashSip.h | 141 +++++++++++- include/tscore/{HashSip.h => HashWyhash.h} | 33 +-- src/proxy/CMakeLists.txt | 4 + src/proxy/ParentConsistentHash.cc | 59 +++-- src/proxy/ParentSelection.cc | 19 ++ src/proxy/{ => unit_tests}/CMakeLists.txt | 47 +--- src/proxy/unit_tests/main.cc | 51 +++++ src/proxy/unit_tests/test_ParentHashConfig.cc | 65 ++++++ src/records/RecordsConfig.cc | 2 + src/tscore/CMakeLists.txt | 3 +- src/tscore/HashSip.cc | 141 ------------ src/tscore/HashWyhash.cc | 154 +++++++++++++ src/tscore/unit_tests/test_HashAlgorithms.cc | 308 ++++++++++++++++++++++++++ 18 files changed, 835 insertions(+), 232 deletions(-) diff --git a/configs/records.yaml.default.in b/configs/records.yaml.default.in index f59a1ffbdc..225ae80239 100644 --- a/configs/records.yaml.default.in +++ b/configs/records.yaml.default.in @@ -130,6 +130,7 @@ records: ############################################################################## parent_proxy: retry_time: 300 + consistent_hash_algorithm: siphash24 ############################################################################## # Security. Docs: diff --git a/doc/admin-guide/files/parent.config.en.rst b/doc/admin-guide/files/parent.config.en.rst index 9fe59116ef..bcd75dd410 100644 --- a/doc/admin-guide/files/parent.config.en.rst +++ b/doc/admin-guide/files/parent.config.en.rst @@ -278,6 +278,13 @@ The following list shows the possible actions and their allowed values. The other traffic is unaffected. Once the downed parent becomes available, the traffic distribution returns to the pre-down state. + + The hash algorithm used for consistent hashing can be configured via + :ts:cv:`proxy.config.http.parent_proxy.consistent_hash_algorithm`. Available + algorithms are ``siphash24`` (default), ``siphash13`` (faster), and ``wyhash`` + (fastest). See the records.yaml documentation for performance characteristics + and migration considerations. + - ``latched`` - The first parent in the list is marked as primary and is always chosen until connection errors cause it to be marked down. When this occurs the next parent in the list then becomes primary. The primary diff --git a/doc/admin-guide/files/records.yaml.en.rst b/doc/admin-guide/files/records.yaml.en.rst index 920ec5d7d4..8025795f82 100644 --- a/doc/admin-guide/files/records.yaml.en.rst +++ b/doc/admin-guide/files/records.yaml.en.rst @@ -1471,6 +1471,28 @@ Parent Proxy Configuration ``2`` Mark the host down. This is the default. ===== ====================================================================== +.. ts:cv:: CONFIG proxy.config.http.parent_proxy.consistent_hash_algorithm STRING siphash24 + + Selects the hash algorithm used for consistent hash parent selection. This setting + only affects parent selection when ``round_robin=consistent_hash`` is configured in + :file:`parent.config`. The hash algorithm determines how requests are distributed + across parent proxies. + + ============== ================================================================================ + Value Description + ============== ================================================================================ + ``siphash24`` SipHash-2-4 (default). Cryptographically strong, DoS-resistant hash function. + ``siphash13`` SipHash-1-3. ~50% faster than SipHash-2-4, still DoS-resistant. + ``wyhash`` Wyhash v4.1. ~3-5x faster than SipHash-2-4, DoS-resistant. + ============== ================================================================================ + + .. warning:: + + Changing this setting will cause requests to be redistributed differently across + parent proxies. This can lead to cache churn and increased origin load during the + transition period. Plan the migration carefully and consider doing it during + low-traffic periods. + .. ts:cv:: CONFIG proxy.config.http.parent_proxy.enable_parent_timeout_markdowns INT 0 :reloadable: :overridable: diff --git a/include/proxy/ParentConsistentHash.h b/include/proxy/ParentConsistentHash.h index afcc387393..d4a4e8a29e 100644 --- a/include/proxy/ParentConsistentHash.h +++ b/include/proxy/ParentConsistentHash.h @@ -38,14 +38,15 @@ // class ParentConsistentHash : public ParentSelectionStrategy { - // there are two hashes PRIMARY parents - // and SECONDARY parents. - ATSHash64Sip24 hash[2]; + std::unique_ptr<ATSHash64> hash[2]; std::unique_ptr<ATSConsistentHash> chash[2]; pRecord *parents[2]; bool foundParents[2][MAX_PARENTS]; bool ignore_query; int secondary_mode; + ParentHashAlgorithm selected_algorithm; + + std::unique_ptr<ATSHash64> createHashInstance(ParentHashAlgorithm algo); public: static const int PRIMARY = 0; diff --git a/include/proxy/ParentSelection.h b/include/proxy/ParentSelection.h index 09726f5036..2794e27861 100644 --- a/include/proxy/ParentSelection.h +++ b/include/proxy/ParentSelection.h @@ -73,6 +73,8 @@ enum class ParentRetry_t { BOTH = 3 }; +enum class ParentHashAlgorithm { SIPHASH24 = 0, SIPHASH13, WYHASH }; + struct UnavailableServerResponseCodes { UnavailableServerResponseCodes(char *val); ~UnavailableServerResponseCodes(){}; @@ -163,6 +165,7 @@ public: int max_unavailable_server_retries = 1; int secondary_mode = 1; bool ignore_self_detect = false; + ParentHashAlgorithm consistent_hash_algorithm = ParentHashAlgorithm::SIPHASH24; }; // If the parent was set by the external customer api, diff --git a/include/tscore/HashSip.h b/include/tscore/HashSip.h index ce94404dc8..60630afc93 100644 --- a/include/tscore/HashSip.h +++ b/include/tscore/HashSip.h @@ -23,22 +23,139 @@ #include "tscore/Hash.h" #include <cstdint> +#include <cstring> /* - Siphash is a Hash Message Authentication Code and can take a key. + SipHash is a Hash Message Authentication Code and can take a key. If you don't care about MAC use the void constructor and it will use a zero key for you. - */ -struct ATSHash64Sip24 : ATSHash64 { - ATSHash64Sip24(); - ATSHash64Sip24(const unsigned char key[16]); - ATSHash64Sip24(std::uint64_t key0, std::uint64_t key1); - void update(const void *data, std::size_t len) override; - void final() override; - std::uint64_t get() const override; - void clear() override; + Template parameters: + - c_rounds: number of compression rounds per message block + - d_rounds: number of finalization rounds +*/ + +#define SIP_BLOCK_SIZE 8 + +#define ROTL64(a, b) (((a) << (b)) | ((a) >> (64 - b))) + +#define U8TO64_LE(p) *(const std::uint64_t *)(p) + +#define SIPCOMPRESS(x0, x1, x2, x3) \ + x0 += x1; \ + x2 += x3; \ + x1 = ROTL64(x1, 13); \ + x3 = ROTL64(x3, 16); \ + x1 ^= x0; \ + x3 ^= x2; \ + x0 = ROTL64(x0, 32); \ + x2 += x1; \ + x0 += x3; \ + x1 = ROTL64(x1, 17); \ + x3 = ROTL64(x3, 21); \ + x1 ^= x2; \ + x3 ^= x0; \ + x2 = ROTL64(x2, 32); + +template <int c_rounds, int d_rounds> struct ATSHashSip : ATSHash64 { + ATSHashSip() { this->clear(); } + + ATSHashSip(const unsigned char key[16]) : k0(U8TO64_LE(key)), k1(U8TO64_LE(key + sizeof(k0))) { this->clear(); } + + ATSHashSip(std::uint64_t key0, std::uint64_t key1) : k0(key0), k1(key1) { this->clear(); } + + void + update(const void *data, std::size_t len) override + { + std::size_t i, blocks; + unsigned char *m; + std::uint64_t mi; + std::uint8_t block_off = 0; + + if (!finalized) { + m = (unsigned char *)data; + total_len += len; + + if (len + block_buffer_len < SIP_BLOCK_SIZE) { + std::memcpy(block_buffer + block_buffer_len, m, len); + block_buffer_len += len; + } else { + if (block_buffer_len > 0) { + block_off = SIP_BLOCK_SIZE - block_buffer_len; + std::memcpy(block_buffer + block_buffer_len, m, block_off); + + mi = U8TO64_LE(block_buffer); + v3 ^= mi; + for (int r = 0; r < c_rounds; r++) { + SIPCOMPRESS(v0, v1, v2, v3); + } + v0 ^= mi; + } + + for (i = block_off, blocks = ((len - block_off) & ~(SIP_BLOCK_SIZE - 1)); i < blocks; i += SIP_BLOCK_SIZE) { + mi = U8TO64_LE(m + i); + v3 ^= mi; + for (int r = 0; r < c_rounds; r++) { + SIPCOMPRESS(v0, v1, v2, v3); + } + v0 ^= mi; + } + + block_buffer_len = (len - block_off) & (SIP_BLOCK_SIZE - 1); + std::memcpy(block_buffer, m + block_off + blocks, block_buffer_len); + } + } + } + + void + final() override + { + std::uint64_t last7; + int i; + + if (!finalized) { + last7 = static_cast<std::uint64_t>(total_len & 0xff) << 56; + + for (i = block_buffer_len - 1; i >= 0; i--) { + last7 |= static_cast<std::uint64_t>(block_buffer[i]) << (i * 8); + } + + v3 ^= last7; + for (int r = 0; r < c_rounds; r++) { + SIPCOMPRESS(v0, v1, v2, v3); + } + v0 ^= last7; + v2 ^= 0xff; + for (int r = 0; r < d_rounds; r++) { + SIPCOMPRESS(v0, v1, v2, v3); + } + hfinal = v0 ^ v1 ^ v2 ^ v3; + finalized = true; + } + } + + std::uint64_t + get() const override + { + if (finalized) { + return hfinal; + } else { + return 0; + } + } + + void + clear() override + { + v0 = k0 ^ 0x736f6d6570736575ull; + v1 = k1 ^ 0x646f72616e646f6dull; + v2 = k0 ^ 0x6c7967656e657261ull; + v3 = k1 ^ 0x7465646279746573ull; + finalized = false; + total_len = 0; + block_buffer_len = 0; + } private: unsigned char block_buffer[8] = {0}; @@ -53,3 +170,7 @@ private: std::size_t total_len = 0; bool finalized = false; }; + +// Standard SipHash variants +using ATSHash64Sip24 = ATSHashSip<2, 4>; // Original SipHash-2-4 +using ATSHash64Sip13 = ATSHashSip<1, 3>; // Faster SipHash-1-3 diff --git a/include/tscore/HashSip.h b/include/tscore/HashWyhash.h similarity index 58% copy from include/tscore/HashSip.h copy to include/tscore/HashWyhash.h index ce94404dc8..10e91c3b2f 100644 --- a/include/tscore/HashSip.h +++ b/include/tscore/HashWyhash.h @@ -24,32 +24,21 @@ #include "tscore/Hash.h" #include <cstdint> -/* - Siphash is a Hash Message Authentication Code and can take a key. - - If you don't care about MAC use the void constructor and it will use - a zero key for you. - */ - -struct ATSHash64Sip24 : ATSHash64 { - ATSHash64Sip24(); - ATSHash64Sip24(const unsigned char key[16]); - ATSHash64Sip24(std::uint64_t key0, std::uint64_t key1); +struct ATSHash64Wyhash : ATSHash64 { + ATSHash64Wyhash(); + ATSHash64Wyhash(std::uint64_t seed); void update(const void *data, std::size_t len) override; void final() override; std::uint64_t get() const override; void clear() override; private: - unsigned char block_buffer[8] = {0}; - std::uint8_t block_buffer_len = 0; - std::uint64_t k0 = 0; - std::uint64_t k1 = 0; - std::uint64_t v0 = 0; - std::uint64_t v1 = 0; - std::uint64_t v2 = 0; - std::uint64_t v3 = 0; - std::uint64_t hfinal = 0; - std::size_t total_len = 0; - bool finalized = false; + std::uint64_t seed = 0; + std::uint64_t state = 0; + std::uint64_t total_len = 0; + std::uint64_t hfinal = 0; + bool finalized = false; + + unsigned char buffer[32] = {0}; + std::uint8_t buffer_len = 0; }; diff --git a/src/proxy/CMakeLists.txt b/src/proxy/CMakeLists.txt index f145034fb6..93c1669784 100644 --- a/src/proxy/CMakeLists.txt +++ b/src/proxy/CMakeLists.txt @@ -55,4 +55,8 @@ if(TS_USE_QUIC) add_subdirectory(http3) endif() +if(BUILD_TESTING) + add_subdirectory(unit_tests) +endif() + clang_tidy_check(proxy) diff --git a/src/proxy/ParentConsistentHash.cc b/src/proxy/ParentConsistentHash.cc index a9dab9a1d3..11fa5d46e9 100644 --- a/src/proxy/ParentConsistentHash.cc +++ b/src/proxy/ParentConsistentHash.cc @@ -23,12 +23,30 @@ #include <atomic> #include "proxy/HostStatus.h" #include "proxy/ParentConsistentHash.h" +#include "tscore/HashSip.h" +#include "tscore/HashWyhash.h" namespace { DbgCtl dbg_ctl_parent_select{"parent_select"}; } +std::unique_ptr<ATSHash64> +ParentConsistentHash::createHashInstance(ParentHashAlgorithm algo) +{ + switch (algo) { + case ParentHashAlgorithm::SIPHASH24: + return std::make_unique<ATSHash64Sip24>(); + case ParentHashAlgorithm::SIPHASH13: + return std::make_unique<ATSHash64Sip13>(); + case ParentHashAlgorithm::WYHASH: + return std::make_unique<ATSHash64Wyhash>(); + default: + Warning("Unknown hash algorithm %d, using SipHash-2-4", static_cast<int>(algo)); + return std::make_unique<ATSHash64Sip24>(); + } +} + ParentConsistentHash::ParentConsistentHash(ParentRecord *parent_record) { int i; @@ -38,21 +56,24 @@ ParentConsistentHash::ParentConsistentHash(ParentRecord *parent_record) parents[SECONDARY] = parent_record->secondary_parents; ignore_query = parent_record->ignore_query; secondary_mode = parent_record->secondary_mode; + selected_algorithm = parent_record->consistent_hash_algorithm; ink_zero(foundParents); + hash[PRIMARY] = createHashInstance(selected_algorithm); chash[PRIMARY] = std::make_unique<ATSConsistentHash>(); for (i = 0; i < parent_record->num_parents; i++) { - chash[PRIMARY]->insert(&(parent_record->parents[i]), parent_record->parents[i].weight, (ATSHash64 *)&hash[PRIMARY]); + chash[PRIMARY]->insert(&(parent_record->parents[i]), parent_record->parents[i].weight, hash[PRIMARY].get()); } if (parent_record->num_secondary_parents > 0) { Dbg(dbg_ctl_parent_select, "ParentConsistentHash(): initializing the secondary parents hash."); + hash[SECONDARY] = createHashInstance(selected_algorithm); chash[SECONDARY] = std::make_unique<ATSConsistentHash>(); for (i = 0; i < parent_record->num_secondary_parents; i++) { chash[SECONDARY]->insert(&(parent_record->secondary_parents[i]), parent_record->secondary_parents[i].weight, - (ATSHash64 *)&hash[SECONDARY]); + hash[SECONDARY].get()); } } else { chash[SECONDARY] = nullptr; @@ -110,8 +131,8 @@ ParentConsistentHash::getPathHash(HttpRequestData *hrdata, ATSHash64 *h) // Helper function to abstract calling ATSConsistentHash lookup_by_hashval() vs lookup(). static pRecord * -chash_lookup(ATSConsistentHash *fhash, uint64_t path_hash, ATSConsistentHashIter *chashIter, bool *wrap_around, - ATSHash64Sip24 *hash, bool *chash_init, bool *mapWrapped) +chash_lookup(ATSConsistentHash *fhash, uint64_t path_hash, ATSConsistentHashIter *chashIter, bool *wrap_around, ATSHash64 *hash, + bool *chash_init, bool *mapWrapped) { pRecord *prtmp; @@ -134,18 +155,18 @@ void ParentConsistentHash::selectParent(bool first_call, ParentResult *result, RequestData *rdata, unsigned int /* fail_threshold ATS_UNUSED */, unsigned int retry_time) { - ATSHash64Sip24 hash; - ATSConsistentHash *fhash; - HttpRequestData *request_info = static_cast<HttpRequestData *>(rdata); - bool firstCall = first_call; - bool parentRetry = false; - bool wrap_around[2] = {false, false}; - int lookups = 0; - uint64_t path_hash = 0; - uint32_t last_lookup; - pRecord *prtmp = nullptr, *pRec = nullptr; - HostStatus &pStatus = HostStatus::instance(); - TSHostStatus host_stat = TSHostStatus::TS_HOST_STATUS_INIT; + std::unique_ptr<ATSHash64> hash = createHashInstance(selected_algorithm); + ATSConsistentHash *fhash; + HttpRequestData *request_info = static_cast<HttpRequestData *>(rdata); + bool firstCall = first_call; + bool parentRetry = false; + bool wrap_around[2] = {false, false}; + int lookups = 0; + uint64_t path_hash = 0; + uint32_t last_lookup; + pRecord *prtmp = nullptr, *pRec = nullptr; + HostStatus &pStatus = HostStatus::instance(); + TSHostStatus host_stat = TSHostStatus::TS_HOST_STATUS_INIT; Dbg(dbg_ctl_parent_select, "ParentConsistentHash::%s(): Using a consistent hash parent selection strategy.", __func__); ink_assert(numParents(result) > 0 || result->rec->go_direct == true); @@ -193,10 +214,10 @@ ParentConsistentHash::selectParent(bool first_call, ParentResult *result, Reques } // Do the initial parent look-up. - path_hash = getPathHash(request_info, (ATSHash64 *)&hash); + path_hash = getPathHash(request_info, hash.get()); fhash = chash[last_lookup].get(); do { // search until we've selected a different parent if !firstCall - prtmp = chash_lookup(fhash, path_hash, &result->chashIter[last_lookup], &wrap_around[last_lookup], &hash, + prtmp = chash_lookup(fhash, path_hash, &result->chashIter[last_lookup], &wrap_around[last_lookup], hash.get(), &result->chash_init[last_lookup], &result->mapWrapped[last_lookup]); lookups++; if (prtmp) { @@ -283,7 +304,7 @@ ParentConsistentHash::selectParent(bool first_call, ParentResult *result, Reques } } fhash = chash[last_lookup].get(); - prtmp = chash_lookup(fhash, path_hash, &result->chashIter[last_lookup], &wrap_around[last_lookup], &hash, + prtmp = chash_lookup(fhash, path_hash, &result->chashIter[last_lookup], &wrap_around[last_lookup], hash.get(), &result->chash_init[last_lookup], &result->mapWrapped[last_lookup]); lookups++; if (prtmp) { diff --git a/src/proxy/ParentSelection.cc b/src/proxy/ParentSelection.cc index a83f0f2a72..42641bf69a 100644 --- a/src/proxy/ParentSelection.cc +++ b/src/proxy/ParentSelection.cc @@ -56,6 +56,21 @@ DbgCtl ParentResult::dbg_ctl_parent_select{"parent_select"}; static DbgCtl &dbg_ctl_parent_select{ParentResult::dbg_ctl_parent_select}; static DbgCtl dbg_ctl_parent_config{"parent_config"}; +ParentHashAlgorithm +parseHashAlgorithm(std::string_view name) +{ + if (name == "siphash24") { + return ParentHashAlgorithm::SIPHASH24; + } else if (name == "siphash13") { + return ParentHashAlgorithm::SIPHASH13; + } else if (name == "wyhash") { + return ParentHashAlgorithm::WYHASH; + } else { + Warning("Unknown hash algorithm '%.*s', defaulting to siphash24", static_cast<int>(name.size()), name.data()); + return ParentHashAlgorithm::SIPHASH24; + } +} + ParentSelectionPolicy::ParentSelectionPolicy() { int32_t retry_time = 0; @@ -647,6 +662,10 @@ ParentRecord::Init(matcher_line *line_info) self_detect = static_cast<int>(rec_self_detect.value()); } + if (auto rec_hash_algo{RecGetRecordStringAlloc("proxy.config.http.parent_proxy.consistent_hash_algorithm")}; rec_hash_algo) { + consistent_hash_algorithm = parseHashAlgorithm(rec_hash_algo.value()); + } + for (int i = 0; i < MATCHER_MAX_TOKENS; i++) { used = false; label = line_info->line[0][i]; diff --git a/src/proxy/CMakeLists.txt b/src/proxy/unit_tests/CMakeLists.txt similarity index 52% copy from src/proxy/CMakeLists.txt copy to src/proxy/unit_tests/CMakeLists.txt index f145034fb6..d3e3410ff7 100644 --- a/src/proxy/CMakeLists.txt +++ b/src/proxy/unit_tests/CMakeLists.txt @@ -15,44 +15,19 @@ # ####################### -add_library( - proxy STATIC - private/SSLProxySession.cc - CacheControl.cc - ControlBase.cc - ControlMatcher.cc - HostStatus.cc - IPAllow.cc - ParentConsistentHash.cc - ParentRoundRobin.cc - ParentSelectionStrategy.cc - ParentSelection.cc - Plugin.cc - PluginVC.cc - ProtocolProbeSessionAccept.cc - ProxySession.cc - ProxyTransaction.cc - ReverseProxy.cc - Transform.cc - FetchSM.cc - PluginHttpConnect.cc +add_executable( + test_proxy + main.cc + test_ParentHashConfig.cc ) -add_library(ts::proxy ALIAS proxy) target_link_libraries( - proxy - PUBLIC ts::inkcache ts::inkevent ts::tsutil ts::tscore ts::inknet - PRIVATE ts::rpcpublichandlers ts::jsonrpc_protocol ts::inkutils ts::tsapibackend + test_proxy + PRIVATE Catch2::Catch2WithMain + ts::proxy + ts::tscore + ts::records + ts::inkevent ) -add_subdirectory(hdrs) -add_subdirectory(shared) -add_subdirectory(http) -add_subdirectory(http2) -add_subdirectory(logging) - -if(TS_USE_QUIC) - add_subdirectory(http3) -endif() - -clang_tidy_check(proxy) +add_catch2_test(NAME test_proxy COMMAND $<TARGET_FILE:test_proxy>) diff --git a/src/proxy/unit_tests/main.cc b/src/proxy/unit_tests/main.cc new file mode 100644 index 0000000000..a4b3b9453c --- /dev/null +++ b/src/proxy/unit_tests/main.cc @@ -0,0 +1,51 @@ +/** @file + + The main file for proxy unit tests + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include <catch2/catch_test_macros.hpp> +#include <catch2/reporters/catch_reporter_event_listener.hpp> +#include <catch2/reporters/catch_reporter_registrars.hpp> + +#include "tscore/Layout.h" +#include "iocore/eventsystem/EventSystem.h" +#include "records/RecordsConfig.h" +#include "iocore/utils/diags.i" + +struct DiagnosticsListener : Catch::EventListenerBase { + using EventListenerBase::EventListenerBase; + + void + testRunStarting(Catch::TestRunInfo const & /* testRunInfo */) override + { + Layout::create(); + init_diags("", nullptr); + RecProcessInit(); + LibRecordsConfigInit(); + } + + void + testRunEnded(Catch::TestRunStats const & /* testRunStats */) override + { + } +}; + +CATCH_REGISTER_LISTENER(DiagnosticsListener); diff --git a/src/proxy/unit_tests/test_ParentHashConfig.cc b/src/proxy/unit_tests/test_ParentHashConfig.cc new file mode 100644 index 0000000000..720b3d1d6c --- /dev/null +++ b/src/proxy/unit_tests/test_ParentHashConfig.cc @@ -0,0 +1,65 @@ +/** @file + + Unit tests for Parent Selection hash algorithm configuration + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "proxy/ParentSelection.h" +#include <catch2/catch_test_macros.hpp> + +// Helper function to test parseHashAlgorithm (normally static, exposed here for testing) +extern ParentHashAlgorithm parseHashAlgorithm(std::string_view name); + +TEST_CASE("parseHashAlgorithm - Valid inputs", "[ParentSelection]") +{ + REQUIRE(parseHashAlgorithm("siphash24") == ParentHashAlgorithm::SIPHASH24); + REQUIRE(parseHashAlgorithm("siphash13") == ParentHashAlgorithm::SIPHASH13); + REQUIRE(parseHashAlgorithm("wyhash") == ParentHashAlgorithm::WYHASH); +} + +TEST_CASE("parseHashAlgorithm - Invalid inputs fallback to default", "[ParentSelection]") +{ + REQUIRE(parseHashAlgorithm("invalid") == ParentHashAlgorithm::SIPHASH24); + REQUIRE(parseHashAlgorithm("") == ParentHashAlgorithm::SIPHASH24); + REQUIRE(parseHashAlgorithm("SIPHASH24") == ParentHashAlgorithm::SIPHASH24); // case sensitive + REQUIRE(parseHashAlgorithm("xxh3") == ParentHashAlgorithm::SIPHASH24); // not yet implemented + REQUIRE(parseHashAlgorithm("md5") == ParentHashAlgorithm::SIPHASH24); +} + +TEST_CASE("parseHashAlgorithm - Case sensitivity", "[ParentSelection]") +{ + // Should be case-sensitive - uppercase should fall back to default + REQUIRE(parseHashAlgorithm("WYHASH") == ParentHashAlgorithm::SIPHASH24); + REQUIRE(parseHashAlgorithm("SipHash24") == ParentHashAlgorithm::SIPHASH24); + REQUIRE(parseHashAlgorithm("Wyhash") == ParentHashAlgorithm::SIPHASH24); +} + +TEST_CASE("ParentHashAlgorithm - Backward compatibility", "[ParentSelection]") +{ + // Verify default is siphash24 for backward compatibility + REQUIRE(parseHashAlgorithm("siphash24") == ParentHashAlgorithm::SIPHASH24); + + // Verify enum default value is SIPHASH24 + ParentHashAlgorithm default_algo = ParentHashAlgorithm::SIPHASH24; + REQUIRE(static_cast<int>(default_algo) == 0); + + // Verify that unrecognized values fall back to siphash24 + REQUIRE(parseHashAlgorithm("unknown") == ParentHashAlgorithm::SIPHASH24); +} diff --git a/src/records/RecordsConfig.cc b/src/records/RecordsConfig.cc index fbc2d3eb65..ed54615f12 100644 --- a/src/records/RecordsConfig.cc +++ b/src/records/RecordsConfig.cc @@ -436,6 +436,8 @@ static constexpr RecordElement RecordsConfig[] = , {RECT_CONFIG, "proxy.config.http.parent_proxy.disable_parent_markdowns", RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_INT, "[0-1]", RECA_NULL} , + {RECT_CONFIG, "proxy.config.http.parent_proxy.consistent_hash_algorithm", RECD_STRING, "siphash24", RECU_RESTART_TS, RR_NULL, RECC_NULL, nullptr, RECA_NULL} + , {RECT_CONFIG, "proxy.config.http.forward.proxy_auth_to_parent", RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_NULL, nullptr, RECA_NULL} , diff --git a/src/tscore/CMakeLists.txt b/src/tscore/CMakeLists.txt index d5d1e2b85d..1f6cfcb17c 100644 --- a/src/tscore/CMakeLists.txt +++ b/src/tscore/CMakeLists.txt @@ -44,7 +44,7 @@ add_library( FrequencyCounter.cc Hash.cc HashFNV.cc - HashSip.cc + HashWyhash.cc HostLookup.cc InkErrno.cc JeMiAllocator.cc @@ -155,6 +155,7 @@ if(BUILD_TESTING) unit_tests/test_Extendible.cc unit_tests/test_Encoding.cc unit_tests/test_FrequencyCounter.cc + unit_tests/test_HashAlgorithms.cc unit_tests/test_HKDF.cc unit_tests/test_Histogram.cc unit_tests/test_History.cc diff --git a/src/tscore/HashSip.cc b/src/tscore/HashSip.cc deleted file mode 100644 index c2487255fe..0000000000 --- a/src/tscore/HashSip.cc +++ /dev/null @@ -1,141 +0,0 @@ -/** - -Algorithm Info: -https://131002.net/siphash/ - -Based off of implementation: -https://github.com/floodyberry/siphash - - */ - -#include "tscore/HashSip.h" -#include <cstring> - -using namespace std; - -#define SIP_BLOCK_SIZE 8 - -#define ROTL64(a, b) (((a) << (b)) | ((a) >> (64 - b))) - -#define U8TO64_LE(p) *(const uint64_t *)(p) - -#define SIPCOMPRESS(x0, x1, x2, x3) \ - x0 += x1; \ - x2 += x3; \ - x1 = ROTL64(x1, 13); \ - x3 = ROTL64(x3, 16); \ - x1 ^= x0; \ - x3 ^= x2; \ - x0 = ROTL64(x0, 32); \ - x2 += x1; \ - x0 += x3; \ - x1 = ROTL64(x1, 17); \ - x3 = ROTL64(x3, 21); \ - x1 ^= x2; \ - x3 ^= x0; \ - x2 = ROTL64(x2, 32); - -ATSHash64Sip24::ATSHash64Sip24() -{ - this->clear(); -} - -ATSHash64Sip24::ATSHash64Sip24(const unsigned char key[16]) : k0(U8TO64_LE(key)), k1(U8TO64_LE(key + sizeof(k0))) -{ - this->clear(); -} - -ATSHash64Sip24::ATSHash64Sip24(uint64_t key0, uint64_t key1) : k0(key0), k1(key1) -{ - this->clear(); -} - -void -ATSHash64Sip24::update(const void *data, size_t len) -{ - size_t i, blocks; - unsigned char *m; - uint64_t mi; - uint8_t block_off = 0; - - if (!finalized) { - m = (unsigned char *)data; - total_len += len; - - if (len + block_buffer_len < SIP_BLOCK_SIZE) { - memcpy(block_buffer + block_buffer_len, m, len); - block_buffer_len += len; - } else { - if (block_buffer_len > 0) { - block_off = SIP_BLOCK_SIZE - block_buffer_len; - memcpy(block_buffer + block_buffer_len, m, block_off); - - mi = U8TO64_LE(block_buffer); - v3 ^= mi; - SIPCOMPRESS(v0, v1, v2, v3); - SIPCOMPRESS(v0, v1, v2, v3); - v0 ^= mi; - } - - for (i = block_off, blocks = ((len - block_off) & ~(SIP_BLOCK_SIZE - 1)); i < blocks; i += SIP_BLOCK_SIZE) { - mi = U8TO64_LE(m + i); - v3 ^= mi; - SIPCOMPRESS(v0, v1, v2, v3); - SIPCOMPRESS(v0, v1, v2, v3); - v0 ^= mi; - } - - block_buffer_len = (len - block_off) & (SIP_BLOCK_SIZE - 1); - memcpy(block_buffer, m + block_off + blocks, block_buffer_len); - } - } -} - -void -ATSHash64Sip24::final() -{ - uint64_t last7; - int i; - - if (!finalized) { - last7 = static_cast<uint64_t>(total_len & 0xff) << 56; - - for (i = block_buffer_len - 1; i >= 0; i--) { - last7 |= static_cast<uint64_t>(block_buffer[i]) << (i * 8); - } - - v3 ^= last7; - SIPCOMPRESS(v0, v1, v2, v3); - SIPCOMPRESS(v0, v1, v2, v3); - v0 ^= last7; - v2 ^= 0xff; - SIPCOMPRESS(v0, v1, v2, v3); - SIPCOMPRESS(v0, v1, v2, v3); - SIPCOMPRESS(v0, v1, v2, v3); - SIPCOMPRESS(v0, v1, v2, v3); - hfinal = v0 ^ v1 ^ v2 ^ v3; - finalized = true; - } -} - -uint64_t -ATSHash64Sip24::get() const -{ - if (finalized) { - return hfinal; - } else { - return 0; - } -} - -void -ATSHash64Sip24::clear() -{ - v0 = k0 ^ 0x736f6d6570736575ull; - v1 = k1 ^ 0x646f72616e646f6dull; - v2 = k0 ^ 0x6c7967656e657261ull; - v3 = k1 ^ 0x7465646279746573ull; - finalized = false; - total_len = 0; - block_buffer_len = 0; -} diff --git a/src/tscore/HashWyhash.cc b/src/tscore/HashWyhash.cc new file mode 100644 index 0000000000..7cbaf89400 --- /dev/null +++ b/src/tscore/HashWyhash.cc @@ -0,0 +1,154 @@ +/** + +Algorithm Info: +https://github.com/wangyi-fudan/wyhash + +Wyhash v4.1 - Fast non-cryptographic hash with DoS resistance +Public Domain / Unlicense + + */ + +#include "tscore/HashWyhash.h" +#include <cstring> + +using namespace std; + +static const uint64_t _wyp0 = 0xa0761d6478bd642full; +static const uint64_t _wyp1 = 0xe7037ed1a0b428dbull; +static const uint64_t _wyp2 = 0x8ebc6af09c88c6e3ull; +static const uint64_t _wyp3 = 0x589965cc75374cc3ull; +static const uint64_t _wyp4 = 0x1d8e4e27c47d124full; + +static inline uint64_t +_wymix(uint64_t A, uint64_t B) +{ + __uint128_t r = A; + r *= B; + return (r >> 64) ^ r; +} + +static inline uint64_t +_wyr8(const uint8_t *p) +{ + uint64_t v; + memcpy(&v, p, 8); + return v; +} + +static inline uint64_t +_wyr4(const uint8_t *p) +{ + uint32_t v; + memcpy(&v, p, 4); + return v; +} + +static inline uint64_t +_wyr3(const uint8_t *p, size_t k) +{ + return (((uint64_t)p[0]) << 16) | (((uint64_t)p[k >> 1]) << 8) | p[k - 1]; +} + +ATSHash64Wyhash::ATSHash64Wyhash() : seed(0) +{ + this->clear(); +} + +ATSHash64Wyhash::ATSHash64Wyhash(uint64_t s) : seed(s) +{ + this->clear(); +} + +void +ATSHash64Wyhash::update(const void *data, size_t len) +{ + if (finalized) { + return; + } + + const uint8_t *p = static_cast<const uint8_t *>(data); + total_len += len; + + if (buffer_len + len < 32) { + memcpy(buffer + buffer_len, p, len); + buffer_len += len; + return; + } + + if (buffer_len > 0) { + size_t to_copy = 32 - buffer_len; + memcpy(buffer + buffer_len, p, to_copy); + state = + _wymix(_wyr8(buffer) ^ _wyp0, _wyr8(buffer + 8) ^ state) ^ _wymix(_wyr8(buffer + 16) ^ _wyp1, _wyr8(buffer + 24) ^ state); + p += to_copy; + len -= to_copy; + buffer_len = 0; + } + + while (len >= 32) { + state = _wymix(_wyr8(p) ^ _wyp0, _wyr8(p + 8) ^ state) ^ _wymix(_wyr8(p + 16) ^ _wyp1, _wyr8(p + 24) ^ state); + p += 32; + len -= 32; + } + + if (len > 0) { + memcpy(buffer, p, len); + buffer_len = len; + } +} + +void +ATSHash64Wyhash::final() +{ + if (finalized) { + return; + } + + uint64_t a = 0, b = 0; + const uint8_t *p = buffer; + size_t len = buffer_len; + + if (len <= 16) { + if (len >= 4) { + a = (_wyr4(p) << 32) | _wyr4(p + ((len >> 3) << 2)); + b = (_wyr4(p + len - 4) << 32) | _wyr4(p + len - 4 - ((len >> 3) << 2)); + } else if (len > 0) { + a = _wyr3(p, len); + } + } else { + size_t i = len; + if (i > 32) { + i = 32; + } + a = _wyr8(p); + b = _wyr8(p + 8); + if (i > 16) { + a ^= _wyr8(p + i - 16); + b ^= _wyr8(p + i - 8); + } + } + + hfinal = _wymix(state ^ _wyp4, total_len ^ (_wymix(a ^ _wyp2, b ^ _wyp3) ^ seed)); + finalized = true; +} + +uint64_t +ATSHash64Wyhash::get() const +{ + if (finalized) { + return hfinal; + } else { + return 0; + } +} + +void +ATSHash64Wyhash::clear() +{ + state = seed; + total_len = 0; + hfinal = 0; + finalized = false; + buffer_len = 0; + memset(buffer, 0, sizeof(buffer)); +} diff --git a/src/tscore/unit_tests/test_HashAlgorithms.cc b/src/tscore/unit_tests/test_HashAlgorithms.cc new file mode 100644 index 0000000000..a1cf58c6b0 --- /dev/null +++ b/src/tscore/unit_tests/test_HashAlgorithms.cc @@ -0,0 +1,308 @@ +/** @file + + Unit tests for hash algorithms (SipHash-1-3, Wyhash) + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "tscore/HashSip.h" +#include "tscore/HashWyhash.h" +#include <catch2/catch_test_macros.hpp> +#include <string> +#include <vector> + +TEST_CASE("HashSip13 - Deterministic output", "[libts][HashSip13]") +{ + ATSHash64Sip13 hash1, hash2; + const char *input = "test"; + + hash1.update(input, 4); + hash1.final(); + + hash2.update(input, 4); + hash2.final(); + + REQUIRE(hash1.get() == hash2.get()); + REQUIRE(hash1.get() != 0); +} + +TEST_CASE("HashSip13 - Empty input", "[libts][HashSip13]") +{ + ATSHash64Sip13 hash; + hash.update("", 0); + hash.final(); + REQUIRE(hash.get() != 0); +} + +TEST_CASE("HashSip13 - Single byte", "[libts][HashSip13]") +{ + ATSHash64Sip13 hash; + hash.update("a", 1); + hash.final(); + REQUIRE(hash.get() != 0); +} + +TEST_CASE("HashSip13 - Block boundaries", "[libts][HashSip13]") +{ + std::vector<size_t> sizes = {7, 8, 9, 16, 17, 31, 32, 33}; + std::string input(64, 'x'); + + for (auto size : sizes) { + ATSHash64Sip13 hash; + hash.update(input.c_str(), size); + hash.final(); + REQUIRE(hash.get() != 0); + } +} + +TEST_CASE("HashSip13 - Incremental vs single update", "[libts][HashSip13]") +{ + ATSHash64Sip13 hash1, hash2; + + hash1.update("hello", 5); + hash1.update(" world", 6); + hash1.final(); + + hash2.update("hello world", 11); + hash2.final(); + + REQUIRE(hash1.get() == hash2.get()); +} + +TEST_CASE("HashSip13 - Typical URL paths", "[libts][HashSip13]") +{ + std::vector<std::string> urls = {"/", "/index.html", "/api/v1/users/123", "/images/photos/vacation/beach/2024/photo_12345.jpg"}; + + for (const auto &url : urls) { + ATSHash64Sip13 hash; + hash.update(url.c_str(), url.size()); + hash.final(); + REQUIRE(hash.get() != 0); + } +} + +TEST_CASE("HashSip13 - Long URLs with query strings", "[libts][HashSip13]") +{ + std::string long_url = "/search?"; + for (int i = 0; i < 200; i++) { + long_url += "parameter" + std::to_string(i) + "=some_longer_value" + std::to_string(i) + "&"; + } + + ATSHash64Sip13 hash; + hash.update(long_url.c_str(), long_url.size()); + hash.final(); + REQUIRE(hash.get() != 0); + REQUIRE(long_url.size() > 2000); +} + +TEST_CASE("HashSip13 - Different inputs produce different hashes", "[libts][HashSip13]") +{ + ATSHash64Sip13 hash1, hash2; + + hash1.update("parent1", 7); + hash1.final(); + + hash2.update("parent2", 7); + hash2.final(); + + REQUIRE(hash1.get() != hash2.get()); +} + +TEST_CASE("HashSip13 - Clear and reuse", "[libts][HashSip13]") +{ + ATSHash64Sip13 hash; + + hash.update("first", 5); + hash.final(); + uint64_t first_result = hash.get(); + + hash.clear(); + hash.update("first", 5); + hash.final(); + + REQUIRE(hash.get() == first_result); +} + +TEST_CASE("HashSip13 - Comparison with SipHash-2-4", "[libts][HashSip13]") +{ + ATSHash64Sip13 hash13; + ATSHash64Sip24 hash24; + const char *input = "test"; + + hash13.update(input, 4); + hash13.final(); + + hash24.update(input, 4); + hash24.final(); + + REQUIRE(hash13.get() != 0); + REQUIRE(hash24.get() != 0); +} + +TEST_CASE("HashWyhash - Deterministic output", "[libts][HashWyhash]") +{ + ATSHash64Wyhash hash1, hash2; + const char *input = "test"; + + hash1.update(input, 4); + hash1.final(); + + hash2.update(input, 4); + hash2.final(); + + REQUIRE(hash1.get() == hash2.get()); + REQUIRE(hash1.get() != 0); +} + +TEST_CASE("HashWyhash - Empty input", "[libts][HashWyhash]") +{ + ATSHash64Wyhash hash; + hash.update("", 0); + hash.final(); + REQUIRE(hash.get() != 0); +} + +TEST_CASE("HashWyhash - Single byte", "[libts][HashWyhash]") +{ + ATSHash64Wyhash hash; + hash.update("a", 1); + hash.final(); + REQUIRE(hash.get() != 0); +} + +TEST_CASE("HashWyhash - Block boundaries (32-byte blocks)", "[libts][HashWyhash]") +{ + std::vector<size_t> sizes = {31, 32, 33, 64, 65, 96, 97}; + std::string input(128, 'x'); + + for (auto size : sizes) { + ATSHash64Wyhash hash; + hash.update(input.c_str(), size); + hash.final(); + REQUIRE(hash.get() != 0); + } +} + +TEST_CASE("HashWyhash - Incremental vs single update", "[libts][HashWyhash]") +{ + ATSHash64Wyhash hash1, hash2; + + hash1.update("hello", 5); + hash1.update(" world", 6); + hash1.final(); + + hash2.update("hello world", 11); + hash2.final(); + + REQUIRE(hash1.get() == hash2.get()); +} + +TEST_CASE("HashWyhash - Typical URL paths", "[libts][HashWyhash]") +{ + std::vector<std::string> urls = {"/", "/index.html", "/api/v1/users/123", "/images/photos/vacation/beach/2024/photo_12345.jpg"}; + + for (const auto &url : urls) { + ATSHash64Wyhash hash; + hash.update(url.c_str(), url.size()); + hash.final(); + REQUIRE(hash.get() != 0); + } +} + +TEST_CASE("HashWyhash - Long URLs with query strings", "[libts][HashWyhash]") +{ + std::string long_url = "/search?"; + for (int i = 0; i < 200; i++) { + long_url += "parameter" + std::to_string(i) + "=some_longer_value" + std::to_string(i) + "&"; + } + + ATSHash64Wyhash hash; + hash.update(long_url.c_str(), long_url.size()); + hash.final(); + REQUIRE(hash.get() != 0); + REQUIRE(long_url.size() > 2000); +} + +TEST_CASE("HashWyhash - Different inputs produce different hashes", "[libts][HashWyhash]") +{ + ATSHash64Wyhash hash1, hash2; + + hash1.update("parent1", 7); + hash1.final(); + + hash2.update("parent2", 7); + hash2.final(); + + REQUIRE(hash1.get() != hash2.get()); +} + +TEST_CASE("HashWyhash - Clear and reuse", "[libts][HashWyhash]") +{ + ATSHash64Wyhash hash; + + hash.update("first", 5); + hash.final(); + uint64_t first_result = hash.get(); + + hash.clear(); + hash.update("first", 5); + hash.final(); + + REQUIRE(hash.get() == first_result); +} + +TEST_CASE("HashWyhash - Custom seed", "[libts][HashWyhash]") +{ + ATSHash64Wyhash hash1(123456); + ATSHash64Wyhash hash2(789012); + const char *input = "test"; + + hash1.update(input, 4); + hash1.final(); + + hash2.update(input, 4); + hash2.final(); + + REQUIRE(hash1.get() != hash2.get()); +} + +TEST_CASE("Hash algorithms produce different outputs for same input", "[libts][Hash]") +{ + const char *input = "test"; + ATSHash64Sip13 hash13; + ATSHash64Sip24 hash24; + ATSHash64Wyhash wyhash; + + hash13.update(input, 4); + hash13.final(); + + hash24.update(input, 4); + hash24.final(); + + wyhash.update(input, 4); + wyhash.final(); + + uint64_t result13 = hash13.get(); + uint64_t result24 = hash24.get(); + uint64_t result_wy = wyhash.get(); + + REQUIRE(result13 != 0); + REQUIRE(result24 != 0); + REQUIRE(result_wy != 0); +}
