This is an automated email from the ASF dual-hosted git repository.
bneradt pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/master by this push:
new b8ada6fe06 Use ls-hpack's fast Huffman decoder for HPACK/QPACK strings
(#13259)
b8ada6fe06 is described below
commit b8ada6fe06dd4e2161d8480a1a1fc75346a43153
Author: Phong Nguyen <[email protected]>
AuthorDate: Wed Jun 24 10:34:59 2026 -0700
Use ls-hpack's fast Huffman decoder for HPACK/QPACK strings (#13259)
When the home-grown Huffman codec was replaced with vendored LiteSpeed
ls-hpack code (#12357), only the conservative 4-bit FSM decoder
(lshpack_dec_huff_decode_full) was ported — although huff-tables.h has carried
the 64K-entry table for upstream's fast decoder all along. This PR ports the
fast decoder (lshpack_dec_huff_decode, from ls-hpack v2.3.5) and switches
huffman_decode() to it.
The fast decoder consumes 16 bits of input per table lookup and emits up to
3 bytes, falling back to the FSM decoder for the rare codes longer than 16
bits. HPACK and QPACK share the wrapper through xpack_decode_string(), so both
HTTP/2 and HTTP/3 header decoding benefit. No new memory footprint: the hdecs
table has been compiled into the binary since the original vendoring.
---
include/proxy/hdrs/HuffmanCodec.h | 10 ++
lib/ls-hpack/README.md | 16 ++-
lib/ls-hpack/lshpack.cc | 190 +++++++++++++++++++++++++-
lib/ls-hpack/lshpack.h | 6 +
src/proxy/hdrs/CMakeLists.txt | 2 +
src/proxy/hdrs/HuffmanCodec.cc | 4 +-
src/proxy/hdrs/unit_tests/test_Huffmancode.cc | 137 +++++++++++++++++++
tools/benchmark/CMakeLists.txt | 4 +
tools/benchmark/benchmark_HuffmanDecode.cc | 142 +++++++++++++++++++
9 files changed, 507 insertions(+), 4 deletions(-)
diff --git a/include/proxy/hdrs/HuffmanCodec.h
b/include/proxy/hdrs/HuffmanCodec.h
index e979bdc315..9d1aa7316f 100644
--- a/include/proxy/hdrs/HuffmanCodec.h
+++ b/include/proxy/hdrs/HuffmanCodec.h
@@ -25,5 +25,15 @@
#include <cstdint>
+/** Decode a Huffman-encoded string per RFC 7541 section 5.2.
+
+ @return The decoded length, or a negative value on invalid input or
+ insufficient destination space.
+
+ @note dst_len must be strictly greater than the decoded length; with an
+ exactly-sized destination the decoder may report insufficient space.
+ Huffman expands to at most 8/5 of the encoded length, so sizing dst at
+ 2x src_len always suffices (see xpack_decode_string).
+ */
int64_t huffman_decode(char *dst, uint32_t dst_len, uint8_t const *src,
uint32_t src_len);
int64_t huffman_encode(uint8_t *dst, uint32_t dst_len, uint8_t const *src,
uint32_t src_len);
diff --git a/lib/ls-hpack/README.md b/lib/ls-hpack/README.md
index b6ba2c769f..2b7ccb1948 100644
--- a/lib/ls-hpack/README.md
+++ b/lib/ls-hpack/README.md
@@ -23,4 +23,18 @@ Huffman decoding acceleration.
The current implementation pulled into ATS is based upon what is currently the
latest version,
-[v2.3.4](https://github.com/litespeedtech/ls-hpack/releases/tag/v2.3.4).
+[v2.3.5](https://github.com/litespeedtech/ls-hpack/releases/tag/v2.3.5).
+
+# ATS Modifications
+
+The code is kept as close to upstream as practical, with one deliberate
+behavioral divergence that any future re-sync must preserve:
+
+- `lshpack_dec_huff_decode` (the fast decoder) rejects Huffman padding of 8 or
+ more bits, as required by RFC 7541 section 5.2. Upstream's fast decoder
+ accepts such padding when it follows the final symbol near the end of the
+ input; the 4-bit FSM decoder (`lshpack_dec_huff_decode_full`) has always
+ rejected it. See the commented tail check in `lshpack.cc` and the
+ `decode_overlong_padding` test in
+ `src/proxy/hdrs/unit_tests/test_Huffmancode.cc`, which fails if the check is
+ dropped.
diff --git a/lib/ls-hpack/lshpack.cc b/lib/ls-hpack/lshpack.cc
index 6e7f0ee0e6..b1d4167c65 100644
--- a/lib/ls-hpack/lshpack.cc
+++ b/lib/ls-hpack/lshpack.cc
@@ -52,6 +52,9 @@ SOFTWARE.
*/
#include "huff-tables.h"
+#include "lshpack.h"
+#include <cassert>
+#include <cstddef>
#include <cstdint>
#include <cstring>
@@ -61,7 +64,7 @@ namespace litespeed {
namespace
{
-constexpr int64_t LSHPACK_ERR_MORE_BUF = -3;
+constexpr unsigned SHORTEST_CODE = 5;
struct decode_status {
uint8_t state;
@@ -243,4 +246,189 @@ lshpack_dec_huff_decode_full(uint8_t const *src, uint32_t
src_len, char *dst, ui
return p_dst - dst;
}
+// Implementation taken from LiteSpeed:
+// lshpack_dec_huff_decode
+//
+// The decoder is optimized for the common case. Most of the time, we decode
+// data whose encoding is 16 bits or shorter. This lets us use a 64 KB table
+// indexed by two bytes of input and outputs 1, 2, or 3 bytes at a time.
+//
+// In the case a longer code is encountered, we fall back to the original
+// Huffman decoder that supports all code lengths.
+int64_t
+lshpack_dec_huff_decode(uint8_t const *src, uint32_t src_len, char *dst,
uint32_t dst_len)
+{
+ char *const orig_dst = dst;
+ uint8_t const *const src_end = src + src_len;
+ char *const dst_end = dst + dst_len;
+ uintptr_t buf = 0;
+ unsigned avail_bits, len;
+ struct hdec hdec = {0, {0, 0, 0}};
+ uint16_t idx;
+ int64_t r;
+
+ avail_bits = 0;
+ while (true) {
+ if (src + sizeof(buf) <= src_end) {
+ len = (sizeof(buf) * 8 - avail_bits) >> 3;
+ avail_bits += len << 3;
+ switch (len) {
+#if UINTPTR_MAX == 18446744073709551615ull
+ case 8:
+ buf <<= 8;
+ buf |= static_cast<uintptr_t>(*src++);
+ [[fallthrough]];
+ case 7:
+ buf <<= 8;
+ buf |= static_cast<uintptr_t>(*src++);
+ [[fallthrough]];
+ default:
+ buf <<= 48;
+ buf |= static_cast<uintptr_t>(*src++) << 40;
+ buf |= static_cast<uintptr_t>(*src++) << 32;
+ buf |= static_cast<uintptr_t>(*src++) << 24;
+ buf |= static_cast<uintptr_t>(*src++) << 16;
+#else
+ [[fallthrough]];
+ case 4:
+ buf <<= 8;
+ buf |= static_cast<uintptr_t>(*src++);
+ [[fallthrough]];
+ case 3:
+ buf <<= 8;
+ buf |= static_cast<uintptr_t>(*src++);
+ [[fallthrough]];
+ default:
+ buf <<= 16;
+#endif
+ buf |= static_cast<uintptr_t>(*src++) << 8;
+ buf |= static_cast<uintptr_t>(*src++) << 0;
+ }
+ } else if (src < src_end) {
+ do {
+ buf <<= 8;
+ buf |= static_cast<uintptr_t>(*src++);
+ avail_bits += 8;
+ } while (src < src_end && avail_bits <= sizeof(buf) * 8 - 8);
+ } else {
+ break; // Normal case terminating condition: out of input
+ }
+
+ if (dst_end - dst >= static_cast<ptrdiff_t>(8 * sizeof(buf) /
SHORTEST_CODE) && avail_bits >= 16) {
+ // Fast path: don't check destination bounds
+ do {
+ idx = static_cast<uint16_t>(buf >> (avail_bits - 16));
+ hdec = hdecs[idx];
+ dst[0] = static_cast<char>(hdec.out[0]);
+ dst[1] = static_cast<char>(hdec.out[1]);
+ dst[2] = static_cast<char>(hdec.out[2]);
+ dst += hdec.lens & 3;
+ avail_bits -= hdec.lens >> 2;
+ } while (avail_bits >= 16 && hdec.lens);
+ if (avail_bits < 16) {
+ continue;
+ }
+ goto slow_path;
+ } else {
+ while (avail_bits >= 16) {
+ idx = static_cast<uint16_t>(buf >> (avail_bits - 16));
+ hdec = hdecs[idx];
+ len = hdec.lens & 3;
+ if (len && dst + len <= dst_end) {
+ switch (len) {
+ case 3:
+ *dst++ = static_cast<char>(hdec.out[0]);
+ *dst++ = static_cast<char>(hdec.out[1]);
+ *dst++ = static_cast<char>(hdec.out[2]);
+ break;
+ case 2:
+ *dst++ = static_cast<char>(hdec.out[0]);
+ *dst++ = static_cast<char>(hdec.out[1]);
+ break;
+ default:
+ *dst++ = static_cast<char>(hdec.out[0]);
+ break;
+ }
+ avail_bits -= hdec.lens >> 2;
+ } else if (dst + len > dst_end) {
+ r = dst_end - dst - static_cast<ptrdiff_t>(len);
+ if (r > LSHPACK_ERR_MORE_BUF) {
+ r = LSHPACK_ERR_MORE_BUF;
+ }
+ return r;
+ } else {
+ goto slow_path;
+ }
+ }
+ }
+ }
+
+ if (avail_bits >= SHORTEST_CODE) {
+ idx = static_cast<uint16_t>(buf << (16 - avail_bits));
+ idx |= (1 << (16 - avail_bits)) - 1; // EOF
+ if (idx == 0xFFFF && avail_bits < 8) {
+ goto end;
+ }
+ // If a byte or more of input is left, this means there is a valid
+ // encoding, not just EOF.
+ hdec = hdecs[idx];
+ len = hdec.lens & 3;
+ if ((static_cast<unsigned>(hdec.lens) >> 2) > avail_bits) {
+ return -1;
+ }
+ if (len && dst + len <= dst_end) {
+ switch (len) {
+ case 3:
+ *dst++ = static_cast<char>(hdec.out[0]);
+ *dst++ = static_cast<char>(hdec.out[1]);
+ *dst++ = static_cast<char>(hdec.out[2]);
+ break;
+ case 2:
+ *dst++ = static_cast<char>(hdec.out[0]);
+ *dst++ = static_cast<char>(hdec.out[1]);
+ break;
+ default:
+ *dst++ = static_cast<char>(hdec.out[0]);
+ break;
+ }
+ avail_bits -= hdec.lens >> 2;
+ } else if (dst + len > dst_end) {
+ r = dst_end - dst - static_cast<ptrdiff_t>(len);
+ if (r > LSHPACK_ERR_MORE_BUF) {
+ r = LSHPACK_ERR_MORE_BUF;
+ }
+ return r;
+ } else {
+ // This must be an invalid code, otherwise it would have fit
+ return -1;
+ }
+ }
+
+ if (avail_bits > 0) {
+ // ATS: unlike upstream ls-hpack, also reject padding of 8 or more bits
+ // (possible here after the final symbol consumed only part of the tail).
+ // RFC 7541 5.2: "A padding strictly longer than 7 bits MUST be treated as
+ // a decoding error." This keeps the strictness of the 4-bit FSM decoder.
+ if (avail_bits >= 8 || ((1u << avail_bits) - 1) != (buf & ((1u <<
avail_bits) - 1))) {
+ return -1; // Not EOF as expected
+ }
+ }
+
+end:
+ return dst - orig_dst;
+
+slow_path:
+ // Find previous byte boundary and finish decoding thence.
+ while ((avail_bits & 7) && dst > orig_dst) {
+ avail_bits += encode_table[static_cast<uint8_t>(*--dst)].bits;
+ }
+ assert((avail_bits & 7) == 0);
+ src -= avail_bits >> 3;
+ r = lshpack_dec_huff_decode_full(src, static_cast<uint32_t>(src_end - src),
dst, static_cast<uint32_t>(dst_end - dst));
+ if (r >= 0) {
+ return dst - orig_dst + r;
+ }
+ return r;
+}
+
} // namespace litespeed
diff --git a/lib/ls-hpack/lshpack.h b/lib/ls-hpack/lshpack.h
index 7069317b28..c64c1f19cc 100644
--- a/lib/ls-hpack/lshpack.h
+++ b/lib/ls-hpack/lshpack.h
@@ -59,10 +59,16 @@ SOFTWARE.
namespace litespeed {
+// Matches upstream lshpack.h's LSHPACK_ERR_MORE_BUF.
+constexpr int64_t LSHPACK_ERR_MORE_BUF = -3;
+
int64_t lshpack_enc_huff_encode(uint8_t const *src,
uint8_t const *const src_end, uint8_t *dst, uint32_t dst_len);
int64_t lshpack_dec_huff_decode_full(uint8_t const *src, uint32_t src_len,
char *dst, uint32_t dst_len);
+int64_t lshpack_dec_huff_decode(uint8_t const *src, uint32_t src_len,
+ char *dst, uint32_t dst_len);
+
} // namespace litespeed
diff --git a/src/proxy/hdrs/CMakeLists.txt b/src/proxy/hdrs/CMakeLists.txt
index 45a084dd6e..4d036a654d 100644
--- a/src/proxy/hdrs/CMakeLists.txt
+++ b/src/proxy/hdrs/CMakeLists.txt
@@ -64,6 +64,8 @@ if(BUILD_TESTING)
lshpack
configmanager
)
+ # For the ls-hpack decoder parity tests.
+ target_include_directories(test_proxy_hdrs PRIVATE ${CMAKE_SOURCE_DIR}/lib)
add_catch2_test(NAME test_proxy_hdrs COMMAND test_proxy_hdrs)
add_executable(test_proxy_hdrs_xpack unit_tests/test_XPACK.cc)
diff --git a/src/proxy/hdrs/HuffmanCodec.cc b/src/proxy/hdrs/HuffmanCodec.cc
index e8a1427584..5f144d7294 100644
--- a/src/proxy/hdrs/HuffmanCodec.cc
+++ b/src/proxy/hdrs/HuffmanCodec.cc
@@ -26,11 +26,11 @@
#include <cstdint>
// Implementation taken from LiteSpeed:
-// lshpack_dec_huff_decode_full
+// lshpack_dec_huff_decode
int64_t
huffman_decode(char *dst, uint32_t dst_len, const uint8_t *src, uint32_t
src_len)
{
- return litespeed::lshpack_dec_huff_decode_full(src, src_len, dst, dst_len);
+ return litespeed::lshpack_dec_huff_decode(src, src_len, dst, dst_len);
}
// Implementation taken from LiteSpeed:
diff --git a/src/proxy/hdrs/unit_tests/test_Huffmancode.cc
b/src/proxy/hdrs/unit_tests/test_Huffmancode.cc
index 4b942d1f88..6a7c2cb22a 100644
--- a/src/proxy/hdrs/unit_tests/test_Huffmancode.cc
+++ b/src/proxy/hdrs/unit_tests/test_Huffmancode.cc
@@ -22,10 +22,14 @@
*/
#include "proxy/hdrs/HuffmanCodec.h"
+#include "ls-hpack/lshpack.h"
+#include <algorithm>
#include <cstdlib>
#include <iostream>
#include <cassert>
#include <cstring>
+#include <random>
+#include <vector>
#include <catch2/catch_test_macros.hpp>
using namespace std;
@@ -207,3 +211,136 @@ TEST_CASE("decode_errors", "[proxy][huffman]")
free(dst);
}
}
+
+TEST_CASE("decode_known_vectors", "[proxy][huffman]")
+{
+ for (const auto &i : huffman_encode_test_data) {
+ char dst[64];
+ int64_t len = huffman_decode(dst, sizeof(dst), i.expect, i.expect_len);
+
+ REQUIRE(len == i.src_len);
+ REQUIRE(memcmp(dst, i.src, len) == 0);
+ }
+}
+
+// The fast table-based decoder must agree with the original 4-bit FSM decoder
+// on input validity and decoded content. The full decoder run with a roomy
+// destination serves as the oracle. The one permitted difference is an
+// exactly-sized destination (dst_len == decoded length): depending on how the
+// trailing padding falls on nibble boundaries, either decoder may report
+// LSHPACK_ERR_MORE_BUF where the other succeeds. One byte of headroom
+// guarantees success for both; ATS callers always allocate twice the encoded
+// length, which is strictly more than any decoded length. Sentinel bytes
+// verify nothing is written past dst_len.
+static void
+require_decoder_parity(const uint8_t *src, uint32_t src_len, uint32_t dst_len)
+{
+ constexpr size_t SENTINEL_LEN = 64;
+ std::vector<char> oracle_buf(2 * src_len + 8);
+ int64_t oracle = litespeed::lshpack_dec_huff_decode_full(src,
src_len, oracle_buf.data(), oracle_buf.size());
+
+ std::vector<char> fast_buf(dst_len + SENTINEL_LEN, '\xa5');
+ int64_t fast = litespeed::lshpack_dec_huff_decode(src, src_len,
fast_buf.data(), dst_len);
+
+ if (oracle < 0 || static_cast<uint64_t>(oracle) > dst_len) {
+ REQUIRE(fast < 0);
+ } else if (static_cast<uint64_t>(oracle) == dst_len && fast < 0) {
+ REQUIRE(fast == litespeed::LSHPACK_ERR_MORE_BUF); // exact-fit destination
+ } else {
+ REQUIRE(fast == oracle);
+ REQUIRE(memcmp(fast_buf.data(), oracle_buf.data(), oracle) == 0);
+ }
+ REQUIRE(std::all_of(fast_buf.begin() + dst_len, fast_buf.end(), [](char c) {
return c == '\xa5'; }));
+}
+
+TEST_CASE("decoder_parity_exhaustive_short", "[proxy][huffman]")
+{
+ uint8_t src[2];
+
+ for (uint32_t hi = 0; hi < 256; ++hi) {
+ src[0] = static_cast<uint8_t>(hi);
+ for (uint32_t dst_len = 0; dst_len <= 4; ++dst_len) {
+ require_decoder_parity(src, 1, dst_len);
+ }
+ for (uint32_t lo = 0; lo < 256; ++lo) {
+ src[1] = static_cast<uint8_t>(lo);
+ require_decoder_parity(src, 2, 8);
+ }
+ }
+}
+
+TEST_CASE("decoder_parity_fuzz", "[proxy][huffman]")
+{
+ std::mt19937 rng(0x5eed);
+ std::uniform_int_distribution<int> len_dist(0, 64);
+ std::uniform_int_distribution<int> byte_dist(0, 255);
+ uint8_t src[64];
+
+ for (int i = 0; i < 100000; ++i) {
+ int src_len = len_dist(rng);
+ for (int j = 0; j < src_len; ++j) {
+ src[j] = static_cast<uint8_t>(byte_dist(rng));
+ }
+ // Bias some iterations toward trailing 0xff runs to exercise the
+ // EOS-prefix and padding paths.
+ if (i % 4 == 0 && src_len > 0) {
+ for (int j = byte_dist(rng) % src_len; j < src_len; ++j) {
+ src[j] = 0xff;
+ }
+ }
+ // Mostly roomy destination, sometimes a tight one to exercise the
+ // bounds-checked path and LSHPACK_ERR_MORE_BUF.
+ uint32_t dst_len = (i % 8 == 0) ? static_cast<uint32_t>(byte_dist(rng) %
16) : static_cast<uint32_t>(2 * src_len + 8);
+ require_decoder_parity(src, src_len, dst_len);
+ }
+}
+
+// Streams whose final symbol is followed by 8 or more bits of all-ones
+// padding. RFC 7541 5.2 requires treating padding longer than 7 bits as a
+// decoding error. Upstream ls-hpack's fast decoder accepts these; the ATS
+// copy deliberately rejects them (see the tail check in
+// lib/ls-hpack/lshpack.cc), and this test pins that divergence against a
+// future re-sync. The exhaustive parity test cannot cover this branch: it is
+// only reachable with inputs of four or more bytes.
+TEST_CASE("decode_overlong_padding", "[proxy][huffman]")
+{
+ const static struct {
+ const uint8_t *input;
+ uint32_t input_len;
+ } test_cases[] = {
+ {reinterpret_cast<const uint8_t *>("\xbf\xd0\x37\xd9\xff"),
5 },
+ {reinterpret_cast<const uint8_t
*>("\xd9\x68\x63\x84\x4c\xfe\x77\xa0\xbc\xc9\xff"), 11},
+ };
+
+ for (const auto &tc : test_cases) {
+ char dst[64];
+ REQUIRE(litespeed::lshpack_dec_huff_decode(tc.input, tc.input_len, dst,
sizeof(dst)) == -1);
+ REQUIRE(litespeed::lshpack_dec_huff_decode_full(tc.input, tc.input_len,
dst, sizeof(dst)) == -1);
+ }
+}
+
+TEST_CASE("decoder_roundtrip_fuzz", "[proxy][huffman]")
+{
+ std::mt19937 rng(0xc0ffee);
+ std::uniform_int_distribution<int> len_dist(0, 300);
+ std::uniform_int_distribution<int> byte_dist(0, 255);
+
+ for (int i = 0; i < 20000; ++i) {
+ uint32_t src_len = static_cast<uint32_t>(len_dist(rng));
+ std::vector<uint8_t> src(src_len);
+ for (auto &c : src) {
+ c = static_cast<uint8_t>(byte_dist(rng));
+ }
+
+ // Worst-case Huffman expansion is 30 bits per input byte.
+ std::vector<uint8_t> encoded(src_len * 4 + 8);
+ int64_t enc_len = huffman_encode(encoded.data(),
encoded.size(), src.data(), src_len);
+ REQUIRE(enc_len >= 0);
+
+ // One byte of headroom guarantees success (see require_decoder_parity).
+ std::vector<char> decoded(src_len + 1);
+ int64_t dec_len = huffman_decode(decoded.data(), src_len + 1,
encoded.data(), enc_len);
+ REQUIRE(dec_len == static_cast<int64_t>(src_len));
+ REQUIRE(memcmp(decoded.data(), src.data(), src_len) == 0);
+ }
+}
diff --git a/tools/benchmark/CMakeLists.txt b/tools/benchmark/CMakeLists.txt
index 49f25fad1c..e58e8dcf26 100644
--- a/tools/benchmark/CMakeLists.txt
+++ b/tools/benchmark/CMakeLists.txt
@@ -47,3 +47,7 @@ target_link_libraries(
ts::inkcache
ts::inkhostdb
)
+
+add_executable(benchmark_HuffmanDecode benchmark_HuffmanDecode.cc)
+target_link_libraries(benchmark_HuffmanDecode PRIVATE Catch2::Catch2WithMain
lshpack)
+target_include_directories(benchmark_HuffmanDecode PRIVATE
${CMAKE_SOURCE_DIR}/lib)
diff --git a/tools/benchmark/benchmark_HuffmanDecode.cc
b/tools/benchmark/benchmark_HuffmanDecode.cc
new file mode 100644
index 0000000000..188501a5f0
--- /dev/null
+++ b/tools/benchmark/benchmark_HuffmanDecode.cc
@@ -0,0 +1,142 @@
+/** @file
+
+ Benchmark comparing the two ls-hpack Huffman decoders: the original 4-bit
+ FSM decoder (lshpack_dec_huff_decode_full) and the 16-bit table decoder
+ (lshpack_dec_huff_decode).
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+#include "ls-hpack/lshpack.h"
+
+#include <cstdint>
+#include <cstring>
+#include <string_view>
+#include <vector>
+
+#define CATCH_CONFIG_ENABLE_BENCHMARKING
+#include <catch2/catch_test_macros.hpp>
+#include <catch2/benchmark/catch_benchmark.hpp>
+
+namespace
+{
+
+// Representative header field values seen on the wire.
+constexpr std::string_view SHORT_VALUE = "text/css";
+constexpr std::string_view MEDIUM_VALUE =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
+constexpr std::string_view LONG_VALUE =
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like
Gecko) Chrome/126.0.0.0 Safari/537.36";
+constexpr std::string_view CORPUS[] = {
+ SHORT_VALUE,
+ MEDIUM_VALUE,
+ LONG_VALUE,
+ "/",
+ "gzip, deflate, br, zstd",
+ "Mon, 21 Oct 2013 20:13:21 GMT",
+ "https://www.example.com/images/2026/06/some-article/hero.webp?w=1200&q=75",
+ "max-age=31536000, public, immutable",
+ "session=8f14e45fceea167a5a36dedd4bea2543; theme=dark; region=us-west-2;
ab_bucket=37",
+ "ATS/10.1.0",
+};
+
+std::vector<uint8_t>
+encode(std::string_view text)
+{
+ std::vector<uint8_t> encoded(text.size() * 4 + 8);
+ int64_t len =
litespeed::lshpack_enc_huff_encode(reinterpret_cast<const uint8_t
*>(text.data()), //
+
reinterpret_cast<const uint8_t *>(text.data()) + text.size(), encoded.data(),
+
static_cast<uint32_t>(encoded.size()));
+ REQUIRE(len > 0);
+ encoded.resize(len);
+ return encoded;
+}
+
+} // namespace
+
+TEST_CASE("huffman decode: full vs fast", "[bench][huffman]")
+{
+ char dst[4096];
+
+ // Sanity: both decoders agree on the corpus.
+ for (auto text : CORPUS) {
+ auto encoded = encode(text);
+ int64_t full = litespeed::lshpack_dec_huff_decode_full(encoded.data(),
encoded.size(), dst, sizeof(dst));
+ REQUIRE(full == static_cast<int64_t>(text.size()));
+ REQUIRE(memcmp(dst, text.data(), text.size()) == 0);
+ int64_t fast = litespeed::lshpack_dec_huff_decode(encoded.data(),
encoded.size(), dst, sizeof(dst));
+ REQUIRE(fast == full);
+ REQUIRE(memcmp(dst, text.data(), text.size()) == 0);
+ }
+
+ auto short_enc = encode(SHORT_VALUE);
+ auto medium_enc = encode(MEDIUM_VALUE);
+ auto long_enc = encode(LONG_VALUE);
+
+ BENCHMARK("full: short (8B)")
+ {
+ return litespeed::lshpack_dec_huff_decode_full(short_enc.data(),
short_enc.size(), dst, sizeof(dst));
+ };
+ BENCHMARK("fast: short (8B)")
+ {
+ return litespeed::lshpack_dec_huff_decode(short_enc.data(),
short_enc.size(), dst, sizeof(dst));
+ };
+
+ BENCHMARK("full: medium (86B)")
+ {
+ return litespeed::lshpack_dec_huff_decode_full(medium_enc.data(),
medium_enc.size(), dst, sizeof(dst));
+ };
+ BENCHMARK("fast: medium (86B)")
+ {
+ return litespeed::lshpack_dec_huff_decode(medium_enc.data(),
medium_enc.size(), dst, sizeof(dst));
+ };
+
+ BENCHMARK("full: long (113B)")
+ {
+ return litespeed::lshpack_dec_huff_decode_full(long_enc.data(),
long_enc.size(), dst, sizeof(dst));
+ };
+ BENCHMARK("fast: long (113B)")
+ {
+ return litespeed::lshpack_dec_huff_decode(long_enc.data(),
long_enc.size(), dst, sizeof(dst));
+ };
+
+ std::vector<std::vector<uint8_t>> corpus_enc;
+ size_t corpus_bytes = 0;
+ for (auto text : CORPUS) {
+ corpus_enc.push_back(encode(text));
+ corpus_bytes += text.size();
+ }
+ WARN("corpus: " << corpus_bytes << " decoded bytes per iteration");
+
+ BENCHMARK("full: corpus")
+ {
+ int64_t acc = 0;
+ for (auto const &encoded : corpus_enc) {
+ acc += litespeed::lshpack_dec_huff_decode_full(encoded.data(),
encoded.size(), dst, sizeof(dst));
+ }
+ return acc;
+ };
+ BENCHMARK("fast: corpus")
+ {
+ int64_t acc = 0;
+ for (auto const &encoded : corpus_enc) {
+ acc += litespeed::lshpack_dec_huff_decode(encoded.data(),
encoded.size(), dst, sizeof(dst));
+ }
+ return acc;
+ };
+}