Copilot commented on code in PR #13072:
URL: https://github.com/apache/trafficserver/pull/13072#discussion_r3054443691
##########
plugins/experimental/jax_fingerprint/ja4/tls_client_hello_summary.cc:
##########
@@ -22,91 +22,178 @@
*/
+#include "ts/ts.h"
+#include <plugin.h>
#include "ja4.h"
-#include <algorithm>
-#include <array>
+#include "tls_client_hello_summary.h"
+
+#include <openssl/sha.h>
#include <cstdint>
-#include <functional>
-#include <vector>
-namespace
+TLSClientHelloSummary::TLSClientHelloSummary(ja4::Datasource::Protocol
protocol, TSClientHello ch) : _ch(ch)
{
+ const uint8_t *buf;
+ size_t buflen;
+
+ // Protocol
+ this->_protocol = protocol;
+
+ // Version
+ if (TS_SUCCESS == TSClientHelloExtensionGet(this->_ch,
EXT_SUPPORTED_VERSIONS, &buf, &buflen)) {
+ uint16_t max_version{0};
+ size_t versions_len = buf[0];
+
+ if (buflen < versions_len + 1) {
+ Dbg(dbg_ctl, "Malformed supported_versions extension (truncated
vector)... using legacy version.");
+ this->_version = this->_ch.get_version();
+ } else {
+ for (size_t i = 1; (i + 1) < (versions_len + 1); i += 2) {
+ uint16_t version = (buf[i] << 8) | buf[i + 1];
+ if (!this->_is_GREASE(version) && version > max_version) {
+ max_version = version;
+ }
+ }
+ this->_version = max_version;
+ }
+ } else {
+ Dbg(dbg_ctl, "No supported_versions extension... using legacy version.");
+ this->_version = this->_ch.get_version();
+ }
-constexpr std::array<std::uint16_t, 16> GREASE_values{0x0a0a, 0x1a1a, 0x2a2a,
0x3a3a, 0x4a4a, 0x5a5a, 0x6a6a, 0x7a7a,
- 0x8a8a, 0x9a9a, 0xaaaa,
0xbaba, 0xcaca, 0xdada, 0xeaea, 0xfafa};
-constexpr std::uint16_t extension_SNI{0x0};
-constexpr std::uint16_t extension_ALPN{0x10};
-
-} // end anonymous namespace
+ // Ciphers
+ buf = this->_ch.get_cipher_suites();
+ buflen = this->_ch.get_cipher_suites_len();
+
+ if (buflen / 2 <= MAX_CIPHERS_FOR_FAST_PATH) {
+ // Fast path
+ this->_ciphers = this->_fast_cipher_storage.data();
+ } else {
+ // Slow path
+ this->_slow_cipher_storage = std::make_unique<uint16_t[]>(buflen / 2);
+ this->_ciphers = this->_slow_cipher_storage.get();
+ }
+ for (size_t i = 0; i + 1 < buflen; i += 2) {
+ uint16_t cipher = (static_cast<uint16_t>(buf[i]) << 8) + buf[i + 1];
+ if (this->_is_GREASE(cipher)) {
+ continue;
+ }
+ this->_ciphers[++this->_n_ciphers] = cipher;
+ }
+ std::sort(this->_ciphers, this->_ciphers + this->_n_ciphers);
Review Comment:
Cipher suite storage uses pre-increment when writing into the array
(`_ciphers[++_n_ciphers] = ...`). Since `_n_ciphers` starts at 0, this leaves
index 0 uninitialized, sorts/hashes the wrong range, and can write past the
allocated buffer. Write to `_ciphers[_n_ciphers++]` (or keep a separate write
index) so the first element is stored at index 0 and the sorted range matches
the count.
##########
plugins/experimental/jax_fingerprint/ja4/tls_client_hello_summary.cc:
##########
@@ -22,91 +22,178 @@
*/
+#include "ts/ts.h"
+#include <plugin.h>
#include "ja4.h"
-#include <algorithm>
-#include <array>
+#include "tls_client_hello_summary.h"
+
+#include <openssl/sha.h>
#include <cstdint>
-#include <functional>
-#include <vector>
-namespace
+TLSClientHelloSummary::TLSClientHelloSummary(ja4::Datasource::Protocol
protocol, TSClientHello ch) : _ch(ch)
{
+ const uint8_t *buf;
+ size_t buflen;
+
+ // Protocol
+ this->_protocol = protocol;
+
+ // Version
+ if (TS_SUCCESS == TSClientHelloExtensionGet(this->_ch,
EXT_SUPPORTED_VERSIONS, &buf, &buflen)) {
+ uint16_t max_version{0};
+ size_t versions_len = buf[0];
+
+ if (buflen < versions_len + 1) {
+ Dbg(dbg_ctl, "Malformed supported_versions extension (truncated
vector)... using legacy version.");
+ this->_version = this->_ch.get_version();
+ } else {
+ for (size_t i = 1; (i + 1) < (versions_len + 1); i += 2) {
+ uint16_t version = (buf[i] << 8) | buf[i + 1];
+ if (!this->_is_GREASE(version) && version > max_version) {
+ max_version = version;
+ }
+ }
+ this->_version = max_version;
Review Comment:
`supported_versions` parsing reads `buf[0]` without first validating `buflen
>= 1`, which can lead to out-of-bounds reads if the extension is
empty/truncated. Add a length check before accessing `buf[0]` and fall back to
the legacy version when malformed.
```suggestion
if (buflen < 1) {
Dbg(dbg_ctl, "Malformed supported_versions extension
(empty/truncated)... using legacy version.");
this->_version = this->_ch.get_version();
} else {
size_t versions_len = buf[0];
if (buflen < versions_len + 1) {
Dbg(dbg_ctl, "Malformed supported_versions extension (truncated
vector)... using legacy version.");
this->_version = this->_ch.get_version();
} else {
for (size_t i = 1; (i + 1) < (versions_len + 1); i += 2) {
uint16_t version = (buf[i] << 8) | buf[i + 1];
if (!this->_is_GREASE(version) && version > max_version) {
max_version = version;
}
}
this->_version = max_version;
}
```
##########
plugins/experimental/jax_fingerprint/ja4/tls_client_hello_summary.cc:
##########
@@ -22,91 +22,178 @@
*/
+#include "ts/ts.h"
+#include <plugin.h>
#include "ja4.h"
-#include <algorithm>
-#include <array>
+#include "tls_client_hello_summary.h"
+
+#include <openssl/sha.h>
#include <cstdint>
-#include <functional>
-#include <vector>
-namespace
+TLSClientHelloSummary::TLSClientHelloSummary(ja4::Datasource::Protocol
protocol, TSClientHello ch) : _ch(ch)
{
+ const uint8_t *buf;
+ size_t buflen;
+
+ // Protocol
+ this->_protocol = protocol;
+
+ // Version
+ if (TS_SUCCESS == TSClientHelloExtensionGet(this->_ch,
EXT_SUPPORTED_VERSIONS, &buf, &buflen)) {
+ uint16_t max_version{0};
+ size_t versions_len = buf[0];
+
+ if (buflen < versions_len + 1) {
+ Dbg(dbg_ctl, "Malformed supported_versions extension (truncated
vector)... using legacy version.");
+ this->_version = this->_ch.get_version();
+ } else {
+ for (size_t i = 1; (i + 1) < (versions_len + 1); i += 2) {
+ uint16_t version = (buf[i] << 8) | buf[i + 1];
+ if (!this->_is_GREASE(version) && version > max_version) {
+ max_version = version;
+ }
+ }
+ this->_version = max_version;
+ }
+ } else {
+ Dbg(dbg_ctl, "No supported_versions extension... using legacy version.");
+ this->_version = this->_ch.get_version();
+ }
-constexpr std::array<std::uint16_t, 16> GREASE_values{0x0a0a, 0x1a1a, 0x2a2a,
0x3a3a, 0x4a4a, 0x5a5a, 0x6a6a, 0x7a7a,
- 0x8a8a, 0x9a9a, 0xaaaa,
0xbaba, 0xcaca, 0xdada, 0xeaea, 0xfafa};
-constexpr std::uint16_t extension_SNI{0x0};
-constexpr std::uint16_t extension_ALPN{0x10};
-
-} // end anonymous namespace
+ // Ciphers
+ buf = this->_ch.get_cipher_suites();
+ buflen = this->_ch.get_cipher_suites_len();
+
+ if (buflen / 2 <= MAX_CIPHERS_FOR_FAST_PATH) {
+ // Fast path
+ this->_ciphers = this->_fast_cipher_storage.data();
+ } else {
+ // Slow path
+ this->_slow_cipher_storage = std::make_unique<uint16_t[]>(buflen / 2);
+ this->_ciphers = this->_slow_cipher_storage.get();
+ }
+ for (size_t i = 0; i + 1 < buflen; i += 2) {
+ uint16_t cipher = (static_cast<uint16_t>(buf[i]) << 8) + buf[i + 1];
+ if (this->_is_GREASE(cipher)) {
+ continue;
+ }
+ this->_ciphers[++this->_n_ciphers] = cipher;
+ }
+ std::sort(this->_ciphers, this->_ciphers + this->_n_ciphers);
-static bool is_ignored_non_GREASE_extension(std::uint16_t extension);
+ // Extensions
+ auto count = 0;
+ for (auto &&type : this->_ch.get_extension_types()) {
+ (void)type;
+ ++count;
+ }
+ if (count <= MAX_EXTENSIONS_FOR_FAST_PATH) {
+ // Fast path
+ this->_extensions = this->_fast_extension_storage.data();
+ } else {
+ // Slow path
+ this->_slow_extension_storage = std::make_unique<uint16_t[]>(count);
+ this->_extensions = this->_slow_extension_storage.get();
+ }
+ for (auto &&type : this->_ch.get_extension_types()) {
+ if (type == EXT_SNI) {
+ this->_has_SNI = true;
+ continue;
+ }
+ if (type == EXT_ALPN) {
+ this->_has_ALPN = true;
+ continue;
+ }
+ if (this->_is_GREASE(type)) {
+ continue;
+ }
+ this->_extensions[++this->_n_extensions] = type;
+ }
+ std::sort(this->_extensions, this->_extensions + this->_n_extensions);
+}
-std::vector<std::uint16_t> const &
-JA4::TLSClientHelloSummary::get_ciphers() const
+std::string_view
+TLSClientHelloSummary::get_first_alpn()
{
- return this->_ciphers;
+ unsigned char const *buf{};
+ std::size_t buflen{};
+
+ if (TS_SUCCESS == TSClientHelloExtensionGet(this->_ch, EXT_ALPN, &buf,
&buflen)) {
+ // The first two bytes are a 16bit encoding of the total length.
+ unsigned char first_ALPN_length{buf[2]};
+ TSAssert(buflen > 4);
+ TSAssert(0 != first_ALPN_length);
+ return {reinterpret_cast<const char *>(&(buf[3])), first_ALPN_length};
Review Comment:
`get_first_alpn()` reads `buf[2]` before verifying `buflen` is large enough,
and it doesn't validate that the first ALPN string fits within `buflen`.
Replace the `TSAssert`-only checks with explicit `buflen`/length validation
before dereferencing, and return empty on malformed ALPN data instead of
risking OOB/abort.
```suggestion
// The first two bytes are a 16-bit encoding of the total protocol name
list length.
if (buflen < 3) {
return {};
}
std::size_t const alpn_list_length = (static_cast<std::size_t>(buf[0])
<< 8) | static_cast<std::size_t>(buf[1]);
if (alpn_list_length == 0 || buflen < 2 + alpn_list_length) {
return {};
}
unsigned char const first_ALPN_length{buf[2]};
if (first_ALPN_length == 0 ||
static_cast<std::size_t>(first_ALPN_length) > (alpn_list_length - 1) ||
buflen < 3 + static_cast<std::size_t>(first_ALPN_length)) {
return {};
}
return {reinterpret_cast<char const *>(&(buf[3])), first_ALPN_length};
```
##########
plugins/experimental/jax_fingerprint/ja4/test.cc:
##########
@@ -0,0 +1,531 @@
+/** @file
+ *
+ Unit tests for JA4 fingerprint calculation.
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+ */
+
+#include "ja4.h"
+#include "datasource.h"
+
+#include <catch2/catch_test_macros.hpp>
+#include <openssl/sha.h>
+
+#include <algorithm>
+#include <cctype>
+#include <cstring>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+
+namespace
+{
+
+class MockDatasource : public ja4::Datasource
+{
+public:
+ std::string_view
+ get_first_alpn() override
+ {
+ return this->_first_alpn;
+ }
+
+ void
+ get_cipher_suites_hash(unsigned char out[32]) override
+ {
+ if (this->_ciphers.empty()) {
+ memset(out, 0, 32);
+ return;
+ }
+ auto sorted = this->_ciphers;
+ std::sort(sorted.begin(), sorted.end());
+ SHA256_CTX ctx;
+ SHA256_Init(&ctx);
+ for (size_t i = 0; i < sorted.size(); ++i) {
+ char buf[5];
+ char *p = buf;
+ if (i != 0) {
+ *p = ',';
+ p += 1;
+ }
+ uint16_t c = sorted[i];
+ uint8_t h1 = (c & 0xF000) >> 12;
+ uint8_t l1 = (c & 0x0F00) >> 8;
+ uint8_t h2 = (c & 0x00F0) >> 4;
+ uint8_t l2 = c & 0x000F;
+ p[0] = h1 <= 9 ? ('0' + h1) : ('a' + h1 - 10);
+ p[1] = l1 <= 9 ? ('0' + l1) : ('a' + l1 - 10);
+ p[2] = h2 <= 9 ? ('0' + h2) : ('a' + h2 - 10);
+ p[3] = l2 <= 9 ? ('0' + l2) : ('a' + l2 - 10);
+ p += 4;
+ SHA256_Update(&ctx, buf, p - buf);
+ }
+ SHA256_Final(out, &ctx);
+ }
+
+ void
+ get_extension_hash(unsigned char out[32]) override
+ {
+ if (this->_extensions.empty()) {
+ memset(out, 0, 32);
+ return;
+ }
+ auto sorted = this->_extensions;
+ std::sort(sorted.begin(), sorted.end());
+ SHA256_CTX ctx;
+ SHA256_Init(&ctx);
+ for (size_t i = 0; i < sorted.size(); ++i) {
+ char buf[5];
+ char *p = buf;
+ if (i != 0) {
+ *p = ',';
+ p += 1;
+ }
+ uint16_t e = sorted[i];
+ uint8_t h1 = (e & 0xF000) >> 12;
+ uint8_t l1 = (e & 0x0F00) >> 8;
+ uint8_t h2 = (e & 0x00F0) >> 4;
+ uint8_t l2 = e & 0x000F;
+ p[0] = h1 <= 9 ? ('0' + h1) : ('a' + h1 - 10);
+ p[1] = l1 <= 9 ? ('0' + l1) : ('a' + l1 - 10);
+ p[2] = h2 <= 9 ? ('0' + h2) : ('a' + h2 - 10);
+ p[3] = l2 <= 9 ? ('0' + l2) : ('a' + l2 - 10);
+ p += 4;
+ SHA256_Update(&ctx, buf, p - buf);
+ }
+ SHA256_Final(out, &ctx);
+ }
+
+ void
+ set_protocol(ja4::Datasource::Protocol protocol)
+ {
+ this->_protocol = protocol;
+ }
+ void
+ set_version(int version)
+ {
+ this->_version = version;
+ }
+ void
+ set_first_alpn(std::string first_alpn)
+ {
+ this->_first_alpn = first_alpn;
+ }
+ void
+ add_cipher(std::uint16_t cipher)
+ {
+ if (_is_GREASE(cipher)) {
+ return;
+ }
+
+ ++this->_n_ciphers;
+ this->_ciphers.push_back(cipher);
+ }
+
+ void
+ add_extension(uint16_t extension)
+ {
+ if (EXT_SNI == extension) {
+ this->_SNI_type = SNI::to_domain;
+ this->_has_SNI = true;
+ return;
+ }
+ if (EXT_ALPN == extension) {
+ this->_has_ALPN = true;
+ return;
+ }
+ if (_is_GREASE(extension)) {
+ return;
+ }
+
+ ++this->_n_extensions;
+ this->_extensions.push_back(extension);
+ }
+
+private:
+ std::string _first_alpn;
+
+ std::vector<std::uint16_t> _ciphers;
+ std::vector<std::uint16_t> _extensions;
+ SNI _SNI_type{SNI::to_IP};
+};
+
+std::string_view
+SHA256_12(std::string_view in)
+{
+ uint8_t hash[32];
+ SHA256(reinterpret_cast<const uint8_t *>(in.data()), in.size(), hash);
+
+ static char out[12];
+ for (int i = 0; i < 6; ++i) {
+ uint8_t h = hash[i] >> 4;
+ uint8_t l = hash[i] & 0x0F;
+ out[i * 2] = h <= 9 ? '0' + h : 'a' + h - 10;
+ out[i * 2 + 1] = l <= 9 ? '0' + l : 'a' + l - 10;
+ }
+ return {out, sizeof(out)};
+}
+
+} // namespace
+
+static std::string call_JA4(ja4::Datasource &datasource);
+
+TEST_CASE("JA4")
+{
+ MockDatasource datasource{};
+
+ SECTION("Given the protocol is TCP, "
+ "when we create a JA4 fingerprint, "
+ "then the first character thereof should be 't'.")
+ {
+ datasource.set_protocol(ja4::Datasource::Protocol::TLS);
+
+ CHECK("t" == call_JA4(datasource).substr(0, 1));
+ }
+
+ SECTION("Given the protocol is QUIC, "
+ "when we create a JA4 fingerprint, "
+ "then the first character thereof should be 'q'.")
+ {
+ datasource.set_protocol(ja4::Datasource::Protocol::QUIC);
+ CHECK(call_JA4(datasource).starts_with('q'));
+ }
+
+ SECTION("Given the protocol is DTLS, "
+ "when we create a JA4 fingerprint, "
+ "then the first character thereof should be 'd'.")
+ {
+ datasource.set_protocol(ja4::Datasource::Protocol::DTLS);
+ CHECK(call_JA4(datasource).starts_with('d'));
+ }
+
+ SECTION("Given the TLS version is unknown, "
+ "when we create a JA4 fingerprint, "
+ "then indices [1,2] thereof should contain \"00\".")
+ {
+ datasource.set_version(0x123);
+ CHECK("00" == call_JA4(datasource).substr(1, 2));
+ datasource.set_version(0x234);
+ CHECK("00" == call_JA4(datasource).substr(1, 2));
+ }
+
+ SECTION("Given the TLS version is known, "
+ "when we create a JA4 fingerprint, "
+ "then indices [1,2] thereof should contain the correct value.")
+ {
+ std::unordered_map<std::uint16_t, std::string> values{
+ {0x304, "13"},
+ {0x303, "12"},
+ {0x302, "11"},
+ {0x301, "10"},
+ {0x300, "s3"},
+ {0x200, "s2"},
+ {0x100, "s1"},
+ {0xfeff, "d1"},
+ {0xfefd, "d2"},
+ {0xfefc, "d3"}
+ };
+ for (auto const &[version, expected] : values) {
+ CAPTURE(version, expected);
+ datasource.set_version(version);
+ CHECK(expected == call_JA4(datasource).substr(1, 2));
+ }
+ }
+
+ SECTION("Given the SNI extension is present, "
+ "when we create a JA4 fingerprint, "
+ "then index 3 thereof should contain 'd'.")
+ {
+ datasource.add_extension(0x0);
+ CHECK("d" == call_JA4(datasource).substr(3, 1));
+ }
+
+ SECTION("Given the SNI extension is not present, "
+ "when we create a JA4 fingerprint, "
+ "then index 3 thereof should contain 'i'.")
+ {
+ datasource.add_extension(0x31);
+ CHECK("i" == call_JA4(datasource).substr(3, 1));
+ }
+
+ SECTION("Given there is one cipher, "
+ "when we create a JA4 fingerprint, "
+ "then indices [4,5] thereof should contain \"01\".")
+ {
+ datasource.add_cipher(1);
+ CHECK("01" == call_JA4(datasource).substr(4, 2));
+ }
+
+ SECTION("Given there are 9 ciphers, "
+ "when we create a JA4 fingerprint, "
+ "then indices [4,5] thereof should contain \"09\".")
+ {
+ for (int i{0}; i < 9; ++i) {
+ datasource.add_cipher(i);
+ }
+ CHECK("09" == call_JA4(datasource).substr(4, 2));
+ }
+
+ SECTION("Given there are 10 ciphers, "
+ "when we create a JA4 fingerprint, "
+ "then indices [4,5] thereof should contain \"10\".")
+ {
+ for (int i{0}; i < 10; ++i) {
+ datasource.add_cipher(i);
+ }
+ CHECK("10" == call_JA4(datasource).substr(4, 2));
+ }
+
+ SECTION("Given there are more than 99 ciphers, "
+ "when we create a JA4 fingerprint, "
+ "then indices [4,5] thereof should contain \"99\".")
+ {
+ for (int i{0}; i < 100; ++i) {
+ datasource.add_cipher(i);
+ }
+ CHECK("99" == call_JA4(datasource).substr(4, 2));
+ }
+
+ SECTION("Given the ciphers include a GREASE value, "
+ "when we create a JA4 fingerprint, "
+ "then that value should not be included in the count.")
+ {
+ datasource.add_cipher(0x0a0a);
+ datasource.add_cipher(72);
+ CHECK("01" == call_JA4(datasource).substr(4, 2));
+ }
+
+ SECTION("Given there are no extensions, "
+ "when we create a JA4 fingerprint, "
+ "then indices [6,7] thereof should contain \"00\".")
+ {
+ CHECK("00" == call_JA4(datasource).substr(6, 2));
+ }
+
+ SECTION("Given there are 9 extensions, "
+ "when we create a JA4 fingerprint, "
+ "then indices [6,7] thereof should contain \"09\".")
+ {
+ for (int i{0}; i < 9; ++i) {
+ datasource.add_extension(i);
+ }
+ CHECK("09" == call_JA4(datasource).substr(6, 2));
+ }
+
+ SECTION("Given there are 99 extensions, "
+ "when we create a JA4 fingerprint, "
+ "then indices [6,7] thereof should contain \"99\".")
+ {
+ for (int i{0}; i < 99; ++i) {
+ datasource.add_extension(i);
+ }
+ CHECK("99" == call_JA4(datasource).substr(6, 2));
+ }
+
+ SECTION("Given there are more than 99 extensions, "
+ "when we create a JA4 fingerprint, "
+ "then indices [6,7] thereof should contain \"99\".")
+ {
+ for (int i{0}; i < 100; ++i) {
+ datasource.add_extension(i);
+ }
+ CHECK("99" == call_JA4(datasource).substr(6, 2));
+ }
+
+ SECTION("Given the extensions include a GREASE value, "
+ "when we create a JA4 fingerprint, "
+ "then that value should not be included in the count.")
+ {
+ datasource.add_extension(2);
+ datasource.add_extension(0x0a0a);
+ CHECK("01" == call_JA4(datasource).substr(6, 2));
+ }
+
+ // These may be covered by the earlier tests as well, but this documents the
+ // behavior explicitly.
+ SECTION("When we create a JA4 fingerprint, "
+ "then the SNI and ALPN extensions should be included in the count.")
+ {
+ datasource.add_extension(0x0);
+ datasource.add_extension(0x10);
+ CHECK("02" == call_JA4(datasource).substr(6, 2));
+ }
+
+ SECTION("Given the ALPN value is empty, "
+ "when we create a JA4 fingerprint, "
+ "then indices [8,9] thereof should contain \"00\".")
+ {
+ datasource.set_first_alpn("");
+ CHECK("00" == call_JA4(datasource).substr(8, 2));
+ }
+
+ // This should never happen in practice because all registered ALPN values
+ // are at least 2 characters long, but it's the correct behavior according
+ // to the spec. :-)
+ SECTION("Given the ALPN value is \"a\", "
+ "when we create a JA4 fingerprint, "
+ "then indices [8,9] thereof should contain \"aa\".")
+ {
+ datasource.set_first_alpn("a");
+ CHECK("aa" == call_JA4(datasource).substr(8, 2));
+ }
+
+ SECTION("Given the ALPN value is \"h3\", "
+ "when we create a JA4 fingerprint, "
+ "then indices [8,9] thereof should contain \"h3\".")
+ {
+ datasource.set_first_alpn("h3");
+ CHECK("h3" == call_JA4(datasource).substr(8, 2));
+ }
+
+ SECTION("Given the ALPN value is \"imap\", "
+ "when we create a JA4 fingerprint, "
+ "then indices [8,9] thereof should contain \"ip\".")
+ {
+ datasource.set_first_alpn("imap");
+ CHECK("ip" == call_JA4(datasource).substr(8, 2));
+ }
+
+ SECTION("When we create a JA4 fingeprint, "
Review Comment:
Spelling: "fingeprint" should be "fingerprint" in this test description
string.
```suggestion
SECTION("When we create a JA4 fingerprint, "
```
##########
plugins/experimental/jax_fingerprint/ja4/test.cc:
##########
@@ -0,0 +1,531 @@
+/** @file
+ *
+ Unit tests for JA4 fingerprint calculation.
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+ */
+
+#include "ja4.h"
+#include "datasource.h"
+
+#include <catch2/catch_test_macros.hpp>
+#include <openssl/sha.h>
+
+#include <algorithm>
+#include <cctype>
+#include <cstring>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+
+namespace
+{
+
+class MockDatasource : public ja4::Datasource
+{
+public:
+ std::string_view
+ get_first_alpn() override
+ {
+ return this->_first_alpn;
+ }
+
+ void
+ get_cipher_suites_hash(unsigned char out[32]) override
+ {
+ if (this->_ciphers.empty()) {
+ memset(out, 0, 32);
+ return;
+ }
+ auto sorted = this->_ciphers;
+ std::sort(sorted.begin(), sorted.end());
+ SHA256_CTX ctx;
+ SHA256_Init(&ctx);
+ for (size_t i = 0; i < sorted.size(); ++i) {
+ char buf[5];
+ char *p = buf;
+ if (i != 0) {
+ *p = ',';
+ p += 1;
+ }
+ uint16_t c = sorted[i];
+ uint8_t h1 = (c & 0xF000) >> 12;
+ uint8_t l1 = (c & 0x0F00) >> 8;
+ uint8_t h2 = (c & 0x00F0) >> 4;
+ uint8_t l2 = c & 0x000F;
+ p[0] = h1 <= 9 ? ('0' + h1) : ('a' + h1 - 10);
+ p[1] = l1 <= 9 ? ('0' + l1) : ('a' + l1 - 10);
+ p[2] = h2 <= 9 ? ('0' + h2) : ('a' + h2 - 10);
+ p[3] = l2 <= 9 ? ('0' + l2) : ('a' + l2 - 10);
+ p += 4;
+ SHA256_Update(&ctx, buf, p - buf);
+ }
+ SHA256_Final(out, &ctx);
+ }
+
+ void
+ get_extension_hash(unsigned char out[32]) override
+ {
+ if (this->_extensions.empty()) {
+ memset(out, 0, 32);
+ return;
+ }
+ auto sorted = this->_extensions;
+ std::sort(sorted.begin(), sorted.end());
+ SHA256_CTX ctx;
+ SHA256_Init(&ctx);
+ for (size_t i = 0; i < sorted.size(); ++i) {
+ char buf[5];
+ char *p = buf;
+ if (i != 0) {
+ *p = ',';
+ p += 1;
+ }
+ uint16_t e = sorted[i];
+ uint8_t h1 = (e & 0xF000) >> 12;
+ uint8_t l1 = (e & 0x0F00) >> 8;
+ uint8_t h2 = (e & 0x00F0) >> 4;
+ uint8_t l2 = e & 0x000F;
+ p[0] = h1 <= 9 ? ('0' + h1) : ('a' + h1 - 10);
+ p[1] = l1 <= 9 ? ('0' + l1) : ('a' + l1 - 10);
+ p[2] = h2 <= 9 ? ('0' + h2) : ('a' + h2 - 10);
+ p[3] = l2 <= 9 ? ('0' + l2) : ('a' + l2 - 10);
+ p += 4;
+ SHA256_Update(&ctx, buf, p - buf);
+ }
+ SHA256_Final(out, &ctx);
+ }
+
+ void
+ set_protocol(ja4::Datasource::Protocol protocol)
+ {
+ this->_protocol = protocol;
+ }
+ void
+ set_version(int version)
+ {
+ this->_version = version;
+ }
+ void
+ set_first_alpn(std::string first_alpn)
+ {
+ this->_first_alpn = first_alpn;
+ }
+ void
+ add_cipher(std::uint16_t cipher)
+ {
+ if (_is_GREASE(cipher)) {
+ return;
+ }
+
+ ++this->_n_ciphers;
+ this->_ciphers.push_back(cipher);
+ }
+
+ void
+ add_extension(uint16_t extension)
+ {
+ if (EXT_SNI == extension) {
+ this->_SNI_type = SNI::to_domain;
+ this->_has_SNI = true;
+ return;
+ }
+ if (EXT_ALPN == extension) {
+ this->_has_ALPN = true;
+ return;
+ }
+ if (_is_GREASE(extension)) {
+ return;
+ }
+
+ ++this->_n_extensions;
+ this->_extensions.push_back(extension);
+ }
+
+private:
+ std::string _first_alpn;
+
+ std::vector<std::uint16_t> _ciphers;
+ std::vector<std::uint16_t> _extensions;
+ SNI _SNI_type{SNI::to_IP};
+};
+
+std::string_view
+SHA256_12(std::string_view in)
+{
+ uint8_t hash[32];
+ SHA256(reinterpret_cast<const uint8_t *>(in.data()), in.size(), hash);
+
+ static char out[12];
+ for (int i = 0; i < 6; ++i) {
+ uint8_t h = hash[i] >> 4;
+ uint8_t l = hash[i] & 0x0F;
+ out[i * 2] = h <= 9 ? '0' + h : 'a' + h - 10;
+ out[i * 2 + 1] = l <= 9 ? '0' + l : 'a' + l - 10;
+ }
+ return {out, sizeof(out)};
+}
+
+} // namespace
+
+static std::string call_JA4(ja4::Datasource &datasource);
+
+TEST_CASE("JA4")
+{
+ MockDatasource datasource{};
+
+ SECTION("Given the protocol is TCP, "
+ "when we create a JA4 fingerprint, "
+ "then the first character thereof should be 't'.")
+ {
+ datasource.set_protocol(ja4::Datasource::Protocol::TLS);
+
+ CHECK("t" == call_JA4(datasource).substr(0, 1));
+ }
+
+ SECTION("Given the protocol is QUIC, "
+ "when we create a JA4 fingerprint, "
+ "then the first character thereof should be 'q'.")
+ {
+ datasource.set_protocol(ja4::Datasource::Protocol::QUIC);
+ CHECK(call_JA4(datasource).starts_with('q'));
+ }
+
+ SECTION("Given the protocol is DTLS, "
+ "when we create a JA4 fingerprint, "
+ "then the first character thereof should be 'd'.")
+ {
+ datasource.set_protocol(ja4::Datasource::Protocol::DTLS);
+ CHECK(call_JA4(datasource).starts_with('d'));
+ }
+
+ SECTION("Given the TLS version is unknown, "
+ "when we create a JA4 fingerprint, "
+ "then indices [1,2] thereof should contain \"00\".")
+ {
+ datasource.set_version(0x123);
+ CHECK("00" == call_JA4(datasource).substr(1, 2));
+ datasource.set_version(0x234);
+ CHECK("00" == call_JA4(datasource).substr(1, 2));
+ }
+
+ SECTION("Given the TLS version is known, "
+ "when we create a JA4 fingerprint, "
+ "then indices [1,2] thereof should contain the correct value.")
+ {
+ std::unordered_map<std::uint16_t, std::string> values{
+ {0x304, "13"},
+ {0x303, "12"},
+ {0x302, "11"},
+ {0x301, "10"},
+ {0x300, "s3"},
+ {0x200, "s2"},
+ {0x100, "s1"},
+ {0xfeff, "d1"},
+ {0xfefd, "d2"},
+ {0xfefc, "d3"}
+ };
+ for (auto const &[version, expected] : values) {
+ CAPTURE(version, expected);
+ datasource.set_version(version);
+ CHECK(expected == call_JA4(datasource).substr(1, 2));
+ }
+ }
+
+ SECTION("Given the SNI extension is present, "
+ "when we create a JA4 fingerprint, "
+ "then index 3 thereof should contain 'd'.")
+ {
+ datasource.add_extension(0x0);
+ CHECK("d" == call_JA4(datasource).substr(3, 1));
+ }
+
+ SECTION("Given the SNI extension is not present, "
+ "when we create a JA4 fingerprint, "
+ "then index 3 thereof should contain 'i'.")
+ {
+ datasource.add_extension(0x31);
+ CHECK("i" == call_JA4(datasource).substr(3, 1));
+ }
+
+ SECTION("Given there is one cipher, "
+ "when we create a JA4 fingerprint, "
+ "then indices [4,5] thereof should contain \"01\".")
+ {
+ datasource.add_cipher(1);
+ CHECK("01" == call_JA4(datasource).substr(4, 2));
+ }
+
+ SECTION("Given there are 9 ciphers, "
+ "when we create a JA4 fingerprint, "
+ "then indices [4,5] thereof should contain \"09\".")
+ {
+ for (int i{0}; i < 9; ++i) {
+ datasource.add_cipher(i);
+ }
+ CHECK("09" == call_JA4(datasource).substr(4, 2));
+ }
+
+ SECTION("Given there are 10 ciphers, "
+ "when we create a JA4 fingerprint, "
+ "then indices [4,5] thereof should contain \"10\".")
+ {
+ for (int i{0}; i < 10; ++i) {
+ datasource.add_cipher(i);
+ }
+ CHECK("10" == call_JA4(datasource).substr(4, 2));
+ }
+
+ SECTION("Given there are more than 99 ciphers, "
+ "when we create a JA4 fingerprint, "
+ "then indices [4,5] thereof should contain \"99\".")
+ {
+ for (int i{0}; i < 100; ++i) {
+ datasource.add_cipher(i);
+ }
+ CHECK("99" == call_JA4(datasource).substr(4, 2));
+ }
+
+ SECTION("Given the ciphers include a GREASE value, "
+ "when we create a JA4 fingerprint, "
+ "then that value should not be included in the count.")
+ {
+ datasource.add_cipher(0x0a0a);
+ datasource.add_cipher(72);
+ CHECK("01" == call_JA4(datasource).substr(4, 2));
+ }
+
+ SECTION("Given there are no extensions, "
+ "when we create a JA4 fingerprint, "
+ "then indices [6,7] thereof should contain \"00\".")
+ {
+ CHECK("00" == call_JA4(datasource).substr(6, 2));
+ }
+
+ SECTION("Given there are 9 extensions, "
+ "when we create a JA4 fingerprint, "
+ "then indices [6,7] thereof should contain \"09\".")
+ {
+ for (int i{0}; i < 9; ++i) {
+ datasource.add_extension(i);
+ }
+ CHECK("09" == call_JA4(datasource).substr(6, 2));
+ }
+
+ SECTION("Given there are 99 extensions, "
+ "when we create a JA4 fingerprint, "
+ "then indices [6,7] thereof should contain \"99\".")
+ {
+ for (int i{0}; i < 99; ++i) {
+ datasource.add_extension(i);
+ }
+ CHECK("99" == call_JA4(datasource).substr(6, 2));
+ }
+
+ SECTION("Given there are more than 99 extensions, "
+ "when we create a JA4 fingerprint, "
+ "then indices [6,7] thereof should contain \"99\".")
+ {
+ for (int i{0}; i < 100; ++i) {
+ datasource.add_extension(i);
+ }
+ CHECK("99" == call_JA4(datasource).substr(6, 2));
+ }
+
+ SECTION("Given the extensions include a GREASE value, "
+ "when we create a JA4 fingerprint, "
+ "then that value should not be included in the count.")
+ {
+ datasource.add_extension(2);
+ datasource.add_extension(0x0a0a);
+ CHECK("01" == call_JA4(datasource).substr(6, 2));
+ }
+
+ // These may be covered by the earlier tests as well, but this documents the
+ // behavior explicitly.
+ SECTION("When we create a JA4 fingerprint, "
+ "then the SNI and ALPN extensions should be included in the count.")
+ {
+ datasource.add_extension(0x0);
+ datasource.add_extension(0x10);
+ CHECK("02" == call_JA4(datasource).substr(6, 2));
+ }
+
+ SECTION("Given the ALPN value is empty, "
+ "when we create a JA4 fingerprint, "
+ "then indices [8,9] thereof should contain \"00\".")
+ {
+ datasource.set_first_alpn("");
+ CHECK("00" == call_JA4(datasource).substr(8, 2));
+ }
+
+ // This should never happen in practice because all registered ALPN values
+ // are at least 2 characters long, but it's the correct behavior according
+ // to the spec. :-)
+ SECTION("Given the ALPN value is \"a\", "
+ "when we create a JA4 fingerprint, "
+ "then indices [8,9] thereof should contain \"aa\".")
+ {
+ datasource.set_first_alpn("a");
+ CHECK("aa" == call_JA4(datasource).substr(8, 2));
+ }
+
+ SECTION("Given the ALPN value is \"h3\", "
+ "when we create a JA4 fingerprint, "
+ "then indices [8,9] thereof should contain \"h3\".")
+ {
+ datasource.set_first_alpn("h3");
+ CHECK("h3" == call_JA4(datasource).substr(8, 2));
+ }
+
+ SECTION("Given the ALPN value is \"imap\", "
+ "when we create a JA4 fingerprint, "
+ "then indices [8,9] thereof should contain \"ip\".")
+ {
+ datasource.set_first_alpn("imap");
+ CHECK("ip" == call_JA4(datasource).substr(8, 2));
+ }
+
+ SECTION("When we create a JA4 fingeprint, "
+ "then index 10 thereof should contain '_'.")
+ {
+ CHECK("_" == call_JA4(datasource).substr(10, 1));
+ }
+
+ SECTION("When we create a JA4 fingerprint, "
+ "then the b section should be passed through the hash function.")
+ {
+ char buf[36];
+ datasource.add_cipher(10);
+ CHECK(SHA256_12("000a") == ja4::generate_fingerprint(buf,
datasource).substr(11, 12));
+ }
+
+ // As per the spec, we expect 4-character, comma-delimited hex values.
+ SECTION("Given only ciphers 2, 12, and 17 in that order, "
+ "when we create a JA4 fingerprint, "
+ "then the hash should be invoked with \"0002,000c,0011\".")
+ {
+ datasource.add_cipher(2);
+ datasource.add_cipher(12);
+ datasource.add_cipher(17);
+ char buf[36];
+ CHECK(SHA256_12("0002,000c,0011") == ja4::generate_fingerprint(buf,
datasource).substr(11, 12));
+ }
+
+ SECTION("When we create a JA4 fingerprint, "
+ "then the cipher values should be sorted before hashing.")
+ {
+ datasource.add_cipher(17);
+ datasource.add_cipher(2);
+ datasource.add_cipher(12);
+ char buf[36];
+ CHECK(SHA256_12("0002,000c,0011") == ja4::generate_fingerprint(buf,
datasource).substr(11, 12));
+ }
+
+ SECTION("When we create a JA4 fingerprint, "
+ "then GREASE values in the cipher list should be ignored.")
+ {
+ datasource.add_cipher(0x0a0a);
+ datasource.add_cipher(2);
+ char buf[36];
+ CHECK(SHA256_12("0002") == ja4::generate_fingerprint(buf,
datasource).substr(11, 12));
+ }
+
+ // All the tests from now on have enough ciphers to ensure a long enough
+ // hash using our default hash (the id function) so that the length of the
+ // JA4 fingerprint will be valid.
+ datasource.add_cipher(1);
+ datasource.add_cipher(2);
+ datasource.add_cipher(3);
+
+ SECTION("When we create a JA4 fingerprint, "
+ "then we should truncate the section b hash to 12 characters.")
+ {
+ char buf[36];
+ CHECK(SHA256_12("0001,0002,0003") == ja4::generate_fingerprint(buf,
datasource).substr(11, 12));
+ }
+
+ SECTION("When we create a JA4 fingeprint, "
Review Comment:
Spelling: "fingeprint" should be "fingerprint" in this test description
string.
```suggestion
SECTION("When we create a JA4 fingerprint, "
```
##########
plugins/experimental/jax_fingerprint/ja4/tls_client_hello_summary.cc:
##########
@@ -22,91 +22,178 @@
*/
+#include "ts/ts.h"
+#include <plugin.h>
#include "ja4.h"
-#include <algorithm>
-#include <array>
+#include "tls_client_hello_summary.h"
+
+#include <openssl/sha.h>
#include <cstdint>
-#include <functional>
-#include <vector>
-namespace
+TLSClientHelloSummary::TLSClientHelloSummary(ja4::Datasource::Protocol
protocol, TSClientHello ch) : _ch(ch)
{
+ const uint8_t *buf;
+ size_t buflen;
+
+ // Protocol
+ this->_protocol = protocol;
+
+ // Version
+ if (TS_SUCCESS == TSClientHelloExtensionGet(this->_ch,
EXT_SUPPORTED_VERSIONS, &buf, &buflen)) {
+ uint16_t max_version{0};
+ size_t versions_len = buf[0];
+
+ if (buflen < versions_len + 1) {
+ Dbg(dbg_ctl, "Malformed supported_versions extension (truncated
vector)... using legacy version.");
+ this->_version = this->_ch.get_version();
+ } else {
+ for (size_t i = 1; (i + 1) < (versions_len + 1); i += 2) {
+ uint16_t version = (buf[i] << 8) | buf[i + 1];
+ if (!this->_is_GREASE(version) && version > max_version) {
+ max_version = version;
+ }
+ }
+ this->_version = max_version;
+ }
+ } else {
+ Dbg(dbg_ctl, "No supported_versions extension... using legacy version.");
+ this->_version = this->_ch.get_version();
+ }
-constexpr std::array<std::uint16_t, 16> GREASE_values{0x0a0a, 0x1a1a, 0x2a2a,
0x3a3a, 0x4a4a, 0x5a5a, 0x6a6a, 0x7a7a,
- 0x8a8a, 0x9a9a, 0xaaaa,
0xbaba, 0xcaca, 0xdada, 0xeaea, 0xfafa};
-constexpr std::uint16_t extension_SNI{0x0};
-constexpr std::uint16_t extension_ALPN{0x10};
-
-} // end anonymous namespace
+ // Ciphers
+ buf = this->_ch.get_cipher_suites();
+ buflen = this->_ch.get_cipher_suites_len();
+
+ if (buflen / 2 <= MAX_CIPHERS_FOR_FAST_PATH) {
+ // Fast path
+ this->_ciphers = this->_fast_cipher_storage.data();
+ } else {
+ // Slow path
+ this->_slow_cipher_storage = std::make_unique<uint16_t[]>(buflen / 2);
+ this->_ciphers = this->_slow_cipher_storage.get();
+ }
+ for (size_t i = 0; i + 1 < buflen; i += 2) {
+ uint16_t cipher = (static_cast<uint16_t>(buf[i]) << 8) + buf[i + 1];
+ if (this->_is_GREASE(cipher)) {
+ continue;
+ }
+ this->_ciphers[++this->_n_ciphers] = cipher;
+ }
+ std::sort(this->_ciphers, this->_ciphers + this->_n_ciphers);
-static bool is_ignored_non_GREASE_extension(std::uint16_t extension);
+ // Extensions
+ auto count = 0;
+ for (auto &&type : this->_ch.get_extension_types()) {
+ (void)type;
+ ++count;
+ }
+ if (count <= MAX_EXTENSIONS_FOR_FAST_PATH) {
+ // Fast path
+ this->_extensions = this->_fast_extension_storage.data();
+ } else {
+ // Slow path
+ this->_slow_extension_storage = std::make_unique<uint16_t[]>(count);
+ this->_extensions = this->_slow_extension_storage.get();
+ }
+ for (auto &&type : this->_ch.get_extension_types()) {
+ if (type == EXT_SNI) {
+ this->_has_SNI = true;
+ continue;
+ }
+ if (type == EXT_ALPN) {
+ this->_has_ALPN = true;
+ continue;
+ }
+ if (this->_is_GREASE(type)) {
+ continue;
+ }
+ this->_extensions[++this->_n_extensions] = type;
+ }
Review Comment:
Extension storage has the same off-by-one issue as ciphers:
`_extensions[++_n_extensions] = type` skips index 0 and makes the subsequent
`std::sort(_extensions, _extensions + _n_extensions)` operate on uninitialized
data. Use `_extensions[_n_extensions++] = type` (or a separate write index) so
the array contents and count stay consistent.
##########
plugins/experimental/jax_fingerprint/ja4/tls_client_hello_summary.h:
##########
@@ -0,0 +1,53 @@
+/** @file
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+ */
+
+#pragma once
+
Review Comment:
This header uses `std::array` and `std::unique_ptr` but doesn't include
`<array>` / `<memory>` (and relies on transitive includes). Add the required
standard headers so this file is self-contained and doesn't break depending on
include order.
```suggestion
#include <array>
#include <memory>
```
##########
plugins/experimental/jax_fingerprint/ja4/datasource.h:
##########
@@ -0,0 +1,71 @@
+/** @file
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+ */
+
+#pragma once
+
+#include <string_view>
+
+constexpr uint16_t EXT_SNI{0x0};
+constexpr uint16_t EXT_ALPN{0x10};
+constexpr uint16_t EXT_SUPPORTED_VERSIONS{0x2b};
Review Comment:
`datasource.h` uses `uint16_t` but does not include `<cstdint>` (or another
header that guarantees the typedef). This can break compilation for translation
units/tests that include this header without pulling in `<cstdint>` first.
Include `<cstdint>` and consider using `std::uint16_t` for consistency.
##########
plugins/experimental/jax_fingerprint/ja4/test.cc:
##########
@@ -0,0 +1,531 @@
+/** @file
+ *
+ Unit tests for JA4 fingerprint calculation.
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+ */
+
+#include "ja4.h"
+#include "datasource.h"
+
+#include <catch2/catch_test_macros.hpp>
+#include <openssl/sha.h>
+
+#include <algorithm>
+#include <cctype>
+#include <cstring>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+
+namespace
+{
+
+class MockDatasource : public ja4::Datasource
+{
+public:
+ std::string_view
+ get_first_alpn() override
+ {
+ return this->_first_alpn;
+ }
+
+ void
+ get_cipher_suites_hash(unsigned char out[32]) override
+ {
+ if (this->_ciphers.empty()) {
+ memset(out, 0, 32);
+ return;
+ }
+ auto sorted = this->_ciphers;
+ std::sort(sorted.begin(), sorted.end());
+ SHA256_CTX ctx;
+ SHA256_Init(&ctx);
+ for (size_t i = 0; i < sorted.size(); ++i) {
+ char buf[5];
+ char *p = buf;
+ if (i != 0) {
+ *p = ',';
+ p += 1;
+ }
+ uint16_t c = sorted[i];
+ uint8_t h1 = (c & 0xF000) >> 12;
+ uint8_t l1 = (c & 0x0F00) >> 8;
+ uint8_t h2 = (c & 0x00F0) >> 4;
+ uint8_t l2 = c & 0x000F;
+ p[0] = h1 <= 9 ? ('0' + h1) : ('a' + h1 - 10);
+ p[1] = l1 <= 9 ? ('0' + l1) : ('a' + l1 - 10);
+ p[2] = h2 <= 9 ? ('0' + h2) : ('a' + h2 - 10);
+ p[3] = l2 <= 9 ? ('0' + l2) : ('a' + l2 - 10);
+ p += 4;
+ SHA256_Update(&ctx, buf, p - buf);
+ }
+ SHA256_Final(out, &ctx);
+ }
+
+ void
+ get_extension_hash(unsigned char out[32]) override
+ {
+ if (this->_extensions.empty()) {
+ memset(out, 0, 32);
+ return;
+ }
+ auto sorted = this->_extensions;
+ std::sort(sorted.begin(), sorted.end());
+ SHA256_CTX ctx;
+ SHA256_Init(&ctx);
+ for (size_t i = 0; i < sorted.size(); ++i) {
+ char buf[5];
+ char *p = buf;
+ if (i != 0) {
+ *p = ',';
+ p += 1;
+ }
+ uint16_t e = sorted[i];
+ uint8_t h1 = (e & 0xF000) >> 12;
+ uint8_t l1 = (e & 0x0F00) >> 8;
+ uint8_t h2 = (e & 0x00F0) >> 4;
+ uint8_t l2 = e & 0x000F;
+ p[0] = h1 <= 9 ? ('0' + h1) : ('a' + h1 - 10);
+ p[1] = l1 <= 9 ? ('0' + l1) : ('a' + l1 - 10);
+ p[2] = h2 <= 9 ? ('0' + h2) : ('a' + h2 - 10);
+ p[3] = l2 <= 9 ? ('0' + l2) : ('a' + l2 - 10);
+ p += 4;
+ SHA256_Update(&ctx, buf, p - buf);
+ }
+ SHA256_Final(out, &ctx);
+ }
+
+ void
+ set_protocol(ja4::Datasource::Protocol protocol)
+ {
+ this->_protocol = protocol;
+ }
+ void
+ set_version(int version)
+ {
+ this->_version = version;
+ }
+ void
+ set_first_alpn(std::string first_alpn)
+ {
+ this->_first_alpn = first_alpn;
+ }
+ void
+ add_cipher(std::uint16_t cipher)
+ {
+ if (_is_GREASE(cipher)) {
+ return;
+ }
+
+ ++this->_n_ciphers;
+ this->_ciphers.push_back(cipher);
+ }
+
+ void
+ add_extension(uint16_t extension)
+ {
+ if (EXT_SNI == extension) {
+ this->_SNI_type = SNI::to_domain;
+ this->_has_SNI = true;
+ return;
+ }
+ if (EXT_ALPN == extension) {
+ this->_has_ALPN = true;
+ return;
+ }
+ if (_is_GREASE(extension)) {
+ return;
+ }
+
+ ++this->_n_extensions;
+ this->_extensions.push_back(extension);
+ }
+
+private:
+ std::string _first_alpn;
+
+ std::vector<std::uint16_t> _ciphers;
+ std::vector<std::uint16_t> _extensions;
+ SNI _SNI_type{SNI::to_IP};
+};
+
+std::string_view
+SHA256_12(std::string_view in)
+{
+ uint8_t hash[32];
+ SHA256(reinterpret_cast<const uint8_t *>(in.data()), in.size(), hash);
+
+ static char out[12];
+ for (int i = 0; i < 6; ++i) {
+ uint8_t h = hash[i] >> 4;
+ uint8_t l = hash[i] & 0x0F;
+ out[i * 2] = h <= 9 ? '0' + h : 'a' + h - 10;
+ out[i * 2 + 1] = l <= 9 ? '0' + l : 'a' + l - 10;
+ }
+ return {out, sizeof(out)};
+}
+
+} // namespace
+
+static std::string call_JA4(ja4::Datasource &datasource);
+
+TEST_CASE("JA4")
+{
+ MockDatasource datasource{};
+
+ SECTION("Given the protocol is TCP, "
+ "when we create a JA4 fingerprint, "
+ "then the first character thereof should be 't'.")
+ {
+ datasource.set_protocol(ja4::Datasource::Protocol::TLS);
+
+ CHECK("t" == call_JA4(datasource).substr(0, 1));
+ }
+
+ SECTION("Given the protocol is QUIC, "
+ "when we create a JA4 fingerprint, "
+ "then the first character thereof should be 'q'.")
+ {
+ datasource.set_protocol(ja4::Datasource::Protocol::QUIC);
+ CHECK(call_JA4(datasource).starts_with('q'));
+ }
+
+ SECTION("Given the protocol is DTLS, "
+ "when we create a JA4 fingerprint, "
+ "then the first character thereof should be 'd'.")
+ {
+ datasource.set_protocol(ja4::Datasource::Protocol::DTLS);
+ CHECK(call_JA4(datasource).starts_with('d'));
+ }
+
+ SECTION("Given the TLS version is unknown, "
+ "when we create a JA4 fingerprint, "
+ "then indices [1,2] thereof should contain \"00\".")
+ {
+ datasource.set_version(0x123);
+ CHECK("00" == call_JA4(datasource).substr(1, 2));
+ datasource.set_version(0x234);
+ CHECK("00" == call_JA4(datasource).substr(1, 2));
Review Comment:
Several sections call `call_JA4(datasource)` without first setting the
protocol; `ja4::Datasource::_protocol` is currently uninitialized by default,
so these tests can be non-deterministic/UB. Either initialize `_protocol` in
`ja4::Datasource` (e.g., default to TLS) or set the protocol explicitly in each
section before generating a fingerprint.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]