Copilot commented on code in PR #13072:
URL: https://github.com/apache/trafficserver/pull/13072#discussion_r3054443691


##########
plugins/experimental/jax_fingerprint/ja4/tls_client_hello_summary.cc:
##########
@@ -22,91 +22,178 @@
 
  */
 
+#include "ts/ts.h"
+#include <plugin.h>
 #include "ja4.h"
 
-#include <algorithm>
-#include <array>
+#include "tls_client_hello_summary.h"
+
+#include <openssl/sha.h>
 #include <cstdint>
-#include <functional>
-#include <vector>
 
-namespace
+TLSClientHelloSummary::TLSClientHelloSummary(ja4::Datasource::Protocol 
protocol, TSClientHello ch) : _ch(ch)
 {
+  const uint8_t *buf;
+  size_t         buflen;
+
+  // Protocol
+  this->_protocol = protocol;
+
+  // Version
+  if (TS_SUCCESS == TSClientHelloExtensionGet(this->_ch, 
EXT_SUPPORTED_VERSIONS, &buf, &buflen)) {
+    uint16_t max_version{0};
+    size_t   versions_len = buf[0];
+
+    if (buflen < versions_len + 1) {
+      Dbg(dbg_ctl, "Malformed supported_versions extension (truncated 
vector)... using legacy version.");
+      this->_version = this->_ch.get_version();
+    } else {
+      for (size_t i = 1; (i + 1) < (versions_len + 1); i += 2) {
+        uint16_t version = (buf[i] << 8) | buf[i + 1];
+        if (!this->_is_GREASE(version) && version > max_version) {
+          max_version = version;
+        }
+      }
+      this->_version = max_version;
+    }
+  } else {
+    Dbg(dbg_ctl, "No supported_versions extension... using legacy version.");
+    this->_version = this->_ch.get_version();
+  }
 
-constexpr std::array<std::uint16_t, 16> GREASE_values{0x0a0a, 0x1a1a, 0x2a2a, 
0x3a3a, 0x4a4a, 0x5a5a, 0x6a6a, 0x7a7a,
-                                                      0x8a8a, 0x9a9a, 0xaaaa, 
0xbaba, 0xcaca, 0xdada, 0xeaea, 0xfafa};
-constexpr std::uint16_t                 extension_SNI{0x0};
-constexpr std::uint16_t                 extension_ALPN{0x10};
-
-} // end anonymous namespace
+  // Ciphers
+  buf    = this->_ch.get_cipher_suites();
+  buflen = this->_ch.get_cipher_suites_len();
+
+  if (buflen / 2 <= MAX_CIPHERS_FOR_FAST_PATH) {
+    // Fast path
+    this->_ciphers = this->_fast_cipher_storage.data();
+  } else {
+    // Slow path
+    this->_slow_cipher_storage = std::make_unique<uint16_t[]>(buflen / 2);
+    this->_ciphers             = this->_slow_cipher_storage.get();
+  }
+  for (size_t i = 0; i + 1 < buflen; i += 2) {
+    uint16_t cipher = (static_cast<uint16_t>(buf[i]) << 8) + buf[i + 1];
+    if (this->_is_GREASE(cipher)) {
+      continue;
+    }
+    this->_ciphers[++this->_n_ciphers] = cipher;
+  }
+  std::sort(this->_ciphers, this->_ciphers + this->_n_ciphers);

Review Comment:
   Cipher suite storage uses pre-increment when writing into the array 
(`_ciphers[++_n_ciphers] = ...`). Since `_n_ciphers` starts at 0, this leaves 
index 0 uninitialized, sorts/hashes the wrong range, and can write past the 
allocated buffer. Write to `_ciphers[_n_ciphers++]` (or keep a separate write 
index) so the first element is stored at index 0 and the sorted range matches 
the count.



##########
plugins/experimental/jax_fingerprint/ja4/tls_client_hello_summary.cc:
##########
@@ -22,91 +22,178 @@
 
  */
 
+#include "ts/ts.h"
+#include <plugin.h>
 #include "ja4.h"
 
-#include <algorithm>
-#include <array>
+#include "tls_client_hello_summary.h"
+
+#include <openssl/sha.h>
 #include <cstdint>
-#include <functional>
-#include <vector>
 
-namespace
+TLSClientHelloSummary::TLSClientHelloSummary(ja4::Datasource::Protocol 
protocol, TSClientHello ch) : _ch(ch)
 {
+  const uint8_t *buf;
+  size_t         buflen;
+
+  // Protocol
+  this->_protocol = protocol;
+
+  // Version
+  if (TS_SUCCESS == TSClientHelloExtensionGet(this->_ch, 
EXT_SUPPORTED_VERSIONS, &buf, &buflen)) {
+    uint16_t max_version{0};
+    size_t   versions_len = buf[0];
+
+    if (buflen < versions_len + 1) {
+      Dbg(dbg_ctl, "Malformed supported_versions extension (truncated 
vector)... using legacy version.");
+      this->_version = this->_ch.get_version();
+    } else {
+      for (size_t i = 1; (i + 1) < (versions_len + 1); i += 2) {
+        uint16_t version = (buf[i] << 8) | buf[i + 1];
+        if (!this->_is_GREASE(version) && version > max_version) {
+          max_version = version;
+        }
+      }
+      this->_version = max_version;

Review Comment:
   `supported_versions` parsing reads `buf[0]` without first validating `buflen 
>= 1`, which can lead to out-of-bounds reads if the extension is 
empty/truncated. Add a length check before accessing `buf[0]` and fall back to 
the legacy version when malformed.
   ```suggestion
   
       if (buflen < 1) {
         Dbg(dbg_ctl, "Malformed supported_versions extension 
(empty/truncated)... using legacy version.");
         this->_version = this->_ch.get_version();
       } else {
         size_t versions_len = buf[0];
   
         if (buflen < versions_len + 1) {
           Dbg(dbg_ctl, "Malformed supported_versions extension (truncated 
vector)... using legacy version.");
           this->_version = this->_ch.get_version();
         } else {
           for (size_t i = 1; (i + 1) < (versions_len + 1); i += 2) {
             uint16_t version = (buf[i] << 8) | buf[i + 1];
             if (!this->_is_GREASE(version) && version > max_version) {
               max_version = version;
             }
           }
           this->_version = max_version;
         }
   ```



##########
plugins/experimental/jax_fingerprint/ja4/tls_client_hello_summary.cc:
##########
@@ -22,91 +22,178 @@
 
  */
 
+#include "ts/ts.h"
+#include <plugin.h>
 #include "ja4.h"
 
-#include <algorithm>
-#include <array>
+#include "tls_client_hello_summary.h"
+
+#include <openssl/sha.h>
 #include <cstdint>
-#include <functional>
-#include <vector>
 
-namespace
+TLSClientHelloSummary::TLSClientHelloSummary(ja4::Datasource::Protocol 
protocol, TSClientHello ch) : _ch(ch)
 {
+  const uint8_t *buf;
+  size_t         buflen;
+
+  // Protocol
+  this->_protocol = protocol;
+
+  // Version
+  if (TS_SUCCESS == TSClientHelloExtensionGet(this->_ch, 
EXT_SUPPORTED_VERSIONS, &buf, &buflen)) {
+    uint16_t max_version{0};
+    size_t   versions_len = buf[0];
+
+    if (buflen < versions_len + 1) {
+      Dbg(dbg_ctl, "Malformed supported_versions extension (truncated 
vector)... using legacy version.");
+      this->_version = this->_ch.get_version();
+    } else {
+      for (size_t i = 1; (i + 1) < (versions_len + 1); i += 2) {
+        uint16_t version = (buf[i] << 8) | buf[i + 1];
+        if (!this->_is_GREASE(version) && version > max_version) {
+          max_version = version;
+        }
+      }
+      this->_version = max_version;
+    }
+  } else {
+    Dbg(dbg_ctl, "No supported_versions extension... using legacy version.");
+    this->_version = this->_ch.get_version();
+  }
 
-constexpr std::array<std::uint16_t, 16> GREASE_values{0x0a0a, 0x1a1a, 0x2a2a, 
0x3a3a, 0x4a4a, 0x5a5a, 0x6a6a, 0x7a7a,
-                                                      0x8a8a, 0x9a9a, 0xaaaa, 
0xbaba, 0xcaca, 0xdada, 0xeaea, 0xfafa};
-constexpr std::uint16_t                 extension_SNI{0x0};
-constexpr std::uint16_t                 extension_ALPN{0x10};
-
-} // end anonymous namespace
+  // Ciphers
+  buf    = this->_ch.get_cipher_suites();
+  buflen = this->_ch.get_cipher_suites_len();
+
+  if (buflen / 2 <= MAX_CIPHERS_FOR_FAST_PATH) {
+    // Fast path
+    this->_ciphers = this->_fast_cipher_storage.data();
+  } else {
+    // Slow path
+    this->_slow_cipher_storage = std::make_unique<uint16_t[]>(buflen / 2);
+    this->_ciphers             = this->_slow_cipher_storage.get();
+  }
+  for (size_t i = 0; i + 1 < buflen; i += 2) {
+    uint16_t cipher = (static_cast<uint16_t>(buf[i]) << 8) + buf[i + 1];
+    if (this->_is_GREASE(cipher)) {
+      continue;
+    }
+    this->_ciphers[++this->_n_ciphers] = cipher;
+  }
+  std::sort(this->_ciphers, this->_ciphers + this->_n_ciphers);
 
-static bool is_ignored_non_GREASE_extension(std::uint16_t extension);
+  // Extensions
+  auto count = 0;
+  for (auto &&type : this->_ch.get_extension_types()) {
+    (void)type;
+    ++count;
+  }
+  if (count <= MAX_EXTENSIONS_FOR_FAST_PATH) {
+    // Fast path
+    this->_extensions = this->_fast_extension_storage.data();
+  } else {
+    // Slow path
+    this->_slow_extension_storage = std::make_unique<uint16_t[]>(count);
+    this->_extensions             = this->_slow_extension_storage.get();
+  }
+  for (auto &&type : this->_ch.get_extension_types()) {
+    if (type == EXT_SNI) {
+      this->_has_SNI = true;
+      continue;
+    }
+    if (type == EXT_ALPN) {
+      this->_has_ALPN = true;
+      continue;
+    }
+    if (this->_is_GREASE(type)) {
+      continue;
+    }
+    this->_extensions[++this->_n_extensions] = type;
+  }
+  std::sort(this->_extensions, this->_extensions + this->_n_extensions);
+}
 
-std::vector<std::uint16_t> const &
-JA4::TLSClientHelloSummary::get_ciphers() const
+std::string_view
+TLSClientHelloSummary::get_first_alpn()
 {
-  return this->_ciphers;
+  unsigned char const *buf{};
+  std::size_t          buflen{};
+
+  if (TS_SUCCESS == TSClientHelloExtensionGet(this->_ch, EXT_ALPN, &buf, 
&buflen)) {
+    // The first two bytes are a 16bit encoding of the total length.
+    unsigned char first_ALPN_length{buf[2]};
+    TSAssert(buflen > 4);
+    TSAssert(0 != first_ALPN_length);
+    return {reinterpret_cast<const char *>(&(buf[3])), first_ALPN_length};

Review Comment:
   `get_first_alpn()` reads `buf[2]` before verifying `buflen` is large enough, 
and it doesn't validate that the first ALPN string fits within `buflen`. 
Replace the `TSAssert`-only checks with explicit `buflen`/length validation 
before dereferencing, and return empty on malformed ALPN data instead of 
risking OOB/abort.
   ```suggestion
       // The first two bytes are a 16-bit encoding of the total protocol name 
list length.
       if (buflen < 3) {
         return {};
       }
   
       std::size_t const alpn_list_length = (static_cast<std::size_t>(buf[0]) 
<< 8) | static_cast<std::size_t>(buf[1]);
       if (alpn_list_length == 0 || buflen < 2 + alpn_list_length) {
         return {};
       }
   
       unsigned char const first_ALPN_length{buf[2]};
       if (first_ALPN_length == 0 || 
static_cast<std::size_t>(first_ALPN_length) > (alpn_list_length - 1) ||
           buflen < 3 + static_cast<std::size_t>(first_ALPN_length)) {
         return {};
       }
   
       return {reinterpret_cast<char const *>(&(buf[3])), first_ALPN_length};
   ```



##########
plugins/experimental/jax_fingerprint/ja4/test.cc:
##########
@@ -0,0 +1,531 @@
+/** @file
+ *
+  Unit tests for JA4 fingerprint calculation.
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+
+ */
+
+#include "ja4.h"
+#include "datasource.h"
+
+#include <catch2/catch_test_macros.hpp>
+#include <openssl/sha.h>
+
+#include <algorithm>
+#include <cctype>
+#include <cstring>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+
+namespace
+{
+
+class MockDatasource : public ja4::Datasource
+{
+public:
+  std::string_view
+  get_first_alpn() override
+  {
+    return this->_first_alpn;
+  }
+
+  void
+  get_cipher_suites_hash(unsigned char out[32]) override
+  {
+    if (this->_ciphers.empty()) {
+      memset(out, 0, 32);
+      return;
+    }
+    auto sorted = this->_ciphers;
+    std::sort(sorted.begin(), sorted.end());
+    SHA256_CTX ctx;
+    SHA256_Init(&ctx);
+    for (size_t i = 0; i < sorted.size(); ++i) {
+      char  buf[5];
+      char *p = buf;
+      if (i != 0) {
+        *p  = ',';
+        p  += 1;
+      }
+      uint16_t c   = sorted[i];
+      uint8_t  h1  = (c & 0xF000) >> 12;
+      uint8_t  l1  = (c & 0x0F00) >> 8;
+      uint8_t  h2  = (c & 0x00F0) >> 4;
+      uint8_t  l2  = c & 0x000F;
+      p[0]         = h1 <= 9 ? ('0' + h1) : ('a' + h1 - 10);
+      p[1]         = l1 <= 9 ? ('0' + l1) : ('a' + l1 - 10);
+      p[2]         = h2 <= 9 ? ('0' + h2) : ('a' + h2 - 10);
+      p[3]         = l2 <= 9 ? ('0' + l2) : ('a' + l2 - 10);
+      p           += 4;
+      SHA256_Update(&ctx, buf, p - buf);
+    }
+    SHA256_Final(out, &ctx);
+  }
+
+  void
+  get_extension_hash(unsigned char out[32]) override
+  {
+    if (this->_extensions.empty()) {
+      memset(out, 0, 32);
+      return;
+    }
+    auto sorted = this->_extensions;
+    std::sort(sorted.begin(), sorted.end());
+    SHA256_CTX ctx;
+    SHA256_Init(&ctx);
+    for (size_t i = 0; i < sorted.size(); ++i) {
+      char  buf[5];
+      char *p = buf;
+      if (i != 0) {
+        *p  = ',';
+        p  += 1;
+      }
+      uint16_t e   = sorted[i];
+      uint8_t  h1  = (e & 0xF000) >> 12;
+      uint8_t  l1  = (e & 0x0F00) >> 8;
+      uint8_t  h2  = (e & 0x00F0) >> 4;
+      uint8_t  l2  = e & 0x000F;
+      p[0]         = h1 <= 9 ? ('0' + h1) : ('a' + h1 - 10);
+      p[1]         = l1 <= 9 ? ('0' + l1) : ('a' + l1 - 10);
+      p[2]         = h2 <= 9 ? ('0' + h2) : ('a' + h2 - 10);
+      p[3]         = l2 <= 9 ? ('0' + l2) : ('a' + l2 - 10);
+      p           += 4;
+      SHA256_Update(&ctx, buf, p - buf);
+    }
+    SHA256_Final(out, &ctx);
+  }
+
+  void
+  set_protocol(ja4::Datasource::Protocol protocol)
+  {
+    this->_protocol = protocol;
+  }
+  void
+  set_version(int version)
+  {
+    this->_version = version;
+  }
+  void
+  set_first_alpn(std::string first_alpn)
+  {
+    this->_first_alpn = first_alpn;
+  }
+  void
+  add_cipher(std::uint16_t cipher)
+  {
+    if (_is_GREASE(cipher)) {
+      return;
+    }
+
+    ++this->_n_ciphers;
+    this->_ciphers.push_back(cipher);
+  }
+
+  void
+  add_extension(uint16_t extension)
+  {
+    if (EXT_SNI == extension) {
+      this->_SNI_type = SNI::to_domain;
+      this->_has_SNI  = true;
+      return;
+    }
+    if (EXT_ALPN == extension) {
+      this->_has_ALPN = true;
+      return;
+    }
+    if (_is_GREASE(extension)) {
+      return;
+    }
+
+    ++this->_n_extensions;
+    this->_extensions.push_back(extension);
+  }
+
+private:
+  std::string _first_alpn;
+
+  std::vector<std::uint16_t> _ciphers;
+  std::vector<std::uint16_t> _extensions;
+  SNI                        _SNI_type{SNI::to_IP};
+};
+
+std::string_view
+SHA256_12(std::string_view in)
+{
+  uint8_t hash[32];
+  SHA256(reinterpret_cast<const uint8_t *>(in.data()), in.size(), hash);
+
+  static char out[12];
+  for (int i = 0; i < 6; ++i) {
+    uint8_t h      = hash[i] >> 4;
+    uint8_t l      = hash[i] & 0x0F;
+    out[i * 2]     = h <= 9 ? '0' + h : 'a' + h - 10;
+    out[i * 2 + 1] = l <= 9 ? '0' + l : 'a' + l - 10;
+  }
+  return {out, sizeof(out)};
+}
+
+} // namespace
+
+static std::string call_JA4(ja4::Datasource &datasource);
+
+TEST_CASE("JA4")
+{
+  MockDatasource datasource{};
+
+  SECTION("Given the protocol is TCP, "
+          "when we create a JA4 fingerprint, "
+          "then the first character thereof should be 't'.")
+  {
+    datasource.set_protocol(ja4::Datasource::Protocol::TLS);
+
+    CHECK("t" == call_JA4(datasource).substr(0, 1));
+  }
+
+  SECTION("Given the protocol is QUIC, "
+          "when we create a JA4 fingerprint, "
+          "then the first character thereof should be 'q'.")
+  {
+    datasource.set_protocol(ja4::Datasource::Protocol::QUIC);
+    CHECK(call_JA4(datasource).starts_with('q'));
+  }
+
+  SECTION("Given the protocol is DTLS, "
+          "when we create a JA4 fingerprint, "
+          "then the first character thereof should be 'd'.")
+  {
+    datasource.set_protocol(ja4::Datasource::Protocol::DTLS);
+    CHECK(call_JA4(datasource).starts_with('d'));
+  }
+
+  SECTION("Given the TLS version is unknown, "
+          "when we create a JA4 fingerprint, "
+          "then indices [1,2] thereof should contain \"00\".")
+  {
+    datasource.set_version(0x123);
+    CHECK("00" == call_JA4(datasource).substr(1, 2));
+    datasource.set_version(0x234);
+    CHECK("00" == call_JA4(datasource).substr(1, 2));
+  }
+
+  SECTION("Given the TLS version is known, "
+          "when we create a JA4 fingerprint, "
+          "then indices [1,2] thereof should contain the correct value.")
+  {
+    std::unordered_map<std::uint16_t, std::string> values{
+      {0x304,  "13"},
+      {0x303,  "12"},
+      {0x302,  "11"},
+      {0x301,  "10"},
+      {0x300,  "s3"},
+      {0x200,  "s2"},
+      {0x100,  "s1"},
+      {0xfeff, "d1"},
+      {0xfefd, "d2"},
+      {0xfefc, "d3"}
+    };
+    for (auto const &[version, expected] : values) {
+      CAPTURE(version, expected);
+      datasource.set_version(version);
+      CHECK(expected == call_JA4(datasource).substr(1, 2));
+    }
+  }
+
+  SECTION("Given the SNI extension is present, "
+          "when we create a JA4 fingerprint, "
+          "then index 3 thereof should contain 'd'.")
+  {
+    datasource.add_extension(0x0);
+    CHECK("d" == call_JA4(datasource).substr(3, 1));
+  }
+
+  SECTION("Given the SNI extension is not present, "
+          "when we create a JA4 fingerprint, "
+          "then index 3 thereof should contain 'i'.")
+  {
+    datasource.add_extension(0x31);
+    CHECK("i" == call_JA4(datasource).substr(3, 1));
+  }
+
+  SECTION("Given there is one cipher, "
+          "when we create a JA4 fingerprint, "
+          "then indices [4,5] thereof should contain \"01\".")
+  {
+    datasource.add_cipher(1);
+    CHECK("01" == call_JA4(datasource).substr(4, 2));
+  }
+
+  SECTION("Given there are 9 ciphers, "
+          "when we create a JA4 fingerprint, "
+          "then indices [4,5] thereof should contain \"09\".")
+  {
+    for (int i{0}; i < 9; ++i) {
+      datasource.add_cipher(i);
+    }
+    CHECK("09" == call_JA4(datasource).substr(4, 2));
+  }
+
+  SECTION("Given there are 10 ciphers, "
+          "when we create a JA4 fingerprint, "
+          "then indices [4,5] thereof should contain \"10\".")
+  {
+    for (int i{0}; i < 10; ++i) {
+      datasource.add_cipher(i);
+    }
+    CHECK("10" == call_JA4(datasource).substr(4, 2));
+  }
+
+  SECTION("Given there are more than 99 ciphers, "
+          "when we create a JA4 fingerprint, "
+          "then indices [4,5] thereof should contain \"99\".")
+  {
+    for (int i{0}; i < 100; ++i) {
+      datasource.add_cipher(i);
+    }
+    CHECK("99" == call_JA4(datasource).substr(4, 2));
+  }
+
+  SECTION("Given the ciphers include a GREASE value, "
+          "when we create a JA4 fingerprint, "
+          "then that value should not be included in the count.")
+  {
+    datasource.add_cipher(0x0a0a);
+    datasource.add_cipher(72);
+    CHECK("01" == call_JA4(datasource).substr(4, 2));
+  }
+
+  SECTION("Given there are no extensions, "
+          "when we create a JA4 fingerprint, "
+          "then indices [6,7] thereof should contain \"00\".")
+  {
+    CHECK("00" == call_JA4(datasource).substr(6, 2));
+  }
+
+  SECTION("Given there are 9 extensions, "
+          "when we create a JA4 fingerprint, "
+          "then indices [6,7] thereof should contain \"09\".")
+  {
+    for (int i{0}; i < 9; ++i) {
+      datasource.add_extension(i);
+    }
+    CHECK("09" == call_JA4(datasource).substr(6, 2));
+  }
+
+  SECTION("Given there are 99 extensions, "
+          "when we create a JA4 fingerprint, "
+          "then indices [6,7] thereof should contain \"99\".")
+  {
+    for (int i{0}; i < 99; ++i) {
+      datasource.add_extension(i);
+    }
+    CHECK("99" == call_JA4(datasource).substr(6, 2));
+  }
+
+  SECTION("Given there are more than 99 extensions, "
+          "when we create a JA4 fingerprint, "
+          "then indices [6,7] thereof should contain \"99\".")
+  {
+    for (int i{0}; i < 100; ++i) {
+      datasource.add_extension(i);
+    }
+    CHECK("99" == call_JA4(datasource).substr(6, 2));
+  }
+
+  SECTION("Given the extensions include a GREASE value, "
+          "when we create a JA4 fingerprint, "
+          "then that value should not be included in the count.")
+  {
+    datasource.add_extension(2);
+    datasource.add_extension(0x0a0a);
+    CHECK("01" == call_JA4(datasource).substr(6, 2));
+  }
+
+  // These may be covered by the earlier tests as well, but this documents the
+  // behavior explicitly.
+  SECTION("When we create a JA4 fingerprint, "
+          "then the SNI and ALPN extensions should be included in the count.")
+  {
+    datasource.add_extension(0x0);
+    datasource.add_extension(0x10);
+    CHECK("02" == call_JA4(datasource).substr(6, 2));
+  }
+
+  SECTION("Given the ALPN value is empty, "
+          "when we create a JA4 fingerprint, "
+          "then indices [8,9] thereof should contain \"00\".")
+  {
+    datasource.set_first_alpn("");
+    CHECK("00" == call_JA4(datasource).substr(8, 2));
+  }
+
+  // This should never happen in practice because all registered ALPN values
+  // are at least 2 characters long, but it's the correct behavior according
+  // to the spec. :-)
+  SECTION("Given the ALPN value is \"a\", "
+          "when we create a JA4 fingerprint, "
+          "then indices [8,9] thereof should contain \"aa\".")
+  {
+    datasource.set_first_alpn("a");
+    CHECK("aa" == call_JA4(datasource).substr(8, 2));
+  }
+
+  SECTION("Given the ALPN value is \"h3\", "
+          "when we create a JA4 fingerprint, "
+          "then indices [8,9] thereof should contain \"h3\".")
+  {
+    datasource.set_first_alpn("h3");
+    CHECK("h3" == call_JA4(datasource).substr(8, 2));
+  }
+
+  SECTION("Given the ALPN value is \"imap\", "
+          "when we create a JA4 fingerprint, "
+          "then indices [8,9] thereof should contain \"ip\".")
+  {
+    datasource.set_first_alpn("imap");
+    CHECK("ip" == call_JA4(datasource).substr(8, 2));
+  }
+
+  SECTION("When we create a JA4 fingeprint, "

Review Comment:
   Spelling: "fingeprint" should be "fingerprint" in this test description 
string.
   ```suggestion
     SECTION("When we create a JA4 fingerprint, "
   ```



##########
plugins/experimental/jax_fingerprint/ja4/test.cc:
##########
@@ -0,0 +1,531 @@
+/** @file
+ *
+  Unit tests for JA4 fingerprint calculation.
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+
+ */
+
+#include "ja4.h"
+#include "datasource.h"
+
+#include <catch2/catch_test_macros.hpp>
+#include <openssl/sha.h>
+
+#include <algorithm>
+#include <cctype>
+#include <cstring>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+
+namespace
+{
+
+class MockDatasource : public ja4::Datasource
+{
+public:
+  std::string_view
+  get_first_alpn() override
+  {
+    return this->_first_alpn;
+  }
+
+  void
+  get_cipher_suites_hash(unsigned char out[32]) override
+  {
+    if (this->_ciphers.empty()) {
+      memset(out, 0, 32);
+      return;
+    }
+    auto sorted = this->_ciphers;
+    std::sort(sorted.begin(), sorted.end());
+    SHA256_CTX ctx;
+    SHA256_Init(&ctx);
+    for (size_t i = 0; i < sorted.size(); ++i) {
+      char  buf[5];
+      char *p = buf;
+      if (i != 0) {
+        *p  = ',';
+        p  += 1;
+      }
+      uint16_t c   = sorted[i];
+      uint8_t  h1  = (c & 0xF000) >> 12;
+      uint8_t  l1  = (c & 0x0F00) >> 8;
+      uint8_t  h2  = (c & 0x00F0) >> 4;
+      uint8_t  l2  = c & 0x000F;
+      p[0]         = h1 <= 9 ? ('0' + h1) : ('a' + h1 - 10);
+      p[1]         = l1 <= 9 ? ('0' + l1) : ('a' + l1 - 10);
+      p[2]         = h2 <= 9 ? ('0' + h2) : ('a' + h2 - 10);
+      p[3]         = l2 <= 9 ? ('0' + l2) : ('a' + l2 - 10);
+      p           += 4;
+      SHA256_Update(&ctx, buf, p - buf);
+    }
+    SHA256_Final(out, &ctx);
+  }
+
+  void
+  get_extension_hash(unsigned char out[32]) override
+  {
+    if (this->_extensions.empty()) {
+      memset(out, 0, 32);
+      return;
+    }
+    auto sorted = this->_extensions;
+    std::sort(sorted.begin(), sorted.end());
+    SHA256_CTX ctx;
+    SHA256_Init(&ctx);
+    for (size_t i = 0; i < sorted.size(); ++i) {
+      char  buf[5];
+      char *p = buf;
+      if (i != 0) {
+        *p  = ',';
+        p  += 1;
+      }
+      uint16_t e   = sorted[i];
+      uint8_t  h1  = (e & 0xF000) >> 12;
+      uint8_t  l1  = (e & 0x0F00) >> 8;
+      uint8_t  h2  = (e & 0x00F0) >> 4;
+      uint8_t  l2  = e & 0x000F;
+      p[0]         = h1 <= 9 ? ('0' + h1) : ('a' + h1 - 10);
+      p[1]         = l1 <= 9 ? ('0' + l1) : ('a' + l1 - 10);
+      p[2]         = h2 <= 9 ? ('0' + h2) : ('a' + h2 - 10);
+      p[3]         = l2 <= 9 ? ('0' + l2) : ('a' + l2 - 10);
+      p           += 4;
+      SHA256_Update(&ctx, buf, p - buf);
+    }
+    SHA256_Final(out, &ctx);
+  }
+
+  void
+  set_protocol(ja4::Datasource::Protocol protocol)
+  {
+    this->_protocol = protocol;
+  }
+  void
+  set_version(int version)
+  {
+    this->_version = version;
+  }
+  void
+  set_first_alpn(std::string first_alpn)
+  {
+    this->_first_alpn = first_alpn;
+  }
+  void
+  add_cipher(std::uint16_t cipher)
+  {
+    if (_is_GREASE(cipher)) {
+      return;
+    }
+
+    ++this->_n_ciphers;
+    this->_ciphers.push_back(cipher);
+  }
+
+  void
+  add_extension(uint16_t extension)
+  {
+    if (EXT_SNI == extension) {
+      this->_SNI_type = SNI::to_domain;
+      this->_has_SNI  = true;
+      return;
+    }
+    if (EXT_ALPN == extension) {
+      this->_has_ALPN = true;
+      return;
+    }
+    if (_is_GREASE(extension)) {
+      return;
+    }
+
+    ++this->_n_extensions;
+    this->_extensions.push_back(extension);
+  }
+
+private:
+  std::string _first_alpn;
+
+  std::vector<std::uint16_t> _ciphers;
+  std::vector<std::uint16_t> _extensions;
+  SNI                        _SNI_type{SNI::to_IP};
+};
+
+std::string_view
+SHA256_12(std::string_view in)
+{
+  uint8_t hash[32];
+  SHA256(reinterpret_cast<const uint8_t *>(in.data()), in.size(), hash);
+
+  static char out[12];
+  for (int i = 0; i < 6; ++i) {
+    uint8_t h      = hash[i] >> 4;
+    uint8_t l      = hash[i] & 0x0F;
+    out[i * 2]     = h <= 9 ? '0' + h : 'a' + h - 10;
+    out[i * 2 + 1] = l <= 9 ? '0' + l : 'a' + l - 10;
+  }
+  return {out, sizeof(out)};
+}
+
+} // namespace
+
+static std::string call_JA4(ja4::Datasource &datasource);
+
+TEST_CASE("JA4")
+{
+  MockDatasource datasource{};
+
+  SECTION("Given the protocol is TCP, "
+          "when we create a JA4 fingerprint, "
+          "then the first character thereof should be 't'.")
+  {
+    datasource.set_protocol(ja4::Datasource::Protocol::TLS);
+
+    CHECK("t" == call_JA4(datasource).substr(0, 1));
+  }
+
+  SECTION("Given the protocol is QUIC, "
+          "when we create a JA4 fingerprint, "
+          "then the first character thereof should be 'q'.")
+  {
+    datasource.set_protocol(ja4::Datasource::Protocol::QUIC);
+    CHECK(call_JA4(datasource).starts_with('q'));
+  }
+
+  SECTION("Given the protocol is DTLS, "
+          "when we create a JA4 fingerprint, "
+          "then the first character thereof should be 'd'.")
+  {
+    datasource.set_protocol(ja4::Datasource::Protocol::DTLS);
+    CHECK(call_JA4(datasource).starts_with('d'));
+  }
+
+  SECTION("Given the TLS version is unknown, "
+          "when we create a JA4 fingerprint, "
+          "then indices [1,2] thereof should contain \"00\".")
+  {
+    datasource.set_version(0x123);
+    CHECK("00" == call_JA4(datasource).substr(1, 2));
+    datasource.set_version(0x234);
+    CHECK("00" == call_JA4(datasource).substr(1, 2));
+  }
+
+  SECTION("Given the TLS version is known, "
+          "when we create a JA4 fingerprint, "
+          "then indices [1,2] thereof should contain the correct value.")
+  {
+    std::unordered_map<std::uint16_t, std::string> values{
+      {0x304,  "13"},
+      {0x303,  "12"},
+      {0x302,  "11"},
+      {0x301,  "10"},
+      {0x300,  "s3"},
+      {0x200,  "s2"},
+      {0x100,  "s1"},
+      {0xfeff, "d1"},
+      {0xfefd, "d2"},
+      {0xfefc, "d3"}
+    };
+    for (auto const &[version, expected] : values) {
+      CAPTURE(version, expected);
+      datasource.set_version(version);
+      CHECK(expected == call_JA4(datasource).substr(1, 2));
+    }
+  }
+
+  SECTION("Given the SNI extension is present, "
+          "when we create a JA4 fingerprint, "
+          "then index 3 thereof should contain 'd'.")
+  {
+    datasource.add_extension(0x0);
+    CHECK("d" == call_JA4(datasource).substr(3, 1));
+  }
+
+  SECTION("Given the SNI extension is not present, "
+          "when we create a JA4 fingerprint, "
+          "then index 3 thereof should contain 'i'.")
+  {
+    datasource.add_extension(0x31);
+    CHECK("i" == call_JA4(datasource).substr(3, 1));
+  }
+
+  SECTION("Given there is one cipher, "
+          "when we create a JA4 fingerprint, "
+          "then indices [4,5] thereof should contain \"01\".")
+  {
+    datasource.add_cipher(1);
+    CHECK("01" == call_JA4(datasource).substr(4, 2));
+  }
+
+  SECTION("Given there are 9 ciphers, "
+          "when we create a JA4 fingerprint, "
+          "then indices [4,5] thereof should contain \"09\".")
+  {
+    for (int i{0}; i < 9; ++i) {
+      datasource.add_cipher(i);
+    }
+    CHECK("09" == call_JA4(datasource).substr(4, 2));
+  }
+
+  SECTION("Given there are 10 ciphers, "
+          "when we create a JA4 fingerprint, "
+          "then indices [4,5] thereof should contain \"10\".")
+  {
+    for (int i{0}; i < 10; ++i) {
+      datasource.add_cipher(i);
+    }
+    CHECK("10" == call_JA4(datasource).substr(4, 2));
+  }
+
+  SECTION("Given there are more than 99 ciphers, "
+          "when we create a JA4 fingerprint, "
+          "then indices [4,5] thereof should contain \"99\".")
+  {
+    for (int i{0}; i < 100; ++i) {
+      datasource.add_cipher(i);
+    }
+    CHECK("99" == call_JA4(datasource).substr(4, 2));
+  }
+
+  SECTION("Given the ciphers include a GREASE value, "
+          "when we create a JA4 fingerprint, "
+          "then that value should not be included in the count.")
+  {
+    datasource.add_cipher(0x0a0a);
+    datasource.add_cipher(72);
+    CHECK("01" == call_JA4(datasource).substr(4, 2));
+  }
+
+  SECTION("Given there are no extensions, "
+          "when we create a JA4 fingerprint, "
+          "then indices [6,7] thereof should contain \"00\".")
+  {
+    CHECK("00" == call_JA4(datasource).substr(6, 2));
+  }
+
+  SECTION("Given there are 9 extensions, "
+          "when we create a JA4 fingerprint, "
+          "then indices [6,7] thereof should contain \"09\".")
+  {
+    for (int i{0}; i < 9; ++i) {
+      datasource.add_extension(i);
+    }
+    CHECK("09" == call_JA4(datasource).substr(6, 2));
+  }
+
+  SECTION("Given there are 99 extensions, "
+          "when we create a JA4 fingerprint, "
+          "then indices [6,7] thereof should contain \"99\".")
+  {
+    for (int i{0}; i < 99; ++i) {
+      datasource.add_extension(i);
+    }
+    CHECK("99" == call_JA4(datasource).substr(6, 2));
+  }
+
+  SECTION("Given there are more than 99 extensions, "
+          "when we create a JA4 fingerprint, "
+          "then indices [6,7] thereof should contain \"99\".")
+  {
+    for (int i{0}; i < 100; ++i) {
+      datasource.add_extension(i);
+    }
+    CHECK("99" == call_JA4(datasource).substr(6, 2));
+  }
+
+  SECTION("Given the extensions include a GREASE value, "
+          "when we create a JA4 fingerprint, "
+          "then that value should not be included in the count.")
+  {
+    datasource.add_extension(2);
+    datasource.add_extension(0x0a0a);
+    CHECK("01" == call_JA4(datasource).substr(6, 2));
+  }
+
+  // These may be covered by the earlier tests as well, but this documents the
+  // behavior explicitly.
+  SECTION("When we create a JA4 fingerprint, "
+          "then the SNI and ALPN extensions should be included in the count.")
+  {
+    datasource.add_extension(0x0);
+    datasource.add_extension(0x10);
+    CHECK("02" == call_JA4(datasource).substr(6, 2));
+  }
+
+  SECTION("Given the ALPN value is empty, "
+          "when we create a JA4 fingerprint, "
+          "then indices [8,9] thereof should contain \"00\".")
+  {
+    datasource.set_first_alpn("");
+    CHECK("00" == call_JA4(datasource).substr(8, 2));
+  }
+
+  // This should never happen in practice because all registered ALPN values
+  // are at least 2 characters long, but it's the correct behavior according
+  // to the spec. :-)
+  SECTION("Given the ALPN value is \"a\", "
+          "when we create a JA4 fingerprint, "
+          "then indices [8,9] thereof should contain \"aa\".")
+  {
+    datasource.set_first_alpn("a");
+    CHECK("aa" == call_JA4(datasource).substr(8, 2));
+  }
+
+  SECTION("Given the ALPN value is \"h3\", "
+          "when we create a JA4 fingerprint, "
+          "then indices [8,9] thereof should contain \"h3\".")
+  {
+    datasource.set_first_alpn("h3");
+    CHECK("h3" == call_JA4(datasource).substr(8, 2));
+  }
+
+  SECTION("Given the ALPN value is \"imap\", "
+          "when we create a JA4 fingerprint, "
+          "then indices [8,9] thereof should contain \"ip\".")
+  {
+    datasource.set_first_alpn("imap");
+    CHECK("ip" == call_JA4(datasource).substr(8, 2));
+  }
+
+  SECTION("When we create a JA4 fingeprint, "
+          "then index 10 thereof should contain '_'.")
+  {
+    CHECK("_" == call_JA4(datasource).substr(10, 1));
+  }
+
+  SECTION("When we create a JA4 fingerprint, "
+          "then the b section should be passed through the hash function.")
+  {
+    char buf[36];
+    datasource.add_cipher(10);
+    CHECK(SHA256_12("000a") == ja4::generate_fingerprint(buf, 
datasource).substr(11, 12));
+  }
+
+  // As per the spec, we expect 4-character, comma-delimited hex values.
+  SECTION("Given only ciphers 2, 12, and 17 in that order, "
+          "when we create a JA4 fingerprint, "
+          "then the hash should be invoked with \"0002,000c,0011\".")
+  {
+    datasource.add_cipher(2);
+    datasource.add_cipher(12);
+    datasource.add_cipher(17);
+    char buf[36];
+    CHECK(SHA256_12("0002,000c,0011") == ja4::generate_fingerprint(buf, 
datasource).substr(11, 12));
+  }
+
+  SECTION("When we create a JA4 fingerprint, "
+          "then the cipher values should be sorted before hashing.")
+  {
+    datasource.add_cipher(17);
+    datasource.add_cipher(2);
+    datasource.add_cipher(12);
+    char buf[36];
+    CHECK(SHA256_12("0002,000c,0011") == ja4::generate_fingerprint(buf, 
datasource).substr(11, 12));
+  }
+
+  SECTION("When we create a JA4 fingerprint, "
+          "then GREASE values in the cipher list should be ignored.")
+  {
+    datasource.add_cipher(0x0a0a);
+    datasource.add_cipher(2);
+    char buf[36];
+    CHECK(SHA256_12("0002") == ja4::generate_fingerprint(buf, 
datasource).substr(11, 12));
+  }
+
+  // All the tests from now on have enough ciphers to ensure a long enough
+  // hash using our default hash (the id function) so that the length of the
+  // JA4 fingerprint will be valid.
+  datasource.add_cipher(1);
+  datasource.add_cipher(2);
+  datasource.add_cipher(3);
+
+  SECTION("When we create a JA4 fingerprint, "
+          "then we should truncate the section b hash to 12 characters.")
+  {
+    char buf[36];
+    CHECK(SHA256_12("0001,0002,0003") == ja4::generate_fingerprint(buf, 
datasource).substr(11, 12));
+  }
+
+  SECTION("When we create a JA4 fingeprint, "

Review Comment:
   Spelling: "fingeprint" should be "fingerprint" in this test description 
string.
   ```suggestion
     SECTION("When we create a JA4 fingerprint, "
   ```



##########
plugins/experimental/jax_fingerprint/ja4/tls_client_hello_summary.cc:
##########
@@ -22,91 +22,178 @@
 
  */
 
+#include "ts/ts.h"
+#include <plugin.h>
 #include "ja4.h"
 
-#include <algorithm>
-#include <array>
+#include "tls_client_hello_summary.h"
+
+#include <openssl/sha.h>
 #include <cstdint>
-#include <functional>
-#include <vector>
 
-namespace
+TLSClientHelloSummary::TLSClientHelloSummary(ja4::Datasource::Protocol 
protocol, TSClientHello ch) : _ch(ch)
 {
+  const uint8_t *buf;
+  size_t         buflen;
+
+  // Protocol
+  this->_protocol = protocol;
+
+  // Version
+  if (TS_SUCCESS == TSClientHelloExtensionGet(this->_ch, 
EXT_SUPPORTED_VERSIONS, &buf, &buflen)) {
+    uint16_t max_version{0};
+    size_t   versions_len = buf[0];
+
+    if (buflen < versions_len + 1) {
+      Dbg(dbg_ctl, "Malformed supported_versions extension (truncated 
vector)... using legacy version.");
+      this->_version = this->_ch.get_version();
+    } else {
+      for (size_t i = 1; (i + 1) < (versions_len + 1); i += 2) {
+        uint16_t version = (buf[i] << 8) | buf[i + 1];
+        if (!this->_is_GREASE(version) && version > max_version) {
+          max_version = version;
+        }
+      }
+      this->_version = max_version;
+    }
+  } else {
+    Dbg(dbg_ctl, "No supported_versions extension... using legacy version.");
+    this->_version = this->_ch.get_version();
+  }
 
-constexpr std::array<std::uint16_t, 16> GREASE_values{0x0a0a, 0x1a1a, 0x2a2a, 
0x3a3a, 0x4a4a, 0x5a5a, 0x6a6a, 0x7a7a,
-                                                      0x8a8a, 0x9a9a, 0xaaaa, 
0xbaba, 0xcaca, 0xdada, 0xeaea, 0xfafa};
-constexpr std::uint16_t                 extension_SNI{0x0};
-constexpr std::uint16_t                 extension_ALPN{0x10};
-
-} // end anonymous namespace
+  // Ciphers
+  buf    = this->_ch.get_cipher_suites();
+  buflen = this->_ch.get_cipher_suites_len();
+
+  if (buflen / 2 <= MAX_CIPHERS_FOR_FAST_PATH) {
+    // Fast path
+    this->_ciphers = this->_fast_cipher_storage.data();
+  } else {
+    // Slow path
+    this->_slow_cipher_storage = std::make_unique<uint16_t[]>(buflen / 2);
+    this->_ciphers             = this->_slow_cipher_storage.get();
+  }
+  for (size_t i = 0; i + 1 < buflen; i += 2) {
+    uint16_t cipher = (static_cast<uint16_t>(buf[i]) << 8) + buf[i + 1];
+    if (this->_is_GREASE(cipher)) {
+      continue;
+    }
+    this->_ciphers[++this->_n_ciphers] = cipher;
+  }
+  std::sort(this->_ciphers, this->_ciphers + this->_n_ciphers);
 
-static bool is_ignored_non_GREASE_extension(std::uint16_t extension);
+  // Extensions
+  auto count = 0;
+  for (auto &&type : this->_ch.get_extension_types()) {
+    (void)type;
+    ++count;
+  }
+  if (count <= MAX_EXTENSIONS_FOR_FAST_PATH) {
+    // Fast path
+    this->_extensions = this->_fast_extension_storage.data();
+  } else {
+    // Slow path
+    this->_slow_extension_storage = std::make_unique<uint16_t[]>(count);
+    this->_extensions             = this->_slow_extension_storage.get();
+  }
+  for (auto &&type : this->_ch.get_extension_types()) {
+    if (type == EXT_SNI) {
+      this->_has_SNI = true;
+      continue;
+    }
+    if (type == EXT_ALPN) {
+      this->_has_ALPN = true;
+      continue;
+    }
+    if (this->_is_GREASE(type)) {
+      continue;
+    }
+    this->_extensions[++this->_n_extensions] = type;
+  }

Review Comment:
   Extension storage has the same off-by-one issue as ciphers: 
`_extensions[++_n_extensions] = type` skips index 0 and makes the subsequent 
`std::sort(_extensions, _extensions + _n_extensions)` operate on uninitialized 
data. Use `_extensions[_n_extensions++] = type` (or a separate write index) so 
the array contents and count stay consistent.



##########
plugins/experimental/jax_fingerprint/ja4/tls_client_hello_summary.h:
##########
@@ -0,0 +1,53 @@
+/** @file
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+
+ */
+
+#pragma once
+

Review Comment:
   This header uses `std::array` and `std::unique_ptr` but doesn't include 
`<array>` / `<memory>` (and relies on transitive includes). Add the required 
standard headers so this file is self-contained and doesn't break depending on 
include order.
   ```suggestion
   
   #include <array>
   #include <memory>
   ```



##########
plugins/experimental/jax_fingerprint/ja4/datasource.h:
##########
@@ -0,0 +1,71 @@
+/** @file
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+
+ */
+
+#pragma once
+
+#include <string_view>
+
+constexpr uint16_t EXT_SNI{0x0};
+constexpr uint16_t EXT_ALPN{0x10};
+constexpr uint16_t EXT_SUPPORTED_VERSIONS{0x2b};

Review Comment:
   `datasource.h` uses `uint16_t` but does not include `<cstdint>` (or another 
header that guarantees the typedef). This can break compilation for translation 
units/tests that include this header without pulling in `<cstdint>` first. 
Include `<cstdint>` and consider using `std::uint16_t` for consistency.



##########
plugins/experimental/jax_fingerprint/ja4/test.cc:
##########
@@ -0,0 +1,531 @@
+/** @file
+ *
+  Unit tests for JA4 fingerprint calculation.
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+
+ */
+
+#include "ja4.h"
+#include "datasource.h"
+
+#include <catch2/catch_test_macros.hpp>
+#include <openssl/sha.h>
+
+#include <algorithm>
+#include <cctype>
+#include <cstring>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+
+namespace
+{
+
+class MockDatasource : public ja4::Datasource
+{
+public:
+  std::string_view
+  get_first_alpn() override
+  {
+    return this->_first_alpn;
+  }
+
+  void
+  get_cipher_suites_hash(unsigned char out[32]) override
+  {
+    if (this->_ciphers.empty()) {
+      memset(out, 0, 32);
+      return;
+    }
+    auto sorted = this->_ciphers;
+    std::sort(sorted.begin(), sorted.end());
+    SHA256_CTX ctx;
+    SHA256_Init(&ctx);
+    for (size_t i = 0; i < sorted.size(); ++i) {
+      char  buf[5];
+      char *p = buf;
+      if (i != 0) {
+        *p  = ',';
+        p  += 1;
+      }
+      uint16_t c   = sorted[i];
+      uint8_t  h1  = (c & 0xF000) >> 12;
+      uint8_t  l1  = (c & 0x0F00) >> 8;
+      uint8_t  h2  = (c & 0x00F0) >> 4;
+      uint8_t  l2  = c & 0x000F;
+      p[0]         = h1 <= 9 ? ('0' + h1) : ('a' + h1 - 10);
+      p[1]         = l1 <= 9 ? ('0' + l1) : ('a' + l1 - 10);
+      p[2]         = h2 <= 9 ? ('0' + h2) : ('a' + h2 - 10);
+      p[3]         = l2 <= 9 ? ('0' + l2) : ('a' + l2 - 10);
+      p           += 4;
+      SHA256_Update(&ctx, buf, p - buf);
+    }
+    SHA256_Final(out, &ctx);
+  }
+
+  void
+  get_extension_hash(unsigned char out[32]) override
+  {
+    if (this->_extensions.empty()) {
+      memset(out, 0, 32);
+      return;
+    }
+    auto sorted = this->_extensions;
+    std::sort(sorted.begin(), sorted.end());
+    SHA256_CTX ctx;
+    SHA256_Init(&ctx);
+    for (size_t i = 0; i < sorted.size(); ++i) {
+      char  buf[5];
+      char *p = buf;
+      if (i != 0) {
+        *p  = ',';
+        p  += 1;
+      }
+      uint16_t e   = sorted[i];
+      uint8_t  h1  = (e & 0xF000) >> 12;
+      uint8_t  l1  = (e & 0x0F00) >> 8;
+      uint8_t  h2  = (e & 0x00F0) >> 4;
+      uint8_t  l2  = e & 0x000F;
+      p[0]         = h1 <= 9 ? ('0' + h1) : ('a' + h1 - 10);
+      p[1]         = l1 <= 9 ? ('0' + l1) : ('a' + l1 - 10);
+      p[2]         = h2 <= 9 ? ('0' + h2) : ('a' + h2 - 10);
+      p[3]         = l2 <= 9 ? ('0' + l2) : ('a' + l2 - 10);
+      p           += 4;
+      SHA256_Update(&ctx, buf, p - buf);
+    }
+    SHA256_Final(out, &ctx);
+  }
+
+  void
+  set_protocol(ja4::Datasource::Protocol protocol)
+  {
+    this->_protocol = protocol;
+  }
+  void
+  set_version(int version)
+  {
+    this->_version = version;
+  }
+  void
+  set_first_alpn(std::string first_alpn)
+  {
+    this->_first_alpn = first_alpn;
+  }
+  void
+  add_cipher(std::uint16_t cipher)
+  {
+    if (_is_GREASE(cipher)) {
+      return;
+    }
+
+    ++this->_n_ciphers;
+    this->_ciphers.push_back(cipher);
+  }
+
+  void
+  add_extension(uint16_t extension)
+  {
+    if (EXT_SNI == extension) {
+      this->_SNI_type = SNI::to_domain;
+      this->_has_SNI  = true;
+      return;
+    }
+    if (EXT_ALPN == extension) {
+      this->_has_ALPN = true;
+      return;
+    }
+    if (_is_GREASE(extension)) {
+      return;
+    }
+
+    ++this->_n_extensions;
+    this->_extensions.push_back(extension);
+  }
+
+private:
+  std::string _first_alpn;
+
+  std::vector<std::uint16_t> _ciphers;
+  std::vector<std::uint16_t> _extensions;
+  SNI                        _SNI_type{SNI::to_IP};
+};
+
+std::string_view
+SHA256_12(std::string_view in)
+{
+  uint8_t hash[32];
+  SHA256(reinterpret_cast<const uint8_t *>(in.data()), in.size(), hash);
+
+  static char out[12];
+  for (int i = 0; i < 6; ++i) {
+    uint8_t h      = hash[i] >> 4;
+    uint8_t l      = hash[i] & 0x0F;
+    out[i * 2]     = h <= 9 ? '0' + h : 'a' + h - 10;
+    out[i * 2 + 1] = l <= 9 ? '0' + l : 'a' + l - 10;
+  }
+  return {out, sizeof(out)};
+}
+
+} // namespace
+
+static std::string call_JA4(ja4::Datasource &datasource);
+
+TEST_CASE("JA4")
+{
+  MockDatasource datasource{};
+
+  SECTION("Given the protocol is TCP, "
+          "when we create a JA4 fingerprint, "
+          "then the first character thereof should be 't'.")
+  {
+    datasource.set_protocol(ja4::Datasource::Protocol::TLS);
+
+    CHECK("t" == call_JA4(datasource).substr(0, 1));
+  }
+
+  SECTION("Given the protocol is QUIC, "
+          "when we create a JA4 fingerprint, "
+          "then the first character thereof should be 'q'.")
+  {
+    datasource.set_protocol(ja4::Datasource::Protocol::QUIC);
+    CHECK(call_JA4(datasource).starts_with('q'));
+  }
+
+  SECTION("Given the protocol is DTLS, "
+          "when we create a JA4 fingerprint, "
+          "then the first character thereof should be 'd'.")
+  {
+    datasource.set_protocol(ja4::Datasource::Protocol::DTLS);
+    CHECK(call_JA4(datasource).starts_with('d'));
+  }
+
+  SECTION("Given the TLS version is unknown, "
+          "when we create a JA4 fingerprint, "
+          "then indices [1,2] thereof should contain \"00\".")
+  {
+    datasource.set_version(0x123);
+    CHECK("00" == call_JA4(datasource).substr(1, 2));
+    datasource.set_version(0x234);
+    CHECK("00" == call_JA4(datasource).substr(1, 2));

Review Comment:
   Several sections call `call_JA4(datasource)` without first setting the 
protocol; `ja4::Datasource::_protocol` is currently uninitialized by default, 
so these tests can be non-deterministic/UB. Either initialize `_protocol` in 
`ja4::Datasource` (e.g., default to TLS) or set the protocol explicitly in each 
section before generating a fingerprint.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to