This is an automated email from the ASF dual-hosted git repository.

jvanderzee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git


The following commit(s) were added to refs/heads/master by this push:
     new 43eeac9089 Add algorithm to calculate a JA4 fingerprint (#11671)
43eeac9089 is described below

commit 43eeac9089bcced8b20acbf79b08099291aa26c9
Author: JosiahWI <[email protected]>
AuthorDate: Wed Aug 14 16:53:34 2024 -0500

    Add algorithm to calculate a JA4 fingerprint (#11671)
    
    * Add algorithm to calculate a JA4 fingerprint
    
    This does not introduce a performance improvement on my system with default
    allocators, but it doesn't degrade performance either. Reserving space on
    the full JA4 result degrades performance, as does resizing it and filling in
    the parts with `std::string::replace()`. I'm adding the reserve calls on the
    individual a, b, and c sections because it's good practice and doesn't cause
    a performance issue.
---
 cmake/ExperimentalPlugins.cmake                    |   1 +
 plugins/experimental/CMakeLists.txt                |   3 +
 .../experimental/ja4_fingerprint/CMakeLists.txt    |  23 ++
 plugins/experimental/ja4_fingerprint/ja4.cc        | 162 ++++++++
 plugins/experimental/ja4_fingerprint/ja4.h         | 156 ++++++++
 plugins/experimental/ja4_fingerprint/test_ja4.cc   | 409 +++++++++++++++++++++
 .../ja4_fingerprint/tls_client_hello_summary.cc    | 103 ++++++
 7 files changed, 857 insertions(+)

diff --git a/cmake/ExperimentalPlugins.cmake b/cmake/ExperimentalPlugins.cmake
index 6836d00f8e..2b07f2c563 100644
--- a/cmake/ExperimentalPlugins.cmake
+++ b/cmake/ExperimentalPlugins.cmake
@@ -40,6 +40,7 @@ auto_option(HOOK_TRACE FEATURE_VAR BUILD_HOOK_TRACE DEFAULT 
${_DEFAULT})
 auto_option(HTTP_STATS FEATURE_VAR BUILD_HTTP_STATS DEFAULT ${_DEFAULT})
 auto_option(ICAP FEATURE_VAR BUILD_ICAP DEFAULT ${_DEFAULT})
 auto_option(INLINER FEATURE_VAR BUILD_INLINER DEFAULT ${_DEFAULT})
+auto_option(JA4_FINGERPRINT FEATURE_VAR BUILD_JA4_FINGERPRINT DEFAULT 
${_DEFAULT})
 auto_option(
   MAGICK
   FEATURE_VAR
diff --git a/plugins/experimental/CMakeLists.txt 
b/plugins/experimental/CMakeLists.txt
index 69e6f334de..bec349b33d 100644
--- a/plugins/experimental/CMakeLists.txt
+++ b/plugins/experimental/CMakeLists.txt
@@ -59,6 +59,9 @@ endif()
 if(BUILD_INLINER)
   add_subdirectory(inliner)
 endif()
+if(BUILD_JA4_FINGERPRINT)
+  add_subdirectory(ja4_fingerprint)
+endif()
 if(BUILD_MAGICK)
   add_subdirectory(magick)
 endif()
diff --git a/plugins/experimental/ja4_fingerprint/CMakeLists.txt 
b/plugins/experimental/ja4_fingerprint/CMakeLists.txt
new file mode 100644
index 0000000000..e5a6045c87
--- /dev/null
+++ b/plugins/experimental/ja4_fingerprint/CMakeLists.txt
@@ -0,0 +1,23 @@
+#######################
+#
+#  Licensed to the Apache Software Foundation (ASF) under one or more 
contributor license
+#  agreements.  See the NOTICE file distributed with this work for additional 
information regarding
+#  copyright ownership.  The ASF licenses this file to you under the Apache 
License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with the 
License.  You may obtain
+#  a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software 
distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
KIND, either express
+#  or implied. See the License for the specific language governing permissions 
and limitations under
+#  the License.
+#
+#######################
+
+if(BUILD_TESTING)
+  add_executable(test_ja4 test_ja4.cc ja4.cc tls_client_hello_summary.cc)
+  target_link_libraries(test_ja4 PRIVATE catch2::catch2)
+
+  add_test(test_ja4 test_ja4)
+endif()
diff --git a/plugins/experimental/ja4_fingerprint/ja4.cc 
b/plugins/experimental/ja4_fingerprint/ja4.cc
new file mode 100644
index 0000000000..bab4b64273
--- /dev/null
+++ b/plugins/experimental/ja4_fingerprint/ja4.cc
@@ -0,0 +1,162 @@
+/** @file ja3_fingerprint.cc
+ *
+  JA4 fingerprint calculation.
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+
+ */
+
+#include "ja4.h"
+
+#include <algorithm>
+#include <cctype>
+#include <cstddef>
+#include <cstdio>
+#include <iterator>
+#include <string>
+#include <string_view>
+
+static char        convert_protocol_to_char(JA4::Protocol protocol);
+static std::string convert_TLS_version_to_string(std::string_view TLS_version);
+static std::string filter_only_digits(std::string_view src);
+static char        convert_SNI_to_char(JA4::SNI SNI_type);
+static std::string convert_count_to_two_digit_string(std::size_t count);
+static std::string convert_ALPN_to_two_char_string(std::string_view ALPN);
+static void        remove_trailing_character(std::string &s);
+static std::string hexify(std::uint16_t n);
+
+namespace
+{
+constexpr std::size_t U16_HEX_BUF_SIZE{4};
+} // end anonymous namespace
+
+std::string
+JA4::make_JA4_a_raw(TLSClientHelloSummary const &TLS_summary)
+{
+  std::string result;
+  result.reserve(9);
+  result.push_back(convert_protocol_to_char(TLS_summary.protocol));
+  result.append(convert_TLS_version_to_string(TLS_summary.TLS_version));
+  result.push_back(convert_SNI_to_char(TLS_summary.SNI_type));
+  
result.append(convert_count_to_two_digit_string(TLS_summary.get_cipher_count()));
+  
result.append(convert_count_to_two_digit_string(TLS_summary.get_extension_count()));
+  result.append(convert_ALPN_to_two_char_string(TLS_summary.ALPN));
+  return result;
+}
+
+static char
+convert_protocol_to_char(JA4::Protocol protocol)
+{
+  return static_cast<char>(protocol);
+}
+
+static std::string
+convert_TLS_version_to_string(std::string_view TLS_version)
+{
+  std::string result{filter_only_digits(TLS_version)};
+  return result.empty() ? "  " : result;
+}
+
+static std::string
+filter_only_digits(std::string_view src)
+{
+  std::string result{};
+  std::copy_if(src.begin(), src.end(), std::back_inserter(result), [](unsigned 
char c) { return std::isdigit(c); });
+  return result;
+}
+
+static char
+convert_SNI_to_char(JA4::SNI SNI_type)
+{
+  return static_cast<char>(SNI_type);
+}
+
+static std::string
+convert_count_to_two_digit_string(std::size_t count)
+{
+  std::string result;
+  if (count <= 9) {
+    result.push_back('0');
+  }
+  // We could also clamp the lower bound to 1 since there must be at least 1
+  // cipher, but 0 is more helpful for debugging if the cipher list is empty.
+  result.append(std::to_string(std::clamp(count, std::size_t{0}, 
std::size_t{99})));
+  return result;
+}
+
+std::string
+convert_ALPN_to_two_char_string(std::string_view ALPN)
+{
+  std::string result;
+  if (ALPN.empty()) {
+    result = "00";
+  } else {
+    result.push_back(ALPN.front());
+    result.push_back(ALPN.back());
+  }
+  return result;
+}
+
+std::string
+JA4::make_JA4_b_raw(TLSClientHelloSummary const &TLS_summary)
+{
+  std::string result;
+  result.reserve(12);
+  std::vector temp = TLS_summary.get_ciphers();
+  std::sort(temp.begin(), temp.end());
+
+  for (auto cipher : temp) {
+    result.append(hexify(cipher));
+    result.push_back(',');
+  }
+  remove_trailing_character(result);
+  return result;
+}
+
+std::string
+JA4::make_JA4_c_raw(TLSClientHelloSummary const &TLS_summary)
+{
+  std::string result;
+  result.reserve(12);
+  std::vector temp = TLS_summary.get_extensions();
+  std::sort(temp.begin(), temp.end());
+
+  for (auto extension : temp) {
+    result.append(hexify(extension));
+    result.push_back(',');
+  }
+  remove_trailing_character(result);
+  return result;
+}
+
+void
+remove_trailing_character(std::string &s)
+{
+  if (!s.empty()) {
+    s.pop_back();
+  }
+}
+
+std::string
+hexify(std::uint16_t n)
+{
+  char result[U16_HEX_BUF_SIZE + 1]{};
+  std::snprintf(result, sizeof(result), "%.4x", n);
+  return result;
+}
diff --git a/plugins/experimental/ja4_fingerprint/ja4.h 
b/plugins/experimental/ja4_fingerprint/ja4.h
new file mode 100644
index 0000000000..241b5de400
--- /dev/null
+++ b/plugins/experimental/ja4_fingerprint/ja4.h
@@ -0,0 +1,156 @@
+/** @file ja3_fingerprint.cc
+ *
+  JA4 fingerprint calculation.
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <iterator>
+#include <string>
+#include <vector>
+
+namespace JA4
+{
+
+constexpr char PORTION_DELIMITER{'_'};
+
+enum class Protocol {
+  DTLS = 'd',
+  QUIC = 'q',
+  TLS  = 't',
+};
+
+enum class SNI {
+  to_domain = 'd',
+  to_IP     = 'i',
+};
+
+/**
+ * Represents the data sent in a TLS Client Hello needed for JA4 fingerprints.
+ */
+class TLSClientHelloSummary
+{
+public:
+  using difference_type = 
std::iterator_traits<std::vector<std::uint16_t>::iterator>::difference_type;
+
+  Protocol    protocol;
+  SNI         SNI_type;
+  std::string TLS_version;
+  std::string ALPN;
+
+  std::vector<std::uint16_t> const &get_ciphers() const;
+  void                              add_cipher(std::uint16_t cipher);
+
+  std::vector<std::uint16_t> const &get_extensions() const;
+  void                              add_extension(std::uint16_t extension);
+
+  /**
+   * Get the number of ciphers excluding GREASE values.
+   *
+   * @return Returns the count of non-GREASE ciphers.
+   */
+  difference_type get_cipher_count() const;
+
+  /**
+   * Get the number of extensions excluding GREASE values.
+   *
+   * @return Returns the count of non-GREASE extensions.
+   */
+  difference_type get_extension_count() const;
+
+private:
+  std::vector<std::uint16_t> _ciphers;
+  std::vector<std::uint16_t> _extensions;
+  int                        _extension_count_including_sni_and_alpn{0};
+};
+
+/**
+ * Calculate the a portion of the JA4 fingerprint for the given client hello.
+ *
+ * The a portion of the fingerprint encodes the protocol, TLS version, SNI
+ * type, number of cipher suites, number of extensions, and first ALPN value.
+ *
+ * For more information see:
+ * https://github.com/FoxIO-LLC/ja4/blob/main/technical_details/JA4.md.
+ *
+ * @param TLS_summary The TLS client hello.
+ * @return Returns a string containing the a portion of the JA4 fingerprint.
+ */
+std::string make_JA4_a_raw(TLSClientHelloSummary const &TLS_summary);
+
+/**
+ * Calculate the b portion of the JA4 fingerprint for the given client hello.
+ *
+ * The b portion of the fingerprint is a comma-delimited list of lowercase hex
+ * numbers representing the cipher suites in sorted order. GREASE values are
+ * ignored.
+ *
+ * For more information see:
+ * https://github.com/FoxIO-LLC/ja4/blob/main/technical_details/JA4.md.
+ *
+ * @param TLS_summary The TLS client hello.
+ * @return Returns a string containing the b portion of the JA4 fingerprint.
+ */
+std::string make_JA4_b_raw(TLSClientHelloSummary const &TLS_summary);
+
+/**
+ * Calculate the c portion of the JA4 fingerprint for the given client hello.
+ *
+ * The b portion of the fingerprint is a comma-delimited list of lowercase hex
+ * numbers representing the extensions in sorted order. GREASE values and the
+ * SNI and ALPN extensions are ignored.
+ *
+ * For more information see:
+ * https://github.com/FoxIO-LLC/ja4/blob/main/technical_details/JA4.md.
+ *
+ * @param TLS_summary The TLS client hello.
+ * @return Returns a string containing the c portion of the JA4 fingerprint.
+ */
+std::string make_JA4_c_raw(TLSClientHelloSummary const &TLS_summary);
+
+/**
+ * Calculate the JA4 fingerprint for the given TLS client hello.
+ *
+ * @param TLS_summary The TLS client hello. If there was no ALPN in the
+ * Client Hello, TLS_summary.ALPN should either be empty or set to "00".
+ * Behavior when the number of digits in TLS_summary.TLS_version is greater
+ * than 2, the number of digits in TLS_summary.ALPN is greater than 2
+ * (except when TLS_summary.ALPN is empty) is unspecified.
+ * @param UnaryOp hasher A hash function. For a specification-compliant
+ * JA4 fingerprint, this should be a sha256 hash.
+ * @return Returns a string containing the JA4 fingerprint.
+ */
+template <typename UnaryOp>
+std::string
+make_JA4_fingerprint(TLSClientHelloSummary const &TLS_summary, UnaryOp hasher)
+{
+  std::string result;
+  result.append(make_JA4_a_raw(TLS_summary));
+  result.push_back(JA4::PORTION_DELIMITER);
+  result.append(hasher(make_JA4_b_raw(TLS_summary)).substr(0, 12));
+  result.push_back(JA4::PORTION_DELIMITER);
+  result.append(hasher(make_JA4_c_raw(TLS_summary)).substr(0, 12));
+  return result;
+}
+
+} // end namespace JA4
diff --git a/plugins/experimental/ja4_fingerprint/test_ja4.cc 
b/plugins/experimental/ja4_fingerprint/test_ja4.cc
new file mode 100644
index 0000000000..32be140735
--- /dev/null
+++ b/plugins/experimental/ja4_fingerprint/test_ja4.cc
@@ -0,0 +1,409 @@
+/** @file ja3_fingerprint.cc
+ *
+  Unit tests for JA4 fingerprint calculation.
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+
+ */
+
+#include "ja4.h"
+
+#define CATCH_CONFIG_MAIN
+#include <catch.hpp>
+
+#include <algorithm>
+#include <cctype>
+#include <string>
+#include <string_view>
+
+static std::string call_JA4(JA4::TLSClientHelloSummary const &TLS_summary);
+static std::string inc(std::string_view sv);
+
+TEST_CASE("JA4")
+{
+  JA4::TLSClientHelloSummary TLS_summary;
+
+  SECTION("Given the protocol is TCP, "
+          "when we create a JA4 fingerprint, "
+          "then the first character thereof should be 't'.")
+  {
+    TLS_summary.protocol = JA4::Protocol::TLS;
+
+    CHECK("t" == call_JA4(TLS_summary).substr(0, 1));
+  }
+
+  SECTION("Given the protocol is QUIC, "
+          "when we create a JA4 fingerprint, "
+          "then the first character thereof should be 'q'.")
+  {
+    TLS_summary.protocol = JA4::Protocol::QUIC;
+    CHECK(call_JA4(TLS_summary).starts_with('q'));
+  }
+
+  SECTION("Given the protocol is DTLS, "
+          "when we create a JA4 fingerprint, "
+          "then the first character thereof should be 'd'.")
+  {
+    TLS_summary.protocol = JA4::Protocol::DTLS;
+    CHECK(call_JA4(TLS_summary).starts_with('d'));
+  }
+
+  SECTION("Given the TLS version is 1.2, "
+          "when we create a JA4 fingerprint, "
+          "then indices [1,2] thereof should be \"12\".")
+  {
+    TLS_summary.TLS_version = "1.2";
+    CHECK("12" == call_JA4(TLS_summary).substr(1, 2));
+  }
+
+  SECTION("Given the TLS version is 1.2, "
+          "when we create a JA4 fingerprint, "
+          "then indices [1,2] thereof should contain \"13\".")
+  {
+    TLS_summary.TLS_version = "1.3";
+    CHECK("13" == call_JA4(TLS_summary).substr(1, 2));
+  }
+
+  SECTION("Given the SNI is a domain name, "
+          "when we create a JA4 fingerprint, "
+          "then index 3 thereof should contain 'd'.")
+  {
+    TLS_summary.SNI_type = JA4::SNI::to_domain;
+    INFO(call_JA4(TLS_summary));
+    CHECK("d" == call_JA4(TLS_summary).substr(3, 1));
+  }
+
+  SECTION("Given the SNI is an IP, "
+          "when we create a JA4 fingerprint, "
+          "then index 3 thereof should contain 'i'.")
+  {
+    TLS_summary.SNI_type = JA4::SNI::to_IP;
+    CHECK("i" == call_JA4(TLS_summary).substr(3, 1));
+  }
+
+  SECTION("Given there is one cipher, "
+          "when we create a JA4 fingerprint, "
+          "then indices [4,5] thereof should contain \"01\".")
+  {
+    TLS_summary.add_cipher(1);
+    CHECK("01" == call_JA4(TLS_summary).substr(4, 2));
+  }
+
+  SECTION("Given there are 9 ciphers, "
+          "when we create a JA4 fingerprint, "
+          "then indices [4,5] thereof should contain \"09\".")
+  {
+    for (int i{0}; i < 9; ++i) {
+      TLS_summary.add_cipher(i);
+    }
+    CHECK("09" == call_JA4(TLS_summary).substr(4, 2));
+  }
+
+  SECTION("Given there are 10 ciphers, "
+          "when we create a JA4 fingerprint, "
+          "then indices [4,5] thereof should contain \"10\".")
+  {
+    for (int i{0}; i < 10; ++i) {
+      TLS_summary.add_cipher(i);
+    }
+    CHECK("10" == call_JA4(TLS_summary).substr(4, 2));
+  }
+
+  SECTION("Given there are more than 99 ciphers, "
+          "when we create a JA4 fingerprint, "
+          "then indices [4,5] thereof should contain \"99\".")
+  {
+    for (int i{0}; i < 100; ++i) {
+      TLS_summary.add_cipher(i);
+    }
+    CHECK("99" == call_JA4(TLS_summary).substr(4, 2));
+  }
+
+  SECTION("Given the ciphers include a GREASE value, "
+          "when we create a JA4 fingerprint, "
+          "then that value should not be included in the count.")
+  {
+    TLS_summary.add_cipher(0x0a0a);
+    TLS_summary.add_cipher(72);
+    CHECK("01" == call_JA4(TLS_summary).substr(4, 2));
+  }
+
+  SECTION("Given there are no extensions, "
+          "when we create a JA4 fingerprint, "
+          "then indices [6,7] thereof should contain \"00\".")
+  {
+    CHECK("00" == call_JA4(TLS_summary).substr(6, 2));
+  }
+
+  SECTION("Given there are 9 extensions, "
+          "when we create a JA4 fingerprint, "
+          "then indices [6,7] thereof should contain \"09\".")
+  {
+    for (int i{0}; i < 9; ++i) {
+      TLS_summary.add_extension(i);
+    }
+    CHECK("09" == call_JA4(TLS_summary).substr(6, 2));
+  }
+
+  SECTION("Given there are 99 extensions, "
+          "when we create a JA4 fingerprint, "
+          "then indices [6,7] thereof should contain \"99\".")
+  {
+    for (int i{0}; i < 99; ++i) {
+      TLS_summary.add_extension(i);
+    }
+    CHECK("99" == call_JA4(TLS_summary).substr(6, 2));
+  }
+
+  SECTION("Given there are more than 99 extensions, "
+          "when we create a JA4 fingerprint, "
+          "then indices [6,7] thereof should contain \"99\".")
+  {
+    for (int i{0}; i < 100; ++i) {
+      TLS_summary.add_extension(i);
+    }
+    CHECK("99" == call_JA4(TLS_summary).substr(6, 2));
+  }
+
+  SECTION("Given the extensions include a GREASE value, "
+          "when we create a JA4 fingerprint, "
+          "then that value should not be included in the count.")
+  {
+    TLS_summary.add_extension(2);
+    TLS_summary.add_extension(0x0a0a);
+    CHECK("01" == call_JA4(TLS_summary).substr(6, 2));
+  }
+
+  // These may be covered by the earlier tests as well, but this documents the
+  // behavior explicitly.
+  SECTION("When we create a JA4 fingerprint, "
+          "then the SNI and ALPN extensions should be included in the count.")
+  {
+    TLS_summary.add_extension(0x0);
+    TLS_summary.add_extension(0x10);
+    CHECK("02" == call_JA4(TLS_summary).substr(6, 2));
+  }
+
+  SECTION("Given the ALPN value is empty, "
+          "when we create a JA4 fingerprint, "
+          "then indices [8,9] thereof should contain \"00\".")
+  {
+    TLS_summary.ALPN = "";
+    CHECK("00" == call_JA4(TLS_summary).substr(8, 2));
+  }
+
+  // This should never happen in practice because all registered ALPN values
+  // are at least 2 characters long, but it's the correct behavior according
+  // to the spec. :-)
+  SECTION("Given the ALPN value is \"a\", "
+          "when we create a JA4 fingerprint, "
+          "then indices [8,9] thereof should contain \"aa\".")
+  {
+    TLS_summary.ALPN = "a";
+    CHECK("aa" == call_JA4(TLS_summary).substr(8, 2));
+  }
+
+  SECTION("Given the ALPN value is \"h3\", "
+          "when we create a JA4 fingerprint, "
+          "then indices [8,9] thereof should contain \"h3\".")
+  {
+    TLS_summary.ALPN = "h3";
+    CHECK("h3" == call_JA4(TLS_summary).substr(8, 2));
+  }
+
+  SECTION("Given the ALPN value is \"imap\", "
+          "when we create a JA4 fingerprint, "
+          "then indices [8,9] thereof should contain \"ip\".")
+  {
+    TLS_summary.ALPN = "imap";
+    CHECK("ip" == call_JA4(TLS_summary).substr(8, 2));
+  }
+
+  SECTION("When we create a JA4 fingeprint, "
+          "then index 10 thereof should contain '_'.")
+  {
+    CHECK("_" == call_JA4(TLS_summary).substr(10, 1));
+  }
+
+  SECTION("When we create a JA4 fingerprint, "
+          "then the b section should be passed through the hash function.")
+  {
+    TLS_summary.add_cipher(10);
+    CHECK("111b" == JA4::make_JA4_fingerprint(TLS_summary, [](std::string_view 
sv) { return inc(sv); }).substr(11, 4));
+  }
+
+  // As per the spec, we expect 4-character, comma-delimited hex values.
+  SECTION("Given only ciphers 2, 12, and 17 in that order, "
+          "when we create a JA4 fingerprint, "
+          "then the hash should be invoked with \"0002,000c,0011\".")
+  {
+    TLS_summary.add_cipher(2);
+    TLS_summary.add_cipher(12);
+    TLS_summary.add_cipher(17);
+    bool verified{false};
+    // INFO doesn't work from inside the lambda body. :/
+    JA4::make_JA4_fingerprint(TLS_summary, [&verified](std::string_view sv) {
+      if ("0002,000c,0011" == sv) {
+        verified = true;
+      }
+      return sv;
+    });
+    CHECK(verified);
+  }
+
+  SECTION("When we create a JA4 fingerprint, "
+          "then the cipher values should be sorted before hashing.")
+  {
+    TLS_summary.add_cipher(17);
+    TLS_summary.add_cipher(2);
+    TLS_summary.add_cipher(12);
+    bool verified{false};
+    // INFO doesn't work from inside the lambda body. :/
+    JA4::make_JA4_fingerprint(TLS_summary, [&verified](std::string_view sv) {
+      if ("0002,000c,0011" == sv) {
+        verified = true;
+      }
+      return sv;
+    });
+    CHECK(verified);
+  }
+
+  SECTION("When we create a JA4 fingerprint, "
+          "then GREASE values in the cipher list should be ignored.")
+  {
+    TLS_summary.add_cipher(0x0a0a);
+    TLS_summary.add_cipher(2);
+    bool verified{false};
+    // INFO doesn't work from inside the lambda body. :/
+    JA4::make_JA4_fingerprint(TLS_summary, [&verified](std::string_view sv) {
+      if ("0002" == sv) {
+        verified = true;
+      }
+      return sv;
+    });
+    CHECK(verified);
+  }
+
+  // All the tests from now on have enough ciphers to ensure a long enough
+  // hash using our default hash (the id function) so that the length of the
+  // JA4 fingerprint will be valid.
+  TLS_summary.add_cipher(1);
+  TLS_summary.add_cipher(2);
+  TLS_summary.add_cipher(3);
+
+  SECTION("When we create a JA4 fingerprint, "
+          "then we should truncate the section b hash to 12 characters.")
+  {
+    CHECK("001,0002,000_" == JA4::make_JA4_fingerprint(TLS_summary, 
[](std::string_view sv) {
+                               return sv.empty() ? sv : sv.substr(1);
+                             }).substr(11, 13));
+  }
+
+  SECTION("When we create a JA4 fingeprint, "
+          "then index 10 thereof should contain '_'.")
+  {
+    CHECK("_" == call_JA4(TLS_summary).substr(23, 1));
+  }
+
+  SECTION("When we create a JA4 fingerprint, "
+          "then the c section should be passed through the hash function.")
+  {
+    TLS_summary.add_extension(10);
+    CHECK("111b" == JA4::make_JA4_fingerprint(TLS_summary, [](std::string_view 
sv) { return inc(sv); }).substr(24, 4));
+  }
+
+  // As per the spec, we expect 4-character, comma-delimited hex values.
+  SECTION("Given only extensions 2, 12, and 17 in that order, "
+          "when we create a JA4 fingerprint, "
+          "then the hash should be invoked with \"0002,000c,0011\".")
+  {
+    TLS_summary.add_extension(2);
+    TLS_summary.add_extension(12);
+    TLS_summary.add_extension(17);
+
+    bool verified{false};
+    // INFO doesn't work from inside the lambda body. :/
+    JA4::make_JA4_fingerprint(TLS_summary, [&verified](std::string_view sv) {
+      if ("0002,000c,0011" == sv) {
+        verified = true;
+      }
+      return sv;
+    });
+    CHECK(verified);
+  }
+
+  SECTION("When we create a JA4 fingerprint, "
+          "then the extension values should be sorted before hashing.")
+  {
+    TLS_summary.add_extension(17);
+    TLS_summary.add_extension(2);
+    TLS_summary.add_extension(12);
+    bool verified{false};
+    // INFO doesn't work from inside the lambda body. :/
+    JA4::make_JA4_fingerprint(TLS_summary, [&verified](std::string_view sv) {
+      if ("0002,000c,0011" == sv) {
+        verified = true;
+      }
+      return sv;
+    });
+    CHECK(verified);
+  }
+
+  SECTION("When we create a JA4 fingerprint, "
+          "then we ignore GREASE, SNI, ALPN, and SNI values in the 
extensions.")
+  {
+    TLS_summary.add_extension(0x0a0a);
+    TLS_summary.add_extension(0x0);
+    TLS_summary.add_extension(0x10);
+    TLS_summary.add_extension(5);
+    bool verified{false};
+    // INFO doesn't work from inside the lambda body. :/
+    JA4::make_JA4_fingerprint(TLS_summary, [&verified](std::string_view sv) {
+      if ("0005" == sv) {
+        verified = true;
+      }
+      return sv;
+    });
+    CHECK(verified);
+  }
+
+  SECTION("When we create a JA4 fingerprint, "
+          "then we total length of the fingerprint should be 36 characters.")
+  {
+    TLS_summary.add_extension(1);
+    TLS_summary.add_extension(2);
+    TLS_summary.add_extension(3);
+    CHECK(36 == call_JA4(TLS_summary).size());
+  }
+}
+
+std::string
+call_JA4(JA4::TLSClientHelloSummary const &TLS_summary)
+{
+  return JA4::make_JA4_fingerprint(TLS_summary, [](std::string_view sv) { 
return sv; });
+}
+
+std::string
+inc(std::string_view sv)
+{
+  std::string result;
+  result.resize(sv.size());
+  std::transform(sv.begin(), sv.end(), result.begin(), [](char c) { return c + 
1; });
+  return result;
+}
diff --git a/plugins/experimental/ja4_fingerprint/tls_client_hello_summary.cc 
b/plugins/experimental/ja4_fingerprint/tls_client_hello_summary.cc
new file mode 100644
index 0000000000..7efcead441
--- /dev/null
+++ b/plugins/experimental/ja4_fingerprint/tls_client_hello_summary.cc
@@ -0,0 +1,103 @@
+/** @file ja3_fingerprint.cc
+ *
+  TLSClientHelloSummary data structure for JA4 fingerprint calculation.
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+
+ */
+
+#include "ja4.h"
+
+#include <algorithm>
+#include <array>
+#include <cstdint>
+#include <functional>
+#include <vector>
+
+namespace
+{
+
+constexpr std::array<std::uint16_t, 16> GREASE_values{0x0a0a, 0x1a1a, 0x2a2a, 
0x3a3a, 0x4a4a, 0x5a5a, 0x6a6a, 0x7a7a,
+                                                      0x8a8a, 0x9a9a, 0xaaaa, 
0xbaba, 0xcaca, 0xdada, 0xeaea, 0xfafa};
+constexpr std::uint16_t                 extension_SNI{0x0};
+constexpr std::uint16_t                 extension_ALPN{0x10};
+
+} // end anonymous namespace
+
+static bool is_GREASE(std::uint16_t value);
+static bool is_ignored_non_GREASE_extension(std::uint16_t extension);
+
+std::vector<std::uint16_t> const &
+JA4::TLSClientHelloSummary::get_ciphers() const
+{
+  return this->_ciphers;
+}
+
+void
+JA4::TLSClientHelloSummary::add_cipher(std::uint16_t cipher)
+{
+  if (is_GREASE(cipher)) {
+    return;
+  }
+
+  this->_ciphers.push_back(cipher);
+}
+
+std::vector<std::uint16_t> const &
+JA4::TLSClientHelloSummary::get_extensions() const
+{
+  return this->_extensions;
+}
+
+void
+JA4::TLSClientHelloSummary::add_extension(std::uint16_t extension)
+{
+  if (is_GREASE(extension)) {
+    return;
+  }
+
+  ++this->_extension_count_including_sni_and_alpn;
+  if (!is_ignored_non_GREASE_extension(extension)) {
+    this->_extensions.push_back(extension);
+  }
+}
+
+JA4::TLSClientHelloSummary::difference_type
+JA4::TLSClientHelloSummary::get_cipher_count() const
+{
+  return this->_ciphers.size();
+}
+
+bool
+is_GREASE(std::uint16_t value)
+{
+  return std::binary_search(GREASE_values.begin(), GREASE_values.end(), value);
+}
+
+JA4::TLSClientHelloSummary::difference_type
+JA4::TLSClientHelloSummary::get_extension_count() const
+{
+  return this->_extension_count_including_sni_and_alpn;
+}
+
+bool
+is_ignored_non_GREASE_extension(std::uint16_t extension)
+{
+  return (extension_SNI == extension) || (extension_ALPN == extension);
+}

Reply via email to