This is an automated email from the ASF dual-hosted git repository.
bneradt pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/master by this push:
new 75f194c5dc Fix user_agent_session_miss for ATS SSL session cache
(#12405)
75f194c5dc is described below
commit 75f194c5dc50ea7bb1c21594a6456fec8182a8dd
Author: Brian Neradt <[email protected]>
AuthorDate: Mon Sep 22 15:35:27 2025 -0500
Fix user_agent_session_miss for ATS SSL session cache (#12405)
proxy.config.ssl.session_cache.value allows the user to configure
whether OpenSSL internally manages the TLS session cache for resumption
or whether ATS does via callbacks. For our user_agent_session_* metrics,
we use the SSL_CTX_sess_* OpenSSL API to populate them. This works for
hit, timeout, etc, for both when the session cache is managed by OpenSSL
and when it is managed by ATS, with the exception of
SSL_CTX_sess_misses. That metric only counts internal cache misses:
> SSL_CTX_sess_misses() returns the number of sessions proposed by
> clients that were not found in the internal session cache in server
> mode.
The result is that when ATS manages the session cache, which is the
default configuration, proxy.process.ssl.user_agent_session_miss is
always zero. This patch uses our ssl_session_cache_miss metric to
augment the stat to be accurate for ATS-managed caching.
---
.../monitoring/statistics/core/ssl.en.rst | 10 ++++-
src/iocore/net/SSLStats.cc | 43 ++++++++++++++++++++++
src/iocore/net/SSLStats.h | 17 +++++++--
src/iocore/net/TLSSessionResumptionSupport.cc | 2 +
4 files changed, 66 insertions(+), 6 deletions(-)
diff --git a/doc/admin-guide/monitoring/statistics/core/ssl.en.rst
b/doc/admin-guide/monitoring/statistics/core/ssl.en.rst
index 22bed6280c..efef309c22 100644
--- a/doc/admin-guide/monitoring/statistics/core/ssl.en.rst
+++ b/doc/admin-guide/monitoring/statistics/core/ssl.en.rst
@@ -100,9 +100,15 @@ SSL/TLS
.. ts:stat:: global proxy.process.ssl.ssl_session_cache_miss integer
:type: counter
+.. ts:stat:: global proxy.process.ssl.ssl_session_cache_timeout integer
+ :type: counter
+
.. ts:stat:: global proxy.process.ssl.ssl_origin_session_cache_miss integer
:type: counter
+.. ts:stat:: global proxy.process.ssl.ssl_origin_session_cache_timeout integer
+ :type: counter
+
.. ts:stat:: global proxy.process.ssl.ssl_session_cache_new_session integer
:type: counter
@@ -247,8 +253,8 @@ SSL/TLS
.. ts:stat:: global proxy.process.ssl.user_agent_session_miss integer
:type: counter
- Incoming client SSL connections which unsuccessfully attempted to use a
- previously negotiated session, since statistics collection began.
+ Incoming client SSL connections which proposed a session for resumption but
+ were not in the session cache, since statistics collection began.
.. ts:stat:: global proxy.process.ssl.user_agent_sessions integer
:type: counter
diff --git a/src/iocore/net/SSLStats.cc b/src/iocore/net/SSLStats.cc
index 8f2e6597a9..440846731f 100644
--- a/src/iocore/net/SSLStats.cc
+++ b/src/iocore/net/SSLStats.cc
@@ -113,6 +113,7 @@ void
SSLPeriodicMetricsUpdate()
{
SSLCertificateConfig::scoped_config certLookup;
+ SSLConfig::scoped_config sslConfig;
int64_t sessions = 0;
int64_t hits = 0;
@@ -120,6 +121,43 @@ SSLPeriodicMetricsUpdate()
int64_t timeouts = 0;
Dbg(dbg_ctl_ssl, "Starting to update the new session metrics");
+
+ // Check if we're using the ATS session cache implementation rather than the
+ // OpenSSL internal cache.
+ bool const using_ats_session_cache =
+ sslConfig && sslConfig->ssl_session_cache ==
SSLConfigParams::SSL_SESSION_CACHE_MODE_SERVER_ATS_IMPL;
+
+ if (using_ats_session_cache) {
+ // Most of the SSL_CTX_sess_*() metrics are inclusive of OpenSSL's
+ // "internal" cache *and* the ATS "external" cache. The exception is the
+ // SSL_CTX_sess_misses() metric, which curiously only counts OpenSSL
+ // internal misses. Therefore, to make that metric accurate for the
+ // situation where ATS manages sessions via its own cache, which is the
+ // default configuration (see proxy.config.ssl.session_cache.value), we
+ // have to add in the misses we've counted in the
+ // TLSSessionResumptionSupport.cc callback hooks.
+
+ // We count timeouts as misses in TLSSessionResumptionSupport.cc for
+ // session_cache_miss, whereas OpenSSL tracks them separately and our
+ // user_agent_session_miss follows suit.
+ int64_t session_cache_timeouts = 0;
+ if (ssl_rsb.session_cache_timeout) {
+ session_cache_timeouts =
Metrics::Counter::load(ssl_rsb.session_cache_timeout);
+ }
+#if defined(OPENSSL_IS_BORINGSSL)
+ // On BoringSSL, all SSL_CTX_sess_*() functions always return 0 for the ATS
+ // external cache, making them unusable for monitoring. We currently
address
+ // hits and misses because they are the most relevant metrics for session
+ // cache performance monitoring and should be treated as a pair.
+ if (ssl_rsb.session_cache_hit) {
+ hits = Metrics::Counter::load(ssl_rsb.session_cache_hit);
+ }
+#endif
+ if (ssl_rsb.session_cache_miss) {
+ misses = Metrics::Counter::load(ssl_rsb.session_cache_miss);
+ misses -= (session_cache_timeouts > misses) ? 0 : session_cache_timeouts;
+ }
+ }
if (certLookup) {
const unsigned ctxCount = certLookup->count();
for (size_t i = 0; i < ctxCount; i++) {
@@ -136,6 +174,9 @@ SSLPeriodicMetricsUpdate()
}
}
+ // Store cumulative session statistics as gauges. These metrics represent
cumulative
+ // counters semantically but are implemented as gauges because they need to
be "set"
+ // to values read from external counter sources (OpenSSL and/or ATS session
cache).
Metrics::Gauge::store(ssl_rsb.user_agent_sessions, sessions);
Metrics::Gauge::store(ssl_rsb.user_agent_session_hit, hits);
Metrics::Gauge::store(ssl_rsb.user_agent_session_miss, misses);
@@ -181,11 +222,13 @@ SSLInitializeStatistics()
ssl_rsb.sni_name_set_failure =
Metrics::Counter::createPtr("proxy.process.ssl.ssl_sni_name_set_failure");
ssl_rsb.origin_session_cache_hit =
Metrics::Counter::createPtr("proxy.process.ssl.ssl_origin_session_cache_hit");
ssl_rsb.origin_session_cache_miss =
Metrics::Counter::createPtr("proxy.process.ssl.ssl_origin_session_cache_miss");
+ ssl_rsb.origin_session_cache_timeout =
Metrics::Counter::createPtr("proxy.process.ssl.ssl_origin_session_cache_timeout");
ssl_rsb.session_cache_eviction =
Metrics::Counter::createPtr("proxy.process.ssl.ssl_session_cache_eviction");
ssl_rsb.session_cache_hit =
Metrics::Counter::createPtr("proxy.process.ssl.ssl_session_cache_hit");
ssl_rsb.session_cache_lock_contention =
Metrics::Counter::createPtr("proxy.process.ssl.ssl_session_cache_lock_contention");
ssl_rsb.session_cache_miss =
Metrics::Counter::createPtr("proxy.process.ssl.ssl_session_cache_miss");
ssl_rsb.session_cache_new_session =
Metrics::Counter::createPtr("proxy.process.ssl.ssl_session_cache_new_session");
+ ssl_rsb.session_cache_timeout =
Metrics::Counter::createPtr("proxy.process.ssl.ssl_session_cache_timeout");
ssl_rsb.total_attempts_handshake_count_in =
Metrics::Counter::createPtr("proxy.process.ssl.total_attempts_handshake_count_in");
ssl_rsb.total_attempts_handshake_count_out =
Metrics::Counter::createPtr("proxy.process.ssl.total_attempts_handshake_count_out");
ssl_rsb.total_dyn_def_tls_record_count =
Metrics::Counter::createPtr("proxy.process.ssl.default_record_size_count");
diff --git a/src/iocore/net/SSLStats.h b/src/iocore/net/SSLStats.h
index 8f879487c6..9e2cdc428f 100644
--- a/src/iocore/net/SSLStats.h
+++ b/src/iocore/net/SSLStats.h
@@ -55,11 +55,13 @@ struct SSLStatsBlock {
Metrics::Counter::AtomicType *origin_server_wrong_version
= nullptr;
Metrics::Counter::AtomicType *origin_session_cache_hit
= nullptr;
Metrics::Counter::AtomicType *origin_session_cache_miss
= nullptr;
+ Metrics::Counter::AtomicType *origin_session_cache_timeout
= nullptr;
Metrics::Counter::AtomicType *origin_session_reused_count
= nullptr;
Metrics::Counter::AtomicType *session_cache_eviction
= nullptr;
Metrics::Counter::AtomicType *session_cache_hit
= nullptr;
Metrics::Counter::AtomicType *session_cache_lock_contention
= nullptr;
Metrics::Counter::AtomicType *session_cache_miss
= nullptr;
+ Metrics::Counter::AtomicType *session_cache_timeout
= nullptr;
Metrics::Counter::AtomicType *session_cache_new_session
= nullptr;
Metrics::Counter::AtomicType *sni_name_set_failure
= nullptr;
Metrics::Counter::AtomicType *total_attempts_handshake_count_in
= nullptr;
@@ -96,10 +98,17 @@ struct SSLStatsBlock {
Metrics::Counter::AtomicType *user_agent_version_too_high
= nullptr;
Metrics::Counter::AtomicType *user_agent_version_too_low
= nullptr;
Metrics::Counter::AtomicType *user_agent_wrong_version
= nullptr;
- Metrics::Gauge::AtomicType *user_agent_session_hit
= nullptr;
- Metrics::Gauge::AtomicType *user_agent_session_miss
= nullptr;
- Metrics::Gauge::AtomicType *user_agent_session_timeout
= nullptr;
- Metrics::Gauge::AtomicType *user_agent_sessions
= nullptr;
+
+ // Note: The following user_agent_session_* metrics are implemented as Gauge
types
+ // even though they semantically represent cumulative counters. This is
because
+ // they are periodically synchronized from external counter sources
(OpenSSL's
+ // built-in session cache or ATS's session cache) and need to be "set" to
specific
+ // values rather than incremented. From a monitoring perspective, these
should be
+ // treated as counters for calculating rates.
+ Metrics::Gauge::AtomicType *user_agent_session_hit = nullptr;
+ Metrics::Gauge::AtomicType *user_agent_session_miss = nullptr;
+ Metrics::Gauge::AtomicType *user_agent_session_timeout = nullptr;
+ Metrics::Gauge::AtomicType *user_agent_sessions = nullptr;
};
extern SSLStatsBlock ssl_rsb;
diff --git a/src/iocore/net/TLSSessionResumptionSupport.cc
b/src/iocore/net/TLSSessionResumptionSupport.cc
index 68d8607617..1fac030a7c 100644
--- a/src/iocore/net/TLSSessionResumptionSupport.cc
+++ b/src/iocore/net/TLSSessionResumptionSupport.cc
@@ -194,6 +194,7 @@ TLSSessionResumptionSupport::getSession(SSL *ssl, const
unsigned char *id, int l
// Double check the timeout
if (is_ssl_session_timed_out(session)) {
Metrics::Counter::increment(ssl_rsb.session_cache_miss);
+ Metrics::Counter::increment(ssl_rsb.session_cache_timeout);
// Due to bug in openssl, the timeout is checked, but only removed
// from the openssl built-in hash table. The external remove cb is not called
#if 0 // This is currently eliminated, since it breaks things in odd ways (see
TS-3710)
@@ -224,6 +225,7 @@ TLSSessionResumptionSupport::getOriginSession(const
std::string &lookup_key)
// Double check the timeout
if (is_ssl_session_timed_out(shared_sess.get())) {
Metrics::Counter::increment(ssl_rsb.origin_session_cache_miss);
+ Metrics::Counter::increment(ssl_rsb.origin_session_cache_timeout);
origin_sess_cache->remove_session(lookup_key);
shared_sess.reset();
} else {