This is an automated email from the ASF dual-hosted git repository.

cmcfarlen pushed a commit to branch 10.1.x
in repository https://gitbox.apache.org/repos/asf/trafficserver.git


The following commit(s) were added to refs/heads/10.1.x by this push:
     new c89cd60d29 Add per_server_conn metrics (#12269)
c89cd60d29 is described below

commit c89cd60d29253ea2dbd1e815ccc14fb00d2644f0
Author: Serris Lew <serrisn...@gmail.com>
AuthorDate: Tue Jun 3 13:27:11 2025 -0700

    Add per_server_conn metrics (#12269)
    
    Co-authored-by: Serris Lew <lser...@apple.com>
---
 include/iocore/net/ConnectionTracker.h             | 54 +++++++++++++++++-
 src/iocore/net/ConnectionTracker.cc                | 66 ++++++++++++++++++++--
 src/proxy/http/HttpSM.cc                           |  6 +-
 src/records/RecordsConfig.cc                       |  6 +-
 .../origin_connection/gold/two_200_ok.gold         |  9 +++
 .../per_server_connection_max.test.py              | 58 +++++++++++++++----
 6 files changed, 179 insertions(+), 20 deletions(-)

diff --git a/include/iocore/net/ConnectionTracker.h 
b/include/iocore/net/ConnectionTracker.h
index d272b78f8d..8ab6860410 100644
--- a/include/iocore/net/ConnectionTracker.h
+++ b/include/iocore/net/ConnectionTracker.h
@@ -43,6 +43,7 @@
 #include "swoc/TextView.h"
 #include <tscore/MgmtDefs.h>
 #include "iocore/net/SessionSharingAPIEnums.h"
+#include "tsutil/Metrics.h"
 
 /**
  * Singleton class to keep track of the number of inbound and outbound 
connections.
@@ -83,6 +84,8 @@ public:
   struct GlobalConfig {
     std::chrono::seconds client_alert_delay{60}; ///< Alert delay in seconds.
     std::chrono::seconds server_alert_delay{60}; ///< Alert delay in seconds.
+    bool                 metric_enabled{false};  ///< Enabling per server 
metrics.
+    std::string          metric_prefix;          ///< Per server metric prefix.
   };
 
   // The names of the configuration values.
@@ -93,6 +96,8 @@ public:
   static constexpr std::string_view 
CONFIG_SERVER_VAR_MIN{"proxy.config.http.per_server.connection.min"};
   static constexpr std::string_view 
CONFIG_SERVER_VAR_MATCH{"proxy.config.http.per_server.connection.match"};
   static constexpr std::string_view 
CONFIG_SERVER_VAR_ALERT_DELAY{"proxy.config.http.per_server.connection.alert_delay"};
+  static constexpr std::string_view 
CONFIG_SERVER_VAR_METRIC_ENABLED{"proxy.config.http.per_server.connection.metric_enabled"};
+  static constexpr std::string_view 
CONFIG_SERVER_VAR_METRIC_PREFIX{"proxy.config.http.per_server.connection.metric_prefix"};
 
   /// A record for the outbound connection count.
   /// These are stored per outbound session equivalence class, as determined 
by the session matching.
@@ -131,6 +136,11 @@ public:
     std::atomic<int>    _in_queue{0};   ///< # of connections queued, waiting 
for a connection.
     std::atomic<Ticker> _last_alert{0}; ///< Absolute time of the last alert.
 
+    // Recording data as metrics
+    ts::Metrics::Gauge::AtomicType   *_count_metric       = nullptr;
+    ts::Metrics::Counter::AtomicType *_count_total_metric = nullptr;
+    ts::Metrics::Counter::AtomicType *_blocked_metric     = nullptr;
+
     /** Constructor.
      * Construct from @c Key because the use cases do a table lookup first so 
the @c Key is already constructed.
      * @param key A populated @c Key structure - values are copied to the @c 
Group.
@@ -149,7 +159,8 @@ public:
     /// @return @c true if an alert should be generated, @c false otherwise.
     bool should_alert(std::time_t *lat = nullptr);
     /// Time of the last alert in epoch seconds.
-    std::time_t get_last_alert_epoch_time() const;
+    std::time_t        get_last_alert_epoch_time() const;
+    static std::string metric_name(const Key &key, std::string_view fqdn, 
std::string metric_prefix);
 
     /// Release the reference count to this group and remove it from the
     /// group table if it is no longer referenced.
@@ -322,6 +333,31 @@ ConnectionTracker::Group::hash(const Key &key)
   }
 }
 
+inline std::string
+ConnectionTracker::Group::metric_name(const Key &key, std::string_view fqdn, 
std::string metric_prefix)
+{
+  std::string metric_name = "";
+  char        buf[INET6_ADDRSTRLEN];
+
+  switch (key._match_type) {
+  case MATCH_IP:
+    metric_name += ats_ip_ntop(&key._addr.sa, buf, sizeof(buf));
+    break;
+  case MATCH_PORT:
+    metric_name += ats_ip_nptop(&key._addr.sa, buf, sizeof(buf));
+    break;
+  case MATCH_HOST:
+    metric_name += std::string(fqdn);
+    break;
+  case MATCH_BOTH:
+    metric_name += std::string(fqdn) + "." + ats_ip_nptop(&key._addr.sa, buf, 
sizeof(buf));
+    break;
+  default:
+    Warning("Invalid matching type to add to per_server.connections metrics");
+  }
+  return metric_prefix.empty() ? metric_name : metric_prefix + "." + 
metric_name;
+}
+
 inline bool
 ConnectionTracker::TxnState::is_active()
 {
@@ -332,6 +368,12 @@ inline int
 ConnectionTracker::TxnState::reserve()
 {
   _reserved_p = true;
+  // If metric enabled, use metric as count
+  if (_g->_count_metric != nullptr) {
+    ts::Metrics::Gauge::increment(_g->_count_metric);
+    ts::Metrics::Counter::increment(_g->_count_total_metric);
+    return _g->_count_metric->load();
+  }
   return ++_g->_count;
 }
 
@@ -340,7 +382,12 @@ ConnectionTracker::TxnState::release()
 {
   if (_reserved_p) {
     _reserved_p = false;
-    --_g->_count;
+    // If metric enabled, use metric as count
+    if (_g->_count_metric != nullptr) {
+      ts::Metrics::Gauge::decrement(_g->_count_metric);
+    } else {
+      --_g->_count;
+    }
   }
 }
 
@@ -372,6 +419,9 @@ ConnectionTracker::TxnState::update_max_count(int count)
 inline void
 ConnectionTracker::TxnState::blocked()
 {
+  if (_g->_blocked_metric != nullptr) {
+    ts::Metrics::Counter::increment(_g->_blocked_metric);
+  }
   ++_g->_blocked;
 }
 
diff --git a/src/iocore/net/ConnectionTracker.cc 
b/src/iocore/net/ConnectionTracker.cc
index 8401ecbf06..43323318a9 100644
--- a/src/iocore/net/ConnectionTracker.cc
+++ b/src/iocore/net/ConnectionTracker.cc
@@ -149,6 +149,30 @@ Config_Update_Conntrack_Client_Alert_Delay(const char 
*name, RecDataT dtype, Rec
   return Config_Update_Conntrack_Server_Alert_Delay_Helper(name, dtype, data, 
cookie, config->client_alert_delay);
 }
 
+bool
+Config_Update_Conntrack_Metric_Enabled(const char * /* name ATS_UNUSED */, 
RecDataT dtype, RecData data, void *cookie)
+{
+  auto config = static_cast<ConnectionTracker::GlobalConfig *>(cookie);
+
+  if (RECD_INT == dtype) {
+    config->metric_enabled = data.rec_int;
+    return true;
+  }
+  return false;
+}
+
+bool
+Config_Update_Conntrack_Metric_Prefix(const char * /* name ATS_UNUSED */, 
RecDataT dtype, RecData data, void *cookie)
+{
+  auto config = static_cast<ConnectionTracker::GlobalConfig *>(cookie);
+
+  if (RECD_STRING == dtype) {
+    config->metric_prefix = data.rec_string;
+    return true;
+  }
+  return false;
+}
+
 } // namespace
 
 void
@@ -162,6 +186,8 @@ ConnectionTracker::config_init(GlobalConfig *global, 
TxnConfig *txn, RecConfigUp
   Enable_Config_Var(CONFIG_SERVER_VAR_MAX, &Config_Update_Conntrack_Max, 
config_cb, txn);
   Enable_Config_Var(CONFIG_SERVER_VAR_MATCH, &Config_Update_Conntrack_Match, 
config_cb, txn);
   Enable_Config_Var(CONFIG_SERVER_VAR_ALERT_DELAY, 
&Config_Update_Conntrack_Server_Alert_Delay, config_cb, global);
+  Enable_Config_Var(CONFIG_SERVER_VAR_METRIC_ENABLED, 
&Config_Update_Conntrack_Metric_Enabled, config_cb, global);
+  Enable_Config_Var(CONFIG_SERVER_VAR_METRIC_PREFIX, 
&Config_Update_Conntrack_Metric_Prefix, config_cb, global);
 }
 
 ConnectionTracker::TxnState
@@ -214,6 +240,19 @@ ConnectionTracker::Group::Group(DirectionType direction, 
Key const &key, std::st
     _alert_delay{direction == DirectionType::INBOUND ? 
_global_config->client_alert_delay : _global_config->server_alert_delay}
 {
   Metrics::Gauge::increment(net_rsb.connection_tracker_table_size);
+  // only add metrics for server connections
+  if (_global_config->metric_enabled && direction == DirectionType::OUTBOUND) {
+    std::string _metric_name = metric_name(key, fqdn, 
_global_config->metric_prefix);
+    _count_metric            = 
Metrics::Gauge::createPtr("proxy.process.http.per_server.current_connection.", 
_metric_name);
+    _count_total_metric      = 
Metrics::Counter::createPtr("proxy.process.http.per_server.total_connection.", 
_metric_name);
+    _blocked_metric          = 
Metrics::Counter::createPtr("proxy.process.http.per_server.blocked_connection.",
 _metric_name);
+
+    if (dbg_ctl.on()) {
+      swoc::LocalBufferWriter<256> w;
+      w.print("Registered per_server_connection.{}\0", _metric_name);
+      DbgPrint(dbg_ctl, "%s", w.data());
+    }
+  }
   // store the host name if relevant.
   if (MATCH_HOST == _match_type || MATCH_BOTH == _match_type) {
     _fqdn.assign(fqdn);
@@ -284,7 +323,25 @@ ConnectionTracker::Group::should_alert(std::time_t *lat)
 void
 ConnectionTracker::Group::release()
 {
-  if (_count > 0) {
+  // If metric enabled, use metric as count
+  if (_count_metric != nullptr) {
+    if (_count_metric->load() > 0) {
+      ts::Metrics::Gauge::decrement(_count_metric);
+      if (_count_metric->load() == 0) {
+        TableSingleton             &table = _direction == 
DirectionType::INBOUND ? _inbound_table : _outbound_table;
+        std::lock_guard<std::mutex> lock(table._mutex); // Table lock
+        if (_count_metric->load() > 0) {
+          // Someone else grabbed the Group between our last check and taking 
the
+          // lock.
+          return;
+        }
+        table._table.erase(_key);
+      }
+    } else {
+      // A bit dubious, as there's no guarantee it's still negative, but even 
that would be interesting to know.
+      Error("Number of tracked connections should be greater than or equal to 
zero: %" PRId64, _count_metric->load());
+    }
+  } else if (_count > 0) {
     if (--_count == 0) {
       TableSingleton             &table = _direction == DirectionType::INBOUND 
? _inbound_table : _outbound_table;
       std::lock_guard<std::mutex> lock(table._mutex); // Table lock
@@ -331,8 +388,8 @@ ConnectionTracker::outbound_to_json_string()
   static const std::string_view trailer{" \n]}"};
 
   static const auto printer = [](swoc::BufferWriter &w, Group const *g) -> 
swoc::BufferWriter & {
-    w.print(item_fmt, g->_match_type, g->_addr, g->_fqdn, g->_count.load(), 
g->_count_max.load(), g->_blocked.load(),
-            g->get_last_alert_epoch_time());
+    w.print(item_fmt, g->_match_type, g->_addr, g->_fqdn, g->_count_metric != 
nullptr ? g->_count_metric->load() : g->_count.load(),
+            g->_count_max.load(), g->_blocked.load(), 
g->get_last_alert_epoch_time());
     return w;
   };
 
@@ -373,7 +430,8 @@ ConnectionTracker::dump(FILE *f)
 
     for (std::shared_ptr<Group const> g : groups) {
       swoc::LocalBufferWriter<128> w;
-      w.print("{:7} | {:5} | {:24} | {:33} | {:8} |\n", g->_count.load(), 
g->_blocked.load(), g->_addr, g->_hash, g->_match_type);
+      w.print("{:7} | {:5} | {:24} | {:33} | {:8} |\n", g->_count_metric != 
nullptr ? g->_count_metric->load() : g->_count.load(),
+              g->_blocked.load(), g->_addr, g->_hash, g->_match_type);
       fwrite(w.data(), w.size(), 1, f);
     }
 
diff --git a/src/proxy/http/HttpSM.cc b/src/proxy/http/HttpSM.cc
index bba5a4da65..4324fc334f 100644
--- a/src/proxy/http/HttpSM.cc
+++ b/src/proxy/http/HttpSM.cc
@@ -5542,7 +5542,8 @@ HttpSM::do_http_server_open(bool raw, bool only_direct)
   }
 
   // See if the outbound connection tracker data is needed. If so, get it here 
for consistency.
-  if (t_state.txn_conf->connection_tracker_config.server_max > 0 || 
t_state.txn_conf->connection_tracker_config.server_min > 0) {
+  if (t_state.txn_conf->connection_tracker_config.server_max > 0 || 
t_state.txn_conf->connection_tracker_config.server_min > 0 ||
+      
t_state.http_config_param->global_connection_tracker_config.metric_enabled) {
     t_state.outbound_conn_track_state =
       
ConnectionTracker::obtain_outbound(t_state.txn_conf->connection_tracker_config,
                                          
std::string_view{t_state.current.server->name}, 
t_state.current.server->dst_addr);
@@ -5569,6 +5570,9 @@ HttpSM::do_http_server_open(bool raw, bool only_direct)
     }
 
     ct_state.update_max_count(ccount);
+  } else if 
(t_state.http_config_param->global_connection_tracker_config.metric_enabled) {
+    auto &ct_state = t_state.outbound_conn_track_state;
+    ct_state.reserve();
   }
 
   // We did not manage to get an existing session and need to open a new 
connection
diff --git a/src/records/RecordsConfig.cc b/src/records/RecordsConfig.cc
index c612e2d438..0022e1c8df 100644
--- a/src/records/RecordsConfig.cc
+++ b/src/records/RecordsConfig.cc
@@ -367,12 +367,16 @@ static const RecordElement RecordsConfig[] =
   ,
   {RECT_CONFIG, "proxy.config.http.per_server.connection.max", RECD_INT, "0", 
RECU_DYNAMIC, RR_NULL, RECC_STR, "^[0-9]+$", RECA_NULL}
   ,
-  {RECT_CONFIG, "proxy.config.http.per_server.connection.match", RECD_STRING, 
"both", RECU_DYNAMIC, RR_NULL, RECC_STR, "^(?:ip|host|both|none)$", RECA_NULL}
+  {RECT_CONFIG, "proxy.config.http.per_server.connection.match", RECD_STRING, 
"both", RECU_DYNAMIC, RR_NULL, RECC_STR, "^(?:ip|port|host|both|none)$", 
RECA_NULL}
         ,
   {RECT_CONFIG, "proxy.config.http.per_server.connection.alert_delay", 
RECD_INT, "60", RECU_DYNAMIC, RR_NULL, RECC_STR, "^[0-9]+$", RECA_NULL}
         ,
   {RECT_CONFIG, "proxy.config.http.per_server.connection.min", RECD_INT, "0", 
RECU_DYNAMIC, RR_NULL, RECC_STR, "^[0-9]+$", RECA_NULL}
   ,
+  {RECT_CONFIG, "proxy.config.http.per_server.connection.metric_enabled", 
RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_STR, "[0-1]", RECA_NULL}
+  ,
+  {RECT_CONFIG, "proxy.config.http.per_server.connection.metric_prefix", 
RECD_STRING, "", RECU_DYNAMIC, RR_NULL, RECC_NULL, nullptr, RECA_NULL}
+  ,
   {RECT_CONFIG, "proxy.config.http.attach_server_session_to_client", RECD_INT, 
"0", RECU_DYNAMIC, RR_NULL, RECC_INT, "[0-1]", RECA_NULL}
   ,
   {RECT_CONFIG, "proxy.config.http.max_proxy_cycles", RECD_INT, "0", 
RECU_DYNAMIC, RR_NULL, RECC_STR, "^[0-9]+$", RECA_NULL}
diff --git a/tests/gold_tests/origin_connection/gold/two_200_ok.gold 
b/tests/gold_tests/origin_connection/gold/two_200_ok.gold
new file mode 100644
index 0000000000..e7833998be
--- /dev/null
+++ b/tests/gold_tests/origin_connection/gold/two_200_ok.gold
@@ -0,0 +1,9 @@
+``
+> CONNECT``
+``
+< HTTP/1.1 200 OK
+``
+> CONNECT``
+``
+< HTTP/1.1 200 OK
+``
diff --git 
a/tests/gold_tests/origin_connection/per_server_connection_max.test.py 
b/tests/gold_tests/origin_connection/per_server_connection_max.test.py
index 6c7253fdb3..998ddd9775 100644
--- a/tests/gold_tests/origin_connection/per_server_connection_max.test.py
+++ b/tests/gold_tests/origin_connection/per_server_connection_max.test.py
@@ -49,13 +49,29 @@ class PerServerConnectionMaxTest:
                 'proxy.config.dns.nameservers': 
f"127.0.0.1:{self._dns.Variables.Port}",
                 'proxy.config.dns.resolv_conf': 'NULL',
                 'proxy.config.diags.debug.enabled': 1,
-                'proxy.config.diags.debug.tags': 'http',
+                'proxy.config.diags.debug.tags': 'http|conn_track',
                 'proxy.config.http.per_server.connection.max': 
self._origin_max_connections,
+                'proxy.config.http.per_server.connection.metric_enabled': 1,
+                'proxy.config.http.per_server.connection.metric_prefix': 'foo',
+                'proxy.config.http.per_server.connection.match': 'port',
             })
         self._ts.Disk.diags_log.Content += Testers.ContainsExpression(
             f'WARNING:.*too many 
connections:.*limit={self._origin_max_connections}',
             'Verify the user is warned about the connection limit being hit.')
 
+    def _test_metrics(self) -> None:
+        """Use traffic_ctl to test metrics."""
+        tr = Test.AddTestRun("Check connection metrics")
+        tr.Processes.Default.Command = 'traffic_ctl metric match per_server'
+        tr.Processes.Default.ReturnCode = 0
+        tr.Processes.Default.Env = self._ts.Env
+        tr.Processes.Default.Streams.All = Testers.ContainsExpression(
+            
f'per_server.total_connection.foo.127.0.0.1:{self._server.Variables.http_port} 
4',
+            'incorrect statistic return, or possible error.')
+        tr.Processes.Default.Streams.All = Testers.ContainsExpression(
+            
f'per_server.blocked_connection.foo.127.0.0.1:{self._server.Variables.http_port}
 1',
+            'incorrect statistic return, or possible error.')
+
     def run(self) -> None:
         """Configure the TestRun."""
         tr = Test.AddTestRun('Verify we enforce 
proxy.config.http.per_server.connection.max')
@@ -65,18 +81,19 @@ class PerServerConnectionMaxTest:
 
         tr.AddVerifierClientProcess('client', self._replay_file, 
http_ports=[self._ts.Variables.port])
 
+        self._test_metrics()
+
 
 class ConnectMethodTest:
     """Test our max origin connection behavior with CONNECT traffic."""
 
     _client_counter: int = 0
-    _origin_max_connections: int = 3
 
-    def __init__(self) -> None:
+    def __init__(self, max_conn) -> None:
         """Configure the server processes in preparation for the TestRun."""
         self._configure_dns()
         self._configure_origin_server()
-        self._configure_trafficserver()
+        self._configure_trafficserver(max_conn)
 
     def _configure_dns(self) -> None:
         """Configure a nameserver for the test."""
@@ -86,18 +103,19 @@ class ConnectMethodTest:
         """Configure the httpbin origin server."""
         self._server = Test.MakeHttpBinServer("server2")
 
-    def _configure_trafficserver(self) -> None:
-        self._ts = Test.MakeATSProcess("ts2")
+    def _configure_trafficserver(self, max_conn) -> None:
+        self._ts = Test.MakeATSProcess("ts2_" + str(max_conn))
 
         self._ts.Disk.records_config.update(
             {
                 'proxy.config.dns.nameservers': 
f"127.0.0.1:{self._dns.Variables.Port}",
                 'proxy.config.dns.resolv_conf': 'NULL',
                 'proxy.config.diags.debug.enabled': 1,
-                'proxy.config.diags.debug.tags': 'http|dns|hostdb',
+                'proxy.config.diags.debug.tags': 'http|dns|hostdb|conn_track',
                 'proxy.config.http.server_ports': f"{self._ts.Variables.port}",
                 'proxy.config.http.connect_ports': 
f"{self._server.Variables.Port}",
-                'proxy.config.http.per_server.connection.max': 
self._origin_max_connections,
+                'proxy.config.http.per_server.connection.metric_enabled': 1,
+                'proxy.config.http.per_server.connection.max': max_conn,
             })
 
         self._ts.Disk.remap_config.AddLines([
@@ -111,7 +129,20 @@ class ConnectMethodTest:
         tr.MakeCurlCommand(f"-v --fail -s -p -x 
127.0.0.1:{self._ts.Variables.port} 'http://foo.com/delay/2'", p=p)
         return p
 
-    def run(self) -> None:
+    def _test_metrics(self, blocked) -> None:
+        """Use traffic_ctl to test metrics."""
+        tr = Test.AddTestRun("Check connection metrics")
+        tr.Processes.Default.Command = 'traffic_ctl metric match per_server'
+        tr.Processes.Default.ReturnCode = 0
+        tr.Processes.Default.Env = self._ts.Env
+        tr.Processes.Default.Streams.All = Testers.ContainsExpression(
+            
f'per_server.total_connection.www.this.origin.com.127.0.0.1:{self._server.Variables.Port}
 5',
+            'incorrect statistic return, or possible error.')
+        tr.Processes.Default.Streams.All = Testers.ContainsExpression(
+            
f'per_server.blocked_connection.www.this.origin.com.127.0.0.1:{self._server.Variables.Port}
 {blocked}',
+            'incorrect statistic return, or possible error.')
+
+    def run(self, blocked, gold_file) -> None:
         """Verify per_server.connection.max with CONNECT traffic."""
         tr = Test.AddTestRun()
         tr.Processes.Default.StartBefore(self._dns)
@@ -134,10 +165,13 @@ class ConnectMethodTest:
             f"--next -v --fail -s -p -x 127.0.0.1:{self._ts.Variables.port} 
'http://foo.com/get'")
         # Curl will have a 22 exit code if it receives a 5XX response (and we
         # expect a 503).
-        tr.Processes.Default.ReturnCode = 22
-        tr.Processes.Default.Streams.stderr = "gold/two_503_congested.gold"
+        tr.Processes.Default.ReturnCode = 22 if blocked else 0
+        tr.Processes.Default.Streams.stderr = gold_file
         tr.Processes.Default.TimeOut = 3
 
+        self._test_metrics(blocked)
+
 
 PerServerConnectionMaxTest().run()
-ConnectMethodTest().run()
+ConnectMethodTest(3).run(blocked=2, gold_file="gold/two_503_congested.gold")
+ConnectMethodTest(0).run(blocked=0, gold_file="gold/two_200_ok.gold")

Reply via email to