This is an automated email from the ASF dual-hosted git repository. cmcfarlen pushed a commit to branch 10.1.x in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/10.1.x by this push: new c89cd60d29 Add per_server_conn metrics (#12269) c89cd60d29 is described below commit c89cd60d29253ea2dbd1e815ccc14fb00d2644f0 Author: Serris Lew <serrisn...@gmail.com> AuthorDate: Tue Jun 3 13:27:11 2025 -0700 Add per_server_conn metrics (#12269) Co-authored-by: Serris Lew <lser...@apple.com> --- include/iocore/net/ConnectionTracker.h | 54 +++++++++++++++++- src/iocore/net/ConnectionTracker.cc | 66 ++++++++++++++++++++-- src/proxy/http/HttpSM.cc | 6 +- src/records/RecordsConfig.cc | 6 +- .../origin_connection/gold/two_200_ok.gold | 9 +++ .../per_server_connection_max.test.py | 58 +++++++++++++++---- 6 files changed, 179 insertions(+), 20 deletions(-) diff --git a/include/iocore/net/ConnectionTracker.h b/include/iocore/net/ConnectionTracker.h index d272b78f8d..8ab6860410 100644 --- a/include/iocore/net/ConnectionTracker.h +++ b/include/iocore/net/ConnectionTracker.h @@ -43,6 +43,7 @@ #include "swoc/TextView.h" #include <tscore/MgmtDefs.h> #include "iocore/net/SessionSharingAPIEnums.h" +#include "tsutil/Metrics.h" /** * Singleton class to keep track of the number of inbound and outbound connections. @@ -83,6 +84,8 @@ public: struct GlobalConfig { std::chrono::seconds client_alert_delay{60}; ///< Alert delay in seconds. std::chrono::seconds server_alert_delay{60}; ///< Alert delay in seconds. + bool metric_enabled{false}; ///< Enabling per server metrics. + std::string metric_prefix; ///< Per server metric prefix. }; // The names of the configuration values. @@ -93,6 +96,8 @@ public: static constexpr std::string_view CONFIG_SERVER_VAR_MIN{"proxy.config.http.per_server.connection.min"}; static constexpr std::string_view CONFIG_SERVER_VAR_MATCH{"proxy.config.http.per_server.connection.match"}; static constexpr std::string_view CONFIG_SERVER_VAR_ALERT_DELAY{"proxy.config.http.per_server.connection.alert_delay"}; + static constexpr std::string_view CONFIG_SERVER_VAR_METRIC_ENABLED{"proxy.config.http.per_server.connection.metric_enabled"}; + static constexpr std::string_view CONFIG_SERVER_VAR_METRIC_PREFIX{"proxy.config.http.per_server.connection.metric_prefix"}; /// A record for the outbound connection count. /// These are stored per outbound session equivalence class, as determined by the session matching. @@ -131,6 +136,11 @@ public: std::atomic<int> _in_queue{0}; ///< # of connections queued, waiting for a connection. std::atomic<Ticker> _last_alert{0}; ///< Absolute time of the last alert. + // Recording data as metrics + ts::Metrics::Gauge::AtomicType *_count_metric = nullptr; + ts::Metrics::Counter::AtomicType *_count_total_metric = nullptr; + ts::Metrics::Counter::AtomicType *_blocked_metric = nullptr; + /** Constructor. * Construct from @c Key because the use cases do a table lookup first so the @c Key is already constructed. * @param key A populated @c Key structure - values are copied to the @c Group. @@ -149,7 +159,8 @@ public: /// @return @c true if an alert should be generated, @c false otherwise. bool should_alert(std::time_t *lat = nullptr); /// Time of the last alert in epoch seconds. - std::time_t get_last_alert_epoch_time() const; + std::time_t get_last_alert_epoch_time() const; + static std::string metric_name(const Key &key, std::string_view fqdn, std::string metric_prefix); /// Release the reference count to this group and remove it from the /// group table if it is no longer referenced. @@ -322,6 +333,31 @@ ConnectionTracker::Group::hash(const Key &key) } } +inline std::string +ConnectionTracker::Group::metric_name(const Key &key, std::string_view fqdn, std::string metric_prefix) +{ + std::string metric_name = ""; + char buf[INET6_ADDRSTRLEN]; + + switch (key._match_type) { + case MATCH_IP: + metric_name += ats_ip_ntop(&key._addr.sa, buf, sizeof(buf)); + break; + case MATCH_PORT: + metric_name += ats_ip_nptop(&key._addr.sa, buf, sizeof(buf)); + break; + case MATCH_HOST: + metric_name += std::string(fqdn); + break; + case MATCH_BOTH: + metric_name += std::string(fqdn) + "." + ats_ip_nptop(&key._addr.sa, buf, sizeof(buf)); + break; + default: + Warning("Invalid matching type to add to per_server.connections metrics"); + } + return metric_prefix.empty() ? metric_name : metric_prefix + "." + metric_name; +} + inline bool ConnectionTracker::TxnState::is_active() { @@ -332,6 +368,12 @@ inline int ConnectionTracker::TxnState::reserve() { _reserved_p = true; + // If metric enabled, use metric as count + if (_g->_count_metric != nullptr) { + ts::Metrics::Gauge::increment(_g->_count_metric); + ts::Metrics::Counter::increment(_g->_count_total_metric); + return _g->_count_metric->load(); + } return ++_g->_count; } @@ -340,7 +382,12 @@ ConnectionTracker::TxnState::release() { if (_reserved_p) { _reserved_p = false; - --_g->_count; + // If metric enabled, use metric as count + if (_g->_count_metric != nullptr) { + ts::Metrics::Gauge::decrement(_g->_count_metric); + } else { + --_g->_count; + } } } @@ -372,6 +419,9 @@ ConnectionTracker::TxnState::update_max_count(int count) inline void ConnectionTracker::TxnState::blocked() { + if (_g->_blocked_metric != nullptr) { + ts::Metrics::Counter::increment(_g->_blocked_metric); + } ++_g->_blocked; } diff --git a/src/iocore/net/ConnectionTracker.cc b/src/iocore/net/ConnectionTracker.cc index 8401ecbf06..43323318a9 100644 --- a/src/iocore/net/ConnectionTracker.cc +++ b/src/iocore/net/ConnectionTracker.cc @@ -149,6 +149,30 @@ Config_Update_Conntrack_Client_Alert_Delay(const char *name, RecDataT dtype, Rec return Config_Update_Conntrack_Server_Alert_Delay_Helper(name, dtype, data, cookie, config->client_alert_delay); } +bool +Config_Update_Conntrack_Metric_Enabled(const char * /* name ATS_UNUSED */, RecDataT dtype, RecData data, void *cookie) +{ + auto config = static_cast<ConnectionTracker::GlobalConfig *>(cookie); + + if (RECD_INT == dtype) { + config->metric_enabled = data.rec_int; + return true; + } + return false; +} + +bool +Config_Update_Conntrack_Metric_Prefix(const char * /* name ATS_UNUSED */, RecDataT dtype, RecData data, void *cookie) +{ + auto config = static_cast<ConnectionTracker::GlobalConfig *>(cookie); + + if (RECD_STRING == dtype) { + config->metric_prefix = data.rec_string; + return true; + } + return false; +} + } // namespace void @@ -162,6 +186,8 @@ ConnectionTracker::config_init(GlobalConfig *global, TxnConfig *txn, RecConfigUp Enable_Config_Var(CONFIG_SERVER_VAR_MAX, &Config_Update_Conntrack_Max, config_cb, txn); Enable_Config_Var(CONFIG_SERVER_VAR_MATCH, &Config_Update_Conntrack_Match, config_cb, txn); Enable_Config_Var(CONFIG_SERVER_VAR_ALERT_DELAY, &Config_Update_Conntrack_Server_Alert_Delay, config_cb, global); + Enable_Config_Var(CONFIG_SERVER_VAR_METRIC_ENABLED, &Config_Update_Conntrack_Metric_Enabled, config_cb, global); + Enable_Config_Var(CONFIG_SERVER_VAR_METRIC_PREFIX, &Config_Update_Conntrack_Metric_Prefix, config_cb, global); } ConnectionTracker::TxnState @@ -214,6 +240,19 @@ ConnectionTracker::Group::Group(DirectionType direction, Key const &key, std::st _alert_delay{direction == DirectionType::INBOUND ? _global_config->client_alert_delay : _global_config->server_alert_delay} { Metrics::Gauge::increment(net_rsb.connection_tracker_table_size); + // only add metrics for server connections + if (_global_config->metric_enabled && direction == DirectionType::OUTBOUND) { + std::string _metric_name = metric_name(key, fqdn, _global_config->metric_prefix); + _count_metric = Metrics::Gauge::createPtr("proxy.process.http.per_server.current_connection.", _metric_name); + _count_total_metric = Metrics::Counter::createPtr("proxy.process.http.per_server.total_connection.", _metric_name); + _blocked_metric = Metrics::Counter::createPtr("proxy.process.http.per_server.blocked_connection.", _metric_name); + + if (dbg_ctl.on()) { + swoc::LocalBufferWriter<256> w; + w.print("Registered per_server_connection.{}\0", _metric_name); + DbgPrint(dbg_ctl, "%s", w.data()); + } + } // store the host name if relevant. if (MATCH_HOST == _match_type || MATCH_BOTH == _match_type) { _fqdn.assign(fqdn); @@ -284,7 +323,25 @@ ConnectionTracker::Group::should_alert(std::time_t *lat) void ConnectionTracker::Group::release() { - if (_count > 0) { + // If metric enabled, use metric as count + if (_count_metric != nullptr) { + if (_count_metric->load() > 0) { + ts::Metrics::Gauge::decrement(_count_metric); + if (_count_metric->load() == 0) { + TableSingleton &table = _direction == DirectionType::INBOUND ? _inbound_table : _outbound_table; + std::lock_guard<std::mutex> lock(table._mutex); // Table lock + if (_count_metric->load() > 0) { + // Someone else grabbed the Group between our last check and taking the + // lock. + return; + } + table._table.erase(_key); + } + } else { + // A bit dubious, as there's no guarantee it's still negative, but even that would be interesting to know. + Error("Number of tracked connections should be greater than or equal to zero: %" PRId64, _count_metric->load()); + } + } else if (_count > 0) { if (--_count == 0) { TableSingleton &table = _direction == DirectionType::INBOUND ? _inbound_table : _outbound_table; std::lock_guard<std::mutex> lock(table._mutex); // Table lock @@ -331,8 +388,8 @@ ConnectionTracker::outbound_to_json_string() static const std::string_view trailer{" \n]}"}; static const auto printer = [](swoc::BufferWriter &w, Group const *g) -> swoc::BufferWriter & { - w.print(item_fmt, g->_match_type, g->_addr, g->_fqdn, g->_count.load(), g->_count_max.load(), g->_blocked.load(), - g->get_last_alert_epoch_time()); + w.print(item_fmt, g->_match_type, g->_addr, g->_fqdn, g->_count_metric != nullptr ? g->_count_metric->load() : g->_count.load(), + g->_count_max.load(), g->_blocked.load(), g->get_last_alert_epoch_time()); return w; }; @@ -373,7 +430,8 @@ ConnectionTracker::dump(FILE *f) for (std::shared_ptr<Group const> g : groups) { swoc::LocalBufferWriter<128> w; - w.print("{:7} | {:5} | {:24} | {:33} | {:8} |\n", g->_count.load(), g->_blocked.load(), g->_addr, g->_hash, g->_match_type); + w.print("{:7} | {:5} | {:24} | {:33} | {:8} |\n", g->_count_metric != nullptr ? g->_count_metric->load() : g->_count.load(), + g->_blocked.load(), g->_addr, g->_hash, g->_match_type); fwrite(w.data(), w.size(), 1, f); } diff --git a/src/proxy/http/HttpSM.cc b/src/proxy/http/HttpSM.cc index bba5a4da65..4324fc334f 100644 --- a/src/proxy/http/HttpSM.cc +++ b/src/proxy/http/HttpSM.cc @@ -5542,7 +5542,8 @@ HttpSM::do_http_server_open(bool raw, bool only_direct) } // See if the outbound connection tracker data is needed. If so, get it here for consistency. - if (t_state.txn_conf->connection_tracker_config.server_max > 0 || t_state.txn_conf->connection_tracker_config.server_min > 0) { + if (t_state.txn_conf->connection_tracker_config.server_max > 0 || t_state.txn_conf->connection_tracker_config.server_min > 0 || + t_state.http_config_param->global_connection_tracker_config.metric_enabled) { t_state.outbound_conn_track_state = ConnectionTracker::obtain_outbound(t_state.txn_conf->connection_tracker_config, std::string_view{t_state.current.server->name}, t_state.current.server->dst_addr); @@ -5569,6 +5570,9 @@ HttpSM::do_http_server_open(bool raw, bool only_direct) } ct_state.update_max_count(ccount); + } else if (t_state.http_config_param->global_connection_tracker_config.metric_enabled) { + auto &ct_state = t_state.outbound_conn_track_state; + ct_state.reserve(); } // We did not manage to get an existing session and need to open a new connection diff --git a/src/records/RecordsConfig.cc b/src/records/RecordsConfig.cc index c612e2d438..0022e1c8df 100644 --- a/src/records/RecordsConfig.cc +++ b/src/records/RecordsConfig.cc @@ -367,12 +367,16 @@ static const RecordElement RecordsConfig[] = , {RECT_CONFIG, "proxy.config.http.per_server.connection.max", RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_STR, "^[0-9]+$", RECA_NULL} , - {RECT_CONFIG, "proxy.config.http.per_server.connection.match", RECD_STRING, "both", RECU_DYNAMIC, RR_NULL, RECC_STR, "^(?:ip|host|both|none)$", RECA_NULL} + {RECT_CONFIG, "proxy.config.http.per_server.connection.match", RECD_STRING, "both", RECU_DYNAMIC, RR_NULL, RECC_STR, "^(?:ip|port|host|both|none)$", RECA_NULL} , {RECT_CONFIG, "proxy.config.http.per_server.connection.alert_delay", RECD_INT, "60", RECU_DYNAMIC, RR_NULL, RECC_STR, "^[0-9]+$", RECA_NULL} , {RECT_CONFIG, "proxy.config.http.per_server.connection.min", RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_STR, "^[0-9]+$", RECA_NULL} , + {RECT_CONFIG, "proxy.config.http.per_server.connection.metric_enabled", RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_STR, "[0-1]", RECA_NULL} + , + {RECT_CONFIG, "proxy.config.http.per_server.connection.metric_prefix", RECD_STRING, "", RECU_DYNAMIC, RR_NULL, RECC_NULL, nullptr, RECA_NULL} + , {RECT_CONFIG, "proxy.config.http.attach_server_session_to_client", RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_INT, "[0-1]", RECA_NULL} , {RECT_CONFIG, "proxy.config.http.max_proxy_cycles", RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_STR, "^[0-9]+$", RECA_NULL} diff --git a/tests/gold_tests/origin_connection/gold/two_200_ok.gold b/tests/gold_tests/origin_connection/gold/two_200_ok.gold new file mode 100644 index 0000000000..e7833998be --- /dev/null +++ b/tests/gold_tests/origin_connection/gold/two_200_ok.gold @@ -0,0 +1,9 @@ +`` +> CONNECT`` +`` +< HTTP/1.1 200 OK +`` +> CONNECT`` +`` +< HTTP/1.1 200 OK +`` diff --git a/tests/gold_tests/origin_connection/per_server_connection_max.test.py b/tests/gold_tests/origin_connection/per_server_connection_max.test.py index 6c7253fdb3..998ddd9775 100644 --- a/tests/gold_tests/origin_connection/per_server_connection_max.test.py +++ b/tests/gold_tests/origin_connection/per_server_connection_max.test.py @@ -49,13 +49,29 @@ class PerServerConnectionMaxTest: 'proxy.config.dns.nameservers': f"127.0.0.1:{self._dns.Variables.Port}", 'proxy.config.dns.resolv_conf': 'NULL', 'proxy.config.diags.debug.enabled': 1, - 'proxy.config.diags.debug.tags': 'http', + 'proxy.config.diags.debug.tags': 'http|conn_track', 'proxy.config.http.per_server.connection.max': self._origin_max_connections, + 'proxy.config.http.per_server.connection.metric_enabled': 1, + 'proxy.config.http.per_server.connection.metric_prefix': 'foo', + 'proxy.config.http.per_server.connection.match': 'port', }) self._ts.Disk.diags_log.Content += Testers.ContainsExpression( f'WARNING:.*too many connections:.*limit={self._origin_max_connections}', 'Verify the user is warned about the connection limit being hit.') + def _test_metrics(self) -> None: + """Use traffic_ctl to test metrics.""" + tr = Test.AddTestRun("Check connection metrics") + tr.Processes.Default.Command = 'traffic_ctl metric match per_server' + tr.Processes.Default.ReturnCode = 0 + tr.Processes.Default.Env = self._ts.Env + tr.Processes.Default.Streams.All = Testers.ContainsExpression( + f'per_server.total_connection.foo.127.0.0.1:{self._server.Variables.http_port} 4', + 'incorrect statistic return, or possible error.') + tr.Processes.Default.Streams.All = Testers.ContainsExpression( + f'per_server.blocked_connection.foo.127.0.0.1:{self._server.Variables.http_port} 1', + 'incorrect statistic return, or possible error.') + def run(self) -> None: """Configure the TestRun.""" tr = Test.AddTestRun('Verify we enforce proxy.config.http.per_server.connection.max') @@ -65,18 +81,19 @@ class PerServerConnectionMaxTest: tr.AddVerifierClientProcess('client', self._replay_file, http_ports=[self._ts.Variables.port]) + self._test_metrics() + class ConnectMethodTest: """Test our max origin connection behavior with CONNECT traffic.""" _client_counter: int = 0 - _origin_max_connections: int = 3 - def __init__(self) -> None: + def __init__(self, max_conn) -> None: """Configure the server processes in preparation for the TestRun.""" self._configure_dns() self._configure_origin_server() - self._configure_trafficserver() + self._configure_trafficserver(max_conn) def _configure_dns(self) -> None: """Configure a nameserver for the test.""" @@ -86,18 +103,19 @@ class ConnectMethodTest: """Configure the httpbin origin server.""" self._server = Test.MakeHttpBinServer("server2") - def _configure_trafficserver(self) -> None: - self._ts = Test.MakeATSProcess("ts2") + def _configure_trafficserver(self, max_conn) -> None: + self._ts = Test.MakeATSProcess("ts2_" + str(max_conn)) self._ts.Disk.records_config.update( { 'proxy.config.dns.nameservers': f"127.0.0.1:{self._dns.Variables.Port}", 'proxy.config.dns.resolv_conf': 'NULL', 'proxy.config.diags.debug.enabled': 1, - 'proxy.config.diags.debug.tags': 'http|dns|hostdb', + 'proxy.config.diags.debug.tags': 'http|dns|hostdb|conn_track', 'proxy.config.http.server_ports': f"{self._ts.Variables.port}", 'proxy.config.http.connect_ports': f"{self._server.Variables.Port}", - 'proxy.config.http.per_server.connection.max': self._origin_max_connections, + 'proxy.config.http.per_server.connection.metric_enabled': 1, + 'proxy.config.http.per_server.connection.max': max_conn, }) self._ts.Disk.remap_config.AddLines([ @@ -111,7 +129,20 @@ class ConnectMethodTest: tr.MakeCurlCommand(f"-v --fail -s -p -x 127.0.0.1:{self._ts.Variables.port} 'http://foo.com/delay/2'", p=p) return p - def run(self) -> None: + def _test_metrics(self, blocked) -> None: + """Use traffic_ctl to test metrics.""" + tr = Test.AddTestRun("Check connection metrics") + tr.Processes.Default.Command = 'traffic_ctl metric match per_server' + tr.Processes.Default.ReturnCode = 0 + tr.Processes.Default.Env = self._ts.Env + tr.Processes.Default.Streams.All = Testers.ContainsExpression( + f'per_server.total_connection.www.this.origin.com.127.0.0.1:{self._server.Variables.Port} 5', + 'incorrect statistic return, or possible error.') + tr.Processes.Default.Streams.All = Testers.ContainsExpression( + f'per_server.blocked_connection.www.this.origin.com.127.0.0.1:{self._server.Variables.Port} {blocked}', + 'incorrect statistic return, or possible error.') + + def run(self, blocked, gold_file) -> None: """Verify per_server.connection.max with CONNECT traffic.""" tr = Test.AddTestRun() tr.Processes.Default.StartBefore(self._dns) @@ -134,10 +165,13 @@ class ConnectMethodTest: f"--next -v --fail -s -p -x 127.0.0.1:{self._ts.Variables.port} 'http://foo.com/get'") # Curl will have a 22 exit code if it receives a 5XX response (and we # expect a 503). - tr.Processes.Default.ReturnCode = 22 - tr.Processes.Default.Streams.stderr = "gold/two_503_congested.gold" + tr.Processes.Default.ReturnCode = 22 if blocked else 0 + tr.Processes.Default.Streams.stderr = gold_file tr.Processes.Default.TimeOut = 3 + self._test_metrics(blocked) + PerServerConnectionMaxTest().run() -ConnectMethodTest().run() +ConnectMethodTest(3).run(blocked=2, gold_file="gold/two_503_congested.gold") +ConnectMethodTest(0).run(blocked=0, gold_file="gold/two_200_ok.gold")