This is an automated email from the ASF dual-hosted git repository. willholley pushed a commit to branch prometheus_erlang_dist in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit eba421924bf513e77c88981e1d790426ff3283ca Author: Will Holley <[email protected]> AuthorDate: Wed Apr 12 18:10:26 2023 +0000 feat (prometheus): add Erlang distribution stats # Why The _prometheus endpoint was missing the erlang distribution stats returned by the _system endpoint. This is useful when diagnosing networking issues between couchdb nodes. # How Adds a new function `couch_prometheus_server:get_distribution_stats/0`. This gathers the distribution stats in a similar fashion to `chttpd_node:get_distribution_stats/0` but formats them in a more prometheus-friendly way. Naming convention follows prometheus standards, so the type of the value is appended to the metric name and, where counter types are used, a "_total" suffix is added. For example: ``` couchdb_erlang_distribution_recv_oct_bytes_total{node="[email protected]"} 30609 couchdb_erlang_distribution_recv_oct_bytes_total{node="[email protected]"} 28392 ``` --- .../src/couch_prometheus_server.erl | 110 ++++++++++++++++++++- 1 file changed, 109 insertions(+), 1 deletion(-) diff --git a/src/couch_prometheus/src/couch_prometheus_server.erl b/src/couch_prometheus/src/couch_prometheus_server.erl index 884d792f0..ac90ae5fe 100644 --- a/src/couch_prometheus/src/couch_prometheus_server.erl +++ b/src/couch_prometheus/src/couch_prometheus_server.erl @@ -116,7 +116,8 @@ get_system_stats() -> get_vm_stats(), get_ets_stats(), get_internal_replication_jobs_stat(), - get_membership_stat() + get_membership_stat(), + get_distribution_stats() ]). get_uptime_stat() -> @@ -264,6 +265,113 @@ get_run_queue_stats() -> ) ]. +get_distribution_stats() -> + % each distribution metric has a different type, + % so expose each as a different metric with the erlang + % node as a label. + % This is the inverse of the structure returned by + % inet:getstat/1. + + % this is a bit hairy. This fold accumulates + % a dictionary keyed on the socker stat_option (https://www.erlang.org/doc/man/inet.html#getstat-2) + % where the value is a list of labels/value pairs for that stat + % e.g. + % recv_oct: [{[{node="[email protected]"}], 30609}, {[{node="[email protected]"}], 28392}] + % recv_cnt: [{[{node="[email protected]"}], 123}, {[{node="[email protected]"}], 134}] + DistStats = lists:foldl( + fun({Node, Socket}, Acc) -> + try inet:getstat(Socket) of + {ok, Stats} -> + % For each Key/Value pair in Stats, append + % an entry for the current Node to the result. + % This relies on lists:foldl returning the final + % accumulated value + lists:foldl( + fun({StatOption, Value}, Acc0) -> + dict:append(StatOption, {[{node, Node}], Value}, Acc0) + end, + Acc, + Stats + ) + catch + _:_ -> + % no result so just continue + Acc + end + end, + dict:new(), + erlang:system_info(dist_ctrl) + ), + [ + to_prom( + erlang_distribution_recv_oct_bytes_total, + counter, + "Number of bytes received by the socket.", + safe_dict_fetch(recv_oct, DistStats) + ), + to_prom( + erlang_distribution_recv_cnt_packets_total, + counter, + "number of packets received by the socket.", + safe_dict_fetch(recv_cnt, DistStats) + ), + to_prom( + erlang_distribution_recv_max_bytes, + gauge, + "size of the largest packet, in bytes, received by the socket.", + safe_dict_fetch(recv_max, DistStats) + ), + to_prom( + erlang_distribution_recv_avg_bytes, + gauge, + "average size of packets, in bytes, received by the socket.", + safe_dict_fetch(recv_avg, DistStats) + ), + to_prom( + erlang_distribution_recv_dvi_bytes, + gauge, + "average packet size deviation, in bytes, received by the socket.", + safe_dict_fetch(recv_dvi, DistStats) + ), + to_prom( + erlang_distribution_send_oct_bytes_total, + counter, + "Number of bytes sent by the socket.", + safe_dict_fetch(send_oct, DistStats) + ), + to_prom( + erlang_distribution_send_cnt_packets_total, + counter, + "number of packets sent by the socket.", + safe_dict_fetch(send_cnt, DistStats) + ), + to_prom( + erlang_distribution_send_max_bytes, + gauge, + "size of the largest packet, in bytes, sent by the socket.", + safe_dict_fetch(send_max, DistStats) + ), + to_prom( + erlang_distribution_send_avg_bytes, + gauge, + "average size of packets, in bytes, sent by the socket.", + safe_dict_fetch(send_avg, DistStats) + ), + to_prom( + erlang_distribution_send_dvi_bytes, + gauge, + "average packet size deviation, in bytes, sent by the socket.", + safe_dict_fetch(rsend_dvi, DistStats) + ) + ]. + +safe_dict_fetch(Key, Dict) -> + try dict:fetch(Key, Dict) of + Val -> Val + catch + _ -> [] + end. + get_ets_stats() -> NumTabs = length(ets:all()), to_prom(erlang_ets_table, gauge, "number of ETS tables", NumTabs).
