This is an automated email from the ASF dual-hosted git repository. willholley pushed a commit to branch prometheus_erlang_dist in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit f027a8e2906f9994cc2250de49f7eae386db29de Author: Will Holley <[email protected]> AuthorDate: Wed Apr 12 07:30:06 2023 +0000 feat (prometheus): include aggregated couch/index message queues In #3860 and #3366 we added sharding to `couch_index_server` and `couch_server`. The `_system` endpoint surfaces a "fake" message queue for each of these contining the aggregated queue size across all shards. This commit adds the same for the `_prometheus` endpoint. Originally I had thought to just filter out the per-shard queue lengths as we've not found these to be useful in Cloudant, but I'll leave them in for now for consistency with the `_system` endpoint. Arguably, we should filter in both places if there's agreement that the per-shard queue lengths are just noise. --- .devcontainer/devcontainer.json | 2 +- src/chttpd/src/chttpd_node.erl | 30 ++++++++++++++-------- .../src/couch_prometheus_server.erl | 26 +++---------------- 3 files changed, 24 insertions(+), 34 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 5e577d96d..3920cd9dd 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -9,7 +9,7 @@ // apache/couchdbci-debian:bullseye-erlang-24.3.4.2 // apache/couchdbci-debian:bullseye-erlang-23.3.4.15 // - "COUCHDB_IMAGE": "apache/couchdbci-debian:bullseye-erlang-25.0.2" + "COUCHDB_IMAGE": "apache/couchdbci-debian:bullseye-erlang-24.3.4.10" } }, diff --git a/src/chttpd/src/chttpd_node.erl b/src/chttpd/src/chttpd_node.erl index a63236db7..bb3cf4798 100644 --- a/src/chttpd/src/chttpd_node.erl +++ b/src/chttpd/src/chttpd_node.erl @@ -16,7 +16,9 @@ -export([ handle_node_req/1, get_stats/0, - run_queues/0 + run_queues/0, + message_queues/0, + db_pid_stats/0 ]). -include_lib("couch/include/couch_db.hrl"). @@ -284,14 +286,13 @@ get_stats() -> ], {NumberOfGCs, WordsReclaimed, _} = statistics(garbage_collection), {{input, Input}, {output, Output}} = statistics(io), + {CF, CDU} = db_pid_stats(), - MessageQueues0 = [ + MessageQueuesHist = [ {couch_file, {CF}}, - {couch_db_updater, {CDU}}, - {couch_server, couch_server:aggregate_queue_len()}, - {index_server, couch_index_server:aggregate_queue_len()} + {couch_db_updater, {CDU}} ], - MessageQueues = MessageQueues0 ++ message_queues(registered()), + MessageQueues = message_queues(), {SQ, DCQ} = run_queues(), [ {uptime, couch_app:uptime() div 1000}, @@ -309,7 +310,7 @@ get_stats() -> {stale_proc_count, couch_proc_manager:get_stale_proc_count()}, {process_count, erlang:system_info(process_count)}, {process_limit, erlang:system_info(process_limit)}, - {message_queues, {MessageQueues}}, + {message_queues, {MessageQueuesHist ++ MessageQueues}}, {internal_replication_jobs, mem3_sync:get_backlog()}, {distribution, {get_distribution_stats()}} ]. @@ -385,15 +386,22 @@ get_distribution_stats() -> erlang:system_info(dist_ctrl) ). -message_queues(Registered) -> - lists:map( +-spec message_queues() -> + [{Name :: atom(), Length :: pos_integer()}]. +message_queues() -> + MessageQueuesAgg = [ + {couch_server, couch_server:aggregate_queue_len()}, + {index_server, couch_index_server:aggregate_queue_len()} + ], + MessageQueuesReg = lists:map( fun(Name) -> Type = message_queue_len, {Type, Length} = process_info(whereis(Name), Type), {Name, Length} end, - Registered - ). + registered() + ), + MessageQueuesAgg ++ MessageQueuesReg. %% Workaround for https://bugs.erlang.org/browse/ERL-1355 run_queues() -> diff --git a/src/couch_prometheus/src/couch_prometheus_server.erl b/src/couch_prometheus/src/couch_prometheus_server.erl index 05cd26265..847ad947d 100644 --- a/src/couch_prometheus/src/couch_prometheus_server.erl +++ b/src/couch_prometheus/src/couch_prometheus_server.erl @@ -198,8 +198,7 @@ get_io_stats() -> ]. get_message_queue_stats() -> - QFun = fun(Name) -> {Name, message_queue_len(whereis(Name))} end, - Queues = lists:map(QFun, registered()), + Queues = chttpd_node:message_queues(), QueueLens = lists:map(fun({_, Len}) -> Len end, Queues), QueueLenByLabel = lists:map(fun({Name, Len}) -> {[{queue_name, Name}], Len} end, Queues), [ @@ -221,33 +220,16 @@ get_message_queue_stats() -> to_prom(erlang_message_queue_size, gauge, "size of message queue", QueueLenByLabel) ]. -message_queue_len(undefined) -> - 0; -message_queue_len(Pid) when is_pid(Pid) -> - case erlang:process_info(Pid, message_queue_len) of - {message_queue_len, N} -> - N; - _ -> - 0 - end. - get_run_queue_stats() -> %% Workaround for https://bugs.erlang.org/browse/ERL-1355 - {Normal, Dirty} = - case erlang:system_info(dirty_cpu_schedulers) > 0 of - false -> - {statistics(run_queue), 0}; - true -> - [DCQ | SQs] = lists:reverse(statistics(run_queue_lengths)), - {lists:sum(SQs), DCQ} - end, + {SQ, DCQ} = chttpd_node:run_queues(), [ - to_prom(erlang_scheduler_queues, gauge, "the total size of all normal run queues", Normal), + to_prom(erlang_scheduler_queues, gauge, "the total size of all normal run queues", SQ), to_prom( erlang_dirty_cpu_scheduler_queues, gauge, "the total size of all dirty CPU scheduler run queues", - Dirty + DCQ ) ].
