This is an automated email from the ASF dual-hosted git repository.

willholley pushed a commit to branch prometheus_erlang_dist
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit f027a8e2906f9994cc2250de49f7eae386db29de
Author: Will Holley <[email protected]>
AuthorDate: Wed Apr 12 07:30:06 2023 +0000

    feat (prometheus): include aggregated couch/index message queues
    
    In #3860 and #3366 we added sharding to `couch_index_server` and
    `couch_server`.
    
    The `_system` endpoint surfaces a "fake" message queue for each of these
    contining the aggregated queue size across all shards. This commit
    adds the same for the `_prometheus` endpoint.
    
    Originally I had thought to just filter out the per-shard queue lengths
    as we've not found these to be useful in Cloudant, but I'll leave them
    in for now for consistency with the `_system` endpoint. Arguably, we
    should filter in both places if there's agreement that the per-shard
    queue lengths are just noise.
---
 .devcontainer/devcontainer.json                    |  2 +-
 src/chttpd/src/chttpd_node.erl                     | 30 ++++++++++++++--------
 .../src/couch_prometheus_server.erl                | 26 +++----------------
 3 files changed, 24 insertions(+), 34 deletions(-)

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 5e577d96d..3920cd9dd 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -9,7 +9,7 @@
             // apache/couchdbci-debian:bullseye-erlang-24.3.4.2
             // apache/couchdbci-debian:bullseye-erlang-23.3.4.15
             //
-            "COUCHDB_IMAGE": "apache/couchdbci-debian:bullseye-erlang-25.0.2"
+            "COUCHDB_IMAGE": 
"apache/couchdbci-debian:bullseye-erlang-24.3.4.10"
         }
     },
 
diff --git a/src/chttpd/src/chttpd_node.erl b/src/chttpd/src/chttpd_node.erl
index a63236db7..bb3cf4798 100644
--- a/src/chttpd/src/chttpd_node.erl
+++ b/src/chttpd/src/chttpd_node.erl
@@ -16,7 +16,9 @@
 -export([
     handle_node_req/1,
     get_stats/0,
-    run_queues/0
+    run_queues/0,
+    message_queues/0,
+    db_pid_stats/0
 ]).
 
 -include_lib("couch/include/couch_db.hrl").
@@ -284,14 +286,13 @@ get_stats() ->
     ],
     {NumberOfGCs, WordsReclaimed, _} = statistics(garbage_collection),
     {{input, Input}, {output, Output}} = statistics(io),
+
     {CF, CDU} = db_pid_stats(),
-    MessageQueues0 = [
+    MessageQueuesHist = [
         {couch_file, {CF}},
-        {couch_db_updater, {CDU}},
-        {couch_server, couch_server:aggregate_queue_len()},
-        {index_server, couch_index_server:aggregate_queue_len()}
+        {couch_db_updater, {CDU}}
     ],
-    MessageQueues = MessageQueues0 ++ message_queues(registered()),
+    MessageQueues = message_queues(),
     {SQ, DCQ} = run_queues(),
     [
         {uptime, couch_app:uptime() div 1000},
@@ -309,7 +310,7 @@ get_stats() ->
         {stale_proc_count, couch_proc_manager:get_stale_proc_count()},
         {process_count, erlang:system_info(process_count)},
         {process_limit, erlang:system_info(process_limit)},
-        {message_queues, {MessageQueues}},
+        {message_queues, {MessageQueuesHist ++ MessageQueues}},
         {internal_replication_jobs, mem3_sync:get_backlog()},
         {distribution, {get_distribution_stats()}}
     ].
@@ -385,15 +386,22 @@ get_distribution_stats() ->
         erlang:system_info(dist_ctrl)
     ).
 
-message_queues(Registered) ->
-    lists:map(
+-spec message_queues() ->
+    [{Name :: atom(), Length :: pos_integer()}].
+message_queues() ->
+    MessageQueuesAgg = [
+        {couch_server, couch_server:aggregate_queue_len()},
+        {index_server, couch_index_server:aggregate_queue_len()}
+    ],
+    MessageQueuesReg = lists:map(
         fun(Name) ->
             Type = message_queue_len,
             {Type, Length} = process_info(whereis(Name), Type),
             {Name, Length}
         end,
-        Registered
-    ).
+        registered()
+    ),
+    MessageQueuesAgg ++ MessageQueuesReg.
 
 %% Workaround for https://bugs.erlang.org/browse/ERL-1355
 run_queues() ->
diff --git a/src/couch_prometheus/src/couch_prometheus_server.erl 
b/src/couch_prometheus/src/couch_prometheus_server.erl
index 05cd26265..847ad947d 100644
--- a/src/couch_prometheus/src/couch_prometheus_server.erl
+++ b/src/couch_prometheus/src/couch_prometheus_server.erl
@@ -198,8 +198,7 @@ get_io_stats() ->
     ].
 
 get_message_queue_stats() ->
-    QFun = fun(Name) -> {Name, message_queue_len(whereis(Name))} end,
-    Queues = lists:map(QFun, registered()),
+    Queues = chttpd_node:message_queues(),
     QueueLens = lists:map(fun({_, Len}) -> Len end, Queues),
     QueueLenByLabel = lists:map(fun({Name, Len}) -> {[{queue_name, Name}], 
Len} end, Queues),
     [
@@ -221,33 +220,16 @@ get_message_queue_stats() ->
         to_prom(erlang_message_queue_size, gauge, "size of message queue", 
QueueLenByLabel)
     ].
 
-message_queue_len(undefined) ->
-    0;
-message_queue_len(Pid) when is_pid(Pid) ->
-    case erlang:process_info(Pid, message_queue_len) of
-        {message_queue_len, N} ->
-            N;
-        _ ->
-            0
-    end.
-
 get_run_queue_stats() ->
     %% Workaround for https://bugs.erlang.org/browse/ERL-1355
-    {Normal, Dirty} =
-        case erlang:system_info(dirty_cpu_schedulers) > 0 of
-            false ->
-                {statistics(run_queue), 0};
-            true ->
-                [DCQ | SQs] = lists:reverse(statistics(run_queue_lengths)),
-                {lists:sum(SQs), DCQ}
-        end,
+    {SQ, DCQ} = chttpd_node:run_queues(),
     [
-        to_prom(erlang_scheduler_queues, gauge, "the total size of all normal 
run queues", Normal),
+        to_prom(erlang_scheduler_queues, gauge, "the total size of all normal 
run queues", SQ),
         to_prom(
             erlang_dirty_cpu_scheduler_queues,
             gauge,
             "the total size of all dirty CPU scheduler run queues",
-            Dirty
+            DCQ
         )
     ].
 

Reply via email to