This is an automated email from the ASF dual-hosted git repository.
vatamane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git
The following commit(s) were added to refs/heads/main by this push:
new 563d1ec21 Fix replicator scheduler total jobs metric
563d1ec21 is described below
commit 563d1ec21cec555bb8647b036f9dc4124f09d5f9
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Wed Nov 26 16:48:08 2025 -0500
Fix replicator scheduler total jobs metric
Previously, we didn't always remember to update the total job stats gauge,
so
it was possible for it to becomes stale. Periodic scheduler refresh updated
all
the other guages but didn't update the total.
To fix it make sure to update the stat in more places (on jobs removes and
adds) and most importantly, add it to periodic stat refresh function, so
even
if we still missed it should eventually catch up after rescheduling cycle.
---
.../src/couch_replicator_scheduler.erl | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/src/couch_replicator/src/couch_replicator_scheduler.erl
b/src/couch_replicator/src/couch_replicator_scheduler.erl
index aabd7febd..14cca4a21 100644
--- a/src/couch_replicator/src/couch_replicator_scheduler.erl
+++ b/src/couch_replicator/src/couch_replicator_scheduler.erl
@@ -258,8 +258,7 @@ handle_call({add_job, Job}, _From, State) ->
true = add_job_int(Job),
ok = maybe_start_newly_added_job(Job, State),
couch_stats:increment_counter([couch_replicator, jobs, adds]),
- TotalJobs = ets:info(?MODULE, size),
- couch_stats:update_gauge([couch_replicator, jobs, total], TotalJobs),
+ update_total_jobs_stats(),
{reply, ok, State};
handle_call({remove_job, Id}, _From, State) ->
ok = maybe_remove_job_int(Id, State),
@@ -464,6 +463,7 @@ handle_crashed_job(Job, Reason, State) ->
update_running_jobs_stats(State#state.stats_pid),
ok;
false ->
+ update_total_jobs_stats(),
ok
end.
@@ -480,6 +480,7 @@ maybe_start_newly_added_job(Job, State) ->
update_running_jobs_stats(State#state.stats_pid),
ok;
false ->
+ update_total_jobs_stats(),
ok
end.
@@ -655,16 +656,13 @@ maybe_remove_job_int(JobId, State) ->
ok = stop_job_int(Job, State),
true = remove_job_int(Job),
couch_stats:increment_counter([couch_replicator, jobs, removes]),
- TotalJobs = ets:info(?MODULE, size),
- couch_stats:update_gauge(
- [couch_replicator, jobs, total],
- TotalJobs
- ),
update_running_jobs_stats(State#state.stats_pid),
ok;
{error, not_found} ->
ok
- end.
+ end,
+ update_total_jobs_stats(),
+ ok.
start_job_int(#job{pid = Pid}, _State) when Pid /= undefined ->
ok;
@@ -964,6 +962,7 @@ stats_updater_refresh() ->
couch_stats:update_gauge([couch_replicator, jobs, pending], PendingN),
couch_stats:update_gauge([couch_replicator, jobs, running], RunningN),
couch_stats:update_gauge([couch_replicator, jobs, crashed], CrashedN),
+ update_total_jobs_stats(),
ok.
-spec stats_fold(#job{}, #stats_acc{}) -> #stats_acc{}.
@@ -976,6 +975,10 @@ stats_fold(#job{pid = undefined, history = [{{crashed, _},
_} | _]}, Acc) ->
stats_fold(#job{pid = P, history = [{started, _} | _]}, Acc) when is_pid(P) ->
Acc#stats_acc{running_n = Acc#stats_acc.running_n + 1}.
+update_total_jobs_stats() ->
+ TotalJobs = ets:info(?MODULE, size),
+ couch_stats:update_gauge([couch_replicator, jobs, total], TotalJobs).
+
-spec existing_replication(#rep{}) -> boolean().
existing_replication(#rep{} = NewRep) ->
case job_by_id(NewRep#rep.id) of