This is an automated email from the ASF dual-hosted git repository. vatamane pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 0c14dec7c501fa09fe4ef210e932db729b37b5d8 Author: Nick Vatamaniuc <[email protected]> AuthorDate: Wed Jan 14 17:03:23 2026 -0500 Add upgrade_in_progress config and metrics Also, add the same for maintenance_mode. When doing upgrades it's nicer to set an explicit config setting for the duration of the upgarde so some backgorund activity can pause or change its behavior. One example may be to stop all the scanner plugins. To avoid them having to stop/start multiple times during upgrades as nodes are toggled in/our of mm mode, it might be better to just have toggle that stays on for the duration of the upgrade. Add metric gauges so upgrades and mm toggles are visible in the metric timeline, which can also aid in investigating various issues. --- rel/overlay/etc/default.ini | 11 ++++++++ src/couch/priv/stats_descriptions.cfg | 8 ++++++ src/couch/src/couch_server.erl | 33 +++++++++++++++++++++--- src/couch/test/eunit/couch_server_tests.erl | 40 +++++++++++++++++++++++++++++ 4 files changed, 88 insertions(+), 4 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index f5c409f8a..b6ab9d3ba 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -38,7 +38,18 @@ view_index_dir = {{view_index_dir}} ;default_security = admin_only ;btree_chunk_size = 1279 + +; When set to "true" the node will stop processing interactive requests and +; return "status":"maintenance_mode" from the /_up endpoint. If a load balancer +; is monitoring the /_up endpoint, it can then take the node out of rotation. ;maintenance_mode = false + +; Toggle to "true" when upgrading a cluster. Then, after upgrading all nodes, +; toggle it back to "false". When this is value enabled the scanner will pause +; running any plugins. In the future other background components may pause +; their execution as well. +;upgrade_in_progress = false + ;stem_interactive_updates = true ;uri_file = diff --git a/src/couch/priv/stats_descriptions.cfg b/src/couch/priv/stats_descriptions.cfg index 18f32a47a..e3bd52992 100644 --- a/src/couch/priv/stats_descriptions.cfg +++ b/src/couch/priv/stats_descriptions.cfg @@ -486,3 +486,11 @@ {type, counter}, {desc, <<"number of times bt_engine cache was full">>} ]}. +{[couchdb, maintenance_mode], [ + {type, gauge}, + {desc, <<"set to 1 when node is in maintenance mode">>} +]}. +{[couchdb, upgrade_in_progress], [ + {type, gauge}, + {desc, <<"set to 1 when upgrade_in_progress is toggled">>} +]}. diff --git a/src/couch/src/couch_server.erl b/src/couch/src/couch_server.erl index 4b66993f8..d1118bcb5 100644 --- a/src/couch/src/couch_server.erl +++ b/src/couch/src/couch_server.erl @@ -302,11 +302,17 @@ init([N]) -> "couchdb", "update_lru_on_read", false ), ok = config:listen_for_changes(?MODULE, N), - % Spawn async .deleted files recursive cleaner, but only - % for the first sharded couch_server instance + case N of - 1 -> ok = couch_file:init_delete_dir(RootDir); - _ -> ok + 1 -> + % Update mm and upgrade_in_progress stats gauges + update_maintenance_mode_gauge(), + update_upgrade_in_progress_gauge(), + % Spawn async .deleted files recursive cleaner, but only + % for the first sharded couch_server instance + ok = couch_file:init_delete_dir(RootDir); + _ -> + ok end, ets:new(couch_dbs(N), [ set, @@ -395,6 +401,12 @@ handle_config_change("httpd", "port", _, _, 1 = N) -> handle_config_change("httpd", "max_connections", _, _, 1 = N) -> couch_httpd:stop(), {ok, N}; +handle_config_change("couchdb", "maintenance_mode", _, _, 1 = N) -> + update_maintenance_mode_gauge(), + {ok, N}; +handle_config_change("couchdb", "upgrade_in_progress", _, _, 1 = N) -> + update_upgrade_in_progress_gauge(), + {ok, N}; handle_config_change(_, _, _, _, N) -> {ok, N}. @@ -1018,6 +1030,19 @@ try_lock(Table, DbName) when is_atom(Table), is_binary(DbName) -> unlock(Table, DbName) when is_atom(Table), is_binary(DbName) -> ets:update_element(Table, DbName, {#entry.lock, unlocked}). +update_maintenance_mode_gauge() -> + % Note, it's not necessarily a boolean, could be "nolb" as well + case config:get("couchdb", "maintenance_mode", "false") of + "false" -> couch_stats:update_gauge([couchdb, maintenance_mode], 0); + _ -> couch_stats:update_gauge([couchdb, maintenance_mode], 1) + end. + +update_upgrade_in_progress_gauge() -> + case config:get_boolean("couchdb", "upgrade_in_progress", false) of + false -> couch_stats:update_gauge([couchdb, upgrade_in_progress], 0); + true -> couch_stats:update_gauge([couchdb, upgrade_in_progress], 1) + end. + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). diff --git a/src/couch/test/eunit/couch_server_tests.erl b/src/couch/test/eunit/couch_server_tests.erl index 77ddfde21..35e1d539d 100644 --- a/src/couch/test/eunit/couch_server_tests.erl +++ b/src/couch/test/eunit/couch_server_tests.erl @@ -288,3 +288,43 @@ get_next_message() -> after 5000 -> error(timeout) end. + +get_stats_and_gagues_test_() -> + { + foreach, + fun() -> + Ctx = test_util:start_couch(), + config:set("couchdb", "maintenance_mode", "false", false), + config:set("couchdb", "upgrade_in_progress", "false", false), + Ctx + end, + fun(Ctx) -> + config:delete("couchdb", "maintenance_mode", false), + config:delete("couchdb", "upgrade_in_progress", false), + test_util:stop(Ctx) + end, + [ + ?TDEF_FE(t_maintenance_mode_metric), + ?TDEF_FE(t_upgrade_in_progress_metric), + ?TDEF_FE(t_get_stats) + ] + }. + +t_maintenance_mode_metric(_) -> + ?assertEqual(0, couch_stats:sample([couchdb, maintenance_mode])), + config:set("couchdb", "maintenance_mode", "true", false), + ?assertEqual(1, couch_stats:sample([couchdb, maintenance_mode])), + config:set("couchdb", "maintenance_mode", "nolb", false), + ?assertEqual(1, couch_stats:sample([couchdb, maintenance_mode])), + config:set("couchdb", "maintenance_mode", "false", false), + ?assertEqual(0, couch_stats:sample([couchdb, maintenance_mode])). + +t_upgrade_in_progress_metric(_) -> + ?assertEqual(0, couch_stats:sample([couchdb, upgrade_in_progress])), + config:set("couchdb", "upgrade_in_progress", "true", false), + ?assertEqual(1, couch_stats:sample([couchdb, upgrade_in_progress])), + config:set("couchdb", "upgrade_in_progress", "false", false), + ?assertEqual(0, couch_stats:sample([couchdb, upgrade_in_progress])). + +t_get_stats(_) -> + ?assertMatch([{start_time, _}, {dbs_open, _}], couch_server:get_stats()).
