This is an automated email from the ASF dual-hosted git repository. vatamane pushed a commit to branch pause-auto-purge-on-node-downs in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit d6feb8db8df3ce9babf29822bb7bd17e91cd8855 Author: Nick Vatamaniuc <[email protected]> AuthorDate: Wed Jan 14 01:05:07 2026 -0500 Pause scanner plugin if there are dead nodes We're already pausing on maintenance mode so this is in the same vein -- pause background processing if there is something unusual happening. This should especially help the auto-purge plugin, as we don't want to keep accumulating purge infos when we know for sure some internal replication purge clients won't be able to "see" and checkpoint when they are down. --- src/couch_scanner/src/couch_scanner_server.erl | 13 +++++--- .../test/eunit/couch_scanner_test.erl | 39 ++++++++++++++++++++-- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/src/couch_scanner/src/couch_scanner_server.erl b/src/couch_scanner/src/couch_scanner_server.erl index 2ec1ac540..aaf3945cd 100644 --- a/src/couch_scanner/src/couch_scanner_server.erl +++ b/src/couch_scanner/src/couch_scanner_server.erl @@ -101,7 +101,7 @@ handle_call(status, _From, #st{} = St) -> {reply, #{pids => Pids, scheduling => Scheds, stopped => Stopped}, St}; handle_call(stop, _From, #st{} = St) -> St1 = St#st{stopped = true}, - St2 = stop_in_maintenance(St1), + St2 = stop_in_maintenance_or_dead_nodes(St1), {reply, ok, St2}; handle_call(resume, _From, #st{} = St) -> St1 = start_stop(St#st{stopped = false}), @@ -167,18 +167,18 @@ subscribe_for_config(#st{} = St) -> ok = config:listen_for_changes(?MODULE, self()), St. -stop_in_maintenance(#st{pids = Pids} = St) -> +stop_in_maintenance_or_dead_nodes(#st{pids = Pids} = St) -> case map_size(Pids) > 0 of true -> - couch_log:info("~p stopping in maintenance mode", [?MODULE]), + couch_log:warning("~p stopping, mm mode or found dead nodes", [?MODULE]), lists:foldl(fun stop_plugin/2, St, maps:keys(Pids)); false -> St end. start_stop(#st{stopped = Stopped} = St) -> - case in_maintenance() orelse Stopped of - true -> stop_in_maintenance(St); + case in_maintenance() orelse Stopped orelse dead_nodes() of + true -> stop_in_maintenance_or_dead_nodes(St); false -> start_stop_cfg(St) end. @@ -279,6 +279,9 @@ penalize(Now, #sched{error_count = ErrorCount} = Sched) -> in_maintenance() -> "false" /= config:get("couchdb", "maintenance_mode", "false"). +dead_nodes() -> + [] =/= (mem3:nodes() -- mem3_util:live_nodes()). + tsec() -> erlang:system_time(second). diff --git a/src/couch_scanner/test/eunit/couch_scanner_test.erl b/src/couch_scanner/test/eunit/couch_scanner_test.erl index 5d6a22f38..53516ab7e 100644 --- a/src/couch_scanner/test/eunit/couch_scanner_test.erl +++ b/src/couch_scanner/test/eunit/couch_scanner_test.erl @@ -23,7 +23,9 @@ couch_scanner_test_() -> [ ?TDEF_FE(t_top_level_api), ?TDEF_FE(t_start_stop), - ?TDEF_FE(t_run_through_all_callbacks_basic, 10), + ?TDEF_FE(t_start_stop_mm_mode, 10), + ?TDEF_FE(t_start_stop_dead_nodes, 10), + ?TDEF_FE(t_run_through_all_callbacks_basic, 10) ?TDEF_FE(t_find_reporting_works, 10), ?TDEF_FE(t_ddoc_features_works, 20), ?TDEF_FE(t_conflict_finder_works, 30), @@ -53,6 +55,7 @@ setup() -> meck:new(fabric, [passthrough]), meck:new(couch_scanner_server, [passthrough]), meck:new(couch_scanner_util, [passthrough]), + meck:new(mem3, [passthrough]), Ctx = test_util:start_couch([fabric, couch_scanner]), % Run with the smallest batch size to exercise the batched % ddoc iteration @@ -100,7 +103,7 @@ setup() -> {Ctx, {DbName1, DbName2, DbName3}}. teardown({Ctx, {DbName1, DbName2, DbName3}}) -> - config:delete("couch_scanner", "maintenance_mode", false), + config:delete("couchdb", "maintenance_mode", false), config_delete_section("couch_scanner"), config_delete_section("couch_scanner_plugins"), config_delete_section(atom_to_list(?FEATURES_PLUGIN)), @@ -139,6 +142,38 @@ t_start_stop(_) -> ?assertEqual(ok, couch_scanner_server:resume()), ?assertMatch(#{stopped := false}, couch_scanner:status()). +t_start_stop_mm_mode(_) -> + ?assertEqual(ok, couch_scanner:stop()), + Plugin = atom_to_list(?FIND_PLUGIN), + config:set("couch_scanner_plugins", Plugin, "true", false), + meck:expect(?FIND_PLUGIN, shards, fun(_, _) -> timer:sleep(10000) end), + config:set("couchdb", "maintenance_mode", "true", true), + ?assertEqual(ok, couch_scanner:resume()), + #{pids := Pids1, stopped := false} = couch_scanner:status(), + ?assertEqual(#{}, Pids1), + config:set("couchdb", "maintenance_mode", "false", true), + ?assertEqual(ok, couch_scanner:stop()), + ?assertEqual(ok, couch_scanner:resume()), + #{pids := Pids2, stopped := false} = couch_scanner:status(), + ?assertMatch(#{<<"couch_scanner_plugin_find">> := Pid} when is_pid(Pid), Pids2), + ?assertEqual(ok, couch_scanner:stop()). + +t_start_stop_dead_nodes(_) -> + ?assertEqual(ok, couch_scanner:stop()), + Plugin = atom_to_list(?FIND_PLUGIN), + config:set("couch_scanner_plugins", Plugin, "true", false), + meck:expect(?FIND_PLUGIN, shards, fun(_, _) -> timer:sleep(10000) end), + meck:expect(mem3, nodes, fun() -> ['[email protected]'] end), + ?assertEqual(ok, couch_scanner:resume()), + #{pids := Pids1, stopped := false} = couch_scanner:status(), + ?assertEqual(#{}, Pids1), + meck:unload(), + ?assertEqual(ok, couch_scanner:stop()), + ?assertEqual(ok, couch_scanner:resume()), + #{pids := Pids2, stopped := false} = couch_scanner:status(), + ?assertMatch(#{<<"couch_scanner_plugin_find">> := Pid} when is_pid(Pid), Pids2), + ?assertEqual(ok, couch_scanner:stop()). + t_run_through_all_callbacks_basic({_, {DbName1, DbName2, _}}) -> % Run the "find" plugin without any regexes meck:reset(couch_scanner_server),
