This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch pause-auto-purge-on-node-downs
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit d6feb8db8df3ce9babf29822bb7bd17e91cd8855
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Wed Jan 14 01:05:07 2026 -0500

    Pause scanner plugin if there are dead nodes
    
    We're already pausing on maintenance mode so this is in the same vein -- 
pause
    background processing if there is something unusual happening.
    
    This should especially help the auto-purge plugin, as we don't want to keep
    accumulating purge infos when we know for sure some internal replication 
purge
    clients won't be able to "see" and checkpoint when they are down.
---
 src/couch_scanner/src/couch_scanner_server.erl     | 13 +++++---
 .../test/eunit/couch_scanner_test.erl              | 39 ++++++++++++++++++++--
 2 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/src/couch_scanner/src/couch_scanner_server.erl 
b/src/couch_scanner/src/couch_scanner_server.erl
index 2ec1ac540..aaf3945cd 100644
--- a/src/couch_scanner/src/couch_scanner_server.erl
+++ b/src/couch_scanner/src/couch_scanner_server.erl
@@ -101,7 +101,7 @@ handle_call(status, _From, #st{} = St) ->
     {reply, #{pids => Pids, scheduling => Scheds, stopped => Stopped}, St};
 handle_call(stop, _From, #st{} = St) ->
     St1 = St#st{stopped = true},
-    St2 = stop_in_maintenance(St1),
+    St2 = stop_in_maintenance_or_dead_nodes(St1),
     {reply, ok, St2};
 handle_call(resume, _From, #st{} = St) ->
     St1 = start_stop(St#st{stopped = false}),
@@ -167,18 +167,18 @@ subscribe_for_config(#st{} = St) ->
     ok = config:listen_for_changes(?MODULE, self()),
     St.
 
-stop_in_maintenance(#st{pids = Pids} = St) ->
+stop_in_maintenance_or_dead_nodes(#st{pids = Pids} = St) ->
     case map_size(Pids) > 0 of
         true ->
-            couch_log:info("~p stopping in maintenance mode", [?MODULE]),
+            couch_log:warning("~p stopping, mm mode or found dead nodes", 
[?MODULE]),
             lists:foldl(fun stop_plugin/2, St, maps:keys(Pids));
         false ->
             St
     end.
 
 start_stop(#st{stopped = Stopped} = St) ->
-    case in_maintenance() orelse Stopped of
-        true -> stop_in_maintenance(St);
+    case in_maintenance() orelse Stopped orelse dead_nodes() of
+        true -> stop_in_maintenance_or_dead_nodes(St);
         false -> start_stop_cfg(St)
     end.
 
@@ -279,6 +279,9 @@ penalize(Now, #sched{error_count = ErrorCount} = Sched) ->
 in_maintenance() ->
     "false" /= config:get("couchdb", "maintenance_mode", "false").
 
+dead_nodes() ->
+    [] =/= (mem3:nodes() -- mem3_util:live_nodes()).
+
 tsec() ->
     erlang:system_time(second).
 
diff --git a/src/couch_scanner/test/eunit/couch_scanner_test.erl 
b/src/couch_scanner/test/eunit/couch_scanner_test.erl
index 5d6a22f38..53516ab7e 100644
--- a/src/couch_scanner/test/eunit/couch_scanner_test.erl
+++ b/src/couch_scanner/test/eunit/couch_scanner_test.erl
@@ -23,7 +23,9 @@ couch_scanner_test_() ->
         [
             ?TDEF_FE(t_top_level_api),
             ?TDEF_FE(t_start_stop),
-            ?TDEF_FE(t_run_through_all_callbacks_basic, 10),
+            ?TDEF_FE(t_start_stop_mm_mode, 10),
+            ?TDEF_FE(t_start_stop_dead_nodes, 10),
+            ?TDEF_FE(t_run_through_all_callbacks_basic, 10)
             ?TDEF_FE(t_find_reporting_works, 10),
             ?TDEF_FE(t_ddoc_features_works, 20),
             ?TDEF_FE(t_conflict_finder_works, 30),
@@ -53,6 +55,7 @@ setup() ->
     meck:new(fabric, [passthrough]),
     meck:new(couch_scanner_server, [passthrough]),
     meck:new(couch_scanner_util, [passthrough]),
+    meck:new(mem3, [passthrough]),
     Ctx = test_util:start_couch([fabric, couch_scanner]),
     % Run with the smallest batch size to exercise the batched
     % ddoc iteration
@@ -100,7 +103,7 @@ setup() ->
     {Ctx, {DbName1, DbName2, DbName3}}.
 
 teardown({Ctx, {DbName1, DbName2, DbName3}}) ->
-    config:delete("couch_scanner", "maintenance_mode", false),
+    config:delete("couchdb", "maintenance_mode", false),
     config_delete_section("couch_scanner"),
     config_delete_section("couch_scanner_plugins"),
     config_delete_section(atom_to_list(?FEATURES_PLUGIN)),
@@ -139,6 +142,38 @@ t_start_stop(_) ->
     ?assertEqual(ok, couch_scanner_server:resume()),
     ?assertMatch(#{stopped := false}, couch_scanner:status()).
 
+t_start_stop_mm_mode(_) ->
+    ?assertEqual(ok, couch_scanner:stop()),
+    Plugin = atom_to_list(?FIND_PLUGIN),
+    config:set("couch_scanner_plugins", Plugin, "true", false),
+    meck:expect(?FIND_PLUGIN, shards, fun(_, _) -> timer:sleep(10000) end),
+    config:set("couchdb", "maintenance_mode", "true", true),
+    ?assertEqual(ok, couch_scanner:resume()),
+    #{pids := Pids1, stopped := false} = couch_scanner:status(),
+    ?assertEqual(#{}, Pids1),
+    config:set("couchdb", "maintenance_mode", "false", true),
+    ?assertEqual(ok, couch_scanner:stop()),
+    ?assertEqual(ok, couch_scanner:resume()),
+    #{pids := Pids2, stopped := false} = couch_scanner:status(),
+    ?assertMatch(#{<<"couch_scanner_plugin_find">> := Pid} when is_pid(Pid), 
Pids2),
+    ?assertEqual(ok, couch_scanner:stop()).
+
+t_start_stop_dead_nodes(_) ->
+    ?assertEqual(ok, couch_scanner:stop()),
+    Plugin = atom_to_list(?FIND_PLUGIN),
+    config:set("couch_scanner_plugins", Plugin, "true", false),
+    meck:expect(?FIND_PLUGIN, shards, fun(_, _) -> timer:sleep(10000) end),
+    meck:expect(mem3, nodes, fun() -> ['[email protected]'] end),
+    ?assertEqual(ok, couch_scanner:resume()),
+    #{pids := Pids1, stopped := false} = couch_scanner:status(),
+    ?assertEqual(#{}, Pids1),
+    meck:unload(),
+    ?assertEqual(ok, couch_scanner:stop()),
+    ?assertEqual(ok, couch_scanner:resume()),
+    #{pids := Pids2, stopped := false} = couch_scanner:status(),
+    ?assertMatch(#{<<"couch_scanner_plugin_find">> := Pid} when is_pid(Pid), 
Pids2),
+    ?assertEqual(ok, couch_scanner:stop()).
+
 t_run_through_all_callbacks_basic({_, {DbName1, DbName2, _}}) ->
     % Run the "find" plugin without any regexes
     meck:reset(couch_scanner_server),

Reply via email to