This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch dry-run-mode-for-auto-purge
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit c9979d977994fe30b65a164f9132e2ebedfc1049
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Thu Jan 22 14:34:31 2026 -0500

    Implement dry-run for auto-purge plugin
    
    Add a dry-run mode for the auto-purge plugin. Users can enable it and 
schedule
    the plugin to run to see how many deleted documents would have been purged 
for
    each db shard range. Users may adjust the ttl or the plugin schedule (to run
    more or less often) and get an idea how long it would take to scan over all 
the
    data on the cluster.
---
 rel/overlay/etc/default.ini                        |  5 ++++
 src/couch/src/couch_auto_purge_plugin.erl          | 33 ++++++++++++++++++----
 .../test/eunit/couch_auto_purge_plugin_tests.erl   | 17 ++++++++++-
 src/docs/src/config/scanner.rst                    |  7 +++++
 4 files changed, 56 insertions(+), 6 deletions(-)

diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
index b6ab9d3ba..0fc4f9ef2 100644
--- a/rel/overlay/etc/default.ini
+++ b/rel/overlay/etc/default.ini
@@ -1223,6 +1223,11 @@ url = {{nouveau_url}}
 ; Defaults to undefined, which disables auto purging.
 ;deleted_document_ttl =
 
+; When set to "true" the plugin does everything (scanning, revision processing,
+; etc) but skips the purge step. For each db file it will also log a warning
+; with the count of revisions it would have purged for that db shard range.
+;dry_run = false
+
 [nouveau_index_upgrader]
 ; Common scanner scheduling settings
 ;after = restart
diff --git a/src/couch/src/couch_auto_purge_plugin.erl 
b/src/couch/src/couch_auto_purge_plugin.erl
index e63516d30..bd6b3b74f 100644
--- a/src/couch/src/couch_auto_purge_plugin.erl
+++ b/src/couch/src/couch_auto_purge_plugin.erl
@@ -34,7 +34,10 @@ start(ScanId, #{}) ->
             skip;
         false ->
             St = init_config(ScanId),
-            ?INFO("Starting.", [], St),
+            case dry_run() of
+                false -> ?INFO("Starting.", [], St);
+                true -> ?WARN("Starting.", [], St)
+            end,
             {ok, St}
     end.
 
@@ -45,12 +48,18 @@ resume(ScanId, #{}) ->
             skip;
         false ->
             St = init_config(ScanId),
-            ?INFO("Resuming.", [], St),
+            case dry_run() of
+                false -> ?INFO("Resuming.", [], St);
+                true -> ?WARN("Resuming.", [], St)
+            end,
             {ok, St}
     end.
 
 complete(St) ->
-    ?INFO("Completed", [], St),
+    case dry_run() of
+        false -> ?INFO("Completed.", [], St);
+        true -> ?WARN("Completed.", [], St)
+    end,
     {ok, #{}}.
 
 checkpoint(St) ->
@@ -84,7 +93,14 @@ db_opened(#{} = St, Db) ->
 
 db_closing(#{} = St, Db) ->
     St1 = #{count := Count} = flush_queue(St, Db),
-    ?INFO("purged ~B deleted documents from ~s", [Count, couch_db:name(Db)], 
meta(St1)),
+    LogMsg = "purged ~B deleted documents from ~s",
+    LogArgs = [Count, couch_db:name(Db)],
+    LogMeta = meta(St1),
+    % In a dry run log at a higher level if anything would have been purged.
+    case dry_run() andalso Count > 0 of
+        false -> ?INFO(LogMsg, LogArgs, LogMeta);
+        true -> ?WARN(LogMsg, LogArgs, LogMeta)
+    end,
     {ok, St1}.
 
 doc_fdi(#{} = St, #full_doc_info{deleted = true} = FDI, Db) ->
@@ -121,7 +137,11 @@ flush_queue(#{queue := []} = St, _Db) ->
 flush_queue(#{queue := IdRevs} = St, Db) ->
     DbName = mem3:dbname(couch_db:name(Db)),
     N = mem3:n(DbName),
-    PurgeFun = fun() -> fabric:purge_docs(DbName, IdRevs, [?ADMIN_CTX, {w, 
N}]) end,
+    PurgeFun =
+        case dry_run() of
+            false -> fun() -> fabric:purge_docs(DbName, IdRevs, [?ADMIN_CTX, 
{w, N}]) end;
+            true -> fun() -> {ok, [{ok, Revs} || {_Id, Revs} <- IdRevs]} end
+        end,
     Timeout = fabric_util:request_timeout(),
     try fabric_util:isolate(PurgeFun, Timeout) of
         {Health, Results} when Health == ok; Health == accepted ->
@@ -232,5 +252,8 @@ min_batch_size() ->
 max_batch_size() ->
     erlang:max(min_batch_size(), config:get_integer(atom_to_list(?MODULE), 
"max_batch_size", 500)).
 
+dry_run() ->
+    config:get_boolean(atom_to_list(?MODULE), "dry_run", false).
+
 dead_nodes() ->
     [] =/= (mem3:nodes() -- mem3_util:live_nodes()).
diff --git a/src/couch/test/eunit/couch_auto_purge_plugin_tests.erl 
b/src/couch/test/eunit/couch_auto_purge_plugin_tests.erl
index a42ef52c5..5c55c4f85 100644
--- a/src/couch/test/eunit/couch_auto_purge_plugin_tests.erl
+++ b/src/couch/test/eunit/couch_auto_purge_plugin_tests.erl
@@ -29,7 +29,8 @@ couch_quickjs_scanner_plugin_test_() ->
             ?TDEF_FE(t_min_batch_size_1, 10),
             ?TDEF_FE(t_min_batch_size_2, 10),
             ?TDEF_FE(t_max_batch_size_1, 10),
-            ?TDEF_FE(t_max_batch_size_2, 10)
+            ?TDEF_FE(t_max_batch_size_2, 10),
+            ?TDEF_FE(t_dry_run, 10)
         ]
     }.
 
@@ -42,6 +43,7 @@ setup() ->
     DbName = ?tempdb(),
     ok = fabric:create_db(DbName, [{q, "2"}, {n, "1"}]),
     config:set(atom_to_list(?PLUGIN), "max_batch_items", "1", false),
+    config:set(atom_to_list(?PLUGIN), "dry_run", "false", false),
     reset_stats(),
     {Ctx, DbName}.
 
@@ -87,6 +89,19 @@ t_auto_purge_after_db_ttl({_, DbName}) ->
     ?assertEqual(0, doc_del_count(DbName)),
     ok.
 
+t_dry_run({_, DbName}) ->
+    config:set(atom_to_list(?PLUGIN), "dry_run", "true", false),
+    config:set(atom_to_list(?PLUGIN), "deleted_document_ttl", "-3_hour", 
false),
+    ok = add_doc(DbName, <<"doc1">>, #{<<"_deleted">> => true}),
+    ?assertEqual(1, doc_del_count(DbName)),
+    meck:reset(couch_scanner_server),
+    meck:reset(?PLUGIN),
+    config:set("couch_scanner_plugins", atom_to_list(?PLUGIN), "true", false),
+    wait_exit(10000),
+    % didn't actually purge
+    ?assertEqual(1, doc_del_count(DbName)),
+    ok.
+
 t_min_batch_size_1({_, DbName}) ->
     meck:new(fabric, [passthrough]),
     config:set_integer(atom_to_list(?PLUGIN), "min_batch_size", 5),
diff --git a/src/docs/src/config/scanner.rst b/src/docs/src/config/scanner.rst
index 9c56891d2..d0dc4b082 100644
--- a/src/docs/src/config/scanner.rst
+++ b/src/docs/src/config/scanner.rst
@@ -263,3 +263,10 @@ settings in their ``[{plugin}]`` section.
         The database may override this setting with the
         :ref:`api/db/auto_purge` endpoint. If neither is set, the
         plugin will not purge deleted documents.
+
+    .. config:option:: dry_run
+
+        When set to ``true`` the plugin does everything (scanning, revision
+        processing, etc) but skips the purge step. For each db file it will
+        also log a warning with the count of revisions it would have purged for
+        that db shard range.

Reply via email to