This is an automated email from the ASF dual-hosted git repository.
vatamane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git
The following commit(s) were added to refs/heads/main by this push:
new 333f5259b Implement dry-run for auto-purge plugin
333f5259b is described below
commit 333f5259bdce15e60b0d47cd827fee03999888c4
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Thu Jan 22 14:34:31 2026 -0500
Implement dry-run for auto-purge plugin
Add a dry-run mode for the auto-purge plugin. Users can enable it and
schedule
the plugin to run to see how many deleted documents would have been purged
for
each db shard range. Users may adjust the ttl or the plugin schedule (to run
more or less often) and get an idea how long it would take to scan over all
the
data on the cluster.
To make log entries stand out user can additionally adjust the log level of
start, stop and summary reporting log events.
---
rel/overlay/etc/default.ini | 11 ++++++
src/couch/src/couch_auto_purge_plugin.erl | 31 +++++++++++++----
.../test/eunit/couch_auto_purge_plugin_tests.erl | 39 +++++++++++++++++++++-
src/docs/src/config/scanner.rst | 18 ++++++++++
4 files changed, 91 insertions(+), 8 deletions(-)
diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
index b6ab9d3ba..69aae2ad8 100644
--- a/rel/overlay/etc/default.ini
+++ b/rel/overlay/etc/default.ini
@@ -1215,14 +1215,25 @@ url = {{nouveau_url}}
; The fewest id/rev pairs the plugin will attempt to purge in
; one request, excepting at the end of a database scan.
;min_batch_size = 250
+
; The most id/rev pairs the plugin will attempt to purge in
; one request.
;max_batch_size = 500
+
; The default time-to-live, measured in seconds, before a
; deleted document is eligible to be purged by the plugin.
; Defaults to undefined, which disables auto purging.
;deleted_document_ttl =
+; Set the log level for starting, stopping and purge report summary log
entries.
+;log_level = info
+
+; When set to "true" the plugin does everything (scanning, revision processing,
+; etc) but skips the purge step. Optionally use the "log_level" plugin setting
+; to increase the severity of log reports so it's clear when the plugin starts,
+; stops and how many revisions it found to purge.
+;dry_run = false
+
[nouveau_index_upgrader]
; Common scanner scheduling settings
;after = restart
diff --git a/src/couch/src/couch_auto_purge_plugin.erl
b/src/couch/src/couch_auto_purge_plugin.erl
index e63516d30..4fd904fdd 100644
--- a/src/couch/src/couch_auto_purge_plugin.erl
+++ b/src/couch/src/couch_auto_purge_plugin.erl
@@ -30,27 +30,27 @@
start(ScanId, #{}) ->
case dead_nodes() of
true ->
- ?INFO("Not starting. Found dead nodes", [], #{sid => ScanId}),
+ ?LOG(level(), "Not starting. Found dead nodes", [], #{sid =>
ScanId}),
skip;
false ->
St = init_config(ScanId),
- ?INFO("Starting.", [], St),
+ ?LOG(level(), "Starting.", [], St),
{ok, St}
end.
resume(ScanId, #{}) ->
case dead_nodes() of
true ->
- ?INFO("Not resuming. Found dead nodes", [], #{sid => ScanId}),
+ ?LOG(level(), "Not resuming. Found dead nodes", [], #{sid =>
ScanId}),
skip;
false ->
St = init_config(ScanId),
- ?INFO("Resuming.", [], St),
+ ?LOG(level(), "Resuming.", [], St),
{ok, St}
end.
complete(St) ->
- ?INFO("Completed", [], St),
+ ?LOG(level(), "Completed.", [], St),
{ok, #{}}.
checkpoint(St) ->
@@ -84,7 +84,13 @@ db_opened(#{} = St, Db) ->
db_closing(#{} = St, Db) ->
St1 = #{count := Count} = flush_queue(St, Db),
- ?INFO("purged ~B deleted documents from ~s", [Count, couch_db:name(Db)],
meta(St1)),
+ case Count > 0 of
+ true ->
+ LogArgs = [Count, couch_db:name(Db)],
+ ?LOG(level(), "purged ~B deleted documents from ~s", LogArgs,
meta(St1));
+ false ->
+ ok
+ end,
{ok, St1}.
doc_fdi(#{} = St, #full_doc_info{deleted = true} = FDI, Db) ->
@@ -121,7 +127,11 @@ flush_queue(#{queue := []} = St, _Db) ->
flush_queue(#{queue := IdRevs} = St, Db) ->
DbName = mem3:dbname(couch_db:name(Db)),
N = mem3:n(DbName),
- PurgeFun = fun() -> fabric:purge_docs(DbName, IdRevs, [?ADMIN_CTX, {w,
N}]) end,
+ PurgeFun =
+ case dry_run() of
+ false -> fun() -> fabric:purge_docs(DbName, IdRevs, [?ADMIN_CTX,
{w, N}]) end;
+ true -> fun() -> {ok, [{ok, Revs} || {_Id, Revs} <- IdRevs]} end
+ end,
Timeout = fabric_util:request_timeout(),
try fabric_util:isolate(PurgeFun, Timeout) of
{Health, Results} when Health == ok; Health == accepted ->
@@ -232,5 +242,12 @@ min_batch_size() ->
max_batch_size() ->
erlang:max(min_batch_size(), config:get_integer(atom_to_list(?MODULE),
"max_batch_size", 500)).
+dry_run() ->
+ config:get_boolean(atom_to_list(?MODULE), "dry_run", false).
+
+level() ->
+ Level = config:get(atom_to_list(?MODULE), "log_level", "info"),
+ couch_log_util:level_to_atom(Level).
+
dead_nodes() ->
[] =/= (mem3:nodes() -- mem3_util:live_nodes()).
diff --git a/src/couch/test/eunit/couch_auto_purge_plugin_tests.erl
b/src/couch/test/eunit/couch_auto_purge_plugin_tests.erl
index a42ef52c5..38b325022 100644
--- a/src/couch/test/eunit/couch_auto_purge_plugin_tests.erl
+++ b/src/couch/test/eunit/couch_auto_purge_plugin_tests.erl
@@ -29,7 +29,9 @@ couch_quickjs_scanner_plugin_test_() ->
?TDEF_FE(t_min_batch_size_1, 10),
?TDEF_FE(t_min_batch_size_2, 10),
?TDEF_FE(t_max_batch_size_1, 10),
- ?TDEF_FE(t_max_batch_size_2, 10)
+ ?TDEF_FE(t_max_batch_size_2, 10),
+ ?TDEF_FE(t_dry_run, 10),
+ ?TDEF_FE(t_dry_run_with_non_default_log_level, 10)
]
}.
@@ -42,6 +44,7 @@ setup() ->
DbName = ?tempdb(),
ok = fabric:create_db(DbName, [{q, "2"}, {n, "1"}]),
config:set(atom_to_list(?PLUGIN), "max_batch_items", "1", false),
+ config:set(atom_to_list(?PLUGIN), "dry_run", "false", false),
reset_stats(),
{Ctx, DbName}.
@@ -87,6 +90,37 @@ t_auto_purge_after_db_ttl({_, DbName}) ->
?assertEqual(0, doc_del_count(DbName)),
ok.
+t_dry_run({_, DbName}) ->
+ config:set(atom_to_list(?PLUGIN), "dry_run", "true", false),
+ config:set(atom_to_list(?PLUGIN), "deleted_document_ttl", "-3_hour",
false),
+ ok = add_doc(DbName, <<"doc1">>, #{<<"_deleted">> => true}),
+ ?assertEqual(1, doc_del_count(DbName)),
+ meck:reset(couch_scanner_server),
+ meck:reset(couch_scanner_util),
+ meck:reset(?PLUGIN),
+ config:set("couch_scanner_plugins", atom_to_list(?PLUGIN), "true", false),
+ wait_exit(10000),
+ ?assertEqual(1, doc_del_count(DbName)),
+ ?assert(log_calls(info) >= 3),
+ ?assert(log_calls(warning) < 3),
+ ok.
+
+t_dry_run_with_non_default_log_level({_, DbName}) ->
+ config:set(atom_to_list(?PLUGIN), "dry_run", "true", false),
+ config:set(atom_to_list(?PLUGIN), "log_level", "warning", false),
+ config:set(atom_to_list(?PLUGIN), "deleted_document_ttl", "-3_hour",
false),
+ ok = add_doc(DbName, <<"doc1">>, #{<<"_deleted">> => true}),
+ ?assertEqual(1, doc_del_count(DbName)),
+ meck:reset(couch_scanner_server),
+ meck:reset(couch_scanner_server),
+ meck:reset(?PLUGIN),
+ config:set("couch_scanner_plugins", atom_to_list(?PLUGIN), "true", false),
+ wait_exit(10000),
+ ?assertEqual(1, doc_del_count(DbName)),
+ ?assert(log_calls(warning) >= 3),
+ ?assert(log_calls(info) < 3),
+ ok.
+
t_min_batch_size_1({_, DbName}) ->
meck:new(fabric, [passthrough]),
config:set_integer(atom_to_list(?PLUGIN), "min_batch_size", 5),
@@ -210,3 +244,6 @@ wait_exit(MSec) ->
doc_del_count(DbName) ->
{ok, DbInfo} = fabric:get_db_info(DbName),
couch_util:get_value(doc_del_count, DbInfo).
+
+log_calls(Level) ->
+ meck:num_calls(couch_scanner_util, log, [Level, ?PLUGIN, '_', '_', '_']).
diff --git a/src/docs/src/config/scanner.rst b/src/docs/src/config/scanner.rst
index 9c56891d2..e03ad7006 100644
--- a/src/docs/src/config/scanner.rst
+++ b/src/docs/src/config/scanner.rst
@@ -263,3 +263,21 @@ settings in their ``[{plugin}]`` section.
The database may override this setting with the
:ref:`api/db/auto_purge` endpoint. If neither is set, the
plugin will not purge deleted documents.
+
+ .. config:option:: log_level
+
+ Set the log level for starting, stopping and purge report summary log
entries. ::
+
+ [couch_auto_purge_plugin]
+ log_level = info
+
+ .. config:option:: dry_run
+
+ When set to ``true`` the plugin does everything (scanning, revision
+ processing, etc) but skips the actual purge step. Optionally use the
+ ``log_level`` plugin setting to increase the severity of log reports so
+ it's clear when the plugin starts, stops and how many revisions it
found
+ to purge. ::
+
+ [couch_auto_purge_plugin]
+ dry_run = false