This is an automated email from the ASF dual-hosted git repository. rnewson pushed a commit to branch auto-delete-tseq in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 24cc48423252933327959156cdb50caebfcb7e5f Author: Robert Newson <rnew...@apache.org> AuthorDate: Tue Sep 2 15:32:21 2025 +0100 purge tombstones that exceed TTL --- src/couch/src/couch_tombstone_remover.erl | 128 ++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) diff --git a/src/couch/src/couch_tombstone_remover.erl b/src/couch/src/couch_tombstone_remover.erl new file mode 100644 index 000000000..b1127c3fd --- /dev/null +++ b/src/couch/src/couch_tombstone_remover.erl @@ -0,0 +1,128 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_tombstone_remover). +-behaviour(couch_scanner_plugin). + +-export([ + start/2, + resume/2, + complete/1, + checkpoint/1, + db/2, + db_opened/2, + db_closing/2, + doc_fdi/3 +]). + +-include_lib("couch_scanner/include/couch_scanner_plugin.hrl"). + +start(ScanId, #{}) -> + St = init_config(ScanId), + case should_run() of + true -> + ?INFO("Starting.", [], St), + {ok, St}; + false -> + ?INFO("Not starting.", [], St), + skip + end. + +resume(ScanId, #{}) -> + St = init_config(ScanId), + case should_run() of + true -> + ?INFO("Resuming.", [], St), + {ok, St}; + false -> + ?INFO("Not resuming.", [], St), + skip + end. + +complete(St) -> + ?INFO("Completed", [], St), + {ok, #{}}. + +checkpoint(_St) -> + {ok, #{}}. + +db(St, DbName) -> + case config:get_integer("tombstone_ttl", ?b2l(DbName), 0) of + 0 -> + {skip, St}; + TTL when is_integer(TTL), TTL > 0 -> + {ok, St#{ttl => TTL}} + end. + +db_opened(#{} = St, Db) -> + #{ttl := TTL} = St, + SinceSeq = couch_time_seq:since(couch_db:get_time_seq(Db), couch_time_seq:timestamp() - TTL), + ChangeOpts = + if + SinceSeq == now -> []; + true -> [{end_key, SinceSeq}] + end, + ?INFO("scanning for tombstones in ~s up to ~p", [couch_db:name(Db), SinceSeq], meta(St)), + {0, ChangeOpts, St#{count => 0, since_seq => SinceSeq}}. + +db_closing(#{} = St, Db) -> + #{count := Count} = St, + ?INFO("purged ~B tombstones from ~s", [Count, couch_db:name(Db)], meta(St)), + {ok, St#{count => 0}}. + +doc_fdi(#{} = St, #full_doc_info{deleted = true} = FDI, Db) -> + #{since_seq := SinceSeq} = St, + if + FDI#full_doc_info.update_seq =< SinceSeq -> + purge(St, Db, FDI); + true -> + {ok, St} + end; +doc_fdi(#{} = St, #full_doc_info{}, _Db) -> + {ok, St}. + +init_config(ScanId) -> + #{sid => ScanId}. + +should_run() -> + couch_scanner_util:on_first_node(). + +purge(#{} = St, Db, #full_doc_info{id = Id, rev_tree = RevTree}) -> + DbName = mem3:dbname(couch_db:name(Db)), + Revs = [ + couch_doc:rev_to_str({Pos, RevId}) + || {#leaf{}, {Pos, [RevId | _]}} <- couch_key_tree:get_all_leafs(RevTree) + ], + IDRevs = [{Id, Revs}], + {Pid, Ref} = spawn_monitor(fun() -> + case fabric:purge_docs(DbName, IDRevs, [?ADMIN_CTX]) of + {ok, _} -> + exit(ok); + Else -> + exit(Else) + end + end), + receive + {'DOWN', Ref, process, Pid, ok} -> + #{count := Count} = St, + {ok, St#{count => Count + 1}}; + {'DOWN', Ref, process, Pid, Else} -> + ?WARN( + "Failed to purge tombstone ~s/~s for reason ~p", + [DbName, Id, Else], + meta(St) + ), + {ok, St} + end. + +meta(#{sid := ScanId}) -> + #{sid => ScanId}.