This is an automated email from the ASF dual-hosted git repository. vatamane pushed a commit to branch avoid-creating-invalid-view-purge-checkpoints in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit a2223b57bb800c3feff9fe9c386311d0a3b5dbb9 Author: Nick Vatamaniuc <[email protected]> AuthorDate: Sat Mar 7 00:21:00 2026 -0500 Avoid creating purge checkpoints for invalid views Previously, if we successfully parsed a design document into an #mrst{} record, we would create a purge checkpoint even if the view group didn't have any views, or used an invalid language. Normally, that would just be untidy, having extra unused checkpoints around, however, when we actually start purging, these checkpoints won't be advancing. So, after a while they will stall the compactor from compacting the purge sequences and advancing the minimum purge sequence forward. In other words we need to differentiate between parsable #mrst{} view groups and buildable/viable ones. Ken (our background index builder) ken was already performing the same check, so simply move it to the proc manager and the the indexing utility modules. In the proc manager, noticed that the repeated OS environment fetching and parsing was kind of time consuming (100s of microseonds) so used a persitent term. --- src/couch/src/couch_proc_manager.erl | 32 +++++++++- src/couch_index/src/couch_index_util.erl | 1 + src/couch_mrview/src/couch_mrview_index.erl | 17 +++-- src/couch_mrview/src/couch_mrview_util.erl | 35 +++++++++-- src/fabric/test/eunit/fabric_tests.erl | 96 ++++++++++++++++++++++++++++- src/ken/src/ken_server.erl | 23 +------ 6 files changed, 173 insertions(+), 31 deletions(-) diff --git a/src/couch/src/couch_proc_manager.erl b/src/couch/src/couch_proc_manager.erl index aa538e23e..e8813f93a 100644 --- a/src/couch/src/couch_proc_manager.erl +++ b/src/couch/src/couch_proc_manager.erl @@ -25,8 +25,7 @@ new_proc/1, reload/0, terminate_stale_procs/0, - get_servers_from_env/1, - native_query_server_enabled/0 + allowed_languages/0 ]). -export([ @@ -503,6 +502,18 @@ split_by_char(String, Char) -> {Key, Value}. get_servers_from_env(Spec) -> + case persistent_term:get({?MODULE, Spec}, undefined) of + undefined -> + % Save some time since os:getenv() can potentially be very large + % and we don't want to reparse it all every time. + Val = get_servers_from_env_raw(Spec), + persistent_term:put({?MODULE, Spec}, Val), + Val; + Val -> + Val + end. + +get_servers_from_env_raw(Spec) -> SpecLen = length(Spec), % loop over os:getenv(), match SPEC_ lists:filtermap( @@ -517,6 +528,23 @@ get_servers_from_env(Spec) -> os:getenv() ). +% Get the list of all the languages we know how handle. Some are built-in +% and some are configurable via config settings or env vars +% +allowed_languages() -> + % These are always available + BuiltIn = [<<"javascript">>, <<"javascript_quickjs">>, <<"query">>], + Config = + get_servers_from_env("COUCHDB_QUERY_SERVER_") ++ + get_servers_from_env("COUCHDB_NATIVE_QUERY_SERVER_"), + Allowed0 = [list_to_binary(string:to_lower(Lang)) || {Lang, _Cmd} <- Config], + Allowed = + case native_query_server_enabled() of + true -> [<<"erlang">> | Allowed0]; + _Else -> Allowed0 + end, + lists:usort(BuiltIn ++ Allowed). + configure_language_servers() -> ets:delete_all_objects(?SERVERS), % NOTE: process environment variables cannot be altered except by diff --git a/src/couch_index/src/couch_index_util.erl b/src/couch_index/src/couch_index_util.erl index 9a16d06d6..5812926af 100644 --- a/src/couch_index/src/couch_index_util.erl +++ b/src/couch_index/src/couch_index_util.erl @@ -15,6 +15,7 @@ -export([root_dir/0, index_dir/2, index_file/3]). -export([load_doc/3, sort_lib/1, hexsig/1]). -export([get_purge_checkpoints/2, cleanup_purges/3]). +-export([delete_checkpoint/2]). -include_lib("couch/include/couch_db.hrl"). diff --git a/src/couch_mrview/src/couch_mrview_index.erl b/src/couch_mrview/src/couch_mrview_index.erl index 164227b49..f0a649b77 100644 --- a/src/couch_mrview/src/couch_mrview_index.erl +++ b/src/couch_mrview/src/couch_mrview_index.erl @@ -292,13 +292,22 @@ ensure_local_purge_docs(DbName, DDocs) -> end). ensure_local_purge_doc(Db, #mrst{} = State) -> + IsValid = couch_mrview_util:mrst_has_valid_views(State), Sig = couch_index_util:hexsig(get(signature, State)), DocId = couch_mrview_util:get_local_purge_doc_id(Sig), - case couch_db:open_doc(Db, DocId, []) of - {not_found, _} -> + case {IsValid, couch_db:open_doc(Db, DocId, [])} of + {true, {not_found, _}} -> + % Is valid and doc doesn't exist => create it create_local_purge_doc(Db, State); - {ok, _} -> - ok + {false, {not_found, _}} -> + % Invalid and doc doesn't exist => do nothing + ok; + {true, {ok, _}} -> + % Is valid and doc exists already => do nothing + ok; + {false, {ok, _}} -> + % Invalid and doc exists => cleanup + couch_index_util:delete_checkpoint(Db, DocId) end. create_local_purge_doc(Db, State) -> diff --git a/src/couch_mrview/src/couch_mrview_util.erl b/src/couch_mrview/src/couch_mrview_util.erl index 494f5b3ec..48138bf76 100644 --- a/src/couch_mrview/src/couch_mrview_util.erl +++ b/src/couch_mrview/src/couch_mrview_util.erl @@ -18,6 +18,7 @@ -export([get_signatures/1, get_purge_checkpoints/1, get_index_files/1]). -export([get_signatures_from_ddocs/2]). -export([ddoc_to_mrst/2, init_state/4, reset_index/3]). +-export([mrst_has_valid_views/1]). -export([make_header/1]). -export([index_file/2, compaction_file/2, open_file/1]). -export([delete_files/2, delete_index_file/2, delete_compaction_file/2]). @@ -112,12 +113,23 @@ get_signatures(Db) -> get_signatures_from_ddocs(DbName, DDocs1). % From a list of design #doc{} records returns signatures map: #{Sig => true} -% +% This will be valid signatures of views we expect to run and build on this +% node. get_signatures_from_ddocs(DbName, DDocs) when is_list(DDocs) -> FoldFun = fun(#doc{} = Doc, Acc) -> - {ok, Mrst} = ddoc_to_mrst(DbName, Doc), - Sig = couch_util:to_hex_bin(Mrst#mrst.sig), - Acc#{Sig => true} + try ddoc_to_mrst(DbName, Doc) of + {ok, Mrst} -> + case couch_mrview_util:mrst_has_valid_views(Mrst) of + true -> + Sig = couch_util:to_hex_bin(Mrst#mrst.sig), + Acc#{Sig => true}; + false -> + Acc + end + catch + _:_ -> + Acc + end end, lists:foldl(FoldFun, #{}, DDocs). @@ -346,6 +358,21 @@ view_sig_term(BaseSig, UpdateSeq, PurgeSeq) -> view_sig_term(BaseSig, UpdateSeq, PurgeSeq, Args) -> {BaseSig, UpdateSeq, PurgeSeq, Args}. +% We can construct an MRSt from design document which may have languages this +% server doesn't support, or may have no views. That view group won't build on +% this server, and we can use this utility function to filter it out. Ken and +% purge checkpoint cleanups use this function. +% +mrst_has_valid_views(MRSt) -> + case couch_mrview_index:get(views, MRSt) of + [_ | _] -> + Language = couch_mrview_index:get(language, MRSt), + AllowedLanguages = couch_proc_manager:allowed_languages(), + lists:member(Language, AllowedLanguages); + [] -> + false + end. + init_state(Db, Fd, #mrst{views = Views} = State, nil) -> PurgeSeq = couch_db:get_purge_seq(Db), Header = #mrheader{ diff --git a/src/fabric/test/eunit/fabric_tests.erl b/src/fabric/test/eunit/fabric_tests.erl index c092f4473..daf018231 100644 --- a/src/fabric/test/eunit/fabric_tests.erl +++ b/src/fabric/test/eunit/fabric_tests.erl @@ -27,7 +27,9 @@ cleanup_index_files_test_() -> ?TDEF_FE(t_cleanup_index_files_with_view_data), ?TDEF_FE(t_cleanup_index_files_with_deleted_db), ?TDEF_FE(t_cleanup_index_file_after_ddoc_update), - ?TDEF_FE(t_cleanup_index_file_after_ddoc_delete) + ?TDEF_FE(t_cleanup_index_file_after_ddoc_delete), + ?TDEF_FE(t_cleanup_empty_view_checkpoints), + ?TDEF_FE(t_cleanup_disallowed_language_checkpoints) ] }. @@ -151,6 +153,76 @@ t_cleanup_index_file_after_ddoc_delete({_, DbName}) -> ?assertEqual([], indices(DbName)), ?assertEqual([], purges(DbName)). +t_cleanup_empty_view_checkpoints({_, DbName}) -> + ?assertEqual( + [ + "4bcdf852098ff6b0578ddf472c320e9c.view", + "da817c3d3f7413c1a610f25635a0c521.view" + ], + indices(DbName) + ), + ?assertEqual( + [ + <<"_local/purge-mrview-4bcdf852098ff6b0578ddf472c320e9c">>, + <<"_local/purge-mrview-da817c3d3f7413c1a610f25635a0c521">> + ], + purges(DbName) + ), + + update_empty_ddoc(DbName, <<"_design/foo">>), + {ok, _} = fabric:get_view_group_info(DbName, <<"foo">>), + ok = fabric:cleanup_index_files_all_nodes(DbName), + + % One 4bc stays, da8 should gone. If it weren't for the check to + % empty views it would have used signature "3e823c2a4383ac0c18d4e574135a5b08" + ?assertEqual( + [ + "4bcdf852098ff6b0578ddf472c320e9c.view" + ], + indices(DbName) + ), + ?assertEqual( + [ + <<"_local/purge-mrview-4bcdf852098ff6b0578ddf472c320e9c">> + ], + purges(DbName) + ). + +t_cleanup_disallowed_language_checkpoints({_, DbName}) -> + ?assertEqual( + [ + "4bcdf852098ff6b0578ddf472c320e9c.view", + "da817c3d3f7413c1a610f25635a0c521.view" + ], + indices(DbName) + ), + ?assertEqual( + [ + <<"_local/purge-mrview-4bcdf852098ff6b0578ddf472c320e9c">>, + <<"_local/purge-mrview-da817c3d3f7413c1a610f25635a0c521">> + ], + purges(DbName) + ), + + update_invalid_language(DbName, <<"_design/foo">>, <<"bar">>), + {ok, _} = fabric:get_view_group_info(DbName, <<"foo">>), + ok = fabric:cleanup_index_files_all_nodes(DbName), + + % One 4bc stays, the new view uses an invalid language which can't index with + % so we don't create a checkpoint for it + ?assertEqual( + [ + "4bcdf852098ff6b0578ddf472c320e9c.view" + ], + indices(DbName) + ), + ?assertEqual( + [ + <<"_local/purge-mrview-4bcdf852098ff6b0578ddf472c320e9c">> + ], + purges(DbName) + ). + shard_names(DbName) -> [mem3:name(S) || S <- mem3:local_shards(DbName)]. @@ -239,6 +311,28 @@ update_ddoc(DbName, DDocId, ViewName) -> }, fabric:update_doc(DbName, DDoc, [?ADMIN_CTX]). +update_empty_ddoc(DbName, DDocId) -> + {ok, DDoc0} = fabric:open_doc(DbName, DDocId, [?ADMIN_CTX]), + DDoc = DDoc0#doc{body = {[]}}, + fabric:update_doc(DbName, DDoc, [?ADMIN_CTX]). + +update_invalid_language(DbName, DDocId, ViewName) -> + {ok, DDoc0} = fabric:open_doc(DbName, DDocId, [?ADMIN_CTX]), + DDoc = DDoc0#doc{ + body = + {[ + {<<"language">>, <<"cobol">>}, + {<<"views">>, + {[ + {ViewName, + {[ + {<<"map">>, <<"MAIN-PROCEDURE. PERFORM MAP">>} + ]}} + ]}} + ]} + }, + fabric:update_doc(DbName, DDoc, [?ADMIN_CTX]). + delete_ddoc(DbName, DDocId) -> {ok, DDoc0} = fabric:open_doc(DbName, DDocId, [?ADMIN_CTX]), DDoc = DDoc0#doc{deleted = true, body = {[]}}, diff --git a/src/ken/src/ken_server.erl b/src/ken/src/ken_server.erl index c51ae4304..26c611eae 100644 --- a/src/ken/src/ken_server.erl +++ b/src/ken/src/ken_server.erl @@ -361,15 +361,12 @@ should_update(#doc{body = {Props}}, IndexType) -> end. update_ddoc_views(Name, MRSt, Seq, State) -> - Language = couch_mrview_index:get(language, MRSt), - Allowed = lists:member(Language, allowed_languages()), - Views = couch_mrview_index:get(views, MRSt), - if - Allowed andalso Views =/= [] -> + case couch_mrview_util:mrst_has_valid_views(MRSt) of + true -> {ok, Pid} = couch_index_server:get_index(couch_mrview_index, MRSt), GroupName = couch_mrview_index:get(idx_name, MRSt), maybe_start_job({Name, GroupName}, Pid, Seq, State); - true -> + false -> ok end. @@ -531,20 +528,6 @@ prune_worker_table(State) -> ets:select_delete(ken_workers, [{MatchHead, [Guard], [true]}]), State#state{pruned_last = erlang:monotonic_time()}. -allowed_languages() -> - % These are always available - BuiltIn = [<<"javascript">>, <<"javascript_quickjs">>, <<"query">>], - Config = - couch_proc_manager:get_servers_from_env("COUCHDB_QUERY_SERVER_") ++ - couch_proc_manager:get_servers_from_env("COUCHDB_NATIVE_QUERY_SERVER_"), - Allowed0 = [list_to_binary(string:to_lower(Lang)) || {Lang, _Cmd} <- Config], - Allowed = - case couch_proc_manager:native_query_server_enabled() of - true -> [<<"erlang">> | Allowed0]; - _Else -> Allowed0 - end, - lists:usort(BuiltIn ++ Allowed). - config(Key, Default) -> config:get("ken", Key, Default).
