This is an automated email from the ASF dual-hosted git repository. vatamane pushed a commit to branch use-maps-in-couch-lru in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 1109ea3f697da7fb4760ef82149112f23143af67 Author: Nick Vatamaniuc <[email protected]> AuthorDate: Sun Aug 4 01:02:52 2024 -0400 Update couch_lru to use maps Maps are more ergonomic and usually more performant. Also add some tests for couch_lru and a few specs with an improved comment indicating what the close function actually doesn't. Previously, it mentions only one of the things: closing oldest idle, however it also bump busy entries, and also clears deleted (or locked) handles from the cache. This change was originally part of another PR that's now closed: #5165. Investigtion into couch_lru started with issue #5166 --- src/couch/src/couch_lru.erl | 201 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 186 insertions(+), 15 deletions(-) diff --git a/src/couch/src/couch_lru.erl b/src/couch/src/couch_lru.erl index 005e50615..d7833c239 100644 --- a/src/couch/src/couch_lru.erl +++ b/src/couch/src/couch_lru.erl @@ -15,34 +15,44 @@ -include("couch_server_int.hrl"). +-type cache() :: {any(), #{binary() => pos_integer()}}. + +-spec new() -> cache(). new() -> - {gb_trees:empty(), dict:new()}. + % {gb_trees(UniqueInt -> DbName), #{DbName => UniqueInt}} + {gb_trees:empty(), #{}}. -insert(DbName, {Tree0, Dict0}) -> +-spec insert(binary(), cache()) -> cache(). +insert(DbName, {Tree0, #{} = Map0}) -> Lru = couch_util:unique_monotonic_integer(), - {gb_trees:insert(Lru, DbName, Tree0), dict:store(DbName, Lru, Dict0)}. + {gb_trees:insert(Lru, DbName, Tree0), Map0#{DbName => Lru}}. -update(DbName, {Tree0, Dict0}) -> - case dict:find(DbName, Dict0) of - {ok, Old} -> +-spec update(binary(), cache()) -> cache(). +update(DbName, {Tree0, #{} = Map0}) -> + case Map0 of + #{DbName := Old} -> New = couch_util:unique_monotonic_integer(), Tree = gb_trees:insert(New, DbName, gb_trees:delete(Old, Tree0)), - Dict = dict:store(DbName, New, Dict0), - {Tree, Dict}; - error -> + {Tree, Map0#{DbName := New}}; + #{} -> % We closed this database before processing the update. Ignore - {Tree0, Dict0} + {Tree0, Map0} end. -%% Attempt to close the oldest idle database. +%% Attempt to close the oldest idle database. This function also cleans deleted +%% and locked entries from the Lru and also bumps busy entries until the first +%% idle entry is found. If not entry is found it returns `false`. In that case +%% all bumped entries are lost as heap garbage. + +-spec close(cache()) -> {true, cache()} | false. close({Tree, _} = Cache) -> close_int(gb_trees:next(gb_trees:iterator(Tree)), Cache). %% internals -close_int(none, _) -> +close_int(none, {_Tree, #{}}) -> false; -close_int({Lru, DbName, Iter}, {Tree, Dict} = Cache) -> +close_int({Lru, DbName, Iter}, {Tree, #{} = Map} = Cache) -> CouchDbs = couch_server:couch_dbs(DbName), CouchDbsPidToName = couch_server:couch_dbs_pid_to_name(DbName), @@ -53,7 +63,7 @@ close_int({Lru, DbName, Iter}, {Tree, Dict} = Cache) -> true = ets:delete(CouchDbs, DbName), true = ets:delete(CouchDbsPidToName, Pid), exit(Pid, kill), - {true, {gb_trees:delete(Lru, Tree), dict:erase(DbName, Dict)}}; + {true, {gb_trees:delete(Lru, Tree), maps:remove(DbName, Map)}}; false -> true = couch_server:unlock(CouchDbs, DbName), couch_stats:increment_counter([couchdb, couch_server, lru_skip]), @@ -62,5 +72,166 @@ close_int({Lru, DbName, Iter}, {Tree, Dict} = Cache) -> false -> NewTree = gb_trees:delete(Lru, Tree), NewIter = gb_trees:iterator(NewTree), - close_int(gb_trees:next(NewIter), {NewTree, dict:erase(DbName, Dict)}) + close_int(gb_trees:next(NewIter), {NewTree, maps:remove(DbName, Map)}) end. + +-ifdef(TEST). + +-include_lib("couch/include/couch_eunit.hrl"). + +-define(DB1, <<"db1">>). +-define(DB2, <<"db2">>). + +couch_lru_test_() -> + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(t_new), + ?TDEF_FE(t_insert), + ?TDEF_FE(t_insert_duplicate), + ?TDEF_FE(t_update), + ?TDEF_FE(t_close_empty), + ?TDEF_FE(t_close_unlocked_idle), + ?TDEF_FE(t_close_bump_busy_all), + ?TDEF_FE(t_close_bump_busy_one), + ?TDEF_FE(t_close_entry_one_is_missing) + ] + }. + +t_new(_) -> + Cache = new(), + ?assertMatch({_, _}, Cache), + {Tree, Map} = Cache, + ?assert(gb_trees:is_empty(Tree)), + ?assert(is_map(Map) andalso map_size(Map) == 0). + +t_insert(_) -> + {Tree, Map} = insert(?DB1, new()), + ?assertEqual(1, gb_trees:size(Tree)), + ?assertEqual(1, map_size(Map)), + ?assertMatch(#{?DB1 := _}, Map), + #{?DB1 := Int} = Map, + ?assert(is_integer(Int)), + ?assert(Int > 0), + ?assertEqual([{Int, ?DB1}], gb_trees:to_list(Tree)). + +t_insert_duplicate(_) -> + % We technically allow this, but is this right? Should we always use update + % instead which would reap the old LRU entry + % + {Tree, Map} = insert(?DB1, insert(?DB1, new())), + ?assertEqual(2, gb_trees:size(Tree)), + ?assertEqual(1, map_size(Map)), + ?assertMatch(#{?DB1 := _}, Map), + ?assertMatch([{_, ?DB1}, {_, ?DB1}], gb_trees:to_list(Tree)). + +t_update(_) -> + % Insert followed by update. + {Tree, Map} = update(?DB1, insert(?DB1, new())), + ?assertEqual(1, gb_trees:size(Tree)), + ?assertEqual(1, map_size(Map)), + ?assertMatch(#{?DB1 := _}, Map), + #{?DB1 := Int} = Map, + ?assertEqual([{Int, ?DB1}], gb_trees:to_list(Tree)). + +t_close_empty(_) -> + ?assertEqual(false, close(new())). + +t_close_unlocked_idle({Dbs, DbsPids, [Pid1, _]}) -> + % There is one db handle and it's idle. It should get closed. + Cache = insert(?DB1, new()), + Res = close(Cache), + ?assertMatch({true, {_, #{}}}, Res), + {true, {Tree, Map}} = Res, + ?assert(gb_trees:is_empty(Tree)), + ?assert(is_map(Map) andalso map_size(Map) == 0), + ?assertNot(is_process_alive(Pid1)), + ?assertEqual([], ets:lookup(Dbs, ?DB1)), + ?assertEqual([], ets:lookup(DbsPids, Pid1)). + +t_close_bump_busy_all({_Dbs, _DbsPids, [Pid1, _]}) -> + % There is one db handle and it's busy. We should stay up. + Cache = insert(?DB1, new()), + meck:expect(couch_db, is_idle, 1, false), + % Yeah, it is odd that we're throwing away the updated cache by returning + % just false if we don't end up finding an idle Db. + meck:reset(couch_stats), + ?assertEqual(false, close(Cache)), + ?assertEqual(1, meck:num_calls(couch_stats, increment_counter, 1)), + ?assert(is_process_alive(Pid1)). + +t_close_bump_busy_one({Dbs, DbsPids, [Pid1, Pid2]}) -> + % One busy handle gets bumped, the idle one closed. + {_, Map} = Cache = insert(?DB2, insert(?DB1, new())), + meck:expect(couch_db, is_idle, fun + (?DB1) -> false; + (?DB2) -> true + end), + meck:reset(couch_stats), + {true, {Tree1, Map1}} = close(Cache), + ?assert(is_process_alive(Pid1)), + ?assertNot(is_process_alive(Pid2)), + ?assertEqual(1, ets:info(Dbs, size)), + ?assertEqual(1, ets:info(DbsPids, size)), + ?assertEqual(1, meck:num_calls(couch_stats, increment_counter, 1)), + ?assert(is_process_alive(Pid1)), + ?assertEqual(1, gb_trees:size(Tree1)), + ?assertEqual(1, map_size(Map1)), + % The ?DB1 entry should have been bumped + #{?DB1 := OldInt} = Map, + #{?DB1 := NewInt} = Map1, + ?assert(NewInt > OldInt), + ?assertEqual([{NewInt, ?DB1}], gb_trees:to_list(Tree1)). + +t_close_entry_one_is_missing({Dbs, _, [_Pid1, Pid2]}) -> + % Two entries but one is missing from db ets + % it should be auto-removed from LRU + Cache = insert(?DB2, insert(?DB1, new())), + ets:delete(Dbs, ?DB1), + {true, {Tree1, Map1}} = close(Cache), + % One entry was removed, one was closed. There should be + % nothing left in the LRU. + ?assert(gb_trees:is_empty(Tree1)), + ?assert(is_map(Map1)), + ?assertEqual(0, map_size(Map1)), + ?assertNot(is_process_alive(Pid2)). + +setup() -> + Pid1 = spawn(fun() -> + receive + something -> ok + end + end), + Pid2 = spawn(fun() -> + receive + something -> ok + end + end), + Dbs = ets:new(unique_name(), [named_table, public, {keypos, #entry.name}]), + DbsPids = ets:new(unique_name(), [named_table, public]), + ets:insert(Dbs, #entry{name = ?DB1, db = ?DB1, pid = Pid1, lock = unlocked}), + ets:insert(Dbs, #entry{name = ?DB2, db = ?DB2, pid = Pid2, lock = unlocked}), + ets:insert(DbsPids, {Pid1, ?DB1}), + ets:insert(DbsPids, {Pid2, ?DB2}), + meck:new(couch_server, [passthrough]), + meck:expect(couch_server, couch_dbs, fun(_) -> Dbs end), + meck:expect(couch_server, couch_dbs_pid_to_name, fun(_) -> DbsPids end), + meck:expect(couch_db, is_idle, 1, true), + meck:expect(couch_stats, increment_counter, 1, ok), + {Dbs, DbsPids, [Pid1, Pid2]}. + +teardown({Dbs, DbsPids, [Pid1, Pid2]}) -> + ets:delete(Dbs), + ets:delete(DbsPids), + exit(Pid1, kill), + exit(Pid2, kill), + meck:unload(). + +unique_name() -> + Mod = atom_to_list(?MODULE), + Unique = erlang:unique_integer([positive, monotonic]), + list_to_atom(Mod ++ "_" ++ integer_to_list(Unique)). + +-endif.
