This is an automated email from the ASF dual-hosted git repository. vatamane pushed a commit to branch optimize-dbname-function in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 19c638dbc009099aeda78d2e1ee2003f87088fea Author: Nick Vatamaniuc <[email protected]> AuthorDate: Tue May 16 18:42:30 2023 -0400 Optimize mem3:dbname/1 function `mem3:dbname/1` with a `<<"shard/...">>` binary is called quite a few times as seen when profiling with fprof: https://gist.github.com/nickva/38760462c1545bf55d98f4898ae1983d In that case `mem3:dbname` is removing the timestamp suffix. However, because it uses `filename:rootname/1` which handles cases pertaining to file system paths and such, it ends up being a bit more expensive than necessary. To optimize it, try avoid translating it a list and parse the extension starting at the end of the binary. To avoid surprises, attempt to emulate exact behavior of the previously used filename:rootname/1 calls for various edge cases (`.`, `/.` etc). Most of those should be asserted in eunit test. A quick speed comparison test shows a 4x speedup or so: ``` speed_compare_test() -> DbName = <<"fabricbenchdb-1684269710714470.1684269710">>, Ids = lists:seq(1, 1000), {LocalUSec, ok} = timer:tc(fun() -> lists:foreach(fun(_) -> strip_suffix(DbName) end, Ids) end), {FilenameUSec, ok} = timer:tc(fun() -> lists:foreach(fun(_) -> list_to_binary(filename:rootname(binary_to_list(DbName))) end, Ids) end), > mem3:speed_compare_test(). {0.327,1.324} ``` --- src/mem3/src/mem3.erl | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/src/mem3/src/mem3.erl b/src/mem3/src/mem3.erl index 7b4a8eca2..d71cdffdf 100644 --- a/src/mem3/src/mem3.erl +++ b/src/mem3/src/mem3.erl @@ -462,6 +462,33 @@ db_is_current(Name) when is_binary(Name) -> % for unit tests that either test or use mem3_rep logic couch_server:exists(Name). +strip_suffix(<<>>) -> + <<>>; +strip_suffix(DbName) when is_binary(DbName) -> + Pos = byte_size(DbName) - 1, + strip_suffix(DbName, binary:at(DbName, Pos), Pos). + +strip_suffix(_, $., 0) -> + <<>>; +strip_suffix(DbName, $., Pos) when Pos > 0 -> + % The first . from the end. Return everything from start until Pos, + % effectively stripping out the extension, unless it is preceeded by / to + % preserve the previously used filename:rootname/1 behavior. + case binary:at(DbName, Pos - 1) of + $/ -> DbName; + _ -> binary_part(DbName, 0, Pos) + end; +strip_suffix(DbName, $/, Pos) when Pos >= 0 -> + % Found the first / from the end, we stop here to be compatible with the + % previously used filename:rootname/1 behavior. + DbName; +strip_suffix(DbName, _, 0) -> + % Reached the front of the string + DbName; +strip_suffix(DbName, _, Pos) when Pos > 0 -> + Pos1 = Pos - 1, + strip_suffix(DbName, binary:at(DbName, Pos1), Pos1). + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). @@ -505,4 +532,68 @@ rotate_rand_distribution_test() -> Cases = [rotate_rand([1, 2, 3]) || _ <- lists:seq(1, 100)], ?assertEqual(3, length(lists:usort(Cases))). +strip_suffix_test() -> + % Assert that strip_suffix function matches the behavior of + % the previously used filename:rootname/1 + + ?assertEqual(<<>>, strip_suffix(<<>>)), + ?assertEqual(strip_suffix(<<>>), filename:rootname(<<>>)), + + ?assertEqual(<<"a">>, strip_suffix(<<"a">>)), + ?assertEqual(strip_suffix(<<"a">>), filename:rootname(<<"a">>)), + + ?assertEqual(<<"abc">>, strip_suffix(<<"abc">>)), + ?assertEqual(strip_suffix(<<"abc">>), filename:rootname(<<"abc">>)), + + ?assertEqual(<<"/">>, strip_suffix(<<"/">>)), + ?assertEqual(strip_suffix(<<"/">>), filename:rootname(<<"/">>)), + + ?assertEqual(<<"//">>, strip_suffix(<<"//">>)), + ?assertEqual(strip_suffix(<<"//">>), filename:rootname(<<"//">>)), + + ?assertEqual(<<"/a">>, strip_suffix(<<"/a">>)), + ?assertEqual(strip_suffix(<<"/a">>), filename:rootname(<<"/a">>)), + + ?assertEqual(<<"a/">>, strip_suffix(<<"a/">>)), + ?assertEqual(strip_suffix(<<"a/">>), filename:rootname(<<"a/">>)), + + ?assertEqual(<<>>, strip_suffix(<<".">>)), + ?assertEqual(strip_suffix(<<".">>), filename:rootname(<<".">>)), + + ?assertEqual(<<".">>, strip_suffix(<<"..">>)), + ?assertEqual(strip_suffix(<<"..">>), filename:rootname(<<"..">>)), + + ?assertEqual(<<"/.">>, strip_suffix(<<"/.">>)), + ?assertEqual(strip_suffix(<<"/.">>), filename:rootname(<<"/.">>)), + + ?assertEqual(<<"./">>, strip_suffix(<<"./">>)), + ?assertEqual(strip_suffix(<<"./">>), filename:rootname(<<"./">>)), + + ?assertEqual(<<>>, strip_suffix(<<".a">>)), + ?assertEqual(strip_suffix(<<".a">>), filename:rootname(<<".a">>)), + + ?assertEqual(<<>>, strip_suffix(<<".ab">>)), + ?assertEqual(strip_suffix(<<".ab">>), filename:rootname(<<".ab">>)), + + ?assertEqual(<<"a">>, strip_suffix(<<"a.b">>)), + ?assertEqual(strip_suffix(<<"a.b">>), filename:rootname(<<"a.b">>)), + + ?assertEqual(<<"a">>, strip_suffix(<<"a.bc">>)), + ?assertEqual(strip_suffix(<<"a.bc">>), filename:rootname(<<"a.bc">>)), + + ?assertEqual(<<"a./b">>, strip_suffix(<<"a./b">>)), + ?assertEqual(strip_suffix(<<"a./b">>), filename:rootname(<<"a./b">>)), + + ?assertEqual(<<"a/.b">>, strip_suffix(<<"a/.b">>)), + ?assertEqual(strip_suffix(<<"a/.b">>), filename:rootname(<<"a/.b">>)), + + ?assertEqual(<<"a/.">>, strip_suffix(<<"a/..b">>)), + ?assertEqual(strip_suffix(<<"a/..b">>), filename:rootname(<<"a/..b">>)), + + ?assertEqual(<<"/a/b">>, strip_suffix(<<"/a/b">>)), + ?assertEqual(strip_suffix(<<"/a/b">>), filename:rootname(<<"/a/b">>)), + + ?assertEqual(<<"/a/b">>, strip_suffix(<<"/a/b.c">>)), + ?assertEqual(strip_suffix(<<"/a/b.c">>), filename:rootname(<<"/a/b.c">>)). + -endif.
