This is an automated email from the ASF dual-hosted git repository. vatamane pushed a commit to branch add-stem-memory-blowup-test in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 3b05f7ebc82742e5708db4f44646f4ba083d7785 Author: Nick Vatamaniuc <[email protected]> AuthorDate: Thu Mar 24 18:48:05 2022 -0400 Add test to ensure rev tree stemming doesn't take too much memory `couch_key_tree:stem/2`, as seen in https://github.com/apache/couchdb/pull/3963, has a potential to consume quite a bit of memory. Replacing sets with maps helped in that case however, since stemming has a non-tail recursive section, there is a chance in future versions of Erlang to experience the same behavior again. As a safeguard, add a memory limit test by stemming a larger conflicted rev tree while limiting the maximum process heap size. For that, use the nifty `max_heap_size` process flag, which ensures a process is killed if it starts using too much memory. To reduce the flakiness, use a determistic tree shape by using a hard-coded seed value, and leave a decent margin for error for the limit. Ref: https://github.com/apache/couchdb/pull/3963 --- src/couch/test/eunit/couch_key_tree_tests.erl | 46 ++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/src/couch/test/eunit/couch_key_tree_tests.erl b/src/couch/test/eunit/couch_key_tree_tests.erl index f571139..33bd70a 100644 --- a/src/couch/test/eunit/couch_key_tree_tests.erl +++ b/src/couch/test/eunit/couch_key_tree_tests.erl @@ -96,7 +96,8 @@ key_tree_stemming_test_() -> [ should_have_no_effect_for_stemming_more_levels_than_exists(), should_return_one_deepest_node(), - should_return_two_deepest_nodes() + should_return_two_deepest_nodes(), + should_not_use_excessive_memory_when_stemming() ] }. @@ -528,3 +529,46 @@ should_return_two_deepest_nodes() -> merge_and_stem(RevTree, Tree) -> {Merged, Result} = couch_key_tree:merge(RevTree, Tree), {couch_key_tree:stem(Merged, ?DEPTH), Result}. + +should_not_use_excessive_memory_when_stemming() -> + ?_test(begin + Seed = {1647,841737,351137}, % This is to preserve determinism + Tree = generate_rev_tree(1000, 0.006, Seed), + % Without the optimization #91de482fd66f4773b3b8583039c6bcaf1c5727ec + % stemming would consume about 18_000_000 words. With it, it consumes + % 6_000_000. So, use 13_000_000 as a threshold. + SpawnOpts = [ + monitor, + {max_heap_size, #{ + size => 13000000, + error_logger => false, + kill => true + }} + ], + {_Pid, Ref} = spawn_opt(fun() -> + couch_key_tree:stem(Tree, 1000) + end, SpawnOpts), + % When it uses too much memory exit would be `killed` + Exit = receive {'DOWN', Ref, _, _, Res} -> Res end, + ?assertEqual(normal, Exit) + end). + +generate_rev_tree(Depth, BranchChance, Seed) -> + rand:seed(exrop, Seed), + [{1, revnode(Depth, BranchChance)}]. + +revnode(0, _) -> + {rev(), x, []}; +revnode(Depth, BranchChance) -> + case rand:uniform() < BranchChance of + true -> + {rev(), x, [ + revnode(Depth - 1, BranchChance), + revnode(Depth - 1, BranchChance) + ]}; + false -> + {rev(), x, [revnode(Depth - 1, BranchChance)]} + end. + +rev() -> + crypto:strong_rand_bytes(16).
