This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch add-stem-memory-blowup-test
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 3b05f7ebc82742e5708db4f44646f4ba083d7785
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Thu Mar 24 18:48:05 2022 -0400

    Add test to ensure rev tree stemming doesn't take too much memory
    
    `couch_key_tree:stem/2`, as seen in
    https://github.com/apache/couchdb/pull/3963, has a potential to consume 
quite a
    bit of memory. Replacing sets with maps helped in that case however, since
    stemming has a non-tail recursive section, there is a chance in future 
versions
    of Erlang to experience the same behavior again.
    
    As a safeguard, add a memory limit test by stemming a larger conflicted rev
    tree while limiting the maximum process heap size. For that, use the nifty
    `max_heap_size` process flag, which ensures a process is killed if it starts
    using too much memory.
    
    To reduce the flakiness, use a determistic tree shape by using a hard-coded
    seed value, and leave a decent margin for error for the limit.
    
    Ref: https://github.com/apache/couchdb/pull/3963
---
 src/couch/test/eunit/couch_key_tree_tests.erl | 46 ++++++++++++++++++++++++++-
 1 file changed, 45 insertions(+), 1 deletion(-)

diff --git a/src/couch/test/eunit/couch_key_tree_tests.erl 
b/src/couch/test/eunit/couch_key_tree_tests.erl
index f571139..33bd70a 100644
--- a/src/couch/test/eunit/couch_key_tree_tests.erl
+++ b/src/couch/test/eunit/couch_key_tree_tests.erl
@@ -96,7 +96,8 @@ key_tree_stemming_test_() ->
         [
             should_have_no_effect_for_stemming_more_levels_than_exists(),
             should_return_one_deepest_node(),
-            should_return_two_deepest_nodes()
+            should_return_two_deepest_nodes(),
+            should_not_use_excessive_memory_when_stemming()
         ]
     }.
 
@@ -528,3 +529,46 @@ should_return_two_deepest_nodes() ->
 merge_and_stem(RevTree, Tree) ->
     {Merged, Result} = couch_key_tree:merge(RevTree, Tree),
     {couch_key_tree:stem(Merged, ?DEPTH), Result}.
+
+should_not_use_excessive_memory_when_stemming() ->
+    ?_test(begin
+        Seed = {1647,841737,351137}, % This is to preserve determinism
+        Tree = generate_rev_tree(1000, 0.006, Seed),
+        % Without the optimization #91de482fd66f4773b3b8583039c6bcaf1c5727ec
+        % stemming would consume about 18_000_000 words. With it, it consumes
+        % 6_000_000. So, use 13_000_000 as a threshold.
+        SpawnOpts = [
+            monitor,
+            {max_heap_size, #{
+                size => 13000000,
+                error_logger => false,
+                kill => true
+            }}
+        ],
+        {_Pid, Ref} = spawn_opt(fun() ->
+            couch_key_tree:stem(Tree, 1000)
+        end, SpawnOpts),
+        % When it uses too much memory exit would be `killed`
+        Exit = receive {'DOWN', Ref, _, _, Res} ->  Res end,
+        ?assertEqual(normal, Exit)
+    end).
+
+generate_rev_tree(Depth, BranchChance, Seed) ->
+    rand:seed(exrop, Seed),
+    [{1, revnode(Depth, BranchChance)}].
+
+revnode(0, _) ->
+    {rev(), x, []};
+revnode(Depth, BranchChance) ->
+    case rand:uniform() < BranchChance of
+        true ->
+            {rev(), x, [
+                revnode(Depth - 1, BranchChance),
+                revnode(Depth - 1, BranchChance)
+            ]};
+        false ->
+            {rev(), x, [revnode(Depth - 1, BranchChance)]}
+    end.
+
+rev() ->
+    crypto:strong_rand_bytes(16).

Reply via email to