This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch property-tests-for-couch-key-tree
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 64ac90b708ddfac1a8efc5c1487db56d420abe93
Author: Nick Vatamaniuc <vatam...@apache.org>
AuthorDate: Fri Feb 2 12:08:08 2018 -0500

    Key tree property tests
    
    Key tree module is a candidate to use property tests on as it mostly deals 
with
    manipulating a single data structure and functions are referentially
    transparent, that is, they aren't many side-effects like IO generated.
    
    Property tests are currently run as part of the EUnit testing framework. In
    the future it might be interesting to have a longer or more thourough runs
    to perhaps catch more edge cases.
    
    To run the tests used Triq. It is an Apache licensed property testing tool. 
It
    is not as full features as the commerical and the other, GPL-licensed one, 
but
    seems to have the basics and should be easy to integrate with CouchDB's 
code.
    
    To run the test:
    
    make eunit apps=couch suites=couch_key_tree_prop_tests
    
    To play with the code in the interpreter can try this script:
    
    ```
    set +e
    make > /dev/null
    exec erl \
     -pa src/couch/ebin \
     -pa src/triq/ebin/ \
     -pa src/config/ebin \
     -eval "application:start(config), code:load_file(couch_key_tree), 
code:load_file(triq)"
    ```
---
 .gitignore                                   |   1 +
 Makefile                                     |   2 +-
 Makefile.win                                 |   2 +-
 rebar.config.script                          |   5 +-
 src/couch/test/couch_key_tree_prop_tests.erl | 391 +++++++++++++++++++++++++++
 5 files changed, 397 insertions(+), 4 deletions(-)

diff --git a/.gitignore b/.gitignore
index 3e22192..625c011 100644
--- a/.gitignore
+++ b/.gitignore
@@ -51,6 +51,7 @@ src/oauth/
 src/rebar/
 src/setup/
 src/snappy/
+src/triq/
 tmp/
 
 src/couch/*.o
diff --git a/Makefile b/Makefile
index bd3b8ac..4d9d2ea 100644
--- a/Makefile
+++ b/Makefile
@@ -21,7 +21,7 @@ DESTDIR=
 
 # Rebar options
 apps=
-skip_deps=folsom,meck,mochiweb,proper,snappy
+skip_deps=folsom,meck,mochiweb,triq,snappy
 suites=
 tests=
 
diff --git a/Makefile.win b/Makefile.win
index 7ff0ab5..01efc5a 100644
--- a/Makefile.win
+++ b/Makefile.win
@@ -27,7 +27,7 @@ DESTDIR=
 
 # Rebar options
 apps=
-skip_deps=folsom,meck,mochiweb,proper,snappy
+skip_deps=folsom,meck,mochiweb,triq,snappy
 suites=
 tests=
 
diff --git a/rebar.config.script b/rebar.config.script
index 60d2e31..69ec709 100644
--- a/rebar.config.script
+++ b/rebar.config.script
@@ -64,8 +64,9 @@ DepDescs = [
 {ibrowse,          "ibrowse",          {tag, "CouchDB-4.0.1"}},
 {jiffy,            "jiffy",            {tag, "CouchDB-0.14.11-2"}},
 {mochiweb,         "mochiweb",         {tag, "CouchDB-2.12.0-1"}},
-{meck,             "meck",             {tag, "0.8.8"}}
-
+{meck,             "meck",             {tag, "0.8.8"}},
+{triq,             {url, "https://github.com/triqng/triq.git"},
+                   "1a9b022cc0432c9314f6f06c61dc375e474b5573"}
 ],
 
 
diff --git a/src/couch/test/couch_key_tree_prop_tests.erl 
b/src/couch/test/couch_key_tree_prop_tests.erl
new file mode 100644
index 0000000..d8d5ee0
--- /dev/null
+++ b/src/couch/test/couch_key_tree_prop_tests.erl
@@ -0,0 +1,391 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_key_tree_prop_tests).
+
+-include_lib("triq/include/triq.hrl").
+-triq(eunit).
+-include_lib("eunit/include/eunit.hrl").
+
+-define(SIZE_REDUCTION, 3).  % How much to reduce size with tree depth.
+-define(MAX_BRANCHES, 5).  % Maximum number of branches.
+-define(RAND_SIZE, 1 bsl 60).
+
+
+%
+% Properties
+%
+
+
+% Merge random paths from a revtree into itself. Check that no revisions have
+% been lost in the process and that result is one of the 3 expected values.
+%
+prop_revtree_merge_with_subset_of_own_nodes() ->
+    ?FORALL(Revs, g_revs(),
+        ?FORALL({RevTree, Branch}, {g_revtree(Revs), g_revtree(Revs, 1)},
+            ?IMPLIES(length(Branch) > 0,
+                begin
+                    [Tree | _ ] = Branch,
+                    %?debugFmt("RevTree:~p Tree:~p", [RevTree, Tree]),
+                    {Merged, Result} = couch_key_tree:merge(RevTree, Tree),
+                    RepeatingRevs = [], % repeating_revs(levels(Merged)),
+                    MergedKeys = lists:usort(keylist(Merged)),
+                    InputKeys = lists:usort(keylist(RevTree) ++ keylist(Tree)),
+                    lists:member(Result, [new_leaf, new_branch, internal_node])
+                        andalso MergedKeys == InputKeys
+                        andalso RepeatingRevs == []
+                end
+            )
+        )
+    ).
+
+
+% Stem deeper than the current max level. Expect no changes to the revtree
+%
+prop_no_change_stemming_deeper_than_current_depth() ->
+    ?FORALL(RevTree, g_revtree(),
+        begin
+            StemDepth = depth(RevTree) + 1,
+            %?debugFmt("RevTree:~p StemDepth:~p", [RevTree, StemDepth]),
+            Stemmed = couch_key_tree:stem(RevTree, StemDepth),
+            StemmedKeys = lists:usort(keylist(Stemmed)),
+            InputKeys = lists:usort(keylist(RevTree)),
+            StemmedKeys == InputKeys
+        end
+    ).
+
+
+% Stem at a random small depth, make sure that resulting tree has
+% unique revisions and the same number or less revisions than input
+%
+prop_stemming_results_in_same_or_less_total_revs() ->
+    ?FORALL({RevTree, StemDepth}, {g_revtree(), choose(1, 20)},
+        begin
+            Stemmed = couch_key_tree:stem(RevTree, StemDepth),
+            StemmedKeys = keylist(Stemmed),
+            UniqueStemmedKeys = lists:usort(StemmedKeys),
+            UniqueInputKeys = lists:usort(keylist(RevTree)),
+            length(StemmedKeys) == length(UniqueStemmedKeys)
+                andalso length(UniqueStemmedKeys) =< length(UniqueInputKeys)
+        end
+    ).
+
+
+% Generate a longer path (revtree with no branches) then stem it.
+% Always expect it to shrink to stemmed depth.
+prop_stem_path_expect_size_to_get_smaller() ->
+    ?FORALL({RevTree, StemDepth},
+        {
+           ?SIZED(Size, resize(Size * 10, g_revtree([], 1))),
+           choose(1,5)
+        },
+        ?IMPLIES(real_depth(RevTree) > 5,
+                begin
+                    Stemmed = couch_key_tree:stem(RevTree, StemDepth),
+                    StemmedKeys = lists:usort(keylist(Stemmed)),
+                    InputKeys = lists:usort(keylist(RevTree)),
+                    length(InputKeys) > length(StemmedKeys)
+                        andalso real_depth(Stemmed) == StemDepth
+                end
+        )
+    ).
+
+
+
+%
+% Generators
+%
+
+% Generate a full rev tree. Most of the forms are just there to set up default
+% parameters, _revtree/3 does all heavy lifting.
+%
+
+g_revtree() ->
+    ?SIZED(Size, g_revtree(Size)).
+
+
+g_revtree(Size) when is_integer(Size) ->
+    g_revtree(Size, [], ?MAX_BRANCHES);
+g_revtree(Revs) when is_list(Revs) ->
+    ?SIZED(Size, g_revtree(Size, Revs, ?MAX_BRANCHES)).
+
+
+g_revtree(Size, Revs) when is_integer(Size), is_list(Revs) ->
+    g_revtree(Size, Revs, ?MAX_BRANCHES);
+g_revtree(Revs, MaxBranches) when is_list(Revs), is_integer(MaxBranches) ->
+    ?SIZED(Size, g_revtree(Size, Revs, MaxBranches)).
+
+
+g_revtree(0, _Revs, _MaxBranches) ->
+    [];
+g_revtree(Size, ERevs, MaxBranches) ->
+    ?LET({Depth, Revs}, {g_stem_depth(Size), g_revs(Size, ERevs)},
+         begin
+             SmallerSize = Size div ?SIZE_REDUCTION,
+             ?DELAY([{Depth, g_treenode(SmallerSize, Revs, MaxBranches)}])
+         end
+    ).
+
+
+% Generate a revtree with a the tree that is a path.
+g_revpath() ->
+    ?SIZED(Size, resize(Size * 50, g_revtree([], 1))).
+
+
+
+% Generate a tree node and then recursively generate its children.
+%
+g_treenode(0, Revs, _) ->
+    {elements(Revs), x, []};
+g_treenode(Size, Revs, MaxBranches) ->
+    ?DELAY(?LET(N, int(0, MaxBranches),
+        begin
+            %?debugFmt("----- g_treenode S:~p N:~p",[Size,N]),
+            [Rev | ChildRevs] = Revs,
+            {Rev, x, g_nodes(Size div ?SIZE_REDUCTION, N, ChildRevs, 
MaxBranches)}
+        end
+    )).
+
+
+% Generate a list of child nodes. Depending on how many children there are
+% the pre-generarated revision list is split into that many sublists.
+%
+g_nodes(0, _N, _Revs, _MaxBranches) ->
+    [];
+g_nodes(_Size, 0, _Revs, _MaxBranches) ->
+    [];
+g_nodes(Size, ChildCount, Revs, MaxBranches) ->
+    ?LETSHRINK(
+        ChildNodes,
+        begin
+            ChildRevList = child_revs(ChildCount, Revs, Size, MaxBranches),
+            [g_treenode(Size, ChildRevs, MaxBranches) || ChildRevs <- 
ChildRevList]
+        end,
+        ordered_nodes(ChildNodes)
+    ).
+
+
+% Generate each subtree's stem depth
+%
+
+
+g_stem_depth(Size) ->
+    choose(0,  4 * expected_height(Size, ?SIZE_REDUCTION)).
+
+
+% Uses the shuffle/1 function to shuffle the input list. Unshuffled list is
+% used as the shrink value.
+%
+g_shuffle(L) when is_list(L) ->
+    triq_dom:domain(g_shuffle,
+        fun(Self, _Size) -> {Self, shuffle(L)} end,
+        fun(Self, _Val) -> {Self, L} end
+     ).
+
+
+% Wrapper to make a list shuffling generator that doesn't shrink
+%
+g_shuffle_noshrink(L) when is_list(L) ->
+    triq_dom:noshrink(g_shuffle(L)).
+
+
+% Generate shuffled sublists up to N items long from a list.
+%
+g_shuffled_sublists(L, N) ->
+    ?LET(Shuffled, g_shuffle_noshrink(L), lists:sublist(Shuffled, N)).
+
+
+% Generate revision lists.
+%
+g_revs() ->
+    ?SIZED(Size, g_revs(Size)).
+
+
+g_revs(Size) when is_integer(Size) ->
+    g_revs(Size, []).
+
+
+g_revs(Size, Existing) when is_integer(Size), is_list(Existing) ->
+    Expected = keys_needed(Size, ?SIZE_REDUCTION, ?MAX_BRANCHES),
+    Revs = revs(Expected, Existing),
+    case length(Revs) > Expected of
+        true -> % have extra, try various sublists
+            g_shuffled_sublists(Revs, Expected);
+        false ->
+            triq_dom:return(Revs)
+    end.
+
+
+%
+% Helper functions
+%
+
+
+% Shufle a list of items. Tag each item with a random number then sort
+% the list and remove the tags.
+%
+shuffle(L) ->
+    Tagged = [{triq_rnd:uniform(), X} || X <- L],
+    [X || {_, X} <- lists:sort(Tagged)].
+
+
+% Generate list of relateively unique large random numbers
+rand_list(N) when N =< 0 ->
+    [];
+rand_list(N) ->
+    [triq_rnd:uniform(?RAND_SIZE) || _ <- lists:seq(1, N)].
+
+
+% Generate a list of revisions to be used as key in revision trees. Expected
+% must the number of maximum expected nodes in a revision tree. Existing is an
+% optional list revisions which must be included in the result. The output list
+% is sorted.
+revs(0, _Existing) ->
+    [];
+revs(Expected, Existing) when is_integer(Expected), is_list(Existing) ->
+    Need = Expected - length(Existing),
+    lists:usort(lists:append(Existing, rand_list(Need))).
+
+
+% Get the list of all the keys in a revision tree. The input can also be a
+% an individual tree (tagged with the depth to virtual root) or a node.
+% Yes, this is not tail recursive but the idea is to keep it simple.
+%
+keylist({_D, Node}) when is_tuple(Node) ->
+    keylist(Node);
+keylist({K, _V, Nodes}) ->
+    [K | keylist(Nodes)];
+keylist(Nodes) ->
+    lists:append([keylist(Node) || Node <- Nodes]).
+
+
+% Get lists of all the keys at each depth level. Result is an orddict that
+% looks like [{depth, [key]}]. The depth used here is the "virtual" depth as
+% indicated by the stemmed depth tag that goes with every top level subtree.
+%
+levels([]) ->
+    orddict:new();
+levels(RevTree) when is_list(RevTree) ->
+    lists:foldl(fun(T, Dict) -> levels(T, Dict) end, orddict:new(), RevTree).
+
+
+levels({Depth, Node}, Dict) when is_tuple(Node) ->
+    levels(Node, Depth, Dict).
+
+
+levels({K, _V, Nodes}, Depth, Dict) ->
+    Dict1 = case orddict:is_key(Depth, Dict) of
+        true -> orddict:append(Depth, K, Dict);
+        false -> orddict:store(Depth, [K], Dict)
+    end,
+    levels(Nodes, Depth + 1, Dict1);
+levels(Nodes, Depth, Dict) ->
+    lists:foldl(fun(Node, AccDict) ->
+        levels(Node, Depth, AccDict)
+    end, Dict, Nodes).
+
+
+repeating_revs(Dict) ->
+    orddict:filter(fun(_Depth, Revs) ->
+        length(lists:usort(Revs)) =/= length(Revs)
+    end, Dict).
+
+
+% Get the maximum depth of a revtree. The depth is "virtual" as it takes into
+% account the distance to the now stemmed root node as indicated by the top
+% level subtrees.
+%
+depth([]) ->
+    0;
+depth(RevTree) when is_list(RevTree) ->
+    lists:max([depth(T) || T <- RevTree]);
+depth({Depth, Node}) when is_tuple(Node) ->
+    depth(Node, Depth - 1).
+
+
+depth({_K, _V, Nodes}, Depth) ->
+    depth(Nodes, Depth + 1);
+depth([], Depth) ->
+    Depth;
+depth(Nodes, Depth) ->
+    lists:max([depth(Node, Depth) || Node <- Nodes]).
+
+
+% Get the "real" tree depth, not the virtual one. As revtrees gets stemmed they
+% will keep their virtual depth but the actual number of nodes in the tree
+% could be reduced.
+%
+real_depth([]) ->
+    0;
+real_depth(RevTree) when is_list(RevTree) ->
+    lists:max([real_depth(T) || T <- RevTree]);
+real_depth({_Depth, Node}) when is_tuple(Node) ->
+    depth(Node, 0).  % Note from here on use the depth/3 function
+
+
+% Return an ordered list of revtree nodes. When sorting only immediate keys
+% (revisions) are looked at and comparison doesn't descent into the treee.
+%
+ordered_nodes(Nodes) ->
+    lists:sort(fun({K1, _, _}, {K2, _, _}) -> K1 =< K2 end, Nodes).
+
+
+% Calculate a maximum number of rev tree nodes needed for a tree of a given
+% height and branchiness. Height is derived from Size and LevelReductionFactor,
+% that is how big the sample should be and quickly the size parameter would
+% shrink on each level. The formula used then is:
+%
+% N = (B ^ (H + 1) - 1) / (B - 1)
+keys_needed(0, _, _) ->
+    0;
+keys_needed(Size, LevelReductionFactor, 1) ->
+    expected_height(Size, LevelReductionFactor);
+keys_needed(Size, LevelReductionFactor, Branches) ->
+    Height =  expected_height(Size, LevelReductionFactor),
+    ceil((math:pow(Branches, Height + 1) - 1) / (Branches - 1)).
+
+
+% Calculate expected tree height for a given sample size and branchiness.
+% At each step the size is divided by the reduction factor.
+expected_height(Size, LevelReductionFactor) ->
+    ceil(log(LevelReductionFactor, Size)).
+
+
+log(B, X) ->
+    math:log(X) / math:log(B).
+
+
+% Distribute items in a list into roughly equal chunks of a given size.
+%
+distribute(_ChunkSize, []) ->
+    [];
+distribute(ChunkSize, L) when ChunkSize >= length(L) ->
+    [L];
+distribute(ChunkSize, L) ->
+    {L1, L2} = lists:split(ChunkSize, L),
+    [L1 | distribute(ChunkSize, L2)].
+
+
+% Split a single (parent) revision list into chunks (sub-lists), one for each
+% child. Also, for safety, double check that at this point in the process the
+% list of revisions is sufficiently large. If it isn't something went wrong and
+% a specific exception is thrown ({not_enough_revisions, Got, Needed}).
+%
+child_revs(ChildCount, Revs, Size, MaxBranches) ->
+    NeedKeys = keys_needed(Size, ?SIZE_REDUCTION, MaxBranches),
+    case length(Revs) >= NeedKeys of
+        true ->
+            ChunkSize = ceil(length(Revs) / ChildCount),
+            distribute(ChunkSize, Revs);
+        false ->
+            throw({not_enough_revisions, length(Revs), NeedKeys})
+    end.

-- 
To stop receiving notification emails like this one, please contact
vatam...@apache.org.

Reply via email to