This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch property-tests-for-couch-key-tree
in repository https://gitbox.apache.org/repos/asf/couchdb.git


The following commit(s) were added to 
refs/heads/property-tests-for-couch-key-tree by this push:
     new 9aa4499  Key tree property tests
9aa4499 is described below

commit 9aa44995fa0b5a132d323c2429723ced6d11c20d
Author: Nick Vatamaniuc <vatam...@apache.org>
AuthorDate: Fri Feb 2 12:08:08 2018 -0500

    Key tree property tests
    
    Key tree module is a candidate to use property tests on as it mostly deals 
with
    manipulating a single data structure and functions are referentially
    transparent, that is, they aren't many side-effects like IO generated.
    
    Property tests are currently run as part of the EUnit testing framework. In
    the future it might be interesting to have a longer or more thourough runs
    to perhaps catch more edge cases.
    
    To run the tests used Triq. It is an Apache licensed property testing tool. 
It
    is not as full features as the commerical and the other, GPL-licensed one, 
but
    seems to have the basics and should be easy to integrate with CouchDB's 
code.
    
    To run the test:
    
    make eunit apps=couch suites=couch_key_tree_prop_tests
    
    To play with the code in the interpreter can try this script:
    
    ```
    set +e
    make > /dev/null
    exec erl \
     -pa src/couch/ebin \
     -pa src/triq/ebin/ \
     -pa src/config/ebin \
     -eval "application:start(config), code:load_file(couch_key_tree), 
code:load_file(triq)"
    ```
---
 Makefile                                     |   2 +-
 Makefile.win                                 |   2 +-
 rebar.config.script                          |   5 +-
 src/couch/test/couch_key_tree_prop_tests.erl | 344 +++++++++++++++++++++++++++
 4 files changed, 349 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index bd3b8ac..4d9d2ea 100644
--- a/Makefile
+++ b/Makefile
@@ -21,7 +21,7 @@ DESTDIR=
 
 # Rebar options
 apps=
-skip_deps=folsom,meck,mochiweb,proper,snappy
+skip_deps=folsom,meck,mochiweb,triq,snappy
 suites=
 tests=
 
diff --git a/Makefile.win b/Makefile.win
index 7ff0ab5..01efc5a 100644
--- a/Makefile.win
+++ b/Makefile.win
@@ -27,7 +27,7 @@ DESTDIR=
 
 # Rebar options
 apps=
-skip_deps=folsom,meck,mochiweb,proper,snappy
+skip_deps=folsom,meck,mochiweb,triq,snappy
 suites=
 tests=
 
diff --git a/rebar.config.script b/rebar.config.script
index 60d2e31..69ec709 100644
--- a/rebar.config.script
+++ b/rebar.config.script
@@ -64,8 +64,9 @@ DepDescs = [
 {ibrowse,          "ibrowse",          {tag, "CouchDB-4.0.1"}},
 {jiffy,            "jiffy",            {tag, "CouchDB-0.14.11-2"}},
 {mochiweb,         "mochiweb",         {tag, "CouchDB-2.12.0-1"}},
-{meck,             "meck",             {tag, "0.8.8"}}
-
+{meck,             "meck",             {tag, "0.8.8"}},
+{triq,             {url, "https://github.com/triqng/triq.git"},
+                   "1a9b022cc0432c9314f6f06c61dc375e474b5573"}
 ],
 
 
diff --git a/src/couch/test/couch_key_tree_prop_tests.erl 
b/src/couch/test/couch_key_tree_prop_tests.erl
new file mode 100644
index 0000000..4dcee1c
--- /dev/null
+++ b/src/couch/test/couch_key_tree_prop_tests.erl
@@ -0,0 +1,344 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+
+-module(couch_key_tree_prop_tests).
+
+-include_lib("triq/include/triq.hrl").
+-triq(eunit).
+-include_lib("eunit/include/eunit.hrl").
+
+-define(SIZE_REDUCTION, 2).  % How much to reduce size with tree depth.
+-define(MAX_BRANCHES, 5).  % Maximum number of branches.
+-define(RAND_SIZE, 1 bsl 54).
+
+
+%
+% Generators
+%
+
+% Generate a full rev tree. Most of the forms are just there to set up default
+% parameters, _revtree/3 does all heavy lifting.
+%
+g_revtree() ->
+    ?SIZED(Size, g_revtree(Size)).
+
+
+g_revtree(Size) when is_integer(Size) ->
+    g_revtree(Size, [], ?MAX_BRANCHES);
+g_revtree(Revs) when is_list(Revs) ->
+    ?SIZED(Size, g_revtree(Size, Revs, ?MAX_BRANCHES)).
+
+
+g_revtree(Size, Revs) when is_integer(Size), is_list(Revs) ->
+    g_revtree(Size, Revs, ?MAX_BRANCHES);
+g_revtree(Revs, MaxBranches) when is_list(Revs), is_integer(MaxBranches) ->
+    ?SIZED(Size, g_revtree(Size, Revs, MaxBranches)).
+
+
+g_revtree(0, _Revs, _MaxBranches) ->
+    [];
+g_revtree(Size, ERevs, MaxBranches) ->
+    ?LET({Branches, Depth, Revs}, {g_branch_count(MaxBranches), 
g_stem_depth(Size), g_revs(Size, ERevs)},
+            begin
+            %?debugFmt(" rev rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr D:~p N:~p 
R:~p", [Depth, Branches, Revs]),
+            case Branches == 0 of
+                true ->
+                    [];
+                false ->
+                    SmallerSize = Size div ?SIZE_REDUCTION,
+                    ?DELAY([{Depth, g_nodes(SmallerSize, Branches, Revs, 
MaxBranches)}])
+            end
+            end
+
+    ).
+
+
+g_non_empty_revtree(ERevs) ->
+    ?SUCHTHAT(RTree, g_revtree(ERevs), length(RTree) > 0).
+
+
+g_tree(ERevs) ->
+    ?LET(RTree, g_non_empty_revtree(ERevs), hd(RTree)).
+
+
+
+% Generate a tree node and then recursively generate its children.
+%
+g_treenode(0, Revs, _) ->
+    {elements(Revs), x, []};
+g_treenode(Size, Revs, MaxBranches) ->
+    ?DELAY(?LET(N, int(0, MaxBranches),
+        begin
+            %?debugFmt("----- g_treenode S:~p N:~p",[Size,N]),
+            [Rev | ChildRevs] = Revs,
+            {Rev, x, g_nodes(Size div ?SIZE_REDUCTION, N, ChildRevs, 
MaxBranches)}
+        end
+    )).
+
+
+
+% Generate a list of child nodes. Depending on how many children there are
+% the pre-generarated revision list is split into that many sublists.
+%
+g_nodes(0, _N, _Revs, _MaxBranches) ->
+    %?debugFmt("..................... g_nodes Size=0 N:~p", [_N]),
+    [];
+g_nodes(_Size, 0, _Revs, _MaxBranches) ->
+    %?debugFmt("++++++++++++++++++++++ g_nodes N=0 Size:~p", [_Size]),
+    [];
+g_nodes(Size, ChildCount, Revs, MaxBranches) ->
+    ?LETSHRINK(
+        ChildNodes,
+        begin
+            ChildRevList = child_revs(ChildCount, Revs, Size, MaxBranches),
+            %?debugFmt("******* g_nodes ChildRevList Size:~p ChildCount:~p 
RevList:~p",[Size, ChildCount, length(ChildRevList)]),
+            [g_treenode(Size, ChildRevs, MaxBranches) || ChildRevs <- 
ChildRevList]
+        end,
+        ordered_nodes(ChildNodes)
+    ).
+
+
+% Generate each subtree's stem depth
+%
+g_stem_depth(Size) ->
+    choose(0,  4 * expected_height(Size, ?SIZE_REDUCTION)).
+
+
+% Generate number of branches
+g_branch_count(MaxBranches) ->
+    int(0, MaxBranches).
+
+
+% Uses the shuffle/1 function to shuffle the input list. Unshuffled list is
+% used as the shrink value.
+%
+g_shuffle(L) when is_list(L) ->
+    triq_dom:domain(g_shuffle,
+        fun(Self, _Size) -> {Self, shuffle(L)} end,
+        fun(Self, _Val) -> {Self, L} end
+     ).
+
+
+% Wrapper to make a list shuffling generator that doesn't shrink
+%
+g_shuffle_noshrink(L) when is_list(L) ->
+    triq_dom:noshrink(g_shuffle(L)).
+
+
+% Generate shuffled sublists up to N items long from a list.
+%
+g_shuffled_sublists(L, N) ->
+    ?LET(Shuffled, g_shuffle_noshrink(L), lists:sublist(Shuffled, N)).
+
+
+% Generate revision lists.
+%
+g_revs() ->
+    ?SIZED(Size, g_revs(Size)).
+
+
+g_revs(Size) when is_integer(Size) ->
+    g_revs(Size, []).
+
+
+g_revs(Size, Existing) when is_integer(Size), is_list(Existing) ->
+    Expected = keys_needed(Size, ?SIZE_REDUCTION, ?MAX_BRANCHES),
+    Revs = revs(Expected, Existing),
+    case length(Revs) > Expected of
+        true -> % have extra, try various sublists
+            g_shuffled_sublists(Revs, Expected);
+        false ->
+            triq_dom:return(Revs)
+    end.
+
+
+%
+% Properties
+%
+
+
+prop_revtree() ->
+  ?FORALL(Revs, g_revs(),
+    ?FORALL({RevTree, Branch}, {g_revtree(Revs), g_revtree(Revs, 1)},
+         ?IMPLIES(length(Branch) > 0,
+            begin
+                [Tree | _ ] = Branch,
+                ?debugFmt(" Revs:~p RevTree:~p Tree:~p", [Revs, RevTree, 
Tree]),
+                %io:format(standard_error, "Xs:~p~n", [Xs]),
+                {Merged, Result} = couch_key_tree:merge(RevTree, Tree, 10),
+                ?debugFmt(" Res ~p ~p", [Merged, Result]),
+                true
+            end
+         )
+    )
+  ).
+
+
+
+
+
+%
+% Helper functions
+%
+
+
+% Shufle a list of items. Tag each item with a random number then sort
+% the list and remove the tags.
+%
+shuffle(L) ->
+    Tagged = [{triq_rnd:uniform(), X} || X <- L],
+    [X || {_, X} <- lists:sort(Tagged)].
+
+
+% Generate list of relateively unique large random numbers
+rand_list(N) when N =< 0 ->
+    [];
+rand_list(N) ->
+    [triq_rnd:uniform(?RAND_SIZE) || _ <- lists:seq(1, N)].
+
+
+% Generate a list of revisions to be used as key in revision trees. Expected
+% must the number of maximum expected nodes in a revision tree. Existing is an
+% optional list revisions which must be included in the result. The output list
+% is sorted.
+revs(0, _Existing) ->
+    [];
+revs(Expected, Existing) when is_integer(Expected), is_list(Existing) ->
+    Need = Expected - length(Existing),
+    lists:usort(lists:append(Existing, rand_list(Need))).
+
+
+% Get the list of all the keys in a revision tree. The input can also be a
+% an individual tree (tagged with the depth to virtual root) or a node.
+% Yes, this is not tail recursive but the idea is to keep it simple.
+%
+keylist({_D, Nodes}) when is_list(Nodes) ->
+    keylist(Nodes);
+keylist({K, _V, Nodes}) ->
+    [K | keylist(Nodes)];
+keylist(Nodes) ->
+    lists:append([keylist(Node) || Node <- Nodes]).
+
+
+% Get lists of all the keys at each depth level. Result is an orddict that
+% looks like [{depth, [key]}]. The depth used here is the "virtual" depth as
+% indicated by the stemmed depth tag that goes with every top level subtree.
+%
+levels([]) ->
+    orddict:new();
+levels(RevTree) when is_list(RevTree) ->
+    lists:foldl(fun(T, Dict) -> levels(T, Dict) end, orddict:new(), RevTree).
+
+
+levels({Depth, Nodes}, Dict) when is_list(Nodes) ->
+    levels(Nodes, Depth, Dict).
+
+
+levels({K, _V, Nodes}, Depth, Dict) ->
+    Dict1 = case orddict:is_key(Depth, Dict) of
+        true -> orddict:append(Depth, K, Dict);
+        false -> orddict:store(Depth, [K], Dict)
+    end,
+    levels(Nodes, Depth + 1, Dict1);
+levels(Nodes, Depth, Dict) ->
+    lists:foldl(fun(Node, AccDict) ->
+        levels(Node, Depth, AccDict)
+    end, Dict, Nodes).
+
+
+% Get the maximum depth of a revtree. The depth is "virtual" as it takes into
+% account the distance to the now stemmed root node as indicated by the top
+% level subtrees.
+%
+depth([]) ->
+    0;
+depth(RevTree) when is_list(RevTree) ->
+    lists:max([depth(T) || T <- RevTree]);
+depth({Depth, Nodes}) ->
+    depth(Nodes, Depth - 1).
+
+
+depth({_K, _V, Nodes}, Depth) ->
+    depth(Nodes, Depth + 1);
+depth([], Depth) ->
+    Depth;
+depth(Nodes, Depth) ->
+    lists:max([depth(Node, Depth) || Node <- Nodes]).
+
+
+% Return an ordered list of revtree nodes. When sorting only immediate keys
+% (revisions) are looked at and comparison doesn't descent into the treee.
+%
+ordered_nodes(Nodes) ->
+    lists:sort(fun({K1, _, _}, {K2, _, _}) -> K1 =< K2 end, Nodes).
+
+
+% Calculate a maximum number of rev tree nodes needed for a tree of a given
+% height and branchiness. Height is derived from Size and LevelReductionFactor,
+% that is how big the sample should be and quickly the size parameter would
+% shrink on each level. The formula used then is:
+%
+% N = (B ^ (H + 1) - 1) / (B - 1)
+keys_needed(0, _, _) ->
+    0;
+keys_needed(Size, LevelReductionFactor, 1) ->
+    expected_height(Size, LevelReductionFactor);
+keys_needed(Size, LevelReductionFactor, Branches) ->
+    Height =  expected_height(Size, LevelReductionFactor),
+    ceil((math:pow(Branches, Height + 1) - 1) / (Branches - 1)).
+
+
+% Calculate expected tree height for a given sample size and branchiness.
+% At each step the size is divided by the reduction factor.
+expected_height(Size, LevelReductionFactor) ->
+    ceil(log(LevelReductionFactor, Size)).
+
+
+log(B, X) ->
+    math:log(X) / math:log(B).
+
+
+% Checks if revisions in a revtree are unique. The observation is if
+% the length of the list is the same as the length of the list with
+% duplicates removed, then the list has not duplicates.
+%
+is_unique(Nodes) ->
+    Keys = keylist(Nodes),
+    length(Keys) == length(lists:usort(Keys)).
+
+
+% Distribute items in a list into roughly equal chunks of a given size.
+%
+distribute(_ChunkSize, []) ->
+    [];
+distribute(ChunkSize, L) when ChunkSize >= length(L) ->
+    [L];
+distribute(ChunkSize, L) ->
+    {L1, L2} = lists:split(ChunkSize, L),
+    [L1 | distribute(ChunkSize, L2)].
+
+
+% Split a single (parent) revision list into chunks (sub-lists), one for each
+% child. Also, for safety, double check that at this point in the process the
+% list of revisions is sufficiently large. If it isn't something went wrong and
+% a specific exception is thrown ({not_enough_revisions, Got, Needed}).
+%
+child_revs(ChildCount, Revs, Size, MaxBranches) ->
+    NeedKeys = keys_needed(Size, ?SIZE_REDUCTION, MaxBranches),
+    case length(Revs) >= NeedKeys of
+        true ->
+            ChunkSize = ceil(length(Revs) / ChildCount),
+            distribute(ChunkSize, Revs);
+        false ->
+            throw({not_enough_revisions, length(Revs), NeedKeys})
+    end.

-- 
To stop receiving notification emails like this one, please contact
vatam...@apache.org.

Reply via email to