This is an automated email from the ASF dual-hosted git repository. davisp pushed a commit to branch optimize-doc-updates in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 737a95290c0cf3335e48eb28f9354bcc96294944 Author: Paul J. Davis <[email protected]> AuthorDate: Wed Nov 15 12:03:28 2017 -0600 Automatically repair revision trees --- src/couch/src/couch_db_updater.erl | 9 ++---- src/couch/src/couch_key_tree.erl | 62 ++++++++++++++++++++++---------------- 2 files changed, 39 insertions(+), 32 deletions(-) diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl index f437426..bcddbe0 100644 --- a/src/couch/src/couch_db_updater.erl +++ b/src/couch/src/couch_db_updater.erl @@ -876,11 +876,8 @@ stem_full_doc_info(#full_doc_info{rev_tree = Tree} = Info, Limit) -> Stemmed = couch_key_tree:stem(Tree, Limit), Info#full_doc_info{rev_tree = Stemmed}. -full_stem_full_doc_infos(#db{revs_limit=Limit}, DocInfos) -> - lists:map(fun(#full_doc_info{rev_tree=Tree}=FDI) -> - Stemmed = couch_key_tree:full_stem(Tree, Limit), - FDI#full_doc_info{rev_tree=Stemmed} - end, DocInfos). +stem_full_doc_infos(#db{revs_limit=Limit}, DocInfos) -> + lists:map(fun(FDI) -> stem_full_doc_info(FDI, Limit) end, DocInfos). update_docs_int(Db, DocsList, NonRepDocs, MergeConflicts, FullCommit) -> #db{ @@ -1128,7 +1125,7 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, Retry) -> } end, NewInfos0), - NewInfos = full_stem_full_doc_infos(Db, NewInfos1), + NewInfos = stem_full_doc_infos(Db, NewInfos1), RemoveSeqs = case Retry of nil -> diff --git a/src/couch/src/couch_key_tree.erl b/src/couch/src/couch_key_tree.erl index d18ecbc..14c136f 100644 --- a/src/couch/src/couch_key_tree.erl +++ b/src/couch/src/couch_key_tree.erl @@ -63,8 +63,7 @@ multi_merge/2, merge/3, merge/2, remove_leafs/2, -stem/2, -full_stem/2 +stem/2 ]). -include_lib("couch/include/couch_db.hrl"). @@ -480,53 +479,64 @@ map_leafs_simple(Fun, Pos, [{Key, Value, SubTree} | RestTree]) -> stem(Trees, Limit) -> - lists:sort(lists:flatmap(fun(Tree) -> - stem_tree(Tree, Limit) - end, Trees)). + try + {_, Branches} = lists:foldl(fun(Tree, {Seen, TreeAcc}) -> + {NewSeen, NewBranches} = stem_tree(Tree, Limit, Seen), + {NewSeen, NewBranches ++ TreeAcc} + end, {sets:new(), []}, Trees), + lists:sort(Branches) + catch throw:dupe_keys -> + repair_tree(Trees, Limit) + end. -stem_tree({Depth, Child}, Limit) -> - case stem_tree(Depth, Child, Limit) of - {_, NewChild, NewBranches} -> - [{Depth, NewChild} | NewBranches]; - {_, NewBranches} -> - NewBranches +stem_tree({Depth, Child}, Limit, Seen) -> + case stem_tree(Depth, Child, Limit, Seen) of + {NewSeen, _, NewChild, NewBranches} -> + {NewSeen, [{Depth, NewChild} | NewBranches]}; + {NewSeen, _, NewBranches} -> + {NewSeen, NewBranches} end. -stem_tree(_Depth, {_Key, _Val, []} = Leaf, Limit) -> - {Limit - 1, Leaf, []}; +stem_tree(_Depth, {_Key, _Val, []} = Leaf, Limit, Seen) -> + {Seen, Limit - 1, Leaf, []}; -stem_tree(Depth, {Key, Val, Children}, Limit) -> - FinalAcc = lists:foldl(fun(Child, {LimitPosAcc, ChildAcc, BranchAcc}) -> - case stem_tree(Depth + 1, Child, Limit) of - {LimitPos, NewChild, NewBranches} -> +stem_tree(Depth, {Key, Val, Children}, Limit, Seen0) -> + Seen1 = case sets:is_element(Key, Seen0) of + true -> throw(dupe_keys); + false -> sets:add_element(Key, Seen0) + end, + FinalAcc = lists:foldl(fun(Child, Acc) -> + {SeenAcc, LimitPosAcc, ChildAcc, BranchAcc} = Acc, + case stem_tree(Depth + 1, Child, Limit, SeenAcc) of + {NewSeenAcc, LimitPos, NewChild, NewBranches} -> NewLimitPosAcc = erlang:max(LimitPos, LimitPosAcc), NewChildAcc = [NewChild | ChildAcc], NewBranchAcc = NewBranches ++ BranchAcc, - {NewLimitPosAcc, NewChildAcc, NewBranchAcc}; - {LimitPos, NewBranches} -> + {NewSeenAcc, NewLimitPosAcc, NewChildAcc, NewBranchAcc}; + {NewSeenAcc, LimitPos, NewBranches} -> NewLimitPosAcc = erlang:max(LimitPos, LimitPosAcc), NewBranchAcc = NewBranches ++ BranchAcc, - {NewLimitPosAcc, ChildAcc, NewBranchAcc} + {NewSeenAcc, NewLimitPosAcc, ChildAcc, NewBranchAcc} end - end, {-1, [], []}, Children), - {FinalLimitPos, FinalChildren, FinalBranches} = FinalAcc, + end, {Seen1, -1, [], []}, Children), + {FinalSeen, FinalLimitPos, FinalChildren, FinalBranches} = FinalAcc, case FinalLimitPos of N when N > 0, length(FinalChildren) > 0 -> FinalNode = {Key, Val, lists:reverse(FinalChildren)}, - {FinalLimitPos - 1, FinalNode, FinalBranches}; + {FinalSeen, FinalLimitPos - 1, FinalNode, FinalBranches}; 0 when length(FinalChildren) > 0 -> NewBranches = lists:map(fun(Child) -> {Depth + 1, Child} end, lists:reverse(FinalChildren)), - {-1, NewBranches ++ FinalBranches}; + {FinalSeen, -1, NewBranches ++ FinalBranches}; N when N < 0, length(FinalChildren) == 0 -> - {FinalLimitPos - 1, FinalBranches} + {FinalSeen, FinalLimitPos - 1, FinalBranches} end. -full_stem(Trees, Limit) -> +repair_tree(Trees, Limit) -> % flatten each branch in a tree into a tree path, sort by starting rev # Paths = lists:sort(lists:map(fun({Pos, Path}) -> StemmedPath = lists:sublist(Path, Limit), -- To stop receiving notification emails like this one, please contact "[email protected]" <[email protected]>.
