This is an automated email from the ASF dual-hosted git repository. davisp pushed a commit to branch optimize-doc-updates in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 047ebe2a1a5b82d32667741db3865ef3c8052e3d Author: Paul J. Davis <[email protected]> AuthorDate: Wed Nov 15 12:03:28 2017 -0600 Automatically repair revision trees --- src/couch/src/couch_db_updater.erl | 9 +++------ src/couch/src/couch_key_tree.erl | 30 ++++++++++++++++++++---------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl index f437426..bcddbe0 100644 --- a/src/couch/src/couch_db_updater.erl +++ b/src/couch/src/couch_db_updater.erl @@ -876,11 +876,8 @@ stem_full_doc_info(#full_doc_info{rev_tree = Tree} = Info, Limit) -> Stemmed = couch_key_tree:stem(Tree, Limit), Info#full_doc_info{rev_tree = Stemmed}. -full_stem_full_doc_infos(#db{revs_limit=Limit}, DocInfos) -> - lists:map(fun(#full_doc_info{rev_tree=Tree}=FDI) -> - Stemmed = couch_key_tree:full_stem(Tree, Limit), - FDI#full_doc_info{rev_tree=Stemmed} - end, DocInfos). +stem_full_doc_infos(#db{revs_limit=Limit}, DocInfos) -> + lists:map(fun(FDI) -> stem_full_doc_info(FDI, Limit) end, DocInfos). update_docs_int(Db, DocsList, NonRepDocs, MergeConflicts, FullCommit) -> #db{ @@ -1128,7 +1125,7 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, Retry) -> } end, NewInfos0), - NewInfos = full_stem_full_doc_infos(Db, NewInfos1), + NewInfos = stem_full_doc_infos(Db, NewInfos1), RemoveSeqs = case Retry of nil -> diff --git a/src/couch/src/couch_key_tree.erl b/src/couch/src/couch_key_tree.erl index d18ecbc..6208cd0 100644 --- a/src/couch/src/couch_key_tree.erl +++ b/src/couch/src/couch_key_tree.erl @@ -63,8 +63,7 @@ multi_merge/2, merge/3, merge/2, remove_leafs/2, -stem/2, -full_stem/2 +stem/2 ]). -include_lib("couch/include/couch_db.hrl"). @@ -480,13 +479,18 @@ map_leafs_simple(Fun, Pos, [{Key, Value, SubTree} | RestTree]) -> stem(Trees, Limit) -> - lists:sort(lists:flatmap(fun(Tree) -> - stem_tree(Tree, Limit) - end, Trees)). + Seen = khash:new(), + try + lists:sort(lists:flatmap(fun(Tree) -> + stem_tree(Tree, Limit, Seen) + end, Trees)) + catch throw:dupe_keys -> + repair_tree(Trees, Limit) + end. -stem_tree({Depth, Child}, Limit) -> - case stem_tree(Depth, Child, Limit) of +stem_tree({Depth, Child}, Limit, Seen) -> + case stem_tree(Depth, Child, Limit, Seen) of {_, NewChild, NewBranches} -> [{Depth, NewChild} | NewBranches]; {_, NewBranches} -> @@ -494,10 +498,16 @@ stem_tree({Depth, Child}, Limit) -> end. -stem_tree(_Depth, {_Key, _Val, []} = Leaf, Limit) -> +stem_tree(_Depth, {_Key, _Val, []} = Leaf, Limit, _Seen) -> {Limit - 1, Leaf, []}; -stem_tree(Depth, {Key, Val, Children}, Limit) -> +stem_tree(Depth, {Key, Val, Children}, Limit, Seen) -> + case khash:lookup(Seen, Key) of + not_found -> + khash:put(Key, seen); + _ -> + throw(dupe_keys) + end, FinalAcc = lists:foldl(fun(Child, {LimitPosAcc, ChildAcc, BranchAcc}) -> case stem_tree(Depth + 1, Child, Limit) of {LimitPos, NewChild, NewBranches} -> @@ -526,7 +536,7 @@ stem_tree(Depth, {Key, Val, Children}, Limit) -> end. -full_stem(Trees, Limit) -> +repair_tree(Trees, Limit) -> % flatten each branch in a tree into a tree path, sort by starting rev # Paths = lists:sort(lists:map(fun({Pos, Path}) -> StemmedPath = lists:sublist(Path, Limit), -- To stop receiving notification emails like this one, please contact "[email protected]" <[email protected]>.
