Repository: couchdb-mem3 Updated Branches: refs/heads/master 252467cb4 -> c3c542918
Chunk missing revisions before attempting to save on target In cases with pathological documents revision patterns (e.g., 10000 open conflicts and tree depth of 300000 on a single document), attempting to replicate the full revision tree in one batch causes the system to crash by attempting to send an oversized message. We've observed messages of > 4GB in the wild. This patch divides the set of revisions-to-replicate for a single document into chunks of a configurable size, thereby allowing operators to keep the system stable when attempting to replicate these troublesome documents. BugzID: 37676 Project: http://git-wip-us.apache.org/repos/asf/couchdb-mem3/repo Commit: http://git-wip-us.apache.org/repos/asf/couchdb-mem3/commit/c4da61c8 Tree: http://git-wip-us.apache.org/repos/asf/couchdb-mem3/tree/c4da61c8 Diff: http://git-wip-us.apache.org/repos/asf/couchdb-mem3/diff/c4da61c8 Branch: refs/heads/master Commit: c4da61c8eb98cedd3cf7a28c293cb1f6d3ec8571 Parents: 252467c Author: Benjamin Anderson <b...@banjiewen.net> Authored: Wed Oct 29 12:52:30 2014 -0700 Committer: Eric Avdey <e...@eiri.ca> Committed: Thu Nov 24 13:55:18 2016 -0400 ---------------------------------------------------------------------- src/mem3_rep.erl | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/couchdb-mem3/blob/c4da61c8/src/mem3_rep.erl ---------------------------------------------------------------------- diff --git a/src/mem3_rep.erl b/src/mem3_rep.erl index 938260d..ad7ac55 100644 --- a/src/mem3_rep.erl +++ b/src/mem3_rep.erl @@ -252,8 +252,10 @@ replicate_batch(#acc{target = #shard{node=Node, name=Name}} = Acc) -> [] -> ok; Missing -> - Docs = open_docs(Acc, Missing), - ok = save_on_target(Node, Name, Docs) + lists:map(fun(Chunk) -> + Docs = open_docs(Acc, Chunk), + ok = save_on_target(Node, Name, Docs) + end, chunk_revs(Missing)) end, update_locals(Acc), {ok, Acc#acc{revcount=0, infos=[]}}. @@ -271,6 +273,32 @@ find_missing_revs(Acc) -> ]). +chunk_revs(Revs) -> + Limit = list_to_integer(config:get("mem3", "rev_chunk_size", "5000")), + chunk_revs(Revs, Limit). + +chunk_revs(Revs, Limit) -> + chunk_revs(Revs, {0, []}, [], Limit). + +chunk_revs([], {_Count, Chunk}, Chunks, _Limit) -> + [Chunk|Chunks]; +chunk_revs([{Id, R, A}|Revs], {Count, Chunk}, Chunks, Limit) when length(R) =< Limit - Count -> + chunk_revs( + Revs, + {Count + length(R), [{Id, R, A}|Chunk]}, + Chunks, + Limit + ); +chunk_revs([{Id, R, A}|Revs], {Count, Chunk}, Chunks, Limit) -> + {This, Next} = lists:split(Limit - Count, R), + chunk_revs( + [{Id, Next, A}|Revs], + {0, []}, + [[{Id, This, A}|Chunk]|Chunks], + Limit + ). + + open_docs(#acc{source=Source, infos=Infos}, Missing) -> lists:flatmap(fun({Id, Revs, _}) -> FDI = lists:keyfind(Id, #full_doc_info.id, Infos),