Repository: couchdb-fabric Updated Branches: refs/heads/master 998cf2d66 -> ec2235196
In open_revs, do not count errors in quorum threshold calculation Previously quorum check looked just at the number of replies and decided quorum was met even if it only received errors. For example, if 2 nodes are in maintance mode it might receive this sequence of replies: `rexi_EXIT, rexi_EXIT, ok` In that case after the first two it would decide quorum (r=2) was met, return what it had so far ([]) and kill the remaining worker, who was about to return a valid revision. The fix is to keep track of error replies and subtract them when deciding if quorum was met. COUCHDB-3271 Project: http://git-wip-us.apache.org/repos/asf/couchdb-fabric/repo Commit: http://git-wip-us.apache.org/repos/asf/couchdb-fabric/commit/ec223519 Tree: http://git-wip-us.apache.org/repos/asf/couchdb-fabric/tree/ec223519 Diff: http://git-wip-us.apache.org/repos/asf/couchdb-fabric/diff/ec223519 Branch: refs/heads/master Commit: ec2235196d7195afab59cedc2d61a02b11596ab4 Parents: 6fa8d3c Author: Nick Vatamaniuc <[email protected]> Authored: Thu Jan 12 13:50:20 2017 -0500 Committer: Nick Vatamaniuc <[email protected]> Committed: Fri Jan 20 10:56:14 2017 -0500 ---------------------------------------------------------------------- src/fabric_doc_open_revs.erl | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/couchdb-fabric/blob/ec223519/src/fabric_doc_open_revs.erl ---------------------------------------------------------------------- diff --git a/src/fabric_doc_open_revs.erl b/src/fabric_doc_open_revs.erl index 573cda7..5393f92 100644 --- a/src/fabric_doc_open_revs.erl +++ b/src/fabric_doc_open_revs.erl @@ -24,6 +24,7 @@ worker_count, workers, reply_count = 0, + reply_error_count = 0, r, revs, latest, @@ -60,13 +61,15 @@ go(DbName, Id, Revs, Options) -> handle_message({rexi_DOWN, _, {_,NodeRef},_}, _Worker, #state{workers=Workers}=State) -> NewState = State#state{ - workers = lists:keydelete(NodeRef, #shard.node, Workers) + workers = lists:keydelete(NodeRef, #shard.node, Workers), + reply_error_count = State#state.reply_error_count + 1 }, handle_message({ok, []}, nil, NewState); handle_message({rexi_EXIT, _}, Worker, #state{workers=Workers}=State) -> NewState = State#state{ - workers = lists:delete(Worker, Workers) + workers = lists:delete(Worker, Workers), + reply_error_count = State#state.reply_error_count + 1 }, handle_message({ok, []}, nil, NewState); @@ -80,18 +83,22 @@ handle_message({ok, RawReplies}, Worker, State) -> r = R, revs = Revs, latest = Latest, - repair = InRepair + repair = InRepair, + reply_error_count = ReplyErrorCount } = State, IsTree = Revs == all orelse Latest, + % Do not count error replies when checking quorum + QuorumReplies = ReplyCount + 1 - ReplyErrorCount >= R, + {NewReplies, QuorumMet, Repair} = case IsTree of true -> {NewReplies0, AllInternal, Repair0} = tree_replies(PrevReplies, tree_sort(RawReplies)), NumLeafs = couch_key_tree:count_leafs(PrevReplies), SameNumRevs = length(RawReplies) == NumLeafs, - QMet = AllInternal andalso SameNumRevs andalso ReplyCount + 1 >= R, + QMet = AllInternal andalso SameNumRevs andalso QuorumReplies, {NewReplies0, QMet, Repair0}; false -> {NewReplies0, MinCount} = dict_replies(PrevReplies, RawReplies), @@ -306,6 +313,7 @@ open_doc_revs_test_() -> check_ancestor_counted_in_quorum(), check_not_found_counts_for_descendant(), check_worker_error_skipped(), + check_quorum_only_counts_valid_responses(), check_not_found_replies_are_removed_when_doc_found(), check_not_found_returned_when_one_of_docs_not_found(), check_not_found_returned_when_doc_not_found() @@ -469,6 +477,20 @@ check_worker_error_skipped() -> end). +check_quorum_only_counts_valid_responses() -> + ?_test(begin + S0 = state0(revs(), true), + Msg1 = {rexi_EXIT, reason}, + Msg2 = {rexi_EXIT, reason}, + Msg3 = {ok, [foo1(), bar1(), baz1()]}, + Expect = {stop, [bar1(), baz1(), foo1()]}, + + {ok, S1} = handle_message(Msg1, w1, S0), + {ok, S2} = handle_message(Msg2, w2, S1), + ?assertEqual(Expect, handle_message(Msg3, w3, S2)) + end). + + check_not_found_replies_are_removed_when_doc_found() -> ?_test(begin Replies = replies_to_dict([foo1(), bar1(), fooNF()]),
