nickva commented on a change in pull request #1972: Introduce Shard Splitting To CouchDB URL: https://github.com/apache/couchdb/pull/1972#discussion_r269588434
########## File path: src/fabric/src/fabric_db_doc_count.erl ########## @@ -34,38 +34,29 @@ go(DbName) -> rexi_monitor:stop(RexiMon) end. -handle_message({rexi_DOWN, _, {_,NodeRef},_}, _Shard, {Counters, Acc}) -> - case fabric_util:remove_down_workers(Counters, NodeRef) of - {ok, NewCounters} -> - {ok, {NewCounters, Acc}}; - error -> - {error, {nodedown, <<"progress not possible">>}} +handle_message({rexi_DOWN, _, {_,NodeRef},_}, _Shard, {Counters, Resps}) -> + case fabric_ring:node_down(NodeRef, Counters, Resps) of + {ok, Counters1} -> {ok, {Counters1, Resps}}; + error -> {error, {nodedown, <<"progress not possible">>}} end; -handle_message({rexi_EXIT, Reason}, Shard, {Counters, Acc}) -> - NewCounters = lists:keydelete(Shard, #shard.ref, Counters), - case fabric_view:is_progress_possible(NewCounters) of - true -> - {ok, {NewCounters, Acc}}; - false -> - {error, Reason} +handle_message({rexi_EXIT, Reason}, Shard, {Counters, Resps}) -> + case fabric_ring:handle_error(Shard, Counters, Resps) of + {ok, Counters1} -> {ok, {Counters1, Resps}}; + error -> {error, Reason} end; -handle_message({ok, Count}, Shard, {Counters, Acc}) -> - case fabric_dict:lookup_element(Shard, Counters) of - undefined -> - % already heard from someone else in this range - {ok, {Counters, Acc}}; - nil -> - C1 = fabric_dict:store(Shard, ok, Counters), - C2 = fabric_view:remove_overlapping_shards(Shard, C1), - case fabric_dict:any(nil, C2) of - true -> - {ok, {C2, Count+Acc}}; - false -> - {stop, Count+Acc} - end +handle_message({ok, Count}, Shard, {Counters, Resps}) -> + case fabric_ring:handle_response(Shard, Count, Counters, Resps) of + {ok, {Counters1, Resps1}} -> + {ok, {Counters1, Resps1}}; + {stop, Resps1} -> + Total = fabric_dict:fold(fun(_, C, A) -> A + C end, 0, Resps1), + {stop, Total} end; -handle_message(_, _, Acc) -> Review comment: There is a generic handler above it: https://github.com/apache/couchdb/blob/reshard/src/fabric/src/fabric_db_doc_count.erl#L58 That handles any unexpected messages but also keeps track of if we have the full ring coverage (worker ranges). If we have the full ring (progress is possible) it ignores the message. But if we don't have a full ring we immediately fail with that error as opposed to previously timing out in `fabric_util:recv(...)` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services