This is an automated email from the ASF dual-hosted git repository. davisp pushed a commit to branch ensure-view-build-progress in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 6baf57452e2ccb9869c50da93880f0742a11a97b Author: Paul J. Davis <[email protected]> AuthorDate: Fri Nov 13 14:11:00 2020 -0600 Minimize conflicts while building views If a client is continuously updating a database the view engine would fail to make progress because the last update sequence is continuously changing and the unbounded changes read would create a conflict to the maximum versionstamp value used to delineate the end of the changes key space. This commits makes two changes to avoid this. First, the current update_seq is read via snapshot so that it doesn't become a read conflict if there's an update. This part is fairly straightforward. The second change reads the changes feed in a snapshot and then manually adds the read conflict range for the keys that were actually read. This solves the issue for large view builds while a database is being updated. However, this does not address the issue when its a single document being updated repeatedly. Perhaps if we keep track of the last sequence read and apply that as a limit when updating the view on a conflict retry? --- src/couch_views/src/couch_views_indexer.erl | 37 ++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index 2735f66..9fdbdb2 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -264,13 +264,15 @@ get_update_start_state(TxDb, Mrst, #{db_seq := undefined} = State) -> view_seq := ViewSeq } = couch_views_fdb:get_view_state(TxDb, Mrst), - State#{ - tx_db := TxDb, - db_seq := fabric2_db:get_update_seq(TxDb), - view_vs := ViewVS, - view_seq := ViewSeq, - last_seq := ViewSeq - }; + fabric2_fdb:with_snapshot(TxDb, fun(SSDb) -> + State#{ + tx_db := TxDb, + db_seq := fabric2_db:get_update_seq(SSDb), + view_vs := ViewVS, + view_seq := ViewSeq, + last_seq := ViewSeq + } + end); get_update_start_state(TxDb, _Idx, State) -> State#{ @@ -287,7 +289,16 @@ fold_changes(State) -> Fun = fun process_changes/2, Opts = [{limit, Limit}, {restart_tx, false}], - fabric2_db:fold_changes(TxDb, SinceSeq, Fun, State, Opts). + Result = fabric2_fdb:with_snapshot(TxDb, fun(SSDb) -> + fabric2_db:fold_changes(SSDb, SinceSeq, Fun, State, Opts) + end), + case Result of + {ok, #{last_seq := LastSeq}} -> + set_changes_conflict(TxDb, SinceSeq, fabric2_fdb:seq_to_vs(LastSeq)); + _ -> + ok + end, + Result. process_changes(Change, Acc) -> @@ -597,6 +608,16 @@ fail_job(Job, Data, Error, Reason) -> exit(normal). +set_changes_conflict(TxDb, StartVS, EndVS) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + StartKey = erlfdb_tuple:pack({?DB_CHANGES, StartVS}, DbPrefix), + EndKey = erlfdb_tuple:pack({?DB_CHANGES, EndVS}, DbPrefix), + erlfdb:add_read_conflict_range(Tx, StartKey, EndKey). + + retry_limit() -> config:get_integer("couch_views", "retry_limit", 3).
