This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch ensure-view-build-progress
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 6baf57452e2ccb9869c50da93880f0742a11a97b
Author: Paul J. Davis <[email protected]>
AuthorDate: Fri Nov 13 14:11:00 2020 -0600

    Minimize conflicts while building views
    
    If a client is continuously updating a database the view engine would
    fail to make progress because the last update sequence is continuously
    changing and the unbounded changes read would create a conflict to the
    maximum versionstamp value used to delineate the end of the changes key
    space.
    
    This commits makes two changes to avoid this. First, the current
    update_seq is read via snapshot so that it doesn't become a read
    conflict if there's an update. This part is fairly straightforward.
    
    The second change reads the changes feed in a snapshot and then manually
    adds the read conflict range for the keys that were actually read. This
    solves the issue for large view builds while a database is being
    updated. However, this does not address the issue when its a single
    document being updated repeatedly. Perhaps if we keep track of the last
    sequence read and apply that as a limit when updating the view on a
    conflict retry?
---
 src/couch_views/src/couch_views_indexer.erl | 37 ++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/src/couch_views/src/couch_views_indexer.erl 
b/src/couch_views/src/couch_views_indexer.erl
index 2735f66..9fdbdb2 100644
--- a/src/couch_views/src/couch_views_indexer.erl
+++ b/src/couch_views/src/couch_views_indexer.erl
@@ -264,13 +264,15 @@ get_update_start_state(TxDb, Mrst, #{db_seq := undefined} 
= State) ->
         view_seq := ViewSeq
     } = couch_views_fdb:get_view_state(TxDb, Mrst),
 
-    State#{
-        tx_db := TxDb,
-        db_seq := fabric2_db:get_update_seq(TxDb),
-        view_vs := ViewVS,
-        view_seq := ViewSeq,
-        last_seq := ViewSeq
-    };
+    fabric2_fdb:with_snapshot(TxDb, fun(SSDb) ->
+        State#{
+            tx_db := TxDb,
+            db_seq := fabric2_db:get_update_seq(SSDb),
+            view_vs := ViewVS,
+            view_seq := ViewSeq,
+            last_seq := ViewSeq
+        }
+    end);
 
 get_update_start_state(TxDb, _Idx, State) ->
     State#{
@@ -287,7 +289,16 @@ fold_changes(State) ->
 
     Fun = fun process_changes/2,
     Opts = [{limit, Limit}, {restart_tx, false}],
-    fabric2_db:fold_changes(TxDb, SinceSeq, Fun, State, Opts).
+    Result = fabric2_fdb:with_snapshot(TxDb, fun(SSDb) ->
+        fabric2_db:fold_changes(SSDb, SinceSeq, Fun, State, Opts)
+    end),
+    case Result of
+        {ok, #{last_seq := LastSeq}} ->
+            set_changes_conflict(TxDb, SinceSeq, 
fabric2_fdb:seq_to_vs(LastSeq));
+        _ ->
+            ok
+    end,
+    Result.
 
 
 process_changes(Change, Acc) ->
@@ -597,6 +608,16 @@ fail_job(Job, Data, Error, Reason) ->
     exit(normal).
 
 
+set_changes_conflict(TxDb, StartVS, EndVS) ->
+    #{
+        tx := Tx,
+        db_prefix := DbPrefix
+    } = TxDb,
+    StartKey = erlfdb_tuple:pack({?DB_CHANGES, StartVS}, DbPrefix),
+    EndKey = erlfdb_tuple:pack({?DB_CHANGES, EndVS}, DbPrefix),
+    erlfdb:add_read_conflict_range(Tx, StartKey, EndKey).
+
+
 retry_limit() ->
     config:get_integer("couch_views", "retry_limit", 3).
 

Reply via email to