(couchdb) 07/08: Do not generate conflicts from the replicator app

vatamane Sat, 05 Oct 2024 19:33:32 -0700

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch 3.4.2-prep
in repository https://gitbox.apache.org/repos/asf/couchdb.git


commit 4b5b1944ee1abad526c3f503cbb75d3df201ce46
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Tue Oct 1 15:42:23 2024 -0400

    Do not generate conflicts from the replicator app
    
    Previously, when replicator updated replication documents and the user 
overrode
    `couch_db:check_is_admin(Db)` such that the replicator app was not admin, 
the
    replicator would end up creating conflicts in the updated replicator docs. 
This
    was the result of running the after_doc_read/2 [1] callback, which would 
strip
    the credentials and then create a random RevId to replace the real RevId.
    
    To fix it, make sure `after_doc_read/2` is skipped when it's the replicator
    application itself doing the doc reads. This is similar to what we do in the
    `before_doc_update/3` callback, so we just mirror the same behavior in
    `after_doc_read/2`.
    
    Moreover, in order to prevent users from pretending to be the replicator 
app by
    using the <<"_replicator">> role, switch the replicator role to be an atom 
--
    `replicator`. It's impossible for users to create atom roles, so this should
    ensure only replicator application can set that.
    
    However, just checking the `replicator` role is not enough to prevent
    conflicts. Since we're updating the docs on shard files directly (even for 
clustered
    databases) the internal replicator or the read_repair process may still read
    the documents via the `after_doc_read/2` callback and then proceed to update
    the patched doc body across the other copies. Thus, we skip the
    `after_doc_read/2` callback if it's called by the internal replicator.
    
    In case of read_repair, there is nothing we can do to detect if the 
document we
    just read (interactively) may have been passed through the 
`after_doc_read/2`
    callback, so we opt to use only the internal replicator to spread 
`_replicator`
    changes between copies.
    
    [1] The `after_doc_read/2` callback is from a time when it was impossible to
    have multiple `_replicator` db and everyone was sharing a single db 
instance.
    In that case we added a feature so users cannot see each others' replication
    credentials unless they were admin or the "owner" of the replication doc. 
Today
    users may create their own `$prefix/_replicator` db, however, for 
compatibility
    we also maintain the `after_doc_read/2` callback (probably until CouchDB 
4.0).
---
 src/couch_replicator/src/couch_replicator_docs.erl | 110 ++++++++++++++++++++-
 src/fabric/src/fabric_doc_open.erl                 |  47 +++++++--
 src/fabric/src/fabric_doc_open_revs.erl            |  27 ++++-
 3 files changed, 169 insertions(+), 15 deletions(-)

diff --git a/src/couch_replicator/src/couch_replicator_docs.erl 
b/src/couch_replicator/src/couch_replicator_docs.erl
index a041e274a..6b324c97a 100644
--- a/src/couch_replicator/src/couch_replicator_docs.erl
+++ b/src/couch_replicator/src/couch_replicator_docs.erl
@@ -31,7 +31,11 @@
 % to delete it. At some point in the future, remove this logic altogether.
 -define(REP_DESIGN_DOC, <<"_design/_replicator">>).
 -define(OWNER, <<"owner">>).
--define(CTX, {user_ctx, #user_ctx{roles = [<<"_admin">>, <<"_replicator">>]}}).
+% Use an atom as the replicator application role. Users cannot create atom
+% roles, only binaries. This way we ensure nobody can pretend to be the
+% replicator app.
+-define(REPLICATOR_ROLE, replicator).
+-define(CTX, {user_ctx, #user_ctx{roles = [<<"_admin">>, ?REPLICATOR_ROLE]}}).
 -define(replace(L, K, V), lists:keystore(K, 1, L, {K, V})).
 
 remove_state_fields(DbName, DocId) ->
@@ -232,7 +236,7 @@ before_doc_update(#doc{} = Doc, _Db, ?REPLICATED_CHANGES) ->
     Doc;
 before_doc_update(#doc{body = {Body}} = Doc, Db, _UpdateType) ->
     #user_ctx{roles = Roles, name = Name} = couch_db:get_user_ctx(Db),
-    IsReplicator = lists:member(<<"_replicator">>, Roles),
+    IsReplicator = lists:member(?REPLICATOR_ROLE, Roles),
     Doc1 =
         case IsReplicator of
             true -> Doc;
@@ -271,7 +275,10 @@ before_doc_update_owner(Other, Name, Db, #doc{body = 
{Body}} = Doc) ->
 after_doc_read(#doc{id = <<?DESIGN_DOC_PREFIX, _/binary>>} = Doc, _Db) ->
     Doc;
 after_doc_read(#doc{body = {Body}} = Doc, Db) ->
-    #user_ctx{name = Name} = couch_db:get_user_ctx(Db),
+    Ctx = #user_ctx{roles = Roles, name = Name} = couch_db:get_user_ctx(Db),
+    IsReplicator = lists:member(?REPLICATOR_ROLE, Roles),
+    % Internal replicator opens dbs with the ?ADMIN_CTX macro
+    IsInternalRepl = (Ctx == ?ADMIN_USER),
     case (catch couch_db:check_is_admin(Db)) of
         ok ->
             Doc;
@@ -279,7 +286,15 @@ after_doc_read(#doc{body = {Body}} = Doc, Db) ->
             case get_value(?OWNER, Body) of
                 Name ->
                     Doc;
+                _Other when (IsReplicator orelse IsInternalRepl) ->
+                    % When the replicator itself updates the doc don't patch 
it with a fake
+                    % rev and strip the creds
+                    Doc;
                 _Other ->
+                    % Not admin, user name doesn't match, and it's not the
+                    % replicator app: this is another user reading it. Strip
+                    % the creds and make up a fake "rev" since we're returning
+                    % a technically non-existent document body
                     Source = strip_credentials(get_value(<<"source">>, Body)),
                     Target = strip_credentials(get_value(<<"target">>, Body)),
                     NewBody0 = ?replace(Body, <<"source">>, Source),
@@ -430,7 +445,12 @@ replicator_can_update_docs_test_() ->
                 ?TDEF_FE(t_update_doc_completed),
                 ?TDEF_FE(t_update_failed),
                 ?TDEF_FE(t_update_triggered),
-                ?TDEF_FE(t_update_error)
+                ?TDEF_FE(t_update_error),
+                ?TDEF_FE(t_after_doc_read_as_admin),
+                ?TDEF_FE(t_after_doc_read_as_replicator),
+                ?TDEF_FE(t_after_doc_read_internal_replicator),
+                ?TDEF_FE(t_after_doc_read_matching_owner),
+                ?TDEF_FE(t_after_doc_read_not_matching_owner)
             ]
         }
     }.
@@ -521,4 +541,86 @@ t_update_error(DbName) ->
     RepId = get_value(<<"_replication_id">>, Props),
     ?assertEqual(null, RepId).
 
+t_after_doc_read_as_admin(DbName) ->
+    DocId = <<"doc1">>,
+    Map = #{
+        <<"source">> => <<"https://user1:pass1@localhost:5984/db1";>>,
+        <<"target">> => <<"https://user2:pass2@localhost:5984/db2";>>,
+        ?OWNER => <<"o1">>
+    },
+    {ok, _} = write_doc(DbName, DocId, Map),
+    Ctx = {user_ctx, #user_ctx{name = <<"o2">>, roles = [<<"_admin">>, 
<<"potato">>]}},
+    Map1 = read_doc(DbName, DocId, Ctx),
+    ?assertEqual(Map, Map1).
+
+t_after_doc_read_as_replicator(DbName) ->
+    DocId = <<"doc1">>,
+    Map = #{
+        <<"source">> => <<"https://user1:pass1@localhost:5984/db1";>>,
+        <<"target">> => <<"https://user2:pass2@localhost:5984/db2";>>,
+        ?OWNER => <<"o1">>
+    },
+    {ok, _} = write_doc(DbName, DocId, Map),
+    Ctx = {user_ctx, #user_ctx{name = <<"o2">>, roles = [?REPLICATOR_ROLE, 
<<"potato">>]}},
+    Map1 = read_doc(DbName, DocId, Ctx),
+    ?assertEqual(Map, Map1).
+
+t_after_doc_read_internal_replicator(DbName) ->
+    DocId = <<"doc1">>,
+    Map = #{
+        <<"source">> => <<"https://user1:pass1@localhost:5984/db1";>>,
+        <<"target">> => <<"https://user2:pass2@localhost:5984/db2";>>,
+        ?OWNER => <<"o1">>
+    },
+    {ok, _} = write_doc(DbName, DocId, Map),
+    Map1 = read_doc(DbName, DocId, ?ADMIN_CTX),
+    ?assertEqual(Map, Map1).
+
+t_after_doc_read_matching_owner(DbName) ->
+    DocId = <<"doc1">>,
+    Map = #{
+        <<"source">> => <<"https://user1:pass1@localhost:5984/db1";>>,
+        <<"target">> => <<"https://user2:pass2@localhost:5984/db2";>>,
+        ?OWNER => <<"o1">>
+    },
+    {ok, _} = write_doc(DbName, DocId, Map),
+    Ctx = {user_ctx, #user_ctx{name = <<"o1">>, roles = [<<"tomato">>, 
<<"potato">>]}},
+    Map1 = read_doc(DbName, DocId, Ctx),
+    ?assertEqual(Map, Map1).
+
+t_after_doc_read_not_matching_owner(DbName) ->
+    DocId = <<"doc1">>,
+    Map = #{
+        <<"source">> => <<"https://user1:pass1@localhost:5984/db1";>>,
+        <<"target">> => <<"https://user2:pass2@localhost:5984/db2";>>,
+        ?OWNER => <<"o1">>
+    },
+    {ok, _} = write_doc(DbName, DocId, Map),
+    Ctx = {user_ctx, #user_ctx{name = <<"o2">>, roles = [<<"tomato">>, 
<<"potato">>]}},
+    Map1 = read_doc(DbName, DocId, Ctx),
+    StrippedMap = #{
+        <<"source">> => <<"https://localhost:5984/db1";>>,
+        <<"target">> => <<"https://localhost:5984/db2";>>,
+        ?OWNER => <<"o1">>
+    },
+    ?assertEqual(StrippedMap, Map1).
+
+ejson_from_map(#{} = Map) ->
+    ?JSON_DECODE(?JSON_ENCODE(Map)).
+
+ejson_to_map({L} = EJson) when is_list(L) ->
+    ?JSON_DECODE(?JSON_ENCODE(EJson), [return_maps]).
+
+write_doc(DbName, DocId, #{} = Map) ->
+    Doc = #doc{id = DocId, body = ejson_from_map(Map)},
+    couch_util:with_db(DbName, fun(Db) ->
+        couch_db:update_doc(Db, Doc, [])
+    end).
+
+read_doc(DbName, DocId, Ctx) ->
+    {ok, Db} = couch_db:open(DbName, [Ctx]),
+    {ok, Doc} = couch_db:open_doc(Db, DocId),
+    couch_db:close(Db),
+    ejson_to_map(Doc#doc.body).
+
 -endif.
diff --git a/src/fabric/src/fabric_doc_open.erl 
b/src/fabric/src/fabric_doc_open.erl
index 70d90b03e..3ad019039 100644
--- a/src/fabric/src/fabric_doc_open.erl
+++ b/src/fabric/src/fabric_doc_open.erl
@@ -14,7 +14,6 @@
 
 -export([go/3]).
 
--include_lib("fabric/include/fabric.hrl").
 -include_lib("mem3/include/mem3.hrl").
 -include_lib("couch/include/couch_db.hrl").
 
@@ -140,13 +139,22 @@ read_repair(#acc{dbname = DbName, replies = Replies, 
node_revs = NodeRevs}) ->
             choose_reply(Docs);
         [#doc{id = Id} | _] ->
             Opts = [?ADMIN_CTX, ?REPLICATED_CHANGES, {read_repair, NodeRevs}],
-            Res = fabric:update_docs(DbName, Docs, Opts),
-            case Res of
-                {ok, []} ->
-                    couch_stats:increment_counter([fabric, read_repairs, 
success]);
-                _ ->
-                    couch_stats:increment_counter([fabric, read_repairs, 
failure]),
-                    couch_log:notice("read_repair ~s ~s ~p", [DbName, Id, Res])
+            case fabric_util:is_replicator_db(DbName) of
+                true ->
+                    % Skip read_repair for _replicator shards. We might have
+                    % gotten the result of after_doc_read/2 with a fake patched
+                    % revision and stripped creds. Let the internal replicator
+                    % handle all that
+                    ok;
+                false ->
+                    Res = fabric:update_docs(DbName, Docs, Opts),
+                    case Res of
+                        {ok, []} ->
+                            couch_stats:increment_counter([fabric, 
read_repairs, success]);
+                        _ ->
+                            couch_stats:increment_counter([fabric, 
read_repairs, failure]),
+                            couch_log:notice("read_repair ~s ~s ~p", [DbName, 
Id, Res])
+                    end
             end,
             choose_reply(Docs);
         [] ->
@@ -228,6 +236,7 @@ open_doc_test_() ->
                 t_handle_message_reply(),
                 t_store_node_revs(),
                 t_read_repair(),
+                t_no_read_repair_for_replicator(),
                 t_handle_response_quorum_met(),
                 t_get_doc_info()
             ]
@@ -517,6 +526,24 @@ t_read_repair() ->
         ?assertEqual(bar, read_repair(Acc4))
     end).
 
+t_no_read_repair_for_replicator() ->
+    Foo1 = {ok, #doc{revs = {1, [<<"foo">>]}}},
+
+    ?_test(begin
+        meck:expect(couch_log, notice, fun(_, _) -> ok end),
+        meck:expect(couch_stats, increment_counter, fun(_) -> ok end),
+
+        % Test no read_repair took place
+        meck:reset(fabric),
+        Acc0 = #acc{
+            dbname = <<"dbname1/_replicator">>,
+            replies = [fabric_util:kv(Foo1, 1)]
+        },
+        ?assertEqual(Foo1, read_repair(Acc0)),
+        timer:sleep(100),
+        ?assertNot(meck:called(fabric, update_docs, ['_', '_', '_']))
+    end).
+
 t_handle_response_quorum_met() ->
     Foo1 = {ok, #doc{revs = {1, [<<"foo">>]}}},
     Foo2 = {ok, #doc{revs = {2, [<<"foo2">>, <<"foo">>]}}},
@@ -528,6 +555,7 @@ t_handle_response_quorum_met() ->
         meck:expect(couch_stats, increment_counter, fun(_) -> ok end),
 
         BasicOkAcc = #acc{
+            dbname = <<"dbname1">>,
             state = r_met,
             replies = [fabric_util:kv(Foo1, 2)],
             q_reply = Foo1
@@ -535,6 +563,7 @@ t_handle_response_quorum_met() ->
         ?assertEqual(Foo1, handle_response(BasicOkAcc)),
 
         WithAncestorsAcc = #acc{
+            dbname = <<"dbname1">>,
             state = r_met,
             replies = [fabric_util:kv(Foo1, 1), fabric_util:kv(Foo2, 2)],
             q_reply = Foo2
@@ -544,6 +573,7 @@ t_handle_response_quorum_met() ->
         % This also checks when the quorum isn't the most recent
         % revision.
         DeeperWinsAcc = #acc{
+            dbname = <<"dbname1">>,
             state = r_met,
             replies = [fabric_util:kv(Foo1, 2), fabric_util:kv(Foo2, 1)],
             q_reply = Foo1
@@ -553,6 +583,7 @@ t_handle_response_quorum_met() ->
         % Check that we return the proper doc based on rev
         % (ie, pos is equal)
         BiggerRevWinsAcc = #acc{
+            dbname = <<"dbname1">>,
             state = r_met,
             replies = [fabric_util:kv(Foo1, 1), fabric_util:kv(Bar1, 2)],
             q_reply = Bar1
diff --git a/src/fabric/src/fabric_doc_open_revs.erl 
b/src/fabric/src/fabric_doc_open_revs.erl
index 754cc8b3c..ff1948735 100644
--- a/src/fabric/src/fabric_doc_open_revs.erl
+++ b/src/fabric/src/fabric_doc_open_revs.erl
@@ -200,10 +200,14 @@ dict_replies(Dict, [Reply | Rest]) ->
     dict_replies(NewDict, Rest).
 
 maybe_read_repair(Db, IsTree, Replies, NodeRevs, ReplyCount, DoRepair) ->
+    % Do not read repair replicator docs because after_doc_read/2 callback
+    % might have patched them with a fake rev and we wouldn't want to write
+    % those back to disk. Let the internal replicator sync the shards instead.
     Docs =
-        case IsTree of
-            true -> tree_repair_docs(Replies, DoRepair);
-            false -> dict_repair_docs(Replies, ReplyCount)
+        case {fabric_util:is_replicator_db(Db), IsTree} of
+            {true, _} -> [];
+            {_, true} -> tree_repair_docs(Replies, DoRepair);
+            {_, false} -> dict_repair_docs(Replies, ReplyCount)
         end,
     case Docs of
         [] ->
@@ -347,6 +351,7 @@ teardown(_) ->
 
 state0(Revs, Latest) ->
     #state{
+        dbname = <<"dbname">>,
         worker_count = 3,
         workers =
             [#shard{node = 'node1'}, #shard{node = 'node2'}, #shard{node = 
'node3'}],
@@ -381,6 +386,7 @@ open_doc_revs_test_() ->
                 ?TDEF_FE(check_basic_response),
                 ?TDEF_FE(check_finish_quorum),
                 ?TDEF_FE(check_finish_quorum_newer),
+                ?TDEF_FE(check_finish_quorum_replicator),
                 ?TDEF_FE(check_no_quorum_on_second),
                 ?TDEF_FE(check_done_on_third),
                 ?TDEF_FE(check_specific_revs_first_msg),
@@ -458,6 +464,21 @@ check_finish_quorum_newer(_) ->
         meck:history(fabric)
     ).
 
+check_finish_quorum_replicator(_) ->
+    % We count a descendant of a revision for quorum so
+    % foo1 should count for foo2 which means we're finished.
+    % Since we use a replicator shard read_repair doesn't trigger
+    W1 = #shard{node = 'node1'},
+    W2 = #shard{node = 'node2'},
+    S0 = state0(all, false),
+    S1 = S0#state{dbname = <<"foo/_replicator">>},
+    {ok, S2} = handle_message({ok, [foo1(), bar1()]}, W1, S1),
+    Expect = {stop, [bar1(), foo2()]},
+    ok = meck:reset(fabric),
+    ?assertEqual(Expect, handle_message({ok, [foo2(), bar1()]}, W2, S2)),
+    timer:sleep(100),
+    ?assertNot(meck:called(fabric, update_docs, ['_', '_', '_'])).
+
 check_no_quorum_on_second(_) ->
     % Quorum not yet met for the foo revision so we
     % would wait for w3

(couchdb) 07/08: Do not generate conflicts from the replicator app

Reply via email to