This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch do-not-generate-conflicts-from-replictor
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit d0d660aff2e2479f8ba40433c3736015fbe03da1
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Tue Oct 1 15:42:23 2024 -0400

    Do not generate conflicts from the replicator app
    
    Previously, when replicator updated replication documents and the user 
overrode
    `couch_db:check_is_admin(Db)` such that the replicator app was not admin, 
the
    replicator would end up creating conflicts in the updated replicator docs. 
This
    was the result of running the after_doc_read/2 [1] callback, which would 
strip
    the credentials and then create a random RevId to replace the real RevId.
    
    To fix it, make sure `after_doc_read/2` is skipped when it's the replicator
    application itself doing the doc reads. This is similar to what we do in the
    `before_doc_update/3` callback, so we just mirror the same behavior in
    `after_doc_read/2`.
    
    Moreover, in order to prevent users from pretending to be the replicator 
app by
    using the <<"_replicator">> role, switch the replicator role to be an atom 
--
    `replicator`. It's impossible for users to create atom roles, so this should
    ensure only replicator application can set that.
    
    [1] The `after_doc_read/2` callback is from a time when it was impossible to
    have multiple `_replicator` db and everyone was sharing a single db 
instance.
    In that case we added a feature so users cannot see each others' replication
    credentials unless they were admin or the "owner" of the replication doc. 
Today
    users may create their own `$prefix/_replicator` db, however, for 
compatibility
    we also maintain the `after_doc_read/2` callback (probably until CouchDB 
4.0).
---
 src/couch_replicator/src/couch_replicator_docs.erl | 98 ++++++++++++++++++++--
 1 file changed, 93 insertions(+), 5 deletions(-)

diff --git a/src/couch_replicator/src/couch_replicator_docs.erl 
b/src/couch_replicator/src/couch_replicator_docs.erl
index a041e274a..8ae32ade5 100644
--- a/src/couch_replicator/src/couch_replicator_docs.erl
+++ b/src/couch_replicator/src/couch_replicator_docs.erl
@@ -31,7 +31,11 @@
 % to delete it. At some point in the future, remove this logic altogether.
 -define(REP_DESIGN_DOC, <<"_design/_replicator">>).
 -define(OWNER, <<"owner">>).
--define(CTX, {user_ctx, #user_ctx{roles = [<<"_admin">>, <<"_replicator">>]}}).
+% Use an atom as the replicator application role. Users cannot create atom
+% roles, only binaries. This way we ensure nobody can pretened to be the
+% replicator app.
+-define(REPLICATOR_ROLE, replicator).
+-define(CTX, {user_ctx, #user_ctx{roles = [<<"_admin">>, ?REPLICATOR_ROLE]}}).
 -define(replace(L, K, V), lists:keystore(K, 1, L, {K, V})).
 
 remove_state_fields(DbName, DocId) ->
@@ -232,7 +236,7 @@ before_doc_update(#doc{} = Doc, _Db, ?REPLICATED_CHANGES) ->
     Doc;
 before_doc_update(#doc{body = {Body}} = Doc, Db, _UpdateType) ->
     #user_ctx{roles = Roles, name = Name} = couch_db:get_user_ctx(Db),
-    IsReplicator = lists:member(<<"_replicator">>, Roles),
+    IsReplicator = lists:member(?REPLICATOR_ROLE, Roles),
     Doc1 =
         case IsReplicator of
             true -> Doc;
@@ -271,7 +275,8 @@ before_doc_update_owner(Other, Name, Db, #doc{body = 
{Body}} = Doc) ->
 after_doc_read(#doc{id = <<?DESIGN_DOC_PREFIX, _/binary>>} = Doc, _Db) ->
     Doc;
 after_doc_read(#doc{body = {Body}} = Doc, Db) ->
-    #user_ctx{name = Name} = couch_db:get_user_ctx(Db),
+    #user_ctx{roles = Roles, name = Name} = couch_db:get_user_ctx(Db),
+    IsReplicator = lists:member(?REPLICATOR_ROLE, Roles),
     case (catch couch_db:check_is_admin(Db)) of
         ok ->
             Doc;
@@ -279,7 +284,15 @@ after_doc_read(#doc{body = {Body}} = Doc, Db) ->
             case get_value(?OWNER, Body) of
                 Name ->
                     Doc;
-                _Other ->
+                _Other when IsReplicator ->
+                    % When the replicator itself updates the doc don't patch 
it with a fake
+                    % rev and strip the creds
+                    Doc;
+                _Other when not IsReplicator ->
+                    % Not admin, user name doesn't match, and it's not the
+                    % replicator app: this is another user reading it. Strip
+                    % the creds and make up a fake "rev" since we're returning
+                    % a technically non-existent document body
                     Source = strip_credentials(get_value(<<"source">>, Body)),
                     Target = strip_credentials(get_value(<<"target">>, Body)),
                     NewBody0 = ?replace(Body, <<"source">>, Source),
@@ -430,7 +443,11 @@ replicator_can_update_docs_test_() ->
                 ?TDEF_FE(t_update_doc_completed),
                 ?TDEF_FE(t_update_failed),
                 ?TDEF_FE(t_update_triggered),
-                ?TDEF_FE(t_update_error)
+                ?TDEF_FE(t_update_error),
+                ?TDEF_FE(t_after_doc_read_as_admin),
+                ?TDEF_FE(t_after_doc_read_as_replicator),
+                ?TDEF_FE(t_after_doc_read_matching_owner),
+                ?TDEF_FE(t_after_doc_read_not_matching_owner)
             ]
         }
     }.
@@ -521,4 +538,75 @@ t_update_error(DbName) ->
     RepId = get_value(<<"_replication_id">>, Props),
     ?assertEqual(null, RepId).
 
+t_after_doc_read_as_admin(DbName) ->
+    DocId = <<"doc1">>,
+    Map = #{
+        <<"source">> => <<"https://user1:pass1@localhost:5984/db1";>>,
+        <<"target">> => <<"https://user2:pass2@localhost:5984/db2";>>,
+        ?OWNER => <<"o1">>
+    },
+    {ok, _} = write_doc(DbName, DocId, Map),
+    Ctx = {user_ctx, #user_ctx{name = <<"o2">>, roles = [<<"_admin">>, 
<<"potato">>]}},
+    Map1 = read_doc(DbName, DocId, Ctx),
+    ?assertEqual(Map, Map1).
+
+t_after_doc_read_as_replicator(DbName) ->
+    DocId = <<"doc1">>,
+    Map = #{
+        <<"source">> => <<"https://user1:pass1@localhost:5984/db1";>>,
+        <<"target">> => <<"https://user2:pass2@localhost:5984/db2";>>,
+        ?OWNER => <<"o1">>
+    },
+    {ok, _} = write_doc(DbName, DocId, Map),
+    Ctx = {user_ctx, #user_ctx{name = <<"o2">>, roles = [?REPLICATOR_ROLE, 
<<"potato">>]}},
+    Map1 = read_doc(DbName, DocId, Ctx),
+    ?assertEqual(Map, Map1).
+
+t_after_doc_read_matching_owner(DbName) ->
+    DocId = <<"doc1">>,
+    Map = #{
+        <<"source">> => <<"https://user1:pass1@localhost:5984/db1";>>,
+        <<"target">> => <<"https://user2:pass2@localhost:5984/db2";>>,
+        ?OWNER => <<"o1">>
+    },
+    {ok, _} = write_doc(DbName, DocId, Map),
+    Ctx = {user_ctx, #user_ctx{name = <<"o1">>, roles = [<<"tomato">>, 
<<"potato">>]}},
+    Map1 = read_doc(DbName, DocId, Ctx),
+    ?assertEqual(Map, Map1).
+
+t_after_doc_read_not_matching_owner(DbName) ->
+    DocId = <<"doc1">>,
+    Map = #{
+        <<"source">> => <<"https://user1:pass1@localhost:5984/db1";>>,
+        <<"target">> => <<"https://user2:pass2@localhost:5984/db2";>>,
+        ?OWNER => <<"o1">>
+    },
+    {ok, _} = write_doc(DbName, DocId, Map),
+    Ctx = {user_ctx, #user_ctx{name = <<"o2">>, roles = [<<"tomato">>, 
<<"potato">>]}},
+    Map1 = read_doc(DbName, DocId, Ctx),
+    StrippedMap = #{
+        <<"source">> => <<"https://localhost:5984/db1";>>,
+        <<"target">> => <<"https://localhost:5984/db2";>>,
+        ?OWNER => <<"o1">>
+    },
+    ?assertEqual(StrippedMap, Map1).
+
+ejson_from_map(#{} = Map) ->
+    ?JSON_DECODE(?JSON_ENCODE(Map)).
+
+ejson_to_map({L} = EJson) when is_list(L) ->
+    ?JSON_DECODE(?JSON_ENCODE(EJson), [return_maps]).
+
+write_doc(DbName, DocId, #{} = Map) ->
+    Doc = #doc{id = DocId, body = ejson_from_map(Map)},
+    couch_util:with_db(DbName, fun(Db) ->
+        couch_db:update_doc(Db, Doc, [])
+    end).
+
+read_doc(DbName, DocId, Ctx) ->
+    {ok, Db} = couch_db:open(DbName, [Ctx]),
+    {ok, Doc} = couch_db:open_doc(Db, DocId),
+    couch_db:close(Db),
+    ejson_to_map(Doc#doc.body).
+
 -endif.

Reply via email to