This is an automated email from the ASF dual-hosted git repository. vatamane pushed a commit to branch do-not-generate-conflicts-from-replictor in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit d0d660aff2e2479f8ba40433c3736015fbe03da1 Author: Nick Vatamaniuc <[email protected]> AuthorDate: Tue Oct 1 15:42:23 2024 -0400 Do not generate conflicts from the replicator app Previously, when replicator updated replication documents and the user overrode `couch_db:check_is_admin(Db)` such that the replicator app was not admin, the replicator would end up creating conflicts in the updated replicator docs. This was the result of running the after_doc_read/2 [1] callback, which would strip the credentials and then create a random RevId to replace the real RevId. To fix it, make sure `after_doc_read/2` is skipped when it's the replicator application itself doing the doc reads. This is similar to what we do in the `before_doc_update/3` callback, so we just mirror the same behavior in `after_doc_read/2`. Moreover, in order to prevent users from pretending to be the replicator app by using the <<"_replicator">> role, switch the replicator role to be an atom -- `replicator`. It's impossible for users to create atom roles, so this should ensure only replicator application can set that. [1] The `after_doc_read/2` callback is from a time when it was impossible to have multiple `_replicator` db and everyone was sharing a single db instance. In that case we added a feature so users cannot see each others' replication credentials unless they were admin or the "owner" of the replication doc. Today users may create their own `$prefix/_replicator` db, however, for compatibility we also maintain the `after_doc_read/2` callback (probably until CouchDB 4.0). --- src/couch_replicator/src/couch_replicator_docs.erl | 98 ++++++++++++++++++++-- 1 file changed, 93 insertions(+), 5 deletions(-) diff --git a/src/couch_replicator/src/couch_replicator_docs.erl b/src/couch_replicator/src/couch_replicator_docs.erl index a041e274a..8ae32ade5 100644 --- a/src/couch_replicator/src/couch_replicator_docs.erl +++ b/src/couch_replicator/src/couch_replicator_docs.erl @@ -31,7 +31,11 @@ % to delete it. At some point in the future, remove this logic altogether. -define(REP_DESIGN_DOC, <<"_design/_replicator">>). -define(OWNER, <<"owner">>). --define(CTX, {user_ctx, #user_ctx{roles = [<<"_admin">>, <<"_replicator">>]}}). +% Use an atom as the replicator application role. Users cannot create atom +% roles, only binaries. This way we ensure nobody can pretened to be the +% replicator app. +-define(REPLICATOR_ROLE, replicator). +-define(CTX, {user_ctx, #user_ctx{roles = [<<"_admin">>, ?REPLICATOR_ROLE]}}). -define(replace(L, K, V), lists:keystore(K, 1, L, {K, V})). remove_state_fields(DbName, DocId) -> @@ -232,7 +236,7 @@ before_doc_update(#doc{} = Doc, _Db, ?REPLICATED_CHANGES) -> Doc; before_doc_update(#doc{body = {Body}} = Doc, Db, _UpdateType) -> #user_ctx{roles = Roles, name = Name} = couch_db:get_user_ctx(Db), - IsReplicator = lists:member(<<"_replicator">>, Roles), + IsReplicator = lists:member(?REPLICATOR_ROLE, Roles), Doc1 = case IsReplicator of true -> Doc; @@ -271,7 +275,8 @@ before_doc_update_owner(Other, Name, Db, #doc{body = {Body}} = Doc) -> after_doc_read(#doc{id = <<?DESIGN_DOC_PREFIX, _/binary>>} = Doc, _Db) -> Doc; after_doc_read(#doc{body = {Body}} = Doc, Db) -> - #user_ctx{name = Name} = couch_db:get_user_ctx(Db), + #user_ctx{roles = Roles, name = Name} = couch_db:get_user_ctx(Db), + IsReplicator = lists:member(?REPLICATOR_ROLE, Roles), case (catch couch_db:check_is_admin(Db)) of ok -> Doc; @@ -279,7 +284,15 @@ after_doc_read(#doc{body = {Body}} = Doc, Db) -> case get_value(?OWNER, Body) of Name -> Doc; - _Other -> + _Other when IsReplicator -> + % When the replicator itself updates the doc don't patch it with a fake + % rev and strip the creds + Doc; + _Other when not IsReplicator -> + % Not admin, user name doesn't match, and it's not the + % replicator app: this is another user reading it. Strip + % the creds and make up a fake "rev" since we're returning + % a technically non-existent document body Source = strip_credentials(get_value(<<"source">>, Body)), Target = strip_credentials(get_value(<<"target">>, Body)), NewBody0 = ?replace(Body, <<"source">>, Source), @@ -430,7 +443,11 @@ replicator_can_update_docs_test_() -> ?TDEF_FE(t_update_doc_completed), ?TDEF_FE(t_update_failed), ?TDEF_FE(t_update_triggered), - ?TDEF_FE(t_update_error) + ?TDEF_FE(t_update_error), + ?TDEF_FE(t_after_doc_read_as_admin), + ?TDEF_FE(t_after_doc_read_as_replicator), + ?TDEF_FE(t_after_doc_read_matching_owner), + ?TDEF_FE(t_after_doc_read_not_matching_owner) ] } }. @@ -521,4 +538,75 @@ t_update_error(DbName) -> RepId = get_value(<<"_replication_id">>, Props), ?assertEqual(null, RepId). +t_after_doc_read_as_admin(DbName) -> + DocId = <<"doc1">>, + Map = #{ + <<"source">> => <<"https://user1:pass1@localhost:5984/db1">>, + <<"target">> => <<"https://user2:pass2@localhost:5984/db2">>, + ?OWNER => <<"o1">> + }, + {ok, _} = write_doc(DbName, DocId, Map), + Ctx = {user_ctx, #user_ctx{name = <<"o2">>, roles = [<<"_admin">>, <<"potato">>]}}, + Map1 = read_doc(DbName, DocId, Ctx), + ?assertEqual(Map, Map1). + +t_after_doc_read_as_replicator(DbName) -> + DocId = <<"doc1">>, + Map = #{ + <<"source">> => <<"https://user1:pass1@localhost:5984/db1">>, + <<"target">> => <<"https://user2:pass2@localhost:5984/db2">>, + ?OWNER => <<"o1">> + }, + {ok, _} = write_doc(DbName, DocId, Map), + Ctx = {user_ctx, #user_ctx{name = <<"o2">>, roles = [?REPLICATOR_ROLE, <<"potato">>]}}, + Map1 = read_doc(DbName, DocId, Ctx), + ?assertEqual(Map, Map1). + +t_after_doc_read_matching_owner(DbName) -> + DocId = <<"doc1">>, + Map = #{ + <<"source">> => <<"https://user1:pass1@localhost:5984/db1">>, + <<"target">> => <<"https://user2:pass2@localhost:5984/db2">>, + ?OWNER => <<"o1">> + }, + {ok, _} = write_doc(DbName, DocId, Map), + Ctx = {user_ctx, #user_ctx{name = <<"o1">>, roles = [<<"tomato">>, <<"potato">>]}}, + Map1 = read_doc(DbName, DocId, Ctx), + ?assertEqual(Map, Map1). + +t_after_doc_read_not_matching_owner(DbName) -> + DocId = <<"doc1">>, + Map = #{ + <<"source">> => <<"https://user1:pass1@localhost:5984/db1">>, + <<"target">> => <<"https://user2:pass2@localhost:5984/db2">>, + ?OWNER => <<"o1">> + }, + {ok, _} = write_doc(DbName, DocId, Map), + Ctx = {user_ctx, #user_ctx{name = <<"o2">>, roles = [<<"tomato">>, <<"potato">>]}}, + Map1 = read_doc(DbName, DocId, Ctx), + StrippedMap = #{ + <<"source">> => <<"https://localhost:5984/db1">>, + <<"target">> => <<"https://localhost:5984/db2">>, + ?OWNER => <<"o1">> + }, + ?assertEqual(StrippedMap, Map1). + +ejson_from_map(#{} = Map) -> + ?JSON_DECODE(?JSON_ENCODE(Map)). + +ejson_to_map({L} = EJson) when is_list(L) -> + ?JSON_DECODE(?JSON_ENCODE(EJson), [return_maps]). + +write_doc(DbName, DocId, #{} = Map) -> + Doc = #doc{id = DocId, body = ejson_from_map(Map)}, + couch_util:with_db(DbName, fun(Db) -> + couch_db:update_doc(Db, Doc, []) + end). + +read_doc(DbName, DocId, Ctx) -> + {ok, Db} = couch_db:open(DbName, [Ctx]), + {ok, Doc} = couch_db:open_doc(Db, DocId), + couch_db:close(Db), + ejson_to_map(Doc#doc.body). + -endif.
