This is an automated email from the ASF dual-hosted git repository. vatamane pushed a commit to branch 3.4.2-prep in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 4b5b1944ee1abad526c3f503cbb75d3df201ce46 Author: Nick Vatamaniuc <[email protected]> AuthorDate: Tue Oct 1 15:42:23 2024 -0400 Do not generate conflicts from the replicator app Previously, when replicator updated replication documents and the user overrode `couch_db:check_is_admin(Db)` such that the replicator app was not admin, the replicator would end up creating conflicts in the updated replicator docs. This was the result of running the after_doc_read/2 [1] callback, which would strip the credentials and then create a random RevId to replace the real RevId. To fix it, make sure `after_doc_read/2` is skipped when it's the replicator application itself doing the doc reads. This is similar to what we do in the `before_doc_update/3` callback, so we just mirror the same behavior in `after_doc_read/2`. Moreover, in order to prevent users from pretending to be the replicator app by using the <<"_replicator">> role, switch the replicator role to be an atom -- `replicator`. It's impossible for users to create atom roles, so this should ensure only replicator application can set that. However, just checking the `replicator` role is not enough to prevent conflicts. Since we're updating the docs on shard files directly (even for clustered databases) the internal replicator or the read_repair process may still read the documents via the `after_doc_read/2` callback and then proceed to update the patched doc body across the other copies. Thus, we skip the `after_doc_read/2` callback if it's called by the internal replicator. In case of read_repair, there is nothing we can do to detect if the document we just read (interactively) may have been passed through the `after_doc_read/2` callback, so we opt to use only the internal replicator to spread `_replicator` changes between copies. [1] The `after_doc_read/2` callback is from a time when it was impossible to have multiple `_replicator` db and everyone was sharing a single db instance. In that case we added a feature so users cannot see each others' replication credentials unless they were admin or the "owner" of the replication doc. Today users may create their own `$prefix/_replicator` db, however, for compatibility we also maintain the `after_doc_read/2` callback (probably until CouchDB 4.0). --- src/couch_replicator/src/couch_replicator_docs.erl | 110 ++++++++++++++++++++- src/fabric/src/fabric_doc_open.erl | 47 +++++++-- src/fabric/src/fabric_doc_open_revs.erl | 27 ++++- 3 files changed, 169 insertions(+), 15 deletions(-) diff --git a/src/couch_replicator/src/couch_replicator_docs.erl b/src/couch_replicator/src/couch_replicator_docs.erl index a041e274a..6b324c97a 100644 --- a/src/couch_replicator/src/couch_replicator_docs.erl +++ b/src/couch_replicator/src/couch_replicator_docs.erl @@ -31,7 +31,11 @@ % to delete it. At some point in the future, remove this logic altogether. -define(REP_DESIGN_DOC, <<"_design/_replicator">>). -define(OWNER, <<"owner">>). --define(CTX, {user_ctx, #user_ctx{roles = [<<"_admin">>, <<"_replicator">>]}}). +% Use an atom as the replicator application role. Users cannot create atom +% roles, only binaries. This way we ensure nobody can pretend to be the +% replicator app. +-define(REPLICATOR_ROLE, replicator). +-define(CTX, {user_ctx, #user_ctx{roles = [<<"_admin">>, ?REPLICATOR_ROLE]}}). -define(replace(L, K, V), lists:keystore(K, 1, L, {K, V})). remove_state_fields(DbName, DocId) -> @@ -232,7 +236,7 @@ before_doc_update(#doc{} = Doc, _Db, ?REPLICATED_CHANGES) -> Doc; before_doc_update(#doc{body = {Body}} = Doc, Db, _UpdateType) -> #user_ctx{roles = Roles, name = Name} = couch_db:get_user_ctx(Db), - IsReplicator = lists:member(<<"_replicator">>, Roles), + IsReplicator = lists:member(?REPLICATOR_ROLE, Roles), Doc1 = case IsReplicator of true -> Doc; @@ -271,7 +275,10 @@ before_doc_update_owner(Other, Name, Db, #doc{body = {Body}} = Doc) -> after_doc_read(#doc{id = <<?DESIGN_DOC_PREFIX, _/binary>>} = Doc, _Db) -> Doc; after_doc_read(#doc{body = {Body}} = Doc, Db) -> - #user_ctx{name = Name} = couch_db:get_user_ctx(Db), + Ctx = #user_ctx{roles = Roles, name = Name} = couch_db:get_user_ctx(Db), + IsReplicator = lists:member(?REPLICATOR_ROLE, Roles), + % Internal replicator opens dbs with the ?ADMIN_CTX macro + IsInternalRepl = (Ctx == ?ADMIN_USER), case (catch couch_db:check_is_admin(Db)) of ok -> Doc; @@ -279,7 +286,15 @@ after_doc_read(#doc{body = {Body}} = Doc, Db) -> case get_value(?OWNER, Body) of Name -> Doc; + _Other when (IsReplicator orelse IsInternalRepl) -> + % When the replicator itself updates the doc don't patch it with a fake + % rev and strip the creds + Doc; _Other -> + % Not admin, user name doesn't match, and it's not the + % replicator app: this is another user reading it. Strip + % the creds and make up a fake "rev" since we're returning + % a technically non-existent document body Source = strip_credentials(get_value(<<"source">>, Body)), Target = strip_credentials(get_value(<<"target">>, Body)), NewBody0 = ?replace(Body, <<"source">>, Source), @@ -430,7 +445,12 @@ replicator_can_update_docs_test_() -> ?TDEF_FE(t_update_doc_completed), ?TDEF_FE(t_update_failed), ?TDEF_FE(t_update_triggered), - ?TDEF_FE(t_update_error) + ?TDEF_FE(t_update_error), + ?TDEF_FE(t_after_doc_read_as_admin), + ?TDEF_FE(t_after_doc_read_as_replicator), + ?TDEF_FE(t_after_doc_read_internal_replicator), + ?TDEF_FE(t_after_doc_read_matching_owner), + ?TDEF_FE(t_after_doc_read_not_matching_owner) ] } }. @@ -521,4 +541,86 @@ t_update_error(DbName) -> RepId = get_value(<<"_replication_id">>, Props), ?assertEqual(null, RepId). +t_after_doc_read_as_admin(DbName) -> + DocId = <<"doc1">>, + Map = #{ + <<"source">> => <<"https://user1:pass1@localhost:5984/db1">>, + <<"target">> => <<"https://user2:pass2@localhost:5984/db2">>, + ?OWNER => <<"o1">> + }, + {ok, _} = write_doc(DbName, DocId, Map), + Ctx = {user_ctx, #user_ctx{name = <<"o2">>, roles = [<<"_admin">>, <<"potato">>]}}, + Map1 = read_doc(DbName, DocId, Ctx), + ?assertEqual(Map, Map1). + +t_after_doc_read_as_replicator(DbName) -> + DocId = <<"doc1">>, + Map = #{ + <<"source">> => <<"https://user1:pass1@localhost:5984/db1">>, + <<"target">> => <<"https://user2:pass2@localhost:5984/db2">>, + ?OWNER => <<"o1">> + }, + {ok, _} = write_doc(DbName, DocId, Map), + Ctx = {user_ctx, #user_ctx{name = <<"o2">>, roles = [?REPLICATOR_ROLE, <<"potato">>]}}, + Map1 = read_doc(DbName, DocId, Ctx), + ?assertEqual(Map, Map1). + +t_after_doc_read_internal_replicator(DbName) -> + DocId = <<"doc1">>, + Map = #{ + <<"source">> => <<"https://user1:pass1@localhost:5984/db1">>, + <<"target">> => <<"https://user2:pass2@localhost:5984/db2">>, + ?OWNER => <<"o1">> + }, + {ok, _} = write_doc(DbName, DocId, Map), + Map1 = read_doc(DbName, DocId, ?ADMIN_CTX), + ?assertEqual(Map, Map1). + +t_after_doc_read_matching_owner(DbName) -> + DocId = <<"doc1">>, + Map = #{ + <<"source">> => <<"https://user1:pass1@localhost:5984/db1">>, + <<"target">> => <<"https://user2:pass2@localhost:5984/db2">>, + ?OWNER => <<"o1">> + }, + {ok, _} = write_doc(DbName, DocId, Map), + Ctx = {user_ctx, #user_ctx{name = <<"o1">>, roles = [<<"tomato">>, <<"potato">>]}}, + Map1 = read_doc(DbName, DocId, Ctx), + ?assertEqual(Map, Map1). + +t_after_doc_read_not_matching_owner(DbName) -> + DocId = <<"doc1">>, + Map = #{ + <<"source">> => <<"https://user1:pass1@localhost:5984/db1">>, + <<"target">> => <<"https://user2:pass2@localhost:5984/db2">>, + ?OWNER => <<"o1">> + }, + {ok, _} = write_doc(DbName, DocId, Map), + Ctx = {user_ctx, #user_ctx{name = <<"o2">>, roles = [<<"tomato">>, <<"potato">>]}}, + Map1 = read_doc(DbName, DocId, Ctx), + StrippedMap = #{ + <<"source">> => <<"https://localhost:5984/db1">>, + <<"target">> => <<"https://localhost:5984/db2">>, + ?OWNER => <<"o1">> + }, + ?assertEqual(StrippedMap, Map1). + +ejson_from_map(#{} = Map) -> + ?JSON_DECODE(?JSON_ENCODE(Map)). + +ejson_to_map({L} = EJson) when is_list(L) -> + ?JSON_DECODE(?JSON_ENCODE(EJson), [return_maps]). + +write_doc(DbName, DocId, #{} = Map) -> + Doc = #doc{id = DocId, body = ejson_from_map(Map)}, + couch_util:with_db(DbName, fun(Db) -> + couch_db:update_doc(Db, Doc, []) + end). + +read_doc(DbName, DocId, Ctx) -> + {ok, Db} = couch_db:open(DbName, [Ctx]), + {ok, Doc} = couch_db:open_doc(Db, DocId), + couch_db:close(Db), + ejson_to_map(Doc#doc.body). + -endif. diff --git a/src/fabric/src/fabric_doc_open.erl b/src/fabric/src/fabric_doc_open.erl index 70d90b03e..3ad019039 100644 --- a/src/fabric/src/fabric_doc_open.erl +++ b/src/fabric/src/fabric_doc_open.erl @@ -14,7 +14,6 @@ -export([go/3]). --include_lib("fabric/include/fabric.hrl"). -include_lib("mem3/include/mem3.hrl"). -include_lib("couch/include/couch_db.hrl"). @@ -140,13 +139,22 @@ read_repair(#acc{dbname = DbName, replies = Replies, node_revs = NodeRevs}) -> choose_reply(Docs); [#doc{id = Id} | _] -> Opts = [?ADMIN_CTX, ?REPLICATED_CHANGES, {read_repair, NodeRevs}], - Res = fabric:update_docs(DbName, Docs, Opts), - case Res of - {ok, []} -> - couch_stats:increment_counter([fabric, read_repairs, success]); - _ -> - couch_stats:increment_counter([fabric, read_repairs, failure]), - couch_log:notice("read_repair ~s ~s ~p", [DbName, Id, Res]) + case fabric_util:is_replicator_db(DbName) of + true -> + % Skip read_repair for _replicator shards. We might have + % gotten the result of after_doc_read/2 with a fake patched + % revision and stripped creds. Let the internal replicator + % handle all that + ok; + false -> + Res = fabric:update_docs(DbName, Docs, Opts), + case Res of + {ok, []} -> + couch_stats:increment_counter([fabric, read_repairs, success]); + _ -> + couch_stats:increment_counter([fabric, read_repairs, failure]), + couch_log:notice("read_repair ~s ~s ~p", [DbName, Id, Res]) + end end, choose_reply(Docs); [] -> @@ -228,6 +236,7 @@ open_doc_test_() -> t_handle_message_reply(), t_store_node_revs(), t_read_repair(), + t_no_read_repair_for_replicator(), t_handle_response_quorum_met(), t_get_doc_info() ] @@ -517,6 +526,24 @@ t_read_repair() -> ?assertEqual(bar, read_repair(Acc4)) end). +t_no_read_repair_for_replicator() -> + Foo1 = {ok, #doc{revs = {1, [<<"foo">>]}}}, + + ?_test(begin + meck:expect(couch_log, notice, fun(_, _) -> ok end), + meck:expect(couch_stats, increment_counter, fun(_) -> ok end), + + % Test no read_repair took place + meck:reset(fabric), + Acc0 = #acc{ + dbname = <<"dbname1/_replicator">>, + replies = [fabric_util:kv(Foo1, 1)] + }, + ?assertEqual(Foo1, read_repair(Acc0)), + timer:sleep(100), + ?assertNot(meck:called(fabric, update_docs, ['_', '_', '_'])) + end). + t_handle_response_quorum_met() -> Foo1 = {ok, #doc{revs = {1, [<<"foo">>]}}}, Foo2 = {ok, #doc{revs = {2, [<<"foo2">>, <<"foo">>]}}}, @@ -528,6 +555,7 @@ t_handle_response_quorum_met() -> meck:expect(couch_stats, increment_counter, fun(_) -> ok end), BasicOkAcc = #acc{ + dbname = <<"dbname1">>, state = r_met, replies = [fabric_util:kv(Foo1, 2)], q_reply = Foo1 @@ -535,6 +563,7 @@ t_handle_response_quorum_met() -> ?assertEqual(Foo1, handle_response(BasicOkAcc)), WithAncestorsAcc = #acc{ + dbname = <<"dbname1">>, state = r_met, replies = [fabric_util:kv(Foo1, 1), fabric_util:kv(Foo2, 2)], q_reply = Foo2 @@ -544,6 +573,7 @@ t_handle_response_quorum_met() -> % This also checks when the quorum isn't the most recent % revision. DeeperWinsAcc = #acc{ + dbname = <<"dbname1">>, state = r_met, replies = [fabric_util:kv(Foo1, 2), fabric_util:kv(Foo2, 1)], q_reply = Foo1 @@ -553,6 +583,7 @@ t_handle_response_quorum_met() -> % Check that we return the proper doc based on rev % (ie, pos is equal) BiggerRevWinsAcc = #acc{ + dbname = <<"dbname1">>, state = r_met, replies = [fabric_util:kv(Foo1, 1), fabric_util:kv(Bar1, 2)], q_reply = Bar1 diff --git a/src/fabric/src/fabric_doc_open_revs.erl b/src/fabric/src/fabric_doc_open_revs.erl index 754cc8b3c..ff1948735 100644 --- a/src/fabric/src/fabric_doc_open_revs.erl +++ b/src/fabric/src/fabric_doc_open_revs.erl @@ -200,10 +200,14 @@ dict_replies(Dict, [Reply | Rest]) -> dict_replies(NewDict, Rest). maybe_read_repair(Db, IsTree, Replies, NodeRevs, ReplyCount, DoRepair) -> + % Do not read repair replicator docs because after_doc_read/2 callback + % might have patched them with a fake rev and we wouldn't want to write + % those back to disk. Let the internal replicator sync the shards instead. Docs = - case IsTree of - true -> tree_repair_docs(Replies, DoRepair); - false -> dict_repair_docs(Replies, ReplyCount) + case {fabric_util:is_replicator_db(Db), IsTree} of + {true, _} -> []; + {_, true} -> tree_repair_docs(Replies, DoRepair); + {_, false} -> dict_repair_docs(Replies, ReplyCount) end, case Docs of [] -> @@ -347,6 +351,7 @@ teardown(_) -> state0(Revs, Latest) -> #state{ + dbname = <<"dbname">>, worker_count = 3, workers = [#shard{node = 'node1'}, #shard{node = 'node2'}, #shard{node = 'node3'}], @@ -381,6 +386,7 @@ open_doc_revs_test_() -> ?TDEF_FE(check_basic_response), ?TDEF_FE(check_finish_quorum), ?TDEF_FE(check_finish_quorum_newer), + ?TDEF_FE(check_finish_quorum_replicator), ?TDEF_FE(check_no_quorum_on_second), ?TDEF_FE(check_done_on_third), ?TDEF_FE(check_specific_revs_first_msg), @@ -458,6 +464,21 @@ check_finish_quorum_newer(_) -> meck:history(fabric) ). +check_finish_quorum_replicator(_) -> + % We count a descendant of a revision for quorum so + % foo1 should count for foo2 which means we're finished. + % Since we use a replicator shard read_repair doesn't trigger + W1 = #shard{node = 'node1'}, + W2 = #shard{node = 'node2'}, + S0 = state0(all, false), + S1 = S0#state{dbname = <<"foo/_replicator">>}, + {ok, S2} = handle_message({ok, [foo1(), bar1()]}, W1, S1), + Expect = {stop, [bar1(), foo2()]}, + ok = meck:reset(fabric), + ?assertEqual(Expect, handle_message({ok, [foo2(), bar1()]}, W2, S2)), + timer:sleep(100), + ?assertNot(meck:called(fabric, update_docs, ['_', '_', '_'])). + check_no_quorum_on_second(_) -> % Quorum not yet met for the foo revision so we % would wait for w3
