This is an automated email from the ASF dual-hosted git repository. jiahuili430 pushed a commit to branch log-rep-domain in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 7e1a1be69a8079183d81b1b1644083a35ee8f2d2 Author: Jiahui Li <[email protected]> AuthorDate: Mon Feb 23 15:11:17 2026 -0600 Add optional logging when using disallowed domains for replication --- rel/overlay/etc/default.ini | 10 +++ .../src/couch_replicator_scheduler.erl | 3 +- .../src/couch_replicator_utils.erl | 93 ++++++++++++++++++++++ 3 files changed, 105 insertions(+), 1 deletion(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 6fbc99f58..922588938 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -715,6 +715,16 @@ partitioned||* = true ; options may be found at https://www.erlang.org/doc/man/inet.html#setopts-2. ;valid_socket_options = buffer,keepalive,nodelay,priority,recbuf,sndbuf +; Valid replication endpoint domains. +; - []: Don't verify the replication jobs. +; - ["domain1", "domain2", ...]: When enabled `verify_endpoint_domain_log`, +; generate additional logs when replication jobs with endpoint URLs not in +; this domain list. +;valid_endpoint_domain = [] + +; When enabled CouchDB will log any replication that uses disallowed domains. +;verify_endpoint_domain_log = false + ; Valid replication endpoint protocols. Replication jobs with endpoint urls not ; in this list will fail to run. ;valid_endpoint_protocols = http,https diff --git a/src/couch_replicator/src/couch_replicator_scheduler.erl b/src/couch_replicator/src/couch_replicator_scheduler.erl index 9eb0b4723..ca9738b88 100644 --- a/src/couch_replicator/src/couch_replicator_scheduler.erl +++ b/src/couch_replicator/src/couch_replicator_scheduler.erl @@ -665,7 +665,8 @@ maybe_remove_job_int(JobId, State) -> start_job_int(#job{pid = Pid}, _State) when Pid /= undefined -> ok; -start_job_int(#job{} = Job0, State) -> +start_job_int(#job{rep = Rep} = Job0, State) -> + ok = couch_replicator_utils:verify_endpoint_domain_log(Rep), Job = maybe_optimize_job_for_rate_limiting(Job0), case couch_replicator_scheduler_job:start_link(Job#job.rep) of {ok, Child} -> diff --git a/src/couch_replicator/src/couch_replicator_utils.erl b/src/couch_replicator/src/couch_replicator_utils.erl index 5e8187200..c924d9512 100644 --- a/src/couch_replicator/src/couch_replicator_utils.erl +++ b/src/couch_replicator/src/couch_replicator_utils.erl @@ -29,6 +29,7 @@ remove_basic_auth_creds/1, normalize_basic_auth/1, seq_encode/1, + verify_endpoint_domain_log/1, valid_endpoint_protocols_log/1, verify_ssl_certificates_log/1, cacert_get/0 @@ -297,6 +298,40 @@ seq_encode(Seq) -> % object. We are being maximally compatible here. ?JSON_ENCODE(Seq). +%% Log uses of disallowed domain +verify_endpoint_domain_log(#rep{source = undefined, target = undefined}) -> + % When we cancel continuous transient replications (with a POST to _replicate) + % source and target will be undefined + ok; +verify_endpoint_domain_log(#rep{} = Rep) -> + VerifyEnabled = config:get_boolean("replicator", "verify_endpoint_domain_log", false), + case VerifyEnabled of + true -> + AllowedDomainCfg = config:get("replicator", "valid_endpoint_domain", "[]"), + {ok, AllowedDomain} = couch_util:parse_term(AllowedDomainCfg), + ok = check_endpoint_domain(Rep, source, AllowedDomain), + ok = check_endpoint_domain(Rep, target, AllowedDomain); + false -> + ok + end. + +check_endpoint_domain(#rep{}, _, []) -> + ok; +check_endpoint_domain(#rep{} = Rep, Type, AllowedDomain) -> + Url = url_from_type(Rep, Type), + #url{host = Host} = ibrowse_lib:parse_url(Url), + case lists:member(Host, AllowedDomain) of + true -> + ok; + false -> + couch_log:warning( + "**disallowed domain** replication ~s used disallowed domain ~s at ~s", [ + rep_principal(Rep), Type, Url + ] + ), + ok + end. + %% Log uses of http protocol valid_endpoint_protocols_log(#rep{source = undefined, target = undefined}) -> % When we cancel continuous transient replications (with a POST to _replicate) @@ -811,6 +846,64 @@ t_allow_canceling_transient_jobs(_) -> ?assertEqual(ok, valid_endpoint_protocols_log(#rep{})), ?assertEqual(0, meck:num_calls(couch_log, warning, 2)). +verify_endpoint_domain_log_setup() -> + Ctx = test_util:start_couch(), + config:set_boolean("replicator", "verify_endpoint_domain_log", true, false), + meck:new(couch_log, [passthrough]), + Ctx. + +verify_endpoint_domain_log_teardown(Ctx) -> + meck:unload(), + config:delete("replicator", "verify_endpoint_domain_log", false), + test_util:stop_couch(Ctx). + +verify_endpoint_domain_log_test_() -> + { + foreach, + fun verify_endpoint_domain_log_setup/0, + fun verify_endpoint_domain_log_teardown/1, + [ + ?TDEF_FE(t_dont_warn_when_valid_endpoint_domain_is_empty), + ?TDEF_FE(t_warn_when_replicate_with_invalid_endpoint_domain) + ] + }. + +t_dont_warn_when_valid_endpoint_domain_is_empty(_) -> + set_allowed_domain("[]"), + Rep = #rep{ + source = #httpdb{url = "https://foo.local"}, + target = #httpdb{url = "https://127.0.0.2"} + }, + meck:reset(couch_log), + ?assertEqual(ok, verify_endpoint_domain_log(Rep)), + ?assertEqual(0, meck:num_calls(couch_log, warning, 2)), + reset_allowed_domain(). + +t_warn_when_replicate_with_invalid_endpoint_domain(_) -> + set_allowed_domain("[\"example.com\", \"127.0.0.1\"]"), + Rep1 = #rep{ + source = #httpdb{url = "https://foo.local"}, + target = #httpdb{url = "https://127.0.0.1"} + }, + meck:reset(couch_log), + ?assertEqual(ok, verify_endpoint_domain_log(Rep1)), + ?assertEqual(1, meck:num_calls(couch_log, warning, 2)), + + meck:reset(couch_log), + Rep2 = #rep{ + source = #httpdb{url = "https://foo.local"}, + target = #httpdb{url = "https://127.0.0.2"} + }, + ?assertEqual(ok, verify_endpoint_domain_log(Rep2)), + ?assertEqual(2, meck:num_calls(couch_log, warning, 2)), + reset_allowed_domain(). + +set_allowed_domain(Domains) -> + config:set("replicator", "valid_endpoint_domain", Domains, false). + +reset_allowed_domain() -> + config:delete("replicator", "valid_endpoint_domain", false). + cacert_test() -> Old = ?CACERT_DEFAULT_TIMESTAMP, Now = erlang:monotonic_time(second),
