This is an automated email from the ASF dual-hosted git repository. jiahuili430 pushed a commit to branch log-rep-domain in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 4bb9e2bfd8e3e71759945a4572e1557ab6cbd1e4 Author: Jiahui Li <[email protected]> AuthorDate: Mon Feb 23 15:11:17 2026 -0600 Add optional logging when using disallowed domains for replication --- rel/overlay/etc/default.ini | 10 +++ .../src/couch_replicator_scheduler.erl | 3 +- .../src/couch_replicator_utils.erl | 93 ++++++++++++++++++++++ 3 files changed, 105 insertions(+), 1 deletion(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 6fbc99f58..897de8285 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -722,6 +722,16 @@ partitioned||* = true ; When enabled CouchDB will log any replication that uses the insecure http protocol. ;valid_endpoint_protocols_log = false +; When enabled CouchDB will log any replication that uses disallowed domains. +;verify_endpoint_domain_log = false + +; Valid replication endpoint domains. +; - []: Don't verify the replication jobs. +; - ["domain1", "domain2", ...]: When enabled `verify_endpoint_domain_log`, +; generate additional logs when replication jobs with endpoint URLs not in +; this domain list. +;valid_endpoint_domain = [] + ; When enabled CouchDB will check the validity of the TLS certificates of source and target. ;verify_ssl_certificates_log = false diff --git a/src/couch_replicator/src/couch_replicator_scheduler.erl b/src/couch_replicator/src/couch_replicator_scheduler.erl index 9eb0b4723..ca9738b88 100644 --- a/src/couch_replicator/src/couch_replicator_scheduler.erl +++ b/src/couch_replicator/src/couch_replicator_scheduler.erl @@ -665,7 +665,8 @@ maybe_remove_job_int(JobId, State) -> start_job_int(#job{pid = Pid}, _State) when Pid /= undefined -> ok; -start_job_int(#job{} = Job0, State) -> +start_job_int(#job{rep = Rep} = Job0, State) -> + ok = couch_replicator_utils:verify_endpoint_domain_log(Rep), Job = maybe_optimize_job_for_rate_limiting(Job0), case couch_replicator_scheduler_job:start_link(Job#job.rep) of {ok, Child} -> diff --git a/src/couch_replicator/src/couch_replicator_utils.erl b/src/couch_replicator/src/couch_replicator_utils.erl index 5e8187200..3b709dfab 100644 --- a/src/couch_replicator/src/couch_replicator_utils.erl +++ b/src/couch_replicator/src/couch_replicator_utils.erl @@ -29,6 +29,7 @@ remove_basic_auth_creds/1, normalize_basic_auth/1, seq_encode/1, + verify_endpoint_domain_log/1, valid_endpoint_protocols_log/1, verify_ssl_certificates_log/1, cacert_get/0 @@ -330,6 +331,40 @@ url_from_type(#rep{} = Rep, source) -> url_from_type(#rep{} = Rep, target) -> Rep#rep.target#httpdb.url. +%% Log uses of disallowed domain +verify_endpoint_domain_log(#rep{source = undefined, target = undefined}) -> + % When we cancel continuous transient replications (with a POST to _replicate) + % source and target will be undefined + ok; +verify_endpoint_domain_log(#rep{} = Rep) -> + VerifyEnabled = config:get_boolean("replicator", "verify_endpoint_domain_log", false), + case VerifyEnabled of + true -> + AllowedDomainCfg = config:get("replicator", "valid_endpoint_domain", "[]"), + {ok, AllowedDomain} = couch_util:parse_term(AllowedDomainCfg), + ok = check_endpoint_domain(Rep, source, AllowedDomain), + ok = check_endpoint_domain(Rep, target, AllowedDomain); + false -> + ok + end. + +check_endpoint_domain(#rep{}, _, []) -> + ok; +check_endpoint_domain(#rep{} = Rep, Type, AllowedDomain) -> + Url = url_from_type(Rep, Type), + #url{host = Host} = ibrowse_lib:parse_url(Url), + case lists:member(Host, AllowedDomain) of + true -> + ok; + false -> + couch_log:warning( + "**disallowed domain** replication ~s used disallowed domain ~s at ~s", [ + rep_principal(Rep), Type, Url + ] + ), + ok + end. + %% log uses of https protocol where verify_peer would fail. verify_ssl_certificates_log(#rep{} = Rep) -> ok = check_ssl_certificates(Rep, source), @@ -811,6 +846,64 @@ t_allow_canceling_transient_jobs(_) -> ?assertEqual(ok, valid_endpoint_protocols_log(#rep{})), ?assertEqual(0, meck:num_calls(couch_log, warning, 2)). +verify_endpoint_domain_log_setup() -> + Ctx = test_util:start_couch(), + config:set_boolean("replicator", "verify_endpoint_domain_log", true, false), + meck:new(couch_log, [passthrough]), + Ctx. + +verify_endpoint_domain_log_teardown(Ctx) -> + meck:unload(), + config:delete("replicator", "verify_endpoint_domain_log", false), + test_util:stop_couch(Ctx). + +verify_endpoint_domain_log_test_() -> + { + foreach, + fun verify_endpoint_domain_log_setup/0, + fun verify_endpoint_domain_log_teardown/1, + [ + ?TDEF_FE(t_dont_warn_when_valid_endpoint_domain_is_empty), + ?TDEF_FE(t_warn_when_replicate_with_invalid_endpoint_domain) + ] + }. + +t_dont_warn_when_valid_endpoint_domain_is_empty(_) -> + set_allowed_domain("[]"), + Rep = #rep{ + source = #httpdb{url = "https://foo.local"}, + target = #httpdb{url = "https://127.0.0.2"} + }, + meck:reset(couch_log), + ?assertEqual(ok, verify_endpoint_domain_log(Rep)), + ?assertEqual(0, meck:num_calls(couch_log, warning, 2)), + reset_allowed_domain(). + +t_warn_when_replicate_with_invalid_endpoint_domain(_) -> + set_allowed_domain("[\"example.com\", \"127.0.0.1\"]"), + Rep1 = #rep{ + source = #httpdb{url = "https://foo.local"}, + target = #httpdb{url = "https://127.0.0.1"} + }, + meck:reset(couch_log), + ?assertEqual(ok, verify_endpoint_domain_log(Rep1)), + ?assertEqual(1, meck:num_calls(couch_log, warning, 2)), + + meck:reset(couch_log), + Rep2 = #rep{ + source = #httpdb{url = "https://foo.local"}, + target = #httpdb{url = "https://127.0.0.2"} + }, + ?assertEqual(ok, verify_endpoint_domain_log(Rep2)), + ?assertEqual(2, meck:num_calls(couch_log, warning, 2)), + reset_allowed_domain(). + +set_allowed_domain(Domains) -> + config:set("replicator", "valid_endpoint_domain", Domains, false). + +reset_allowed_domain() -> + config:delete("replicator", "valid_endpoint_domain", false). + cacert_test() -> Old = ?CACERT_DEFAULT_TIMESTAMP, Now = erlang:monotonic_time(second),
