This is an automated email from the ASF dual-hosted git repository.

willholley pushed a commit to branch wh/connect_to
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 7595f91e48f06a90eb8b5a5c20e8fe85fdc9ceab
Author: Will Holley <[email protected]>
AuthorDate: Mon Apr 27 13:53:28 2026 +0100

    feat: Add connect_to override support for outbound requests.
    
    This adds a feature to the CouchDB replicator
    to override the connection target for specific host
    patterns (including wildcards) when making
    outbound requests. This is similar to the
    `--connect-to` option in curl.
    
    One use case is when requests need
    to be routed via a transparent SNI proxy e.g.
    for network egress monitoring and specifying
    overrides in /etc/hosts or similar isn't suffient
    / possible (e.g. due to lack of wildcard support).
    
    There is adds a new configuration option to
    specify the overrides:
    
    ```
    [replicator]
    connect_to = patternhost:port:target:targetport,..
    ```
    
    The replicator resolves the configured host patterns
    to the alternative connection targets while
    preserving the request URL host (applies to
    regular requests and session-auth requests)
    and rewriting the port as necessary.
    
    If using https, the SNI header is added for the
    original Hostname.
    
    The `pattern` can be a hostname, including leading
    wildcards e.g. `*.example.com`. Targets must be
    IP addresses. IPv6 addresses are supported using
    bracketed notation e.g. `[2001:db8::1]`.
---
 rel/overlay/etc/default.ini                        |   8 +
 src/couch_replicator/priv/stats_descriptions.cfg   |   4 +
 .../src/couch_replicator_auth_session.erl          |   9 +-
 .../src/couch_replicator_connect.erl               | 267 +++++++++++++++++++++
 .../src/couch_replicator_httpc.erl                 |  11 +-
 .../src/couch_replicator_scheduler.erl             |   4 +
 .../couch_replicator_connect_override_tests.erl    |  74 ++++++
 .../test/eunit/couch_replicator_connect_tests.erl  | 195 +++++++++++++++
 src/docs/src/config/replicator.rst                 |  25 ++
 9 files changed, 593 insertions(+), 4 deletions(-)

diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
index 79026dea3..ff148b271 100644
--- a/rel/overlay/etc/default.ini
+++ b/rel/overlay/etc/default.ini
@@ -716,6 +716,14 @@ partitioned||* = true
 ; Checkpoint interval
 ;checkpoint_interval = 30000
 
+; Connect_to override for replicator outbound requests
+; Format: pattern:port:target:targetport[,pattern:port:target:targetport,...]
+; Examples:
+;   *.example.com:443:proxy.internal:8443
+;   api.example.com:443:127.0.0.1:443
+;   *.example.com:443:[2001:db8::1]:443
+;connect_to =
+
 ; Some socket options that might boost performance in some scenarios:
 ;       {nodelay, boolean()}
 ;       {sndbuf, integer()}
diff --git a/src/couch_replicator/priv/stats_descriptions.cfg 
b/src/couch_replicator/priv/stats_descriptions.cfg
index 7edc87941..10821d885 100644
--- a/src/couch_replicator/priv/stats_descriptions.cfg
+++ b/src/couch_replicator/priv/stats_descriptions.cfg
@@ -142,3 +142,7 @@
     {type, counter},
     {desc, <<"number of times a worker is gracefully shut down">>}
 ]}.
+{[couch_replicator, connect_to_applied], [
+    {type, counter},
+    {desc, <<"number of times DNS overrides were applied to replication 
requests">>}
+]}.
diff --git a/src/couch_replicator/src/couch_replicator_auth_session.erl 
b/src/couch_replicator/src/couch_replicator_auth_session.erl
index 93ddc834f..5d6e8792a 100644
--- a/src/couch_replicator/src/couch_replicator_auth_session.erl
+++ b/src/couch_replicator/src/couch_replicator_auth_session.erl
@@ -66,6 +66,7 @@
 ]).
 
 -include_lib("couch_replicator/include/couch_replicator_api_wrap.hrl").
+-include_lib("ibrowse/include/ibrowse.hrl").
 
 -type headers() :: [{string(), string()}].
 -type code() :: non_neg_integer().
@@ -311,20 +312,24 @@ refresh(#state{session_url = Url, user = User, pass = 
Pass} = State) ->
     {ok, string(), headers(), binary()} | {error, term()}.
 http_request(#state{httpdb_pool = Pool} = State, Url, Headers, Method, Body) ->
     Timeout = State#state.httpdb_timeout,
+
     Opts = [
         {response_format, binary},
         {inactivity_timeout, Timeout}
         | State#state.httpdb_ibrowse_options
     ],
+
+    {Url1, Opts1} = couch_replicator_connect:apply_connect_to(Url, Opts),
+
     {ok, Wrk} = couch_replicator_httpc_pool:get_worker(Pool),
     try
         Result = ibrowse:send_req_direct(
             Wrk,
-            Url,
+            Url1,
             Headers,
             Method,
             Body,
-            Opts,
+            Opts1,
             Timeout
         ),
         case Result of
diff --git a/src/couch_replicator/src/couch_replicator_connect.erl 
b/src/couch_replicator/src/couch_replicator_connect.erl
new file mode 100644
index 000000000..5a91b188d
--- /dev/null
+++ b/src/couch_replicator/src/couch_replicator_connect.erl
@@ -0,0 +1,267 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+%% This module implements the connect_to configuration option, which allows
+%% routing replication requests through proxies or rewriting connection ports.
+%% Similar to curl's --connect-to option.
+%%
+%% Flow:
+%% 1. init/0 - Called at startup to parse and cache connect_to config
+%% 2. apply_connect_to/2 - Called for each replication request:
+%%    a. Parse URL to extract host and port
+%%    b. resolve_connection/2 - Check if host:port matches any override pattern
+%%    c. If match found:
+%%       - Reconstruct URL with target port
+%%       - Add ibrowse connect_to option (target host)
+%%       - Add SNI option for HTTPS (original host)
+%%    d. Return modified URL and options
+%%
+%% Configuration format: host:port:target_host:target_port
+%% Example: *.example.com:443:proxy.internal:8443
+%%
+%% Pattern matching:
+%% - Exact hostnames: foo.example.com
+%% - Leading wildcards: *.example.com (matches sub.example.com, not 
example.com)
+%% - Case-insensitive
+%% - Port must match exactly
+
+-module(couch_replicator_connect).
+
+-include_lib("ibrowse/include/ibrowse.hrl").
+
+-export([
+    init/0,
+    apply_connect_to/2
+]).
+
+-ifdef(TEST).
+-export([
+    parse_config/1,
+    match_host_pattern/2,
+    get_overrides/0,
+    resolve_connection/2,
+    is_ip_address/1
+]).
+-endif.
+
+-type connect_to_override() :: {
+    PatternHost :: binary(),
+    PatternPort :: integer(),
+    TargetHost :: binary(),
+    TargetPort :: integer()
+}.
+
+-define(CONNECT_TO_KEY, {?MODULE, connect_to}).
+
+%% Initialize connect_to overrides cache
+-spec init() -> ok.
+init() ->
+    Overrides =
+        case config:get("replicator", "connect_to", undefined) of
+            undefined -> [];
+            ConfigStr -> parse_config(ConfigStr)
+        end,
+    persistent_term:put(?CONNECT_TO_KEY, Overrides),
+    ok.
+
+%% Resolve connection override for a host:port pair.
+%% String/binary conversions are necessary because:
+%% - Input: ibrowse provides Host as string
+%% - Internal: overrides stored as binaries for efficient pattern matching
+%% - Output: ibrowse connect_to option requires string
+-spec resolve_connection(string(), integer()) ->
+    {string(), integer(), string()} | not_found.
+resolve_connection(Host, Port) ->
+    case find_override(list_to_binary(Host), Port, get_overrides()) of
+        {ok, {TargetHost, TargetPort}} ->
+            {binary_to_list(TargetHost), TargetPort, Host};
+        not_found ->
+            not_found
+    end.
+
+-spec get_overrides() -> [connect_to_override()].
+get_overrides() ->
+    case persistent_term:get(?CONNECT_TO_KEY, not_initialized) of
+        not_initialized ->
+            % not initialized yet, fall back to reading config
+            case config:get("replicator", "connect_to", undefined) of
+                undefined -> [];
+                ConfigStr -> parse_config(ConfigStr)
+            end;
+        Overrides ->
+            Overrides
+    end.
+
+-spec parse_config(string()) -> [connect_to_override()].
+parse_config(ConfigStr) ->
+    ConfigBin = list_to_binary(ConfigStr),
+    Entries = binary:split(ConfigBin, <<",">>, [global, trim]),
+    lists:filtermap(fun parse_entry/1, Entries).
+
+% Format: HOST:PORT:TARGET:TARGET_PORT (matches curl --connect-to)
+% Examples:
+%   *.example.com:443:192.168.1.1:8443
+%   *.example.com:443:[2001:db8::1]:8443
+% IPv6 addresses in targets must be enclosed in brackets
+parse_entry(<<>>) ->
+    false;
+parse_entry(Entry0) ->
+    Entry = string:trim(Entry0),
+    % Regex: HOST:PORT:TARGET:TARGET_PORT where TARGET can be [IPv6]
+    % Reject IPv6 patterns (starting with [), ensure non-empty captures
+    Pattern = "^([^:\\[]+):([0-9]+):([^:]+|\\[[^\\]]+\\]):([0-9]+)$",
+    case re:run(Entry, Pattern, [{capture, all_but_first, binary}]) of
+        {match, [PatternHost, PatternPortBin, TargetHost, TargetPortBin]} ->
+            case parse_four_part(PatternHost, PatternPortBin, TargetHost, 
TargetPortBin) of
+                {ok, Override} ->
+                    {true, Override};
+                {error, Reason} ->
+                    invalid_entry(Entry, Reason)
+            end;
+        nomatch ->
+            invalid_entry(Entry, "expected format 
HOST:PORT:TARGET:TARGET_PORT")
+    end.
+
+-spec parse_four_part(binary(), binary(), binary(), binary()) ->
+    {ok, connect_to_override()} | {error, string()}.
+parse_four_part(PatternHost, PatternPortBin, TargetHost0, TargetPortBin) ->
+    % Regex guarantees non-empty hosts and numeric ports
+    PatternPort = binary_to_integer(PatternPortBin),
+    TargetPort = binary_to_integer(TargetPortBin),
+    % Strip brackets from IPv6 addresses in targets
+    TargetHost = string:trim(TargetHost0, both, "[]"),
+    {ok, {PatternHost, PatternPort, TargetHost, TargetPort}}.
+
+invalid_entry(Entry, Reason) ->
+    couch_log:warning("Invalid connect_to entry: ~ts (~s)", [Entry, Reason]),
+    false.
+
+-spec find_override(binary(), integer(), [connect_to_override()]) ->
+    {ok, {binary(), integer()}} | not_found.
+find_override(_Host, _Port, []) ->
+    not_found;
+find_override(Host, Port, [{Pattern, Port, Target, TargetPort} | Rest]) ->
+    case match_host_pattern(Host, Pattern) of
+        true ->
+            {ok, {Target, TargetPort}};
+        false ->
+            find_override(Host, Port, Rest)
+    end;
+find_override(Host, Port, [_Mismatch | Rest]) ->
+    find_override(Host, Port, Rest).
+
+% Host Pattern Matching
+%
+% Supports leading wildcard patterns only:
+%   - *.example.com matches any.subdomain.example.com
+%   - *.example.com does NOT match example.com (requires at least one 
subdomain)
+%
+% Not supported:
+%   - middle wildcards: sub.*.example.com
+%   - trailing wildcards: example.*
+%   - multiple wildcards: *.*.example.com
+-spec match_host_pattern(binary(), binary()) -> boolean().
+match_host_pattern(Host, Pattern) when is_binary(Host), is_binary(Pattern) ->
+    % DNS names are case-insensitive
+    HostLower = string:lowercase(Host),
+    PatternLower = string:lowercase(Pattern),
+    match_host_pattern_impl(HostLower, PatternLower).
+
+match_host_pattern_impl(Host, <<"*", Suffix/binary>>) ->
+    % wildcard match: extract last N bytes from Host and compare to Suffix
+    HostSize = byte_size(Host),
+    SuffixSize = byte_size(Suffix),
+    % ensure we have enough bytes before extracting suffix
+    case HostSize >= SuffixSize of
+        true ->
+            Pos = HostSize - SuffixSize,
+            binary:part(Host, Pos, SuffixSize) =:= Suffix;
+        false ->
+            false
+    end;
+match_host_pattern_impl(Host, Pattern) ->
+    Host =:= Pattern.
+
+-spec is_ip_address(string()) -> boolean().
+is_ip_address(Host) when is_list(Host) ->
+    % Strip brackets for IPv6 if present
+    HostStripped = string:trim(Host, both, "[]"),
+    case inet:parse_address(HostStripped) of
+        {ok, _} -> true;
+        _ -> false
+    end.
+
+%% Apply connect_to override to URL and ibrowse options
+-spec apply_connect_to(string(), list()) -> {string(), list()}.
+apply_connect_to(Url, IbrowseOptions) ->
+    case ibrowse_lib:parse_url(Url) of
+        {error, _} ->
+            {Url, IbrowseOptions};
+        #url{host = Host, port = Port, protocol = Protocol} = ParsedUrl ->
+            case resolve_connection(Host, Port) of
+                {TargetHost, TargetPort, OriginalHost} ->
+                    % Reconstruct URL with target port
+                    Url2 = reconstruct_url(ParsedUrl, TargetPort),
+                    % Apply connection override options
+                    Opts = apply_override_options(
+                        IbrowseOptions,
+                        Protocol,
+                        TargetHost,
+                        OriginalHost
+                    ),
+                    {Url2, Opts};
+                not_found ->
+                    {Url, IbrowseOptions}
+            end
+    end.
+
+%% Reconstruct URL with new port.
+%% Note: ibrowse:send_req_direct requires a string URL, not a parsed #url{} 
record.
+%% The #url.path field from ibrowse_lib:parse_url includes the full path with
+%% query string and fragment, so we don't need to handle those separately.
+%% Credentials are not included because normalize_basic_auth() strips them from
+%% URLs before they reach this code - they're passed via ibrowse options 
instead.
+-spec reconstruct_url(#url{}, integer()) -> string().
+reconstruct_url(#url{protocol = Protocol, host = Host, path = Path}, NewPort) 
->
+    Scheme = atom_to_list(Protocol),
+    PortStr = ":" ++ integer_to_list(NewPort),
+    Scheme ++ "://" ++ Host ++ PortStr ++ Path.
+
+%% Apply connect_to and SNI options
+-spec apply_override_options(list(), atom(), string(), string()) -> list().
+apply_override_options(Opts, Protocol, TargetHost, OriginalHost) ->
+    % Log connection override
+    couch_log:debug(
+        "connect_to override (~p): ~s -> ~s",
+        [Protocol, OriginalHost, TargetHost]
+    ),
+    couch_stats:increment_counter([couch_replicator, connect_to_applied]),
+    % Add connect_to option
+    Opts1 = [{connect_to, TargetHost} | Opts],
+    % Add SNI for HTTPS if OriginalHost is a hostname (not IP)
+    SNIHost =
+        case {Protocol, is_ip_address(OriginalHost)} of
+            {https, false} ->
+                OriginalHost;
+            _ ->
+                disable
+        end,
+    add_sni_option(Opts1, SNIHost).
+
+-spec add_sni_option(list(), string() | disable) -> list().
+add_sni_option(IbrowseOpts, Host) ->
+    SslOpts = proplists:get_value(ssl_options, IbrowseOpts, []),
+    SslOpts1 = [
+        {server_name_indication, Host}
+        | proplists:delete(server_name_indication, SslOpts)
+    ],
+    lists:keystore(ssl_options, 1, IbrowseOpts, {ssl_options, SslOpts1}).
diff --git a/src/couch_replicator/src/couch_replicator_httpc.erl 
b/src/couch_replicator/src/couch_replicator_httpc.erl
index fe81e65ea..7f4f43afd 100644
--- a/src/couch_replicator/src/couch_replicator_httpc.erl
+++ b/src/couch_replicator/src/couch_replicator_httpc.erl
@@ -111,7 +111,7 @@ send_ibrowse_req(#httpdb{headers = BaseHeaders} = HttpDb0, 
Params) ->
     UserHeaders = get_value(headers, Params, []),
     Headers1 = merge_headers(BaseHeaders, UserHeaders),
     {Headers2, HttpDb} = couch_replicator_auth:update_headers(HttpDb0, 
Headers1),
-    Url = full_url(HttpDb, Params),
+    Url0 = full_url(HttpDb, Params),
     Body = get_value(body, Params, []),
     case get_value(path, Params) == "_changes" of
         true ->
@@ -131,7 +131,7 @@ send_ibrowse_req(#httpdb{headers = BaseHeaders} = HttpDb0, 
Params) ->
             {User, Pass} when is_list(User), is_list(Pass) ->
                 [{basic_auth, {User, Pass}}]
         end,
-    IbrowseOptions =
+    IbrowseOptions0 =
         BasicAuthOpts ++
             [
                 {response_format, binary},
@@ -142,6 +142,13 @@ send_ibrowse_req(#httpdb{headers = BaseHeaders} = HttpDb0, 
Params) ->
                     HttpDb#httpdb.ibrowse_options
                 )
             ],
+
+    % Apply connect_to override and SNI configuration
+    {Url, IbrowseOptions} = couch_replicator_connect:apply_connect_to(
+        Url0,
+        IbrowseOptions0
+    ),
+
     backoff_before_request(Worker, HttpDb, Params),
     Response = ibrowse:send_req_direct(
         Worker, Url, Headers2, Method, Body, IbrowseOptions, Timeout
diff --git a/src/couch_replicator/src/couch_replicator_scheduler.erl 
b/src/couch_replicator/src/couch_replicator_scheduler.erl
index 9eb0b4723..5a414ce3a 100644
--- a/src/couch_replicator/src/couch_replicator_scheduler.erl
+++ b/src/couch_replicator/src/couch_replicator_scheduler.erl
@@ -229,6 +229,7 @@ init(_) ->
     ],
     ?MODULE = ets:new(?MODULE, EtsOpts),
     ok = couch_replicator_share:init(),
+    ok = couch_replicator_connect:init(),
     ok = config:listen_for_changes(?MODULE, nil),
     Interval = get_interval_msec(),
     MaxJobs = config:get_integer("replicator", "max_jobs", ?DEFAULT_MAX_JOBS),
@@ -385,6 +386,9 @@ handle_config_change("replicator", "interval", V, _, S) ->
 handle_config_change("replicator", "max_history", V, _, S) ->
     ok = gen_server:cast(?MODULE, {set_max_history, list_to_integer(V)}),
     {ok, S};
+handle_config_change("replicator", "connect_to", _, _, S) ->
+    ok = couch_replicator_connect:init(),
+    {ok, S};
 handle_config_change("replicator.shares", Key, deleted, _, S) ->
     ok = gen_server:cast(?MODULE, {reset_shares, list_to_binary(Key)}),
     {ok, S};
diff --git 
a/src/couch_replicator/test/eunit/couch_replicator_connect_override_tests.erl 
b/src/couch_replicator/test/eunit/couch_replicator_connect_override_tests.erl
new file mode 100644
index 000000000..787557560
--- /dev/null
+++ 
b/src/couch_replicator/test/eunit/couch_replicator_connect_override_tests.erl
@@ -0,0 +1,74 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_replicator_connect_override_tests).
+
+-include_lib("couch/include/couch_eunit.hrl").
+-include_lib("couch/include/couch_db.hrl").
+-include_lib("ibrowse/include/ibrowse.hrl").
+
+connect_to_replication_test_() ->
+    {
+        "connect_to override replication tests",
+        {
+            foreach,
+            fun setup/0,
+            fun teardown/1,
+            [
+                ?TDEF_FE(should_replicate_with_connect_to_override)
+            ]
+        }
+    }.
+
+setup() ->
+    couch_replicator_test_helper:test_setup().
+
+teardown(Ctx) ->
+    config:delete("replicator", "connect_to", false),
+    couch_replicator_test_helper:test_teardown(Ctx).
+
+should_replicate_with_connect_to_override({_Ctx, {Source, Target}}) ->
+    create_doc(Source),
+
+    SourceUrl = db_url(Source),
+    #url{host = SourceHost, port = SourcePort} = 
ibrowse_lib:parse_url(binary_to_list(SourceUrl)),
+
+    % configure connect_to override: example.com:port -> actual source 
host:port
+    OverrideConfig =
+        "example.com:" ++ integer_to_list(SourcePort) ++ ":" ++
+            SourceHost ++ ":" ++ integer_to_list(SourcePort),
+    config:set("replicator", "connect_to", OverrideConfig, false),
+
+    % reinitialize connect_to cache to pick up the new config
+    couch_replicator_connect:init(),
+
+    % replace source host with example.com
+    OverrideUrl = re:replace(SourceUrl, SourceHost, "example.com", [{return, 
binary}]),
+
+    % replicate using overridden URL
+    replicate(OverrideUrl, db_url(Target)),
+
+    % verify replication succeeded by comparing doc counts
+    ?assertEqual(ok, compare(Source, Target)).
+
+create_doc(DbName) ->
+    Doc = couch_doc:from_json_obj({[{<<"_id">>, <<"test-doc">>}, {<<"value">>, 
42}]}),
+    {ok, _} = fabric:update_doc(DbName, Doc, [?ADMIN_CTX]).
+
+db_url(DbName) ->
+    couch_replicator_test_helper:cluster_db_url(DbName).
+
+compare(Source, Target) ->
+    couch_replicator_test_helper:cluster_compare_dbs(Source, Target).
+
+replicate(SourceUrl, TargetUrl) ->
+    couch_replicator_test_helper:replicate(SourceUrl, TargetUrl).
diff --git a/src/couch_replicator/test/eunit/couch_replicator_connect_tests.erl 
b/src/couch_replicator/test/eunit/couch_replicator_connect_tests.erl
new file mode 100644
index 000000000..1331d7c9d
--- /dev/null
+++ b/src/couch_replicator/test/eunit/couch_replicator_connect_tests.erl
@@ -0,0 +1,195 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_replicator_connect_tests).
+
+-include_lib("couch/include/couch_eunit.hrl").
+
+match_pattern_test_() ->
+    [
+        % wildcard matching
+        ?_assert(
+            couch_replicator_connect:match_host_pattern(
+                <<"account.example.test">>, <<"*.example.test">>
+            )
+        ),
+        ?_assertNot(
+            couch_replicator_connect:match_host_pattern(
+                <<"example.test">>, <<"*.example.test">>
+            )
+        ),
+        % exact matching
+        ?_assert(
+            couch_replicator_connect:match_host_pattern(
+                <<"exact.example.test">>, <<"exact.example.test">>
+            )
+        ),
+        ?_assertNot(
+            couch_replicator_connect:match_host_pattern(
+                <<"other.example.test">>, <<"exact.example.test">>
+            )
+        ),
+        % case insensitive
+        ?_assert(
+            couch_replicator_connect:match_host_pattern(
+                <<"account.example.test">>, <<"*.Example.Test">>
+            )
+        ),
+        ?_assert(
+            couch_replicator_connect:match_host_pattern(
+                <<"ACCOUNT.EXAMPLE.TEST">>, <<"*.example.test">>
+            )
+        ),
+        ?_assert(
+            couch_replicator_connect:match_host_pattern(
+                <<"Exact.Example.Test">>, <<"exact.example.test">>
+            )
+        ),
+        ?_assert(
+            couch_replicator_connect:match_host_pattern(
+                <<"exact.example.test">>, <<"Exact.Example.Test">>
+            )
+        )
+    ].
+
+parse_config_test_() ->
+    [
+        % valid 4-part format
+        ?_assertEqual(
+            [{<<"*.example.test">>, 443, <<"proxy.internal">>, 8443}],
+            couch_replicator_connect:parse_config(
+                "*.example.test:443:proxy.internal:8443"
+            )
+        ),
+        % multiple entries
+        ?_assertEqual(
+            2,
+            length(
+                couch_replicator_connect:parse_config(
+                    "*.example.test:443:proxy.internal:8443, 
exact.example.test:5984:127.0.0.1:5984"
+                )
+            )
+        ),
+        % empty config
+        ?_assertEqual([], couch_replicator_connect:parse_config("")),
+        % IPv6 targets with brackets
+        ?_assertEqual(
+            [{<<"*.example.test">>, 443, <<"2001:db8::1">>, 8443}],
+            couch_replicator_connect:parse_config(
+                "*.example.test:443:[2001:db8::1]:8443"
+            )
+        ),
+        ?_assertEqual(
+            [{<<"*.example.test">>, 5984, <<"::1">>, 5984}],
+            couch_replicator_connect:parse_config(
+                "*.example.test:5984:[::1]:5984"
+            )
+        ),
+        % invalid: too few parts
+        ?_assertEqual(
+            [],
+            
couch_replicator_connect:parse_config("*.example.test:443:proxy.internal")
+        ),
+        % invalid: IPv6 patterns are rejected
+        ?_assertEqual(
+            [],
+            
couch_replicator_connect:parse_config("[2001:db8::1]:443:proxy.internal:8443")
+        ),
+        % invalid: non-numeric port
+        ?_assertEqual(
+            [],
+            
couch_replicator_connect:parse_config("*.example.test:abc:proxy.internal:8443")
+        ),
+        % invalid: empty pattern
+        ?_assertEqual(
+            [],
+            couch_replicator_connect:parse_config(":443:proxy.internal:8443")
+        ),
+        % invalid: empty target
+        ?_assertEqual(
+            [],
+            couch_replicator_connect:parse_config("*.example.test:443::8443")
+        )
+    ].
+
+resolve_connection_test_() ->
+    {setup,
+        fun() ->
+            meck:new(config, [passthrough]),
+            meck:expect(config, get, fun
+                ("replicator", "connect_to", _) ->
+                    
"ipv6.example.test:443:[2001:db8::1]:8443,foo.bar.com:5984:127.0.0.1:5984,*.example.test:443:proxy.internal:8443";
+                (_, _, Default) ->
+                    Default
+            end),
+            couch_replicator_connect:init()
+        end,
+        fun(_) ->
+            meck:unload(config)
+        end,
+        [
+            % wildcard pattern matches with correct port
+            ?_assertEqual(
+                {"proxy.internal", 8443, "account.example.test"},
+                
couch_replicator_connect:resolve_connection("account.example.test", 443)
+            ),
+            ?_assertEqual(
+                {"proxy.internal", 8443, "sub.domain.example.test"},
+                
couch_replicator_connect:resolve_connection("sub.domain.example.test", 443)
+            ),
+            % wildcard pattern does not match with wrong port
+            ?_assertEqual(
+                not_found,
+                
couch_replicator_connect:resolve_connection("account.example.test", 80)
+            ),
+            % exact match
+            ?_assertEqual(
+                {"127.0.0.1", 5984, "foo.bar.com"},
+                couch_replicator_connect:resolve_connection("foo.bar.com", 
5984)
+            ),
+            % IPv6 target
+            ?_assertEqual(
+                {"2001:db8::1", 8443, "ipv6.example.test"},
+                
couch_replicator_connect:resolve_connection("ipv6.example.test", 443)
+            ),
+            % no match - wrong host
+            ?_assertEqual(
+                not_found,
+                couch_replicator_connect:resolve_connection("other.test", 443)
+            ),
+            % no match - wrong port
+            ?_assertEqual(
+                not_found,
+                couch_replicator_connect:resolve_connection("foo.bar.com", 443)
+            ),
+            % case insensitive matching
+            ?_assertEqual(
+                {"proxy.internal", 8443, "Account.Example.Test"},
+                
couch_replicator_connect:resolve_connection("Account.Example.Test", 443)
+            )
+        ]}.
+
+is_ip_address_test_() ->
+    [
+        % IPv4
+        ?_assert(couch_replicator_connect:is_ip_address("127.0.0.1")),
+        ?_assert(couch_replicator_connect:is_ip_address("192.168.1.1")),
+        ?_assert(couch_replicator_connect:is_ip_address("10.0.0.1")),
+        % IPv6
+        ?_assert(couch_replicator_connect:is_ip_address("::1")),
+        ?_assert(couch_replicator_connect:is_ip_address("2001:db8::1")),
+        ?_assert(couch_replicator_connect:is_ip_address("fe80::1")),
+        % hostnames
+        ?_assertNot(couch_replicator_connect:is_ip_address("localhost")),
+        ?_assertNot(couch_replicator_connect:is_ip_address("example.com")),
+        ?_assertNot(couch_replicator_connect:is_ip_address("sub.example.com"))
+    ].
diff --git a/src/docs/src/config/replicator.rst 
b/src/docs/src/config/replicator.rst
index bca107ae3..b34264fc6 100644
--- a/src/docs/src/config/replicator.rst
+++ b/src/docs/src/config/replicator.rst
@@ -279,6 +279,31 @@ Replicator Database Configuration
         on error; however, in some cases it may be useful to prevent spending
         time attempting to call ``_bulk_get`` altogether.
 
+    .. config:option:: connect_to :: Connection routing overrides for 
replication requests
+
+        Comma delimited ``host:port:target_host:target_port`` mappings to use 
for
+        replicator requests. This is useful for cases where outbound HTTP 
requests
+        must be made through a transparent proxy or when port rewriting is 
needed.
+        
+        This feature is similar to curl's `--connect-to`_ option.
+
+        ``host`` may be either an exact hostname such as ``foo.bar.com`` or a
+        leading wildcard for subdomains such as ``*.example.test``. Wildcards
+        are supported only at the start of the pattern.
+
+        ``port`` is the port number in the source URL (e.g., 443 for HTTPS).
+
+        ``target_host`` may be an IPv4 address, a hostname, or a bracketed IPv6
+        address. Brackets avoid ambiguity with the ``:`` separator.
+
+        ``target_port`` is the port number to connect to on the target host.
+
+        This allows both host and port rewriting. For example, to route HTTPS
+        requests on port 443 to a local proxy on port 9443::
+
+            [replicator]
+            connect_to = 
foo.bar.com:443:proxy.internal:8443,*.example.test:443:127.0.0.1:9443,*.test_ip6.local:443:[2001:db8::20]:8443
+
     .. config:option:: cert_file :: Path to user PEM certificate file
 
         Path to a file containing the user's certificate::

Reply via email to