This is an automated email from the ASF dual-hosted git repository. vatamane pushed a commit to branch speed-up-internal-replicator in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 827d220ba4ed898502067be6ffd650c065cefdfe Author: Nick Vatamaniuc <[email protected]> AuthorDate: Thu May 11 14:58:24 2023 -0400 Speed up internal replicator Increase internal replicator default batch size and batch count. On systems with a slower (remote) disks, or a slower dist protocol, internal replicator can easily fall behind during a high rate of bulk_docs ingestion. For each batch of 100 it had to sync security properties, make an rpc call to fetch remote target sync checkpoint, open handles, fetch revs diff, etc. If there are changes to sync it would also incur the commit (fsycn) delay as well. It make sense to operate on slightly larger batches to increase performance. I picked 500 as that's the default for the (external) replicator. It also helps to keep replicating more than one batch once we've brought the source and target data into the page cache, so opted to make it do 5 batches per job run at most. A survey of other batch size already in use by the internal replicator: * Shard splitting uses a batch of 2000 [1]. * Seed" system dbs replication uses 1000 [2] There is some danger in creating too large of a rev list for highly conflicted documents. In that case already have chunking for max rev [3] to keep everything under 5000 revs per batch. To be on the safe side both values are now configurable and can be adjusted at runtime. To validate how this affects performance used a simple benchmarking utility: https://gist.github.com/nickva/9a2a3665702a876ec06d3d720aa19b0a With defaults: ``` fabric_bench:go(). ... *** DB fabric-bench-1683835787725432000 [{q,4},{n,3}] created. Inserting 100000 docs * Add 100000 docs small, bs=1000 (Hz): 420 --- mem3_sync backlog: 76992 --- mem3_sync backlog: 82792 --- mem3_sync backlog: 107592 ... snipped a few minutes of waiting for backlog to clear ... --- mem3_sync backlog: 1500 --- mem3_sync backlog: 0 ... ok ``` With this PR ``` ([email protected])3> fabric_bench:go(). ... *** DB fabric-bench-1683834758071419000 [{q,4},{n,3}] created. Inserting 100000 docs * Add 100000 docs small, bs=1000 (Hz): 600 --- mem3_sync backlog: 0 ... ok ``` 100000 doc insertion rate improved from 420 docs/sec to 600 with no minutes long sync backlog left over. [1] https://github.com/apache/couchdb/blob/a854625d74a5b3847b99c6f536187723821d0aae/src/mem3/src/mem3_reshard_job.erl#L52 [2] https://github.com/apache/couchdb/blob/a854625d74a5b3847b99c6f536187723821d0aae/src/mem3/src/mem3_rpc.erl#L181 [3] https://github.com/apache/couchdb/blob/a854625d74a5b3847b99c6f536187723821d0aae/src/mem3/src/mem3_rep.erl#L609 --- src/mem3/src/mem3_sync.erl | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/mem3/src/mem3_sync.erl b/src/mem3/src/mem3_sync.erl index 179435965..f6997860d 100644 --- a/src/mem3/src/mem3_sync.erl +++ b/src/mem3/src/mem3_sync.erl @@ -45,10 +45,14 @@ -include_lib("mem3/include/mem3.hrl"). -include_lib("couch/include/couch_db.hrl"). +-define(DEFAULT_CONCURRENCY, 10). +-define(DEFAULT_BATCH_SIZE, 500). +-define(DEFAULT_BATCH_COUNT, 5). + -record(state, { active = [], count = 0, - limit, + limit = ?DEFAULT_CONCURRENCY, dict = dict:new(), waiting = queue:new() }). @@ -87,10 +91,10 @@ remove_shard(Shard) -> init([]) -> process_flag(trap_exit, true), - Concurrency = config:get("mem3", "sync_concurrency", "10"), + Concurrency = config:get_integer("mem3", "sync_concurrency", ?DEFAULT_CONCURRENCY), gen_event:add_handler(mem3_events, mem3_sync_event, []), initial_sync(), - {ok, #state{limit = list_to_integer(Concurrency)}}. + {ok, #state{limit = Concurrency}}. handle_call({push, Job}, From, State) -> handle_cast({push, Job#job{pid = From}}, State); @@ -236,7 +240,10 @@ start_push_replication(#job{name = Name, node = Node, pid = From}) -> true -> ok end, spawn_link(fun() -> - case mem3_rep:go(Name, maybe_redirect(Node)) of + BatchSize = config:get_integer("mem3", "sync_batch_size", ?DEFAULT_BATCH_SIZE), + BatchCount = config:get_integer("mem3", "sync_batch_count", ?DEFAULT_BATCH_COUNT), + Opts = [{batch_size, BatchSize}, {batch_count, BatchCount}], + case mem3_rep:go(Name, maybe_redirect(Node), Opts) of {ok, Pending} when Pending > 0 -> exit({pending_changes, Pending}); _ ->
