This is an automated email from the ASF dual-hosted git repository.
vatamane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git
The following commit(s) were added to refs/heads/main by this push:
new 85e1fa791 Speed up internal replicator
85e1fa791 is described below
commit 85e1fa7913b5a564c1731ad86fbba294a9d9a16c
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Thu May 11 14:58:24 2023 -0400
Speed up internal replicator
Increase internal replicator default batch size and batch count. On systems
with a slower (remote) disks, or a slower dist protocol, internal replicator
can easily fall behind during a high rate of bulk_docs ingestion. For each
batch of 100 it had to sync security properties, make an rpc call to fetch
remote target sync checkpoint, open handles, fetch revs diff, etc. If there
are
changes to sync it would also incur the commit (fsycn) delay as well. It
make
sense to operate on slightly larger batches to increase performance. I
picked
500 as that's the default for the (external) replicator.
It also helps to keep replicating more than one batch once we've brought the
source and target data into the page cache, so opted to make it do 5 batches
per job run at most.
A survey of other batch size already in use by the internal replicator:
* Shard splitting uses a batch of 2000 [1].
* Seed" system dbs replication uses 1000 [2]
There is some danger in creating too large of a rev list for highly
conflicted
documents. In that case already have chunking for max rev [3] to keep
everything under 5000 revs per batch.
To be on the safe side both values are now configurable and can be adjusted
at
runtime.
To validate how this affects performance used a simple benchmarking utility:
https://gist.github.com/nickva/9a2a3665702a876ec06d3d720aa19b0a
With defaults:
```
fabric_bench:go().
...
*** DB fabric-bench-1683835787725432000 [{q,4},{n,3}] created. Inserting
100000 docs
* Add 100000 docs small, bs=1000 (Hz): 420
--- mem3_sync backlog: 76992
--- mem3_sync backlog: 82792
--- mem3_sync backlog: 107592
... snipped a few minutes of waiting for backlog to clear ...
--- mem3_sync backlog: 1500
--- mem3_sync backlog: 0
...
ok
```
With this PR
```
([email protected])3> fabric_bench:go().
...
*** DB fabric-bench-1683834758071419000 [{q,4},{n,3}] created. Inserting
100000 docs
* Add 100000 docs small, bs=1000 (Hz): 600
--- mem3_sync backlog: 0
...
ok
```
100000 doc insertion rate improved from 420 docs/sec to 600 with no minutes
long sync backlog left over.
[1]
https://github.com/apache/couchdb/blob/a854625d74a5b3847b99c6f536187723821d0aae/src/mem3/src/mem3_reshard_job.erl#L52
[2]
https://github.com/apache/couchdb/blob/a854625d74a5b3847b99c6f536187723821d0aae/src/mem3/src/mem3_rpc.erl#L181
[3]
https://github.com/apache/couchdb/blob/a854625d74a5b3847b99c6f536187723821d0aae/src/mem3/src/mem3_rep.erl#L609
---
src/mem3/src/mem3_sync.erl | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/src/mem3/src/mem3_sync.erl b/src/mem3/src/mem3_sync.erl
index 179435965..f6997860d 100644
--- a/src/mem3/src/mem3_sync.erl
+++ b/src/mem3/src/mem3_sync.erl
@@ -45,10 +45,14 @@
-include_lib("mem3/include/mem3.hrl").
-include_lib("couch/include/couch_db.hrl").
+-define(DEFAULT_CONCURRENCY, 10).
+-define(DEFAULT_BATCH_SIZE, 500).
+-define(DEFAULT_BATCH_COUNT, 5).
+
-record(state, {
active = [],
count = 0,
- limit,
+ limit = ?DEFAULT_CONCURRENCY,
dict = dict:new(),
waiting = queue:new()
}).
@@ -87,10 +91,10 @@ remove_shard(Shard) ->
init([]) ->
process_flag(trap_exit, true),
- Concurrency = config:get("mem3", "sync_concurrency", "10"),
+ Concurrency = config:get_integer("mem3", "sync_concurrency",
?DEFAULT_CONCURRENCY),
gen_event:add_handler(mem3_events, mem3_sync_event, []),
initial_sync(),
- {ok, #state{limit = list_to_integer(Concurrency)}}.
+ {ok, #state{limit = Concurrency}}.
handle_call({push, Job}, From, State) ->
handle_cast({push, Job#job{pid = From}}, State);
@@ -236,7 +240,10 @@ start_push_replication(#job{name = Name, node = Node, pid
= From}) ->
true -> ok
end,
spawn_link(fun() ->
- case mem3_rep:go(Name, maybe_redirect(Node)) of
+ BatchSize = config:get_integer("mem3", "sync_batch_size",
?DEFAULT_BATCH_SIZE),
+ BatchCount = config:get_integer("mem3", "sync_batch_count",
?DEFAULT_BATCH_COUNT),
+ Opts = [{batch_size, BatchSize}, {batch_count, BatchCount}],
+ case mem3_rep:go(Name, maybe_redirect(Node), Opts) of
{ok, Pending} when Pending > 0 ->
exit({pending_changes, Pending});
_ ->