New appctl 'cluster/set-backlog-threshold' to configure thresholds on backlog of raft jsonrpc connections. Could be used, for example, in some extreme conditions where size of a database expected to be very large, i.e. comparable with default 4GB threshold.
Acked-by: Dumitru Ceara <dce...@redhat.com> Signed-off-by: Ilya Maximets <i.maxim...@ovn.org> --- NEWS | 1 + ovsdb/ovsdb-server.1.in | 5 ++++ ovsdb/raft.c | 55 +++++++++++++++++++++++++++++++++++++---- 3 files changed, 56 insertions(+), 5 deletions(-) diff --git a/NEWS b/NEWS index ebdf8758b..c0819bf93 100644 --- a/NEWS +++ b/NEWS @@ -8,6 +8,7 @@ Post-v2.14.0 after every DB compaction back to OS. Disabled by default. * Maximum backlog on RAFT connections limited to 500 messages or 4GB. Once threshold reached, connection is dropped (and re-established). + Use the 'cluster/set-backlog-threshold' command to change limits. - DPDK: * Removed support for vhost-user dequeue zero-copy. - The environment variable OVS_UNBOUND_CONF, if set, is now used diff --git a/ovsdb/ovsdb-server.1.in b/ovsdb/ovsdb-server.1.in index 07a36cc7d..5a7f3ba13 100644 --- a/ovsdb/ovsdb-server.1.in +++ b/ovsdb/ovsdb-server.1.in @@ -381,6 +381,11 @@ This command must be executed on the leader. It initiates the change to the cluster. To see if the change takes effect (committed), use \fBcluster/status\fR to show the current setting. Once a change is committed, it persists at server restarts. +.IP "\fBcluster/set\-backlog\-threshold \fIdb\fR \fIn_msgs\fR \fIn_bytes\fR" +Sets the backlog limits for \fIdb\fR's RAFT connections to a maximum of +\fIn_msgs\fR messages or \fIn_bytes\fR bytes. If the backlog on one of the +connections reaches the limit, it will be disconnected (and re-established). +Values are checked only if the backlog contains more than 50 messages. . .so lib/vlog-unixctl.man .so lib/memory-unixctl.man diff --git a/ovsdb/raft.c b/ovsdb/raft.c index 67c714ff4..760dfca6d 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -305,6 +305,12 @@ struct raft { bool ever_had_leader; /* There has been leader elected since the raft is initialized, meaning it is ever connected. */ + + /* Connection backlog limits. */ +#define DEFAULT_MAX_BACKLOG_N_MSGS 500 +#define DEFAULT_MAX_BACKLOG_N_BYTES UINT32_MAX + size_t conn_backlog_max_n_msgs; /* Number of messages. */ + size_t conn_backlog_max_n_bytes; /* Number of bytes. */ }; /* All Raft structures. */ @@ -412,6 +418,9 @@ raft_alloc(void) raft->election_timer = ELECTION_BASE_MSEC; + raft->conn_backlog_max_n_msgs = DEFAULT_MAX_BACKLOG_N_MSGS; + raft->conn_backlog_max_n_bytes = DEFAULT_MAX_BACKLOG_N_BYTES; + return raft; } @@ -925,9 +934,6 @@ raft_reset_ping_timer(struct raft *raft) raft->ping_timeout = time_msec() + raft->election_timer / 3; } -#define RAFT_MAX_BACKLOG_N_MSGS 500 -#define RAFT_MAX_BACKLOG_BYTES UINT32_MAX - static void raft_add_conn(struct raft *raft, struct jsonrpc_session *js, const struct uuid *sid, bool incoming) @@ -943,8 +949,8 @@ raft_add_conn(struct raft *raft, struct jsonrpc_session *js, conn->incoming = incoming; conn->js_seqno = jsonrpc_session_get_seqno(conn->js); jsonrpc_session_set_probe_interval(js, 0); - jsonrpc_session_set_backlog_threshold(js, RAFT_MAX_BACKLOG_N_MSGS, - RAFT_MAX_BACKLOG_BYTES); + jsonrpc_session_set_backlog_threshold(js, raft->conn_backlog_max_n_msgs, + raft->conn_backlog_max_n_bytes); } /* Starts the local server in an existing Raft cluster, using the local copy of @@ -4717,6 +4723,42 @@ raft_unixctl_change_election_timer(struct unixctl_conn *conn, unixctl_command_reply(conn, "change of election timer initiated."); } +static void +raft_unixctl_set_backlog_threshold(struct unixctl_conn *conn, + int argc OVS_UNUSED, const char *argv[], + void *aux OVS_UNUSED) +{ + const char *cluster_name = argv[1]; + unsigned long long n_msgs, n_bytes; + struct raft_conn *r_conn; + + struct raft *raft = raft_lookup_by_name(cluster_name); + if (!raft) { + unixctl_command_reply_error(conn, "unknown cluster"); + return; + } + + if (!str_to_ullong(argv[2], 10, &n_msgs) + || !str_to_ullong(argv[3], 10, &n_bytes)) { + unixctl_command_reply_error(conn, "invalid argument"); + return; + } + + if (n_msgs < 50 || n_msgs > SIZE_MAX || n_bytes > SIZE_MAX) { + unixctl_command_reply_error(conn, "values out of range"); + return; + } + + raft->conn_backlog_max_n_msgs = n_msgs; + raft->conn_backlog_max_n_bytes = n_bytes; + + LIST_FOR_EACH (r_conn, list_node, &raft->conns) { + jsonrpc_session_set_backlog_threshold(r_conn->js, n_msgs, n_bytes); + } + + unixctl_command_reply(conn, NULL); +} + static void raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED, const char *argv[], @@ -4777,6 +4819,9 @@ raft_init(void) raft_unixctl_kick, NULL); unixctl_command_register("cluster/change-election-timer", "DB TIME", 2, 2, raft_unixctl_change_election_timer, NULL); + unixctl_command_register("cluster/set-backlog-threshold", + "DB N_MSGS N_BYTES", 3, 3, + raft_unixctl_set_backlog_threshold, NULL); unixctl_command_register("cluster/failure-test", "FAILURE SCENARIO", 1, 1, raft_unixctl_failure_test, NULL); ovsthread_once_done(&once); -- 2.25.4 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev