After creating the new cluster database write a raft entry that sets the desired election timer. This allows CMSes to set the election timer at cluster start and avoid an error-prone election timer modification process after the cluster is up.
Reported-at: https://bugzilla.redhat.com/1831778 Signed-off-by: Dan Williams <d...@redhat.com> --- v2: - Address Ben's comments; add --help and manpage docs - Write raft record directly instead of using private raft.c functions NEWS | 3 ++ ovsdb/ovsdb-tool.1.in | 12 ++++++++ ovsdb/ovsdb-tool.c | 64 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+) diff --git a/NEWS b/NEWS index 402ce59690478..8cf88acf861f2 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,9 @@ Post-v2.15.0 * New option '--no-record-hostname' to disable hostname configuration in ovsdb on startup. * New command 'record-hostname-if-not-set' to update hostname in ovsdb. + - ovsdb-tool: + * New command 'db-set-election-timer' to change a newly created clustered + database election timer before a cluster has started. - DPDK: * OVS validated with DPDK 20.11.1. It is recommended to use this version until further releases. diff --git a/ovsdb/ovsdb-tool.1.in b/ovsdb/ovsdb-tool.1.in index 3bdda85062b23..1d6d45666a984 100644 --- a/ovsdb/ovsdb-tool.1.in +++ b/ovsdb/ovsdb-tool.1.in @@ -25,6 +25,8 @@ ovsdb\-tool \- Open vSwitch database management utility .br \fBovsdb\-tool \fR[\fIoptions\fR] \fBdb\-cksum \fR[\fIdb\fR] .br +\fBovsdb\-tool \fR[\fIoptions\fR] \fBdb\-set\-election\-timer \fR[\fIdb\fR] \fR[\fIms\fR] +.br \fBovsdb\-tool \fR[\fIoptions\fR] \fBschema\-cksum \fR[\fIschema\fR] .br \fBovsdb\-tool \fR[\fIoptions\fR] \fBcompare-versions\fI a op b\fR @@ -221,6 +223,16 @@ The \fBdb\-cksum\fR command is for standalone and active-backup databases only. For clustered databases, use \fBovsdb\-client\fR's \fBschema\-cksum\fR command instead. . +.IP "\fBdb\-set\-election\-timer \fR[\fIdb\fR] \fR[\fIms\fR]" +Sets the leader election timeout base value for a newly created clustered +database \fIdb\fR. +.IP +Leader election will be initiated by a follower if there is no heartbeat +received from the leader within this time plus a random time within 1 second. +.IP +The default value is 1000, if not changed with this command. The value must be +between 100ms and 600000ms (10 minutes) inclusive. +. .IP "\fBcompare-versions\fI a op b\fR" Compares \fIa\fR and \fIb\fR according to \fIop\fR. Both \fIa\fR and \fIb\fR must be OVSDB schema version numbers in the form diff --git a/ovsdb/ovsdb-tool.c b/ovsdb/ovsdb-tool.c index b8560f850cd08..e0aaffb0608b0 100644 --- a/ovsdb/ovsdb-tool.c +++ b/ovsdb/ovsdb-tool.c @@ -167,6 +167,8 @@ usage(void) " db-local-address DB report local address of clustered DB\n" " db-is-clustered DB test whether DB is clustered\n" " db-is-standalone DB test whether DB is standalone\n" + " db-set-election_timer DB ms sets the leader election timer for\n" + " newly created clustered database DB to ms milliseconds\n" " schema-name [SCHEMA] report SCHEMA's name\n" " schema-version [SCHEMA] report SCHEMA's schema version\n" " schema-cksum [SCHEMA] report SCHEMA's checksum\n" @@ -584,6 +586,68 @@ do_db_is_standalone(struct ovs_cmdl_context *ctx) do_db_has_magic(ctx, OVSDB_MAGIC); } +static void +do_db_set_election_timer(struct ovs_cmdl_context *ctx) +{ + const char *db_file_name = ctx->argv[1]; + const char *timer_ms = ctx->argv[2]; + uint64_t election_timer = 0; + + election_timer = atoll(timer_ms); + /* Election timer smaller than 100ms or bigger than 10min doesn't make + * sense. */ + if (election_timer < 100 || election_timer > 600000) { + ovs_fatal(0, "election timer must be between 100 and 600000, " + "in msec."); + return; + } + + struct ovsdb_log *log = NULL; + check_ovsdb_error(ovsdb_log_open(db_file_name, RAFT_MAGIC, + OVSDB_LOG_READ_WRITE, -1, &log)); + struct json *json = NULL; + check_ovsdb_error(ovsdb_log_read(log, &json)); + if (!json) { + ovs_fatal(0, "failed to find first RAFT record in database."); + return; + } + + /* Minimally verify the header */ + struct ovsdb_error *error; + struct raft_header h; + error = raft_header_from_json(&h, json); + raft_header_uninit(&h); + json_destroy(json); + if (error) { + ovs_fatal(0, "failed to read RAFT header: %s", + ovsdb_error_to_string(error)); + return; + } + + /* Ensure there is no second record yet */ + json = NULL; + check_ovsdb_error(ovsdb_log_read(log, &json)); + if (json) { + json_destroy(json); + ovs_fatal(0, "election timer can only be changed for new databases."); + return; + } + + struct raft_record r = { + .type = RAFT_REC_ENTRY, + .term = 1, /* New databases always start at term 1 */ + .entry = { + .index = 2, /* First log entry after header is index 2 */ + .data = NULL, + .servers = NULL, + .election_timer = election_timer, + .eid = UUID_ZERO, + }, + }; + check_ovsdb_error(ovsdb_log_write_and_free(log, raft_record_to_json(&r))); + ovsdb_log_close(log); +} + static void do_schema_name(struct ovs_cmdl_context *ctx) { @@ -1689,6 +1753,8 @@ static const struct ovs_cmdl_command all_commands[] = { { "db-local-address", "db", 1, 1, do_db_local_address, OVS_RO }, { "db-is-clustered", "db", 1, 1, do_db_is_clustered, OVS_RO }, { "db-is-standalone", "db", 1, 1, do_db_is_standalone, OVS_RO }, + { "db-set-election-timer", "db ms", 2, 2, + do_db_set_election_timer, OVS_RW }, { "schema-name", "[schema]", 0, 1, do_schema_name, OVS_RO }, { "schema-version", "[schema]", 0, 1, do_schema_version, OVS_RO }, { "schema-cksum", "[schema]", 0, 1, do_schema_cksum, OVS_RO }, -- 2.31.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev