From: Han Zhou <[email protected]>

Add option --leader-timeout to ovsdb-server.

Signed-off-by: Han Zhou <[email protected]>
---

Notes:
    This patch is on top of:
    https://mail.openvswitch.org/pipermail/ovs-dev/2019-March/357339.html

 ovsdb/ovsdb-server.1.in | 12 ++++++++++++
 ovsdb/ovsdb-server.c    | 23 +++++++++++++++++++----
 ovsdb/raft.c            | 31 ++++++++++++++++++-------------
 ovsdb/raft.h            |  3 ++-
 ovsdb/storage.c         | 18 ++++++++++++++----
 ovsdb/storage.h         |  2 ++
 6 files changed, 67 insertions(+), 22 deletions(-)

diff --git a/ovsdb/ovsdb-server.1.in b/ovsdb/ovsdb-server.1.in
index 9f78e87..5bacb12 100644
--- a/ovsdb/ovsdb-server.1.in
+++ b/ovsdb/ovsdb-server.1.in
@@ -19,6 +19,8 @@ ovsdb\-server \- Open vSwitch database server
 [\fB\-\-sync\-from=\fIserver\fR]
 
[\fB\-\-sync\-exclude-tables=\fIdb\fB:\fItable\fR[\fB,\fIdb\fB:\fItable\fR]...\fR]
 [\fB\-\-active\fR]
+.IP "Clustered mode options:"
+[\fB\-\-leader\-timeout=\fItimeout\fR]
 .so lib/ssl-syn.man
 .so lib/ssl-bootstrap-syn.man
 .so lib/ssl-peer-ca-cert-syn.man
@@ -171,6 +173,16 @@ allow the syncing options to be specified using command 
line options,
 yet start the server, as the default, active server.  To switch the
 running server to backup mode, use \fBovs-appctl(1)\fR to execute the
 \fBovsdb\-server/connect\-active\-ovsdb\-server\fR command.
+.SS "Clustered Mode Options"
+These options apply only to databases working in clustered mode:
+.TP
+\fB\-\-leader\-timeout=\fIserver\fR
+Set the cluster leader election timeout value in miniseconds.  This option
+should be set to the same value across all nodes in the cluster.  Leader
+re-election will be triggered if a follower haven't heard from current leader
+within this interval.  The default timeout is one second.  Increasing this
+value reduces the chance of leader re-election during transient overload
+situations but increases the delay of reacting to real failures, too.
 .SS "Public Key Infrastructure Options"
 The options described below for configuring the SSL public key
 infrastructure accept a special syntax for obtaining their
diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c
index 9dc1d57..80f6f96 100644
--- a/ovsdb/ovsdb-server.c
+++ b/ovsdb/ovsdb-server.c
@@ -97,6 +97,7 @@ struct server_config {
     char **sync_from;
     char **sync_exclude;
     bool *is_backup;
+    unsigned int *leader_timeout;
     struct ovsdb_jsonrpc_server *jsonrpc;
 };
 static unixctl_cb_func ovsdb_server_add_remote;
@@ -119,7 +120,7 @@ static void parse_options(int argc, char *argvp[],
                           struct sset *db_filenames, struct sset *remotes,
                           char **unixctl_pathp, char **run_command,
                           char **sync_from, char **sync_exclude,
-                          bool *is_backup);
+                          bool *is_backup, unsigned int *leader_timeout);
 OVS_NO_RETURN static void usage(void);
 
 static char *reconfigure_remotes(struct ovsdb_jsonrpc_server *,
@@ -312,8 +313,10 @@ main(int argc, char *argv[])
     process_init();
 
     bool active = false;
+    unsigned int leader_timeout = 0;
     parse_options(argc, argv, &db_filenames, &remotes, &unixctl_path,
-                  &run_command, &sync_from, &sync_exclude, &active);
+                  &run_command, &sync_from, &sync_exclude, &active,
+                  &leader_timeout);
     is_backup = sync_from && !active;
 
     daemon_become_new_user(false);
@@ -351,6 +354,7 @@ main(int argc, char *argv[])
     server_config.sync_from = &sync_from;
     server_config.sync_exclude = &sync_exclude;
     server_config.is_backup = &is_backup;
+    server_config.leader_timeout = &leader_timeout;
 
     perf_counters_init();
 
@@ -639,7 +643,8 @@ open_db(struct server_config *config, const char *filename)
 
     struct ovsdb_storage *storage;
     struct ovsdb_error *error;
-    error = ovsdb_storage_open(filename, true, &storage);
+    error = ovsdb_storage_open(filename, true, *config->leader_timeout,
+                               &storage);
     if (error) {
         return error;
     }
@@ -1664,7 +1669,8 @@ static void
 parse_options(int argc, char *argv[],
               struct sset *db_filenames, struct sset *remotes,
               char **unixctl_pathp, char **run_command,
-              char **sync_from, char **sync_exclude, bool *active)
+              char **sync_from, char **sync_exclude, bool *active,
+              unsigned int *leader_timeout)
 {
     enum {
         OPT_REMOTE = UCHAR_MAX + 1,
@@ -1675,6 +1681,7 @@ parse_options(int argc, char *argv[],
         OPT_SYNC_FROM,
         OPT_SYNC_EXCLUDE,
         OPT_ACTIVE,
+        OPT_LEADER_TIMEOUT,
         OPT_NO_DBS,
         VLOG_OPTION_ENUMS,
         DAEMON_OPTION_ENUMS,
@@ -1697,6 +1704,7 @@ parse_options(int argc, char *argv[],
         {"sync-from",   required_argument, NULL, OPT_SYNC_FROM},
         {"sync-exclude-tables", required_argument, NULL, OPT_SYNC_EXCLUDE},
         {"active", no_argument, NULL, OPT_ACTIVE},
+        {"leader-timeout", required_argument, NULL, OPT_LEADER_TIMEOUT},
         {"no-dbs", no_argument, NULL, OPT_NO_DBS},
         {NULL, 0, NULL, 0},
     };
@@ -1784,6 +1792,12 @@ parse_options(int argc, char *argv[],
             *active = true;
             break;
 
+        case OPT_LEADER_TIMEOUT:
+            if (!str_to_uint(optarg, 10, leader_timeout)) {
+                ovs_fatal(0, "leader-timeout must be a non-negative integer.");
+            }
+            break;
+
         case OPT_NO_DBS:
             add_default_db = false;
             break;
@@ -1822,6 +1836,7 @@ usage(void)
     daemon_usage();
     vlog_usage();
     replication_usage();
+    storage_usage();
     printf("\nOther options:\n"
            "  --run COMMAND           run COMMAND as subprocess then exit\n"
            "  --unixctl=SOCKET        override default control socket name\n"
diff --git a/ovsdb/raft.c b/ovsdb/raft.c
index 31e9e72..a789626 100644
--- a/ovsdb/raft.c
+++ b/ovsdb/raft.c
@@ -238,8 +238,11 @@ struct raft {
     uint64_t last_applied;      /* Max log index applied to state machine. */
     struct uuid leader_sid;     /* Server ID of leader (zero, if unknown). */
 
+#define ELECTION_DEFAULT 1024   /* Default value of election_interval. */
+    unsigned int election_interval; /* election_interval +
+                                       random(ELECTION_RANGE_MSEC) is the time
+                                       to wait before starting an election. */
     /* Followers and candidates only. */
-#define ELECTION_BASE_MSEC 1024
 #define ELECTION_RANGE_MSEC 1024
     long long int election_base;    /* Time of last heartbeat from leader. */
     long long int election_timeout; /* Time at which we start an election. */
@@ -269,7 +272,6 @@ struct raft {
     struct hmap add_servers;    /* Contains "struct raft_server"s to add. */
     struct raft_server *remove_server; /* Server being removed. */
     struct hmap commands;       /* Contains "struct raft_command"s. */
-#define PING_TIME_MSEC (ELECTION_BASE_MSEC / 3)
     long long int ping_timeout; /* Time at which to send a heartbeat */
 
     /* Candidates only.  Reinitialized at start of election. */
@@ -360,7 +362,7 @@ raft_make_address_passive(const char *address_)
 }
 
 static struct raft *
-raft_alloc(void)
+raft_alloc(unsigned int election_interval)
 {
     raft_init();
 
@@ -377,6 +379,8 @@ raft_alloc(void)
     hmap_init(&raft->add_servers);
     hmap_init(&raft->commands);
 
+    raft->election_interval = election_interval ? election_interval :
+        ELECTION_DEFAULT;
     raft_reset_ping_timer(raft);
     raft_reset_election_timer(raft);
 
@@ -541,7 +545,7 @@ raft_join_cluster(const char *file_name,
 struct ovsdb_error * OVS_WARN_UNUSED_RESULT
 raft_read_metadata(struct ovsdb_log *log, struct raft_metadata *md)
 {
-    struct raft *raft = raft_alloc();
+    struct raft *raft = raft_alloc(0);
     raft->log = log;
 
     struct ovsdb_error *error = raft_read_header(raft);
@@ -868,7 +872,7 @@ raft_read_log(struct raft *raft)
 static void
 raft_reset_election_timer(struct raft *raft)
 {
-    unsigned int duration = (ELECTION_BASE_MSEC
+    unsigned int duration = (raft->election_interval
                              + random_range(ELECTION_RANGE_MSEC));
     raft->election_base = time_msec();
     raft->election_timeout = raft->election_base + duration;
@@ -877,7 +881,7 @@ raft_reset_election_timer(struct raft *raft)
 static void
 raft_reset_ping_timer(struct raft *raft)
 {
-    raft->ping_timeout = time_msec() + PING_TIME_MSEC;
+    raft->ping_timeout = time_msec() + raft->election_interval / 3;
 }
 
 static void
@@ -900,9 +904,10 @@ raft_add_conn(struct raft *raft, struct jsonrpc_session 
*js,
  * the cluster's log in 'file_name'.  Takes ownership of 'log', whether
  * successful or not. */
 struct ovsdb_error * OVS_WARN_UNUSED_RESULT
-raft_open(struct ovsdb_log *log, struct raft **raftp)
+raft_open(struct ovsdb_log *log, unsigned int election_interval,
+          struct raft **raftp)
 {
-    struct raft *raft = raft_alloc();
+    struct raft *raft = raft_alloc(election_interval);
     raft->log = log;
 
     struct ovsdb_error *error = raft_read_header(raft);
@@ -1113,7 +1118,7 @@ raft_send_remove_server_requests(struct raft *raft)
         }
     }
 
-    raft->leave_timeout = time_msec() + ELECTION_BASE_MSEC;
+    raft->leave_timeout = time_msec() + raft->election_interval;
 }
 
 /* Attempts to start 'raft' leaving its cluster.  The caller can check progress
@@ -1130,7 +1135,7 @@ raft_leave(struct raft *raft)
     raft_transfer_leadership(raft, "this server is leaving the cluster");
     raft_become_follower(raft);
     raft_send_remove_server_requests(raft);
-    raft->leave_timeout = time_msec() + ELECTION_BASE_MSEC;
+    raft->leave_timeout = time_msec() + raft->election_interval;
 }
 
 /* Returns true if 'raft' is currently attempting to leave its cluster. */
@@ -1785,7 +1790,7 @@ raft_run(struct raft *raft)
             struct raft_command *cmd, *next_cmd;
             HMAP_FOR_EACH_SAFE (cmd, next_cmd, hmap_node, &raft->commands) {
                 if (cmd->timestamp
-                    && now - cmd->timestamp > ELECTION_BASE_MSEC) {
+                    && now - cmd->timestamp > raft->election_interval) {
                     raft_command_complete(raft, cmd, RAFT_CMD_TIMEOUT);
                 }
             }
@@ -3231,10 +3236,10 @@ raft_should_suppress_disruptive_server(struct raft 
*raft,
         return true;
 
     case RAFT_FOLLOWER:
-        if (now < raft->election_base + ELECTION_BASE_MSEC) {
+        if (now < raft->election_base + raft->election_interval) {
             VLOG_WARN_RL(&rl, "ignoring vote request received after only "
                          "%lld ms (minimum election time is %d ms)",
-                         now - raft->election_base, ELECTION_BASE_MSEC);
+                         now - raft->election_base, raft->election_interval);
             return true;
         }
         return false;
diff --git a/ovsdb/raft.h b/ovsdb/raft.h
index 3d44899..6b719ba 100644
--- a/ovsdb/raft.h
+++ b/ovsdb/raft.h
@@ -100,7 +100,8 @@ struct ovsdb_error *raft_read_metadata(struct ovsdb_log *,
 void raft_metadata_destroy(struct raft_metadata *);
 
 /* Starting up or shutting down a server within a cluster. */
-struct ovsdb_error *raft_open(struct ovsdb_log *, struct raft **)
+struct ovsdb_error *raft_open(struct ovsdb_log *,
+                              unsigned int election_interval, struct raft **)
     OVS_WARN_UNUSED_RESULT;
 void raft_close(struct raft *);
 
diff --git a/ovsdb/storage.c b/ovsdb/storage.c
index e26252b..b9ad401 100644
--- a/ovsdb/storage.c
+++ b/ovsdb/storage.c
@@ -58,6 +58,7 @@ static void schedule_next_snapshot(struct ovsdb_storage *, 
bool quick);
 
 static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
 ovsdb_storage_open__(const char *filename, bool rw, bool allow_clustered,
+                     unsigned int leader_timeout,
                      struct ovsdb_storage **storagep)
 {
     *storagep = NULL;
@@ -78,7 +79,7 @@ ovsdb_storage_open__(const char *filename, bool rw, bool 
allow_clustered,
             return ovsdb_error(NULL, "%s: cannot apply this operation to "
                                "clustered database file", filename);
         }
-        error = raft_open(log, &raft);
+        error = raft_open(log, leader_timeout, &raft);
         log = NULL;
         if (error) {
             return error;
@@ -101,10 +102,10 @@ ovsdb_storage_open__(const char *filename, bool rw, bool 
allow_clustered,
  * The returned storage might be clustered or standalone, depending on what the
  * disk file contains. */
 struct ovsdb_error * OVS_WARN_UNUSED_RESULT
-ovsdb_storage_open(const char *filename, bool rw,
+ovsdb_storage_open(const char *filename, bool rw, unsigned int leader_timeout,
                    struct ovsdb_storage **storagep)
 {
-    return ovsdb_storage_open__(filename, rw, true, storagep);
+    return ovsdb_storage_open__(filename, rw, true, leader_timeout, storagep);
 }
 
 struct ovsdb_storage *
@@ -112,7 +113,7 @@ ovsdb_storage_open_standalone(const char *filename, bool rw)
 {
     struct ovsdb_storage *storage;
     struct ovsdb_error *error = ovsdb_storage_open__(filename, rw, false,
-                                                     &storage);
+                                                     0, &storage);
     if (error) {
         ovs_fatal(0, "%s", ovsdb_error_to_string_free(error));
     }
@@ -610,3 +611,12 @@ ovsdb_storage_peek_last_eid(struct ovsdb_storage *storage)
     }
     return raft_current_eid(storage->raft);
 }
+
+void
+storage_usage(void)
+{
+    printf("\n\
+Clustered mode options:\n\
+  --leader-timeout=TIMEOUT\n\
+                          set cluster leader election timeout in ms.\n");
+}
diff --git a/ovsdb/storage.h b/ovsdb/storage.h
index 8a9bbab..06cb695 100644
--- a/ovsdb/storage.h
+++ b/ovsdb/storage.h
@@ -26,6 +26,7 @@ struct ovsdb_storage;
 struct uuid;
 
 struct ovsdb_error *ovsdb_storage_open(const char *filename, bool rw,
+                                       unsigned int leader_timeout,
                                        struct ovsdb_storage **)
     OVS_WARN_UNUSED_RESULT;
 struct ovsdb_storage *ovsdb_storage_create_unbacked(void);
@@ -93,4 +94,5 @@ struct ovsdb_schema *ovsdb_storage_read_schema(struct 
ovsdb_storage *);
 
 const struct uuid *ovsdb_storage_peek_last_eid(struct ovsdb_storage *);
 
+void storage_usage(void);
 #endif /* ovsdb/storage.h */
-- 
2.1.0

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to