Section 4.2.3 "Disruptive Servers" says:

    ...if a server receives a RequestVote request within the minimum
    election timeout of hearing from a current leader, it does not
    update its term or grant its vote.  It can either drop the request,
    reply with a vote denial, or delay the request; the result is
    essentially the same...

However, while logic in the code does make it skip the term update, it
still proceeds to reply with the vote, because when the suppression
check returns true, we'll not enter the 'if' block and will proceed to
the RPC 'switch' instead.

It will reply with a vote for the candidate it already voted for on
this term for an actual vote and it will vote for the requester on
the pre-vote.  This effectively bypasses the pre-vote scheme as the
disruptive server will win the pre-vote and proceed to the regular
election.  And while the disruptor will not win the actual vote, it
has a term +1 and will reply with a "stale term" to the next append
request, forcing the current leader to step down.

Fix that by actually not responding to the disruptive vote requests,
i.e. by taking the "drop the request" route.

The test is updated with a new failure model where vote requests are
actually getting sent out.  There is a value in testing the full RPC
stop as well, so keeping the current checks as they are.

The FT_FORCE_ELECTION enum entry is not actually needed, but the code
feels awkward without it.

Also removing the duplicate term check in the vote reply handler,
as it is now clear that the term check will not be skipped.

Fixes: 1b1d2e6daa56 ("ovsdb: Introduce experimental support for clustered 
databases.")
Signed-off-by: Ilya Maximets <[email protected]>
---
 ovsdb/raft.c           | 23 ++++++++++++++++-------
 tests/ovsdb-cluster.at | 12 ++++++++++++
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/ovsdb/raft.c b/ovsdb/raft.c
index 32349c8af..a7e631b2c 100644
--- a/ovsdb/raft.c
+++ b/ovsdb/raft.c
@@ -80,6 +80,7 @@ enum raft_failure_test {
     FT_CRASH_BEFORE_SEND_SNAPSHOT_REP,
     FT_DELAY_ELECTION,
     FT_DONT_SEND_VOTE_REQUEST,
+    FT_FORCE_ELECTION,
     FT_STOP_RAFT_RPC,
     FT_TRANSFER_LEADERSHIP,
     FT_TRANSFER_LEADERSHIP_AFTER_SEND_APPEND_REQ,
@@ -3975,10 +3976,6 @@ static void
 raft_handle_vote_reply(struct raft *raft,
                        const struct raft_vote_reply *rpy)
 {
-    if (!raft_receive_term__(raft, &rpy->common, rpy->term)) {
-        return;
-    }
-
     if (raft->role != RAFT_CANDIDATE) {
         return;
     }
@@ -4740,10 +4737,12 @@ raft_handle_rpc(struct raft *raft, const union raft_rpc 
*rpc)
         s->last_msg_ts = time_msec();
     }
 
+    if (raft_should_suppress_disruptive_server(raft, rpc)) {
+        return;
+    }
+
     uint64_t term = raft_rpc_get_term(rpc);
-    if (term
-        && !raft_should_suppress_disruptive_server(raft, rpc)
-        && !raft_receive_term__(raft, &rpc->common, term)) {
+    if (term && !raft_receive_term__(raft, &rpc->common, term)) {
         if (rpc->type == RAFT_RPC_APPEND_REQUEST) {
             /* Section 3.3: "If a server receives a request with a stale term
              * number, it rejects the request." */
@@ -5267,6 +5266,7 @@ raft_unixctl_failure_test(struct unixctl_conn *conn 
OVS_UNUSED,
         failure_test = FT_CRASH_BEFORE_SEND_SNAPSHOT_REP;
     } else if (!strcmp(test, "delay-election")) {
         failure_test = FT_DELAY_ELECTION;
+
         struct raft *raft;
         HMAP_FOR_EACH (raft, hmap_node, &all_rafts) {
             if (raft->role == RAFT_FOLLOWER) {
@@ -5275,6 +5275,15 @@ raft_unixctl_failure_test(struct unixctl_conn *conn 
OVS_UNUSED,
         }
     } else if (!strcmp(test, "dont-send-vote-request")) {
         failure_test = FT_DONT_SEND_VOTE_REQUEST;
+    } else if (!strcmp(test, "force-election")) {
+        failure_test = FT_FORCE_ELECTION;
+
+        struct raft *raft;
+        HMAP_FOR_EACH (raft, hmap_node, &all_rafts) {
+            if (raft->role != RAFT_LEADER) {
+                raft_start_election(raft, true, false);
+            }
+        }
     } else if (!strcmp(test, "stop-raft-rpc")) {
         failure_test = FT_STOP_RAFT_RPC;
     } else if (!strcmp(test,
diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at
index 91a76cb81..ff18ec556 100644
--- a/tests/ovsdb-cluster.at
+++ b/tests/ovsdb-cluster.at
@@ -1005,6 +1005,18 @@ for i in $(seq $n); do
     AT_CHECK([ovs-appctl -t $(pwd)/s$i cluster/status $schema_name | grep 
"Term: 1"], [0], [ignore])
 done
 
+# Now force election without pausing RPC, so the requests are actually sent.
+AT_CHECK([ovs-appctl -t $(pwd)/s2 cluster/failure-test force-election], [0], 
[ignore])
+
+# The server transitions into a candidate role while engaging the test
+# and shortly after it should step back to be a follower.
+OVS_WAIT_UNTIL([ovs-appctl -t $(pwd)/s2 cluster/status $schema_name | grep 
"Role: follower"])
+
+# No term change.
+for i in $(seq $n); do
+    AT_CHECK([ovs-appctl -t $(pwd)/s$i cluster/status $schema_name | grep 
"Term: 1"], [0], [ignore])
+done
+
 for i in $(seq $n); do
     OVS_APP_EXIT_AND_WAIT_BY_TARGET([$(pwd)/s$i], [s$i.pid])
 done
-- 
2.52.0

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to