On 9/3/20 12:10 PM, Lorenzo Bianconi wrote:
> Introduce the following info useful for cluster debugging to
> cluster/status command:
> - time elapsed from last start/complete election
> - election trigger (e.g. timeout)
> - number of disconnections
> - time elapsed from last raft messaged received
>
> Signed-off-by: Lorenzo Bianconi <[email protected]>
> ---
Hi Lorenzo,
> ovsdb/raft-private.h | 2 ++
> ovsdb/raft.c | 34 ++++++++++++++++++++++++++++++++++
> 2 files changed, 36 insertions(+)
>
> diff --git a/ovsdb/raft-private.h b/ovsdb/raft-private.h
> index 1f366b4ab..61452df0f 100644
> --- a/ovsdb/raft-private.h
> +++ b/ovsdb/raft-private.h
> @@ -90,6 +90,8 @@ struct raft_server {
> /* For use in adding and removing servers: */
> struct uuid requester_sid; /* Nonzero if requested via RPC. */
> struct unixctl_conn *requester_conn; /* Only if requested via unixctl. */
> +
> + long long int last_msg_ts; /* last received msg timestamp in ms */
Nit: Comments should start with capital letter and end with '.'.
Also, it seems to me that there are some more functions that handle
incoming messages where we don't store last_msg_ts:
raft_handle_vote_request__()
raft_handle_append_entries()
raft_handle_add_server_request()
raft_handle_add_server_reply()
raft_handle_remove_server_request()
..
I think we should instead set 'last_msg_ts' in raft_handle_rpc() before
calling the RPC specific handler. That would mean that we'd have to
change a single place in the code + one more time when the server is
created in raft_handle_add_server_request().
Would that work?
> };
>
> void raft_server_destroy(struct raft_server *);
> diff --git a/ovsdb/raft.c b/ovsdb/raft.c
> index 708b0624c..7d8fc098d 100644
> --- a/ovsdb/raft.c
> +++ b/ovsdb/raft.c
> @@ -264,6 +264,12 @@ struct raft {
> long long int election_base; /* Time of last heartbeat from leader. */
> long long int election_timeout; /* Time at which we start an election. */
>
> + long long int election_start; /* Start election time */
> + long long int election_complete;/* Time of election completion */
Nit: Missing '.' at the end of the comments.
Nit: Missing space after ';'.
> + bool leadership_transfer;
A comment might be good here. Maybe:
/* Was the leadership transferred? */
> +
> + unsigned int n_disconnections;
> +
> /* Used for joining a cluster. */
> bool joining; /* Attempting to join the cluster? */
> struct sset remote_addresses; /* Addresses to try to find other servers.
> */
> @@ -1687,6 +1693,9 @@ raft_start_election(struct raft *raft, bool
> leadership_transfer)
>
> raft->n_votes = 0;
>
> + raft->election_start = time_now();
Why do we store election_start and election_complete in seconds and
last_msg_ts in milliseconds? Why not store them both in milliseconds?
We should also set raft->election_complete = 0 here so we don't report
it as completed in "cluster/status" if it's called before the election
actually completed.
> + raft->leadership_transfer = leadership_transfer;
> +
> static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
> if (!VLOG_DROP_INFO(&rl)) {
> long long int now = time_msec();
> @@ -1836,6 +1845,7 @@ raft_run(struct raft *raft)
> struct raft_conn *next;
> LIST_FOR_EACH_SAFE (conn, next, list_node, &raft->conns) {
> if (!raft_conn_should_stay_open(raft, conn)) {
> + raft->n_disconnections++;
> raft_conn_close(conn);
> }
> }
> @@ -2575,6 +2585,7 @@ raft_become_leader(struct raft *raft)
>
> ovs_assert(raft->role != RAFT_LEADER);
> raft->role = RAFT_LEADER;
> + raft->election_complete = time_now();
So, from what I see, if a server loses the election it will always have
raft->election_start set but raft->election_complete is only set when a
server becomes leader. Would it make more sense to call it 'election_won'?
> raft_set_leader(raft, &raft->sid);
> raft_reset_election_timer(raft);
> raft_reset_ping_timer(raft);
> @@ -3015,6 +3026,11 @@ static void
> raft_handle_append_request(struct raft *raft,
> const struct raft_append_request *rq)
> {
> + struct raft_server *s = raft_find_server(raft, &rq->common.sid);
> + if (s) {
> + s->last_msg_ts = time_msec();
> + }
> +
> /* We do not check whether the server that sent the request is part of
> the
> * cluster. As section 4.1 says, "A server accepts AppendEntries
> requests
> * from a leader that is not part of the server’s latest configuration.
> @@ -3358,6 +3374,8 @@ raft_handle_append_reply(struct raft *raft,
> }
> }
>
> + s->last_msg_ts = time_msec();
> +
> s->replied = true;
> if (rpy->result == RAFT_APPEND_OK) {
> /* Figure 3.1: "If successful, update nextIndex and matchIndex for
> @@ -4473,6 +4491,16 @@ raft_unixctl_status(struct unixctl_conn *conn,
> raft_put_sid("Vote", &raft->vote, raft, &s);
> ds_put_char(&s, '\n');
>
> + if (raft->election_start) {
> + ds_put_format(&s, "Election started: %"PRIu64"s reason: %s\n",
Maybe it's more clear if we say:
"Last Election started x milliseconds ago, reason: ..."
> + (uint64_t) (time_now() - raft->election_start),
> + raft->leadership_transfer ?
> + "leadership_transfer" : "timeout");
> + }
> + if (raft->election_complete) {
> + ds_put_format(&s, "Election completed: %"PRIu64"s\n",
> + (uint64_t) (time_now() - raft->election_complete));
Same here.
Thanks,
Dumitru
> + }
> ds_put_format(&s, "Election timer: %"PRIu64, raft->election_timer);
> if (raft->role == RAFT_LEADER && raft->election_timer_new) {
> ds_put_format(&s, " (changing to %"PRIu64")",
> @@ -4500,6 +4528,8 @@ raft_unixctl_status(struct unixctl_conn *conn,
> }
> ds_put_char(&s, '\n');
>
> + ds_put_format(&s, "Disconnections: %u\n", raft->n_disconnections);
> +
> ds_put_cstr(&s, "Servers:\n");
> struct raft_server *server;
> HMAP_FOR_EACH (server, hmap_node, &raft->servers) {
> @@ -4524,6 +4554,10 @@ raft_unixctl_status(struct unixctl_conn *conn,
> ds_put_format(&s, " next_index=%"PRIu64" match_index=%"PRIu64,
> server->next_index, server->match_index);
> }
> + if (server->last_msg_ts) {
> + ds_put_format(&s, " last msg %"PRIu64" ms",
> + (uint64_t) (time_msec() - server->last_msg_ts));
> + }
> ds_put_char(&s, '\n');
> }
>
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev