On 9/3/20 12:10 PM, Lorenzo Bianconi wrote:
> Introduce the following info useful for cluster debugging to
> cluster/status command:
> - time elapsed from last start/complete election
> - election trigger (e.g. timeout)
> - number of disconnections
> - time elapsed from last raft messaged received
> 
> Signed-off-by: Lorenzo Bianconi <[email protected]>
> ---

Hi Lorenzo,

>  ovsdb/raft-private.h |  2 ++
>  ovsdb/raft.c         | 34 ++++++++++++++++++++++++++++++++++
>  2 files changed, 36 insertions(+)
> 
> diff --git a/ovsdb/raft-private.h b/ovsdb/raft-private.h
> index 1f366b4ab..61452df0f 100644
> --- a/ovsdb/raft-private.h
> +++ b/ovsdb/raft-private.h
> @@ -90,6 +90,8 @@ struct raft_server {
>      /* For use in adding and removing servers: */
>      struct uuid requester_sid;  /* Nonzero if requested via RPC. */
>      struct unixctl_conn *requester_conn; /* Only if requested via unixctl. */
> +
> +    long long int last_msg_ts; /* last received msg timestamp in ms */

Nit: Comments should start with capital letter and end with '.'.

Also, it seems to me that there are some more functions that handle
incoming messages where we don't store last_msg_ts:

raft_handle_vote_request__()
raft_handle_append_entries()
raft_handle_add_server_request()
raft_handle_add_server_reply()
raft_handle_remove_server_request()
..

I think we should instead set 'last_msg_ts' in raft_handle_rpc() before
calling the RPC specific handler.  That would mean that we'd have to
change a single place in the code + one more time when the server is
created in raft_handle_add_server_request().

Would that work?

>  };
>  
>  void raft_server_destroy(struct raft_server *);
> diff --git a/ovsdb/raft.c b/ovsdb/raft.c
> index 708b0624c..7d8fc098d 100644
> --- a/ovsdb/raft.c
> +++ b/ovsdb/raft.c
> @@ -264,6 +264,12 @@ struct raft {
>      long long int election_base;    /* Time of last heartbeat from leader. */
>      long long int election_timeout; /* Time at which we start an election. */
>  
> +    long long int election_start;   /* Start election time */
> +    long long int election_complete;/* Time of election completion */

Nit: Missing '.' at the end of the comments.
Nit: Missing space after ';'.

> +    bool leadership_transfer;

A comment might be good here.  Maybe:

/* Was the leadership transferred? */

> +
> +    unsigned int n_disconnections;
> +
>      /* Used for joining a cluster. */
>      bool joining;                 /* Attempting to join the cluster? */
>      struct sset remote_addresses; /* Addresses to try to find other servers. 
> */
> @@ -1687,6 +1693,9 @@ raft_start_election(struct raft *raft, bool 
> leadership_transfer)
>  
>      raft->n_votes = 0;
>  
> +    raft->election_start = time_now();

Why do we store election_start and election_complete in seconds and
last_msg_ts in milliseconds?  Why not store them both in milliseconds?

We should also set raft->election_complete = 0 here so we don't report
it as completed in "cluster/status" if it's called before the election
actually completed.

> +    raft->leadership_transfer = leadership_transfer;
> +
>      static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>      if (!VLOG_DROP_INFO(&rl)) {
>          long long int now = time_msec();
> @@ -1836,6 +1845,7 @@ raft_run(struct raft *raft)
>      struct raft_conn *next;
>      LIST_FOR_EACH_SAFE (conn, next, list_node, &raft->conns) {
>          if (!raft_conn_should_stay_open(raft, conn)) {
> +            raft->n_disconnections++;
>              raft_conn_close(conn);
>          }
>      }
> @@ -2575,6 +2585,7 @@ raft_become_leader(struct raft *raft)
>  
>      ovs_assert(raft->role != RAFT_LEADER);
>      raft->role = RAFT_LEADER;
> +    raft->election_complete = time_now();

So, from what I see, if a server loses the election it will always have
raft->election_start set but raft->election_complete is only set when a
server becomes leader.  Would it make more sense to call it 'election_won'?

>      raft_set_leader(raft, &raft->sid);
>      raft_reset_election_timer(raft);
>      raft_reset_ping_timer(raft);
> @@ -3015,6 +3026,11 @@ static void
>  raft_handle_append_request(struct raft *raft,
>                             const struct raft_append_request *rq)
>  {
> +    struct raft_server *s = raft_find_server(raft, &rq->common.sid);
> +    if (s) {
> +        s->last_msg_ts = time_msec();
> +    }
> +
>      /* We do not check whether the server that sent the request is part of 
> the
>       * cluster.  As section 4.1 says, "A server accepts AppendEntries 
> requests
>       * from a leader that is not part of the server’s latest configuration.
> @@ -3358,6 +3374,8 @@ raft_handle_append_reply(struct raft *raft,
>          }
>      }
>  
> +    s->last_msg_ts = time_msec();
> +
>      s->replied = true;
>      if (rpy->result == RAFT_APPEND_OK) {
>          /* Figure 3.1: "If successful, update nextIndex and matchIndex for
> @@ -4473,6 +4491,16 @@ raft_unixctl_status(struct unixctl_conn *conn,
>      raft_put_sid("Vote", &raft->vote, raft, &s);
>      ds_put_char(&s, '\n');
>  
> +    if (raft->election_start) {
> +        ds_put_format(&s, "Election started: %"PRIu64"s reason: %s\n",

Maybe it's more clear if we say:
"Last Election started x milliseconds ago, reason: ..."

> +                      (uint64_t) (time_now() - raft->election_start),
> +                      raft->leadership_transfer ?
> +                      "leadership_transfer" : "timeout");
> +    }
> +    if (raft->election_complete) {
> +        ds_put_format(&s, "Election completed: %"PRIu64"s\n",
> +                      (uint64_t) (time_now() - raft->election_complete));

Same here.

Thanks,
Dumitru

> +    }
>      ds_put_format(&s, "Election timer: %"PRIu64, raft->election_timer);
>      if (raft->role == RAFT_LEADER && raft->election_timer_new) {
>          ds_put_format(&s, " (changing to %"PRIu64")",
> @@ -4500,6 +4528,8 @@ raft_unixctl_status(struct unixctl_conn *conn,
>      }
>      ds_put_char(&s, '\n');
>  
> +    ds_put_format(&s, "Disconnections: %u\n", raft->n_disconnections);
> +
>      ds_put_cstr(&s, "Servers:\n");
>      struct raft_server *server;
>      HMAP_FOR_EACH (server, hmap_node, &raft->servers) {
> @@ -4524,6 +4554,10 @@ raft_unixctl_status(struct unixctl_conn *conn,
>              ds_put_format(&s, " next_index=%"PRIu64" match_index=%"PRIu64,
>                            server->next_index, server->match_index);
>          }
> +        if (server->last_msg_ts) {
> +            ds_put_format(&s, " last msg %"PRIu64" ms",
> +                          (uint64_t) (time_msec() - server->last_msg_ts));
> +        }
>          ds_put_char(&s, '\n');
>      }
>  
> 

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to