On 12/1/25 4:05 PM, Felix Moebius via dev wrote:
> When an ovsdb raft cluster in kubernetes is restarted the individual DBs
> may get assigned different ips. The corresponding DNS entries may take a
> moment to update, such that other cluster members will still see the old
> ip address for some time.
> During that time an ovsdb server from another cluster or another server
> from the same cluster may appear under the old ip address such that the
> remaining cluster members reconnect to this ovsdb server.
> Currently they will notice that either the server id or cluster id does
> not match what they expect, but will keep the connection alive and just
> ignore messages from that connection.
> 
> Fix this by terminating connections for which we notice a mismatched
> cluster or server id to force a reconnect such that we will eventually
> reconnect to the right server once the DNS entries have caught up.
> 
> Suggested-by: Felix Huettner <[email protected]>
> Signed-off-by: Felix Moebius <[email protected]>
> ---
>  ovsdb/raft-rpc.c | 38 ++++++++++----------------------------
>  ovsdb/raft-rpc.h |  2 +-
>  ovsdb/raft.c     | 35 ++++++++++++++++++++++++++++++++---
>  3 files changed, 43 insertions(+), 32 deletions(-)

Hi, Felix and Felix.  Thanks for the patch!

This is indeed an interesting idea on what to do with servers trying
to connect to a wrong cluster.  And the solution seems reasonable at
the first glance.

See some comments below.


> 
> diff --git a/ovsdb/raft-rpc.c b/ovsdb/raft-rpc.c
> index e898a8b55..6e8b3274b 100644
> --- a/ovsdb/raft-rpc.c
> +++ b/ovsdb/raft-rpc.c
> @@ -858,16 +858,16 @@ raft_rpc_to_jsonrpc(const struct uuid *cid,
>                                   json_array_create_1(args));
>  }
>  
> -/* Parses 'msg' as a Raft message directed to 'sid' and initializes 'rpc'
> - * appropriately.  On success, returns NULL and the caller owns the contents 
> of
> - * 'rpc' and must eventually uninitialize it with raft_rpc_uninit().  On
> - * failure, returns an error that the caller must eventually free.
> +/* Parses 'msg' as a Raft message and initializes 'rpc' appropriately.
> + * On success, returns NULL and the caller owns the contents of 'rpc'
> + * and must eventually uninitialize it with raft_rpc_uninit().
> + * On failure, returns an error that the caller must eventually free.
>   *
> - * 'cidp' must point to the Raft cluster's ID.  If the cluster ID isn't yet
> - * known, then '*cidp' must be UUID_ZERO and this function will attempt to
> - * initialize it based on 'msg'. */
> + * 'cid' and 'sid' will be set to the cluster id and destination server id
> + * if the corresponding fields in the Raft message are set. Otherwise they
> + * are set to UUID_ZERO. */
>  struct ovsdb_error * OVS_WARN_UNUSED_RESULT
> -raft_rpc_from_jsonrpc(struct uuid *cidp, const struct uuid *sid,
> +raft_rpc_from_jsonrpc(struct uuid *cid, struct uuid *sid,
>                        const struct jsonrpc_msg *msg, union raft_rpc *rpc)
>  {
>      memset(rpc, 0, sizeof *rpc);
> @@ -893,26 +893,8 @@ raft_rpc_from_jsonrpc(struct uuid *cidp, const struct 
> uuid *sid,
>      bool is_hello = rpc->type == RAFT_RPC_HELLO_REQUEST;
>      bool is_add = rpc->type == RAFT_RPC_ADD_SERVER_REQUEST;
>  
> -    struct uuid cid;
> -    if (raft_parse_uuid(&p, "cluster", is_add, &cid)
> -        && !uuid_equals(&cid, cidp)) {
> -        if (uuid_is_zero(cidp)) {
> -            *cidp = cid;
> -            VLOG_INFO("learned cluster ID "CID_FMT, CID_ARGS(&cid));
> -        } else {
> -            ovsdb_parser_raise_error(&p, "wrong cluster "CID_FMT" "
> -                                     "(expected "CID_FMT")",
> -                                     CID_ARGS(&cid), CID_ARGS(cidp));
> -        }
> -    }
> -
> -    struct uuid to_sid;
> -    if (raft_parse_uuid(&p, "to", is_add || is_hello, &to_sid)
> -        && !uuid_equals(&to_sid, sid)) {
> -        ovsdb_parser_raise_error(&p, "misrouted message (addressed to "
> -                                 SID_FMT" but we're "SID_FMT")",
> -                                 SID_ARGS(&to_sid), SID_ARGS(sid));
> -    }
> +    raft_parse_uuid(&p, "cluster", is_add, cid);
> +    raft_parse_uuid(&p, "to", is_add || is_hello, sid);
>  
>      rpc->common.sid = raft_parse_required_uuid(&p, "from");
>      rpc->common.comment = nullable_xstrdup(
> diff --git a/ovsdb/raft-rpc.h b/ovsdb/raft-rpc.h
> index 7677c35b4..1af181671 100644
> --- a/ovsdb/raft-rpc.h
> +++ b/ovsdb/raft-rpc.h
> @@ -289,7 +289,7 @@ struct jsonrpc_msg *raft_rpc_to_jsonrpc(const struct uuid 
> *cid,
>                                          const struct uuid *sid,
>                                          const union raft_rpc *);
>  struct ovsdb_error *raft_rpc_from_jsonrpc(struct uuid *cid,
> -                                          const struct uuid *sid,
> +                                          struct uuid *sid,
>                                            const struct jsonrpc_msg *,
>                                            union raft_rpc *)
>      OVS_WARN_UNUSED_RESULT;
> diff --git a/ovsdb/raft.c b/ovsdb/raft.c
> index 32349c8af..370dfe24c 100644
> --- a/ovsdb/raft.c
> +++ b/ovsdb/raft.c
> @@ -1534,8 +1534,8 @@ raft_conn_receive(struct raft *raft, struct raft_conn 
> *conn,
>          return false;
>      }
>  
> -    struct ovsdb_error *error = raft_rpc_from_jsonrpc(&raft->cid, &raft->sid,
> -                                                      msg, rpc);
> +    struct uuid cid, sid;
> +    struct ovsdb_error *error = raft_rpc_from_jsonrpc(&cid, &sid, msg, rpc);
>      jsonrpc_msg_destroy(msg);
>      if (error) {
>          char *s = ovsdb_error_to_string_free(error);
> @@ -1544,6 +1544,34 @@ raft_conn_receive(struct raft *raft, struct raft_conn 
> *conn,
>          return false;

Should we also consider reconnection if we got a general parsing error
here?  If we do, maybe we can simplify the code a little and avoid moving
the checks from the rpc module here.

>      }
>  
> +    if (!uuid_is_zero(&cid)) {
> +        if (uuid_is_zero(&raft->cid)) {
> +            raft->cid = cid;
> +            VLOG_INFO("%s: learned cluster ID "CID_FMT,
> +                      jsonrpc_session_get_name(conn->js), CID_ARGS(&cid));
> +        } else if (!uuid_equals(&raft->cid, &cid)) {
> +            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
> +            VLOG_WARN_RL(&rl, "%s: ignoring message to unexpected cluster ID 
> "
> +                         "(addressed to "CID_FMT" but we are "CID_FMT")",
> +                         jsonrpc_session_get_name(conn->js),
> +                         CID_ARGS(&cid), CID_ARGS(&raft->cid));
> +            raft_rpc_uninit(rpc);
> +            jsonrpc_session_force_reconnect(conn->js);
> +            return false;
> +        }
> +    }
> +
> +    if (!uuid_is_zero(&sid) && !uuid_equals(&raft->sid, &sid)) {
> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
> +        VLOG_WARN_RL(&rl, "%s: ignoring message to unexpected server ID "
> +                     "(addressed to "SID_FMT" but we are "SID_FMT")",
> +                     jsonrpc_session_get_name(conn->js),
> +                     SID_ARGS(&sid), SID_ARGS(&raft->sid));
> +        raft_rpc_uninit(rpc);
> +        jsonrpc_session_force_reconnect(conn->js);
> +        return false;
> +    }
> +
>      if (uuid_is_zero(&conn->sid)) {
>          static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(50, 50);
>          conn->sid = rpc->common.sid;
> @@ -1551,11 +1579,12 @@ raft_conn_receive(struct raft *raft, struct raft_conn 
> *conn,
>                       jsonrpc_session_get_name(conn->js), 
> SID_ARGS(&conn->sid));
>      } else if (!uuid_equals(&conn->sid, &rpc->common.sid)) {
>          static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
> -        VLOG_WARN_RL(&rl, "%s: ignoring message with unexpected server ID "
> +        VLOG_WARN_RL(&rl, "%s: ignoring message from unexpected server ID "

I'm not sure we should change the logs.  Some users including me may be used to
the current wording in the logs, or may have some automation looking for them.
New logs don't seem to be much more clear than the current ones.  This applies
to all 3 of them.

Best regards, Ilya Maximets.
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to