On Sat Dec 20, 2025 at 12:38 AM CET, Ilya Maximets wrote:
> On 12/1/25 4:05 PM, Felix Moebius via dev wrote:
>> When an ovsdb raft cluster in kubernetes is restarted the individual DBs
>> may get assigned different ips. The corresponding DNS entries may take a
>> moment to update, such that other cluster members will still see the old
>> ip address for some time.
>> During that time an ovsdb server from another cluster or another server
>> from the same cluster may appear under the old ip address such that the
>> remaining cluster members reconnect to this ovsdb server.
>> Currently they will notice that either the server id or cluster id does
>> not match what they expect, but will keep the connection alive and just
>> ignore messages from that connection.
>> 
>> Fix this by terminating connections for which we notice a mismatched
>> cluster or server id to force a reconnect such that we will eventually
>> reconnect to the right server once the DNS entries have caught up.
>> 
>> Suggested-by: Felix Huettner <[email protected]>
>> Signed-off-by: Felix Moebius <[email protected]>
>> ---
>>  ovsdb/raft-rpc.c | 38 ++++++++++----------------------------
>>  ovsdb/raft-rpc.h |  2 +-
>>  ovsdb/raft.c     | 35 ++++++++++++++++++++++++++++++++---
>>  3 files changed, 43 insertions(+), 32 deletions(-)
>
> Hi, Felix and Felix.  Thanks for the patch!
>
> This is indeed an interesting idea on what to do with servers trying
> to connect to a wrong cluster.  And the solution seems reasonable at
> the first glance.
>
> See some comments below.
>

Hi Ilya, thanks for taking the time to review.

>
>> 
>> diff --git a/ovsdb/raft-rpc.c b/ovsdb/raft-rpc.c
>> index e898a8b55..6e8b3274b 100644
>> --- a/ovsdb/raft-rpc.c
>> +++ b/ovsdb/raft-rpc.c
>> @@ -858,16 +858,16 @@ raft_rpc_to_jsonrpc(const struct uuid *cid,
>>                                   json_array_create_1(args));
>>  }
>>  
>> -/* Parses 'msg' as a Raft message directed to 'sid' and initializes 'rpc'
>> - * appropriately.  On success, returns NULL and the caller owns the 
>> contents of
>> - * 'rpc' and must eventually uninitialize it with raft_rpc_uninit().  On
>> - * failure, returns an error that the caller must eventually free.
>> +/* Parses 'msg' as a Raft message and initializes 'rpc' appropriately.
>> + * On success, returns NULL and the caller owns the contents of 'rpc'
>> + * and must eventually uninitialize it with raft_rpc_uninit().
>> + * On failure, returns an error that the caller must eventually free.
>>   *
>> - * 'cidp' must point to the Raft cluster's ID.  If the cluster ID isn't yet
>> - * known, then '*cidp' must be UUID_ZERO and this function will attempt to
>> - * initialize it based on 'msg'. */
>> + * 'cid' and 'sid' will be set to the cluster id and destination server id
>> + * if the corresponding fields in the Raft message are set. Otherwise they
>> + * are set to UUID_ZERO. */
>>  struct ovsdb_error * OVS_WARN_UNUSED_RESULT
>> -raft_rpc_from_jsonrpc(struct uuid *cidp, const struct uuid *sid,
>> +raft_rpc_from_jsonrpc(struct uuid *cid, struct uuid *sid,
>>                        const struct jsonrpc_msg *msg, union raft_rpc *rpc)
>>  {
>>      memset(rpc, 0, sizeof *rpc);
>> @@ -893,26 +893,8 @@ raft_rpc_from_jsonrpc(struct uuid *cidp, const struct 
>> uuid *sid,
>>      bool is_hello = rpc->type == RAFT_RPC_HELLO_REQUEST;
>>      bool is_add = rpc->type == RAFT_RPC_ADD_SERVER_REQUEST;
>>  
>> -    struct uuid cid;
>> -    if (raft_parse_uuid(&p, "cluster", is_add, &cid)
>> -        && !uuid_equals(&cid, cidp)) {
>> -        if (uuid_is_zero(cidp)) {
>> -            *cidp = cid;
>> -            VLOG_INFO("learned cluster ID "CID_FMT, CID_ARGS(&cid));
>> -        } else {
>> -            ovsdb_parser_raise_error(&p, "wrong cluster "CID_FMT" "
>> -                                     "(expected "CID_FMT")",
>> -                                     CID_ARGS(&cid), CID_ARGS(cidp));
>> -        }
>> -    }
>> -
>> -    struct uuid to_sid;
>> -    if (raft_parse_uuid(&p, "to", is_add || is_hello, &to_sid)
>> -        && !uuid_equals(&to_sid, sid)) {
>> -        ovsdb_parser_raise_error(&p, "misrouted message (addressed to "
>> -                                 SID_FMT" but we're "SID_FMT")",
>> -                                 SID_ARGS(&to_sid), SID_ARGS(sid));
>> -    }
>> +    raft_parse_uuid(&p, "cluster", is_add, cid);
>> +    raft_parse_uuid(&p, "to", is_add || is_hello, sid);
>>  
>>      rpc->common.sid = raft_parse_required_uuid(&p, "from");
>>      rpc->common.comment = nullable_xstrdup(
>> diff --git a/ovsdb/raft-rpc.h b/ovsdb/raft-rpc.h
>> index 7677c35b4..1af181671 100644
>> --- a/ovsdb/raft-rpc.h
>> +++ b/ovsdb/raft-rpc.h
>> @@ -289,7 +289,7 @@ struct jsonrpc_msg *raft_rpc_to_jsonrpc(const struct 
>> uuid *cid,
>>                                          const struct uuid *sid,
>>                                          const union raft_rpc *);
>>  struct ovsdb_error *raft_rpc_from_jsonrpc(struct uuid *cid,
>> -                                          const struct uuid *sid,
>> +                                          struct uuid *sid,
>>                                            const struct jsonrpc_msg *,
>>                                            union raft_rpc *)
>>      OVS_WARN_UNUSED_RESULT;
>> diff --git a/ovsdb/raft.c b/ovsdb/raft.c
>> index 32349c8af..370dfe24c 100644
>> --- a/ovsdb/raft.c
>> +++ b/ovsdb/raft.c
>> @@ -1534,8 +1534,8 @@ raft_conn_receive(struct raft *raft, struct raft_conn 
>> *conn,
>>          return false;
>>      }
>>  
>> -    struct ovsdb_error *error = raft_rpc_from_jsonrpc(&raft->cid, 
>> &raft->sid,
>> -                                                      msg, rpc);
>> +    struct uuid cid, sid;
>> +    struct ovsdb_error *error = raft_rpc_from_jsonrpc(&cid, &sid, msg, rpc);
>>      jsonrpc_msg_destroy(msg);
>>      if (error) {
>>          char *s = ovsdb_error_to_string_free(error);
>> @@ -1544,6 +1544,34 @@ raft_conn_receive(struct raft *raft, struct raft_conn 
>> *conn,
>>          return false;
>
> Should we also consider reconnection if we got a general parsing error
> here?  If we do, maybe we can simplify the code a little and avoid moving
> the checks from the rpc module here.

I thought about this as well, but was concerned that it might have some
unintended side effects. On further thought, I cannot really come up with
a concrete problem that this could cause, so I think this would be neither
helpful nor harmful to do.
Then however, the patch can be simplified to just triggering a connection
reset once in case of a parse error and once more for the case where the
peer sid does not match down below.
Would you prefer that over the current approach?

>
>>      }
>>  
>> +    if (!uuid_is_zero(&cid)) {
>> +        if (uuid_is_zero(&raft->cid)) {
>> +            raft->cid = cid;
>> +            VLOG_INFO("%s: learned cluster ID "CID_FMT,
>> +                      jsonrpc_session_get_name(conn->js), CID_ARGS(&cid));
>> +        } else if (!uuid_equals(&raft->cid, &cid)) {
>> +            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>> +            VLOG_WARN_RL(&rl, "%s: ignoring message to unexpected cluster 
>> ID "
>> +                         "(addressed to "CID_FMT" but we are "CID_FMT")",
>> +                         jsonrpc_session_get_name(conn->js),
>> +                         CID_ARGS(&cid), CID_ARGS(&raft->cid));
>> +            raft_rpc_uninit(rpc);
>> +            jsonrpc_session_force_reconnect(conn->js);
>> +            return false;
>> +        }
>> +    }
>> +
>> +    if (!uuid_is_zero(&sid) && !uuid_equals(&raft->sid, &sid)) {
>> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>> +        VLOG_WARN_RL(&rl, "%s: ignoring message to unexpected server ID "
>> +                     "(addressed to "SID_FMT" but we are "SID_FMT")",
>> +                     jsonrpc_session_get_name(conn->js),
>> +                     SID_ARGS(&sid), SID_ARGS(&raft->sid));
>> +        raft_rpc_uninit(rpc);
>> +        jsonrpc_session_force_reconnect(conn->js);
>> +        return false;
>> +    }
>> +
>>      if (uuid_is_zero(&conn->sid)) {
>>          static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(50, 50);
>>          conn->sid = rpc->common.sid;
>> @@ -1551,11 +1579,12 @@ raft_conn_receive(struct raft *raft, struct 
>> raft_conn *conn,
>>                       jsonrpc_session_get_name(conn->js), 
>> SID_ARGS(&conn->sid));
>>      } else if (!uuid_equals(&conn->sid, &rpc->common.sid)) {
>>          static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>> -        VLOG_WARN_RL(&rl, "%s: ignoring message with unexpected server ID "
>> +        VLOG_WARN_RL(&rl, "%s: ignoring message from unexpected server ID "
>
> I'm not sure we should change the logs.  Some users including me may be used 
> to
> the current wording in the logs, or may have some automation looking for them.
> New logs don't seem to be much more clear than the current ones.  This applies
> to all 3 of them.
>
> Best regards, Ilya Maximets.

Yes, I aggree, we shouldn't change the logs. I had not thought about it
like that.

Kind regards,
Felix

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to