On 12/23/25 9:55 AM, Felix Moebius wrote:
> On Sat Dec 20, 2025 at 12:38 AM CET, Ilya Maximets wrote:
>> On 12/1/25 4:05 PM, Felix Moebius via dev wrote:
>>> When an ovsdb raft cluster in kubernetes is restarted the individual DBs
>>> may get assigned different ips. The corresponding DNS entries may take a
>>> moment to update, such that other cluster members will still see the old
>>> ip address for some time.
>>> During that time an ovsdb server from another cluster or another server
>>> from the same cluster may appear under the old ip address such that the
>>> remaining cluster members reconnect to this ovsdb server.
>>> Currently they will notice that either the server id or cluster id does
>>> not match what they expect, but will keep the connection alive and just
>>> ignore messages from that connection.
>>>
>>> Fix this by terminating connections for which we notice a mismatched
>>> cluster or server id to force a reconnect such that we will eventually
>>> reconnect to the right server once the DNS entries have caught up.
>>>
>>> Suggested-by: Felix Huettner <[email protected]>
>>> Signed-off-by: Felix Moebius <[email protected]>
>>> ---
>>>  ovsdb/raft-rpc.c | 38 ++++++++++----------------------------
>>>  ovsdb/raft-rpc.h |  2 +-
>>>  ovsdb/raft.c     | 35 ++++++++++++++++++++++++++++++++---
>>>  3 files changed, 43 insertions(+), 32 deletions(-)
>>
>> Hi, Felix and Felix.  Thanks for the patch!
>>
>> This is indeed an interesting idea on what to do with servers trying
>> to connect to a wrong cluster.  And the solution seems reasonable at
>> the first glance.
>>
>> See some comments below.
>>
> 
> Hi Ilya, thanks for taking the time to review.
> 
>>
>>>
>>> diff --git a/ovsdb/raft-rpc.c b/ovsdb/raft-rpc.c
>>> index e898a8b55..6e8b3274b 100644
>>> --- a/ovsdb/raft-rpc.c
>>> +++ b/ovsdb/raft-rpc.c
>>> @@ -858,16 +858,16 @@ raft_rpc_to_jsonrpc(const struct uuid *cid,
>>>                                   json_array_create_1(args));
>>>  }
>>>  
>>> -/* Parses 'msg' as a Raft message directed to 'sid' and initializes 'rpc'
>>> - * appropriately.  On success, returns NULL and the caller owns the 
>>> contents of
>>> - * 'rpc' and must eventually uninitialize it with raft_rpc_uninit().  On
>>> - * failure, returns an error that the caller must eventually free.
>>> +/* Parses 'msg' as a Raft message and initializes 'rpc' appropriately.
>>> + * On success, returns NULL and the caller owns the contents of 'rpc'
>>> + * and must eventually uninitialize it with raft_rpc_uninit().
>>> + * On failure, returns an error that the caller must eventually free.
>>>   *
>>> - * 'cidp' must point to the Raft cluster's ID.  If the cluster ID isn't yet
>>> - * known, then '*cidp' must be UUID_ZERO and this function will attempt to
>>> - * initialize it based on 'msg'. */
>>> + * 'cid' and 'sid' will be set to the cluster id and destination server id
>>> + * if the corresponding fields in the Raft message are set. Otherwise they
>>> + * are set to UUID_ZERO. */
>>>  struct ovsdb_error * OVS_WARN_UNUSED_RESULT
>>> -raft_rpc_from_jsonrpc(struct uuid *cidp, const struct uuid *sid,
>>> +raft_rpc_from_jsonrpc(struct uuid *cid, struct uuid *sid,
>>>                        const struct jsonrpc_msg *msg, union raft_rpc *rpc)
>>>  {
>>>      memset(rpc, 0, sizeof *rpc);
>>> @@ -893,26 +893,8 @@ raft_rpc_from_jsonrpc(struct uuid *cidp, const struct 
>>> uuid *sid,
>>>      bool is_hello = rpc->type == RAFT_RPC_HELLO_REQUEST;
>>>      bool is_add = rpc->type == RAFT_RPC_ADD_SERVER_REQUEST;
>>>  
>>> -    struct uuid cid;
>>> -    if (raft_parse_uuid(&p, "cluster", is_add, &cid)
>>> -        && !uuid_equals(&cid, cidp)) {
>>> -        if (uuid_is_zero(cidp)) {
>>> -            *cidp = cid;
>>> -            VLOG_INFO("learned cluster ID "CID_FMT, CID_ARGS(&cid));
>>> -        } else {
>>> -            ovsdb_parser_raise_error(&p, "wrong cluster "CID_FMT" "
>>> -                                     "(expected "CID_FMT")",
>>> -                                     CID_ARGS(&cid), CID_ARGS(cidp));
>>> -        }
>>> -    }
>>> -
>>> -    struct uuid to_sid;
>>> -    if (raft_parse_uuid(&p, "to", is_add || is_hello, &to_sid)
>>> -        && !uuid_equals(&to_sid, sid)) {
>>> -        ovsdb_parser_raise_error(&p, "misrouted message (addressed to "
>>> -                                 SID_FMT" but we're "SID_FMT")",
>>> -                                 SID_ARGS(&to_sid), SID_ARGS(sid));
>>> -    }
>>> +    raft_parse_uuid(&p, "cluster", is_add, cid);
>>> +    raft_parse_uuid(&p, "to", is_add || is_hello, sid);
>>>  
>>>      rpc->common.sid = raft_parse_required_uuid(&p, "from");
>>>      rpc->common.comment = nullable_xstrdup(
>>> diff --git a/ovsdb/raft-rpc.h b/ovsdb/raft-rpc.h
>>> index 7677c35b4..1af181671 100644
>>> --- a/ovsdb/raft-rpc.h
>>> +++ b/ovsdb/raft-rpc.h
>>> @@ -289,7 +289,7 @@ struct jsonrpc_msg *raft_rpc_to_jsonrpc(const struct 
>>> uuid *cid,
>>>                                          const struct uuid *sid,
>>>                                          const union raft_rpc *);
>>>  struct ovsdb_error *raft_rpc_from_jsonrpc(struct uuid *cid,
>>> -                                          const struct uuid *sid,
>>> +                                          struct uuid *sid,
>>>                                            const struct jsonrpc_msg *,
>>>                                            union raft_rpc *)
>>>      OVS_WARN_UNUSED_RESULT;
>>> diff --git a/ovsdb/raft.c b/ovsdb/raft.c
>>> index 32349c8af..370dfe24c 100644
>>> --- a/ovsdb/raft.c
>>> +++ b/ovsdb/raft.c
>>> @@ -1534,8 +1534,8 @@ raft_conn_receive(struct raft *raft, struct raft_conn 
>>> *conn,
>>>          return false;
>>>      }
>>>  
>>> -    struct ovsdb_error *error = raft_rpc_from_jsonrpc(&raft->cid, 
>>> &raft->sid,
>>> -                                                      msg, rpc);
>>> +    struct uuid cid, sid;
>>> +    struct ovsdb_error *error = raft_rpc_from_jsonrpc(&cid, &sid, msg, 
>>> rpc);
>>>      jsonrpc_msg_destroy(msg);
>>>      if (error) {
>>>          char *s = ovsdb_error_to_string_free(error);
>>> @@ -1544,6 +1544,34 @@ raft_conn_receive(struct raft *raft, struct 
>>> raft_conn *conn,
>>>          return false;
>>
>> Should we also consider reconnection if we got a general parsing error
>> here?  If we do, maybe we can simplify the code a little and avoid moving
>> the checks from the rpc module here.
> 
> I thought about this as well, but was concerned that it might have some
> unintended side effects. On further thought, I cannot really come up with
> a concrete problem that this could cause, so I think this would be neither
> helpful nor harmful to do.
> Then however, the patch can be simplified to just triggering a connection
> reset once in case of a parse error and once more for the case where the
> peer sid does not match down below.
> Would you prefer that over the current approach?

Yeah, I think it will be simpler if we avoid moving the code around.

Best regards, Ilya Maximets.

> 
>>
>>>      }
>>>  
>>> +    if (!uuid_is_zero(&cid)) {
>>> +        if (uuid_is_zero(&raft->cid)) {
>>> +            raft->cid = cid;
>>> +            VLOG_INFO("%s: learned cluster ID "CID_FMT,
>>> +                      jsonrpc_session_get_name(conn->js), CID_ARGS(&cid));
>>> +        } else if (!uuid_equals(&raft->cid, &cid)) {
>>> +            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>>> +            VLOG_WARN_RL(&rl, "%s: ignoring message to unexpected cluster 
>>> ID "
>>> +                         "(addressed to "CID_FMT" but we are "CID_FMT")",
>>> +                         jsonrpc_session_get_name(conn->js),
>>> +                         CID_ARGS(&cid), CID_ARGS(&raft->cid));
>>> +            raft_rpc_uninit(rpc);
>>> +            jsonrpc_session_force_reconnect(conn->js);
>>> +            return false;
>>> +        }
>>> +    }
>>> +
>>> +    if (!uuid_is_zero(&sid) && !uuid_equals(&raft->sid, &sid)) {
>>> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>>> +        VLOG_WARN_RL(&rl, "%s: ignoring message to unexpected server ID "
>>> +                     "(addressed to "SID_FMT" but we are "SID_FMT")",
>>> +                     jsonrpc_session_get_name(conn->js),
>>> +                     SID_ARGS(&sid), SID_ARGS(&raft->sid));
>>> +        raft_rpc_uninit(rpc);
>>> +        jsonrpc_session_force_reconnect(conn->js);
>>> +        return false;
>>> +    }
>>> +
>>>      if (uuid_is_zero(&conn->sid)) {
>>>          static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(50, 50);
>>>          conn->sid = rpc->common.sid;
>>> @@ -1551,11 +1579,12 @@ raft_conn_receive(struct raft *raft, struct 
>>> raft_conn *conn,
>>>                       jsonrpc_session_get_name(conn->js), 
>>> SID_ARGS(&conn->sid));
>>>      } else if (!uuid_equals(&conn->sid, &rpc->common.sid)) {
>>>          static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>>> -        VLOG_WARN_RL(&rl, "%s: ignoring message with unexpected server ID "
>>> +        VLOG_WARN_RL(&rl, "%s: ignoring message from unexpected server ID "
>>
>> I'm not sure we should change the logs.  Some users including me may be used 
>> to
>> the current wording in the logs, or may have some automation looking for 
>> them.
>> New logs don't seem to be much more clear than the current ones.  This 
>> applies
>> to all 3 of them.
>>
>> Best regards, Ilya Maximets.
> 
> Yes, I aggree, we shouldn't change the logs. I had not thought about it
> like that.
> 
> Kind regards,
> Felix
> 

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to