Re: [ovs-dev] [PATCH ovn v2] controller: Fix bfd up too early after unexpected reboot.
On 3/26/26 6:29 PM, Xavier Simonart wrote:
> Hi Dumitru
>
Hi Xavier,
> On Thu, Mar 26, 2026 at 5:50 PM Dumitru Ceara wrote:
>
>> On 3/26/26 5:26 PM, Xavier Simonart wrote:
>>> Hi Dumitru
>>>
>>
>> Hi Xavier,
>>
>>
>>> I also have one comment on one of your (no so) nits below.
>>> Thanks
>>> Xavier
>>>
>>
>> [...]
>>
>> @@ -3271,22 +3308,44 @@ dump_statistics() {
>> ch1_rep=$(grep -c "ICMP echo reply" ch1.tcpdump)
>> ch2_req=$(grep -c "ICMP echo request" ch2.tcpdump)
>> ch2_rep=$(grep -c "ICMP echo reply" ch2.tcpdump)
>> +ch3_req=$(grep -c "ICMP echo request" ch3.tcpdump)
>> +ch3_rep=$(grep -c "ICMP echo reply" ch3.tcpdump)
>> gw1_req=$(grep -c "ICMP echo request" gw1.tcpdump)
>> gw1_rep=$(grep -c "ICMP echo reply" gw1.tcpdump)
>> gw2_req=$(grep -c "ICMP echo request" gw2.tcpdump)
>> gw2_rep=$(grep -c "ICMP echo reply" gw2.tcpdump)
>> gw3_req=$(grep -c "ICMP echo request" gw3.tcpdump)
>> gw3_rep=$(grep -c "ICMP echo reply" gw3.tcpdump)
>> -echo "$n1 claims in gw1, $n2 in gw2 and $n3 on gw3"
>> -echo "ch2_request=$ch2_req gw1_request=$gw1_req
gw2_request=$gw2_req gw3_request=$gw3_req ch1_request=$ch1_req
ch1_reply=$ch1_rep gw1_reply=$gw1_rep gw2_reply=$gw2_rep
>> gw3_reply=$gw3_rep
ch2_reply=$ch2_rep"
>> +echo "$n1 claims in gw1, $n2 in gw2 and $n3 on gw3" >&2
>> +echo "ch3_req=$ch3_req gw_req=($gw1_req + $gw2_req +$gw3_req)
ch1_req=$ch1_req ch1_rep=$ch1_rep gw_rep=($gw1_rep + $gw2_rep +
>> $gw3_rep)
ch3_rep=$ch3_rep ch2=($ch2_req+$ch2_rep)" >&2
>> +echo "$((ch3_req - ch3_rep))"
>> }
>>
>> -check_migration_between_gw1_and_gw2() {
>> -action=$1
>> -send_background_packets
>> +add_port() {
>> +bridge=$1
>> +interface=$2
>> +address=$3
>> +echo "Adding $bridge $interface $address"
>> +
>> +pid=$(podman inspect -f '{{.State.Pid}}' ovn-gw-1)
>> +ln -sf /proc/$pid/ns/net /var/run/netns/$pid
>> +port=$(OVS_RUNDIR= ovs-vsctl --data=bare --no-heading
--columns=name find interface \
>> + external_ids:container_id=ovn-gw-1
external_ids:container_iface="$interface")
>> +port="${port:0:13}"
>> +ip link add "${port}_l" type veth peer name "${port}_c"
>> +ip link set "${port}_l" up
>> +ip link set "${port}_c" netns $pid
>> +ip netns exec $pid ip link set dev "${port}_c" name "$interface"
>> +ip netns exec $pid ip link set "$interface" up
>> +if [[ -n "$address" ]]; then
>> +ip netns exec $pid ip addr add "$address" dev "$interface"
>> +fi
I might be wrong but I think nobody cleans up any of these ports created
in the ovn-gw-1 container. Do we need some on_exit() calls here?
>>> I do not think so. We stop a container, and ports on host were deleted
>> as a
>>> consequence.
>>> add_port just "repairs" the container, after restarting it, adding back
>> the
>>> ports which
>>> ovn-fake-multinode initially added.
>>> We could debate whether add_port should itself be added in on_exit, right
>>> after
>>> podman stop the container. But it looked overkill to me (we do add_port
>>> right after
>>> podman stop/start, so on_exit would only be useful if podman stop/start
>>> failed (in which case cluster
>>> is anyhow in bad state) or if e.g. we ctrl-c just between podman stop and
>>> add_port
>>> WDYT?
>>>
>>
>> Maybe I'm misunderstanding stuff but we don't stop/start containers
>> during one execution of the multinode.at testsuite. At least not in
>> GitHub CI. What we do is:
>>
> I think there is a misunderstanding: the multinode test now stops and
> starts the container within the test.
> multinode.at calls podman stop and podman start.
> When the ovn-gw-1 container is stopped by the multinode test (test 18 so
> far), the (host related) ports are deleted.
> So the container (ovn-gw-1) lacks those ports when restarted (as those
> ports were initially added by ovn-fake-multinode).
> So, we add them back here to ensure the cluster is in the same state as
> before the test run.
> I've added a small comment in the test of patch v3 regarding this.
>
Ah, I completely misunderstood that part, thanks for the explanation.
I'll have a closer look at v3.
Regards,
Dumitru
>>
>> - start the fake-multinode cluster (this does podman run under the hood):
>> CHASSIS_COUNT=4 GW_COUNT=4 ./ovn_cluster.sh start
>>
>> - run "make check-multinode": this only runs "podman exec" commands
>> inside the containers started above
>>
>> - collect logs
>>
>> - stop cluster:
>> ./ovn_cluster.sh stop
>>
>> So what I meant was, if you run the full test suite, when this specific
>> test that called "add_ports()" ends we still have the veth pair
>> configured on the system, at least until the full test suite ends.
>>
>> That may cause issues as tests that happen to run after this sp
Re: [ovs-dev] [PATCH ovn v2] controller: Fix bfd up too early after unexpected reboot.
Hi Dumitru
On Thu, Mar 26, 2026 at 5:50 PM Dumitru Ceara wrote:
> On 3/26/26 5:26 PM, Xavier Simonart wrote:
> > Hi Dumitru
> >
>
> Hi Xavier,
>
>
> > I also have one comment on one of your (no so) nits below.
> > Thanks
> > Xavier
> >
>
> [...]
>
> @@ -3271,22 +3308,44 @@ dump_statistics() {
> ch1_rep=$(grep -c "ICMP echo reply" ch1.tcpdump)
> ch2_req=$(grep -c "ICMP echo request" ch2.tcpdump)
> ch2_rep=$(grep -c "ICMP echo reply" ch2.tcpdump)
> +ch3_req=$(grep -c "ICMP echo request" ch3.tcpdump)
> +ch3_rep=$(grep -c "ICMP echo reply" ch3.tcpdump)
> gw1_req=$(grep -c "ICMP echo request" gw1.tcpdump)
> gw1_rep=$(grep -c "ICMP echo reply" gw1.tcpdump)
> gw2_req=$(grep -c "ICMP echo request" gw2.tcpdump)
> gw2_rep=$(grep -c "ICMP echo reply" gw2.tcpdump)
> gw3_req=$(grep -c "ICMP echo request" gw3.tcpdump)
> gw3_rep=$(grep -c "ICMP echo reply" gw3.tcpdump)
> -echo "$n1 claims in gw1, $n2 in gw2 and $n3 on gw3"
> -echo "ch2_request=$ch2_req gw1_request=$gw1_req
> >> gw2_request=$gw2_req gw3_request=$gw3_req ch1_request=$ch1_req
> >> ch1_reply=$ch1_rep gw1_reply=$gw1_rep gw2_reply=$gw2_rep
> gw3_reply=$gw3_rep
> >> ch2_reply=$ch2_rep"
> +echo "$n1 claims in gw1, $n2 in gw2 and $n3 on gw3" >&2
> +echo "ch3_req=$ch3_req gw_req=($gw1_req + $gw2_req +$gw3_req)
> >> ch1_req=$ch1_req ch1_rep=$ch1_rep gw_rep=($gw1_rep + $gw2_rep +
> $gw3_rep)
> >> ch3_rep=$ch3_rep ch2=($ch2_req+$ch2_rep)" >&2
> +echo "$((ch3_req - ch3_rep))"
> }
>
> -check_migration_between_gw1_and_gw2() {
> -action=$1
> -send_background_packets
> +add_port() {
> +bridge=$1
> +interface=$2
> +address=$3
> +echo "Adding $bridge $interface $address"
> +
> +pid=$(podman inspect -f '{{.State.Pid}}' ovn-gw-1)
> +ln -sf /proc/$pid/ns/net /var/run/netns/$pid
> +port=$(OVS_RUNDIR= ovs-vsctl --data=bare --no-heading
> >> --columns=name find interface \
> + external_ids:container_id=ovn-gw-1
> >> external_ids:container_iface="$interface")
> +port="${port:0:13}"
> +ip link add "${port}_l" type veth peer name "${port}_c"
> +ip link set "${port}_l" up
> +ip link set "${port}_c" netns $pid
> +ip netns exec $pid ip link set dev "${port}_c" name "$interface"
> +ip netns exec $pid ip link set "$interface" up
> +if [[ -n "$address" ]]; then
> +ip netns exec $pid ip addr add "$address" dev "$interface"
> +fi
> >>
> >> I might be wrong but I think nobody cleans up any of these ports created
> >> in the ovn-gw-1 container. Do we need some on_exit() calls here?
> >>
> > I do not think so. We stop a container, and ports on host were deleted
> as a
> > consequence.
> > add_port just "repairs" the container, after restarting it, adding back
> the
> > ports which
> > ovn-fake-multinode initially added.
> > We could debate whether add_port should itself be added in on_exit, right
> > after
> > podman stop the container. But it looked overkill to me (we do add_port
> > right after
> > podman stop/start, so on_exit would only be useful if podman stop/start
> > failed (in which case cluster
> > is anyhow in bad state) or if e.g. we ctrl-c just between podman stop and
> > add_port
> > WDYT?
> >
>
> Maybe I'm misunderstanding stuff but we don't stop/start containers
> during one execution of the multinode.at testsuite. At least not in
> GitHub CI. What we do is:
>
I think there is a misunderstanding: the multinode test now stops and
starts the container within the test.
multinode.at calls podman stop and podman start.
When the ovn-gw-1 container is stopped by the multinode test (test 18 so
far), the (host related) ports are deleted.
So the container (ovn-gw-1) lacks those ports when restarted (as those
ports were initially added by ovn-fake-multinode).
So, we add them back here to ensure the cluster is in the same state as
before the test run.
I've added a small comment in the test of patch v3 regarding this.
>
> - start the fake-multinode cluster (this does podman run under the hood):
> CHASSIS_COUNT=4 GW_COUNT=4 ./ovn_cluster.sh start
>
> - run "make check-multinode": this only runs "podman exec" commands
> inside the containers started above
>
> - collect logs
>
> - stop cluster:
> ./ovn_cluster.sh stop
>
> So what I meant was, if you run the full test suite, when this specific
> test that called "add_ports()" ends we still have the veth pair
> configured on the system, at least until the full test suite ends.
>
> That may cause issues as tests that happen to run after this specific
> one will have to take into account that some veths might exist.
>
> In general, we (should) try to cleanup everything that was created by a
> test (an instance of AT_SETUP) when that test ends successfully or with
> a failure.
>
Agr
Re: [ovs-dev] [PATCH ovn v2] controller: Fix bfd up too early after unexpected reboot.
On 3/26/26 5:26 PM, Xavier Simonart wrote:
> Hi Dumitru
>
Hi Xavier,
> I also have one comment on one of your (no so) nits below.
> Thanks
> Xavier
>
[...]
@@ -3271,22 +3308,44 @@ dump_statistics() {
ch1_rep=$(grep -c "ICMP echo reply" ch1.tcpdump)
ch2_req=$(grep -c "ICMP echo request" ch2.tcpdump)
ch2_rep=$(grep -c "ICMP echo reply" ch2.tcpdump)
+ch3_req=$(grep -c "ICMP echo request" ch3.tcpdump)
+ch3_rep=$(grep -c "ICMP echo reply" ch3.tcpdump)
gw1_req=$(grep -c "ICMP echo request" gw1.tcpdump)
gw1_rep=$(grep -c "ICMP echo reply" gw1.tcpdump)
gw2_req=$(grep -c "ICMP echo request" gw2.tcpdump)
gw2_rep=$(grep -c "ICMP echo reply" gw2.tcpdump)
gw3_req=$(grep -c "ICMP echo request" gw3.tcpdump)
gw3_rep=$(grep -c "ICMP echo reply" gw3.tcpdump)
-echo "$n1 claims in gw1, $n2 in gw2 and $n3 on gw3"
-echo "ch2_request=$ch2_req gw1_request=$gw1_req
>> gw2_request=$gw2_req gw3_request=$gw3_req ch1_request=$ch1_req
>> ch1_reply=$ch1_rep gw1_reply=$gw1_rep gw2_reply=$gw2_rep gw3_reply=$gw3_rep
>> ch2_reply=$ch2_rep"
+echo "$n1 claims in gw1, $n2 in gw2 and $n3 on gw3" >&2
+echo "ch3_req=$ch3_req gw_req=($gw1_req + $gw2_req +$gw3_req)
>> ch1_req=$ch1_req ch1_rep=$ch1_rep gw_rep=($gw1_rep + $gw2_rep + $gw3_rep)
>> ch3_rep=$ch3_rep ch2=($ch2_req+$ch2_rep)" >&2
+echo "$((ch3_req - ch3_rep))"
}
-check_migration_between_gw1_and_gw2() {
-action=$1
-send_background_packets
+add_port() {
+bridge=$1
+interface=$2
+address=$3
+echo "Adding $bridge $interface $address"
+
+pid=$(podman inspect -f '{{.State.Pid}}' ovn-gw-1)
+ln -sf /proc/$pid/ns/net /var/run/netns/$pid
+port=$(OVS_RUNDIR= ovs-vsctl --data=bare --no-heading
>> --columns=name find interface \
+ external_ids:container_id=ovn-gw-1
>> external_ids:container_iface="$interface")
+port="${port:0:13}"
+ip link add "${port}_l" type veth peer name "${port}_c"
+ip link set "${port}_l" up
+ip link set "${port}_c" netns $pid
+ip netns exec $pid ip link set dev "${port}_c" name "$interface"
+ip netns exec $pid ip link set "$interface" up
+if [[ -n "$address" ]]; then
+ip netns exec $pid ip addr add "$address" dev "$interface"
+fi
>>
>> I might be wrong but I think nobody cleans up any of these ports created
>> in the ovn-gw-1 container. Do we need some on_exit() calls here?
>>
> I do not think so. We stop a container, and ports on host were deleted as a
> consequence.
> add_port just "repairs" the container, after restarting it, adding back the
> ports which
> ovn-fake-multinode initially added.
> We could debate whether add_port should itself be added in on_exit, right
> after
> podman stop the container. But it looked overkill to me (we do add_port
> right after
> podman stop/start, so on_exit would only be useful if podman stop/start
> failed (in which case cluster
> is anyhow in bad state) or if e.g. we ctrl-c just between podman stop and
> add_port
> WDYT?
>
Maybe I'm misunderstanding stuff but we don't stop/start containers
during one execution of the multinode.at testsuite. At least not in
GitHub CI. What we do is:
- start the fake-multinode cluster (this does podman run under the hood):
CHASSIS_COUNT=4 GW_COUNT=4 ./ovn_cluster.sh start
- run "make check-multinode": this only runs "podman exec" commands
inside the containers started above
- collect logs
- stop cluster:
./ovn_cluster.sh stop
So what I meant was, if you run the full test suite, when this specific
test that called "add_ports()" ends we still have the veth pair
configured on the system, at least until the full test suite ends.
That may cause issues as tests that happen to run after this specific
one will have to take into account that some veths might exist.
In general, we (should) try to cleanup everything that was created by a
test (an instance of AT_SETUP) when that test ends successfully or with
a failure.
Regards,
Dumitru
___
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH ovn v2] controller: Fix bfd up too early after unexpected reboot.
Hi Dumitru
I also have one comment on one of your (no so) nits below.
Thanks
Xavier
On Wed, Mar 25, 2026 at 4:31 PM Dumitru Ceara wrote:
> On 3/12/26 8:35 PM, Mark Michelson via dev wrote:
> > Hi Xavier,
> >
>
> Hi Xavier, Mark,
>
> > It took a bit for me to look through all of this, but I believe this
> > looks good to me. Thanks for adding the notes about the offline
> > discussions, as I had not recalled all the details about those
> > discussions between then and now.
> >
> > Acked-by: Mark Michelson
> >
>
> I think this change makes sense too. I do have some very small comments
> below but I might be able to address them myself and just squash that
> into this version of the patch before applying it to main.
>
> Please let me know what you think.
>
> > On Mon, Mar 9, 2026 at 3:36 PM Xavier Simonart
> wrote:
> >>
> >> If a server unexpectedly rebooted, OVS, when restarted, sets BFD
> >> UP on bfd-enabled geneve tunnels.
> >> However, if it takes time to restart OVN, an HA gw chassis
> >> would attract the traffic while being unable to handle it
> >> (as no flows), resulting in traffic loss.
> >>
> >> This is fixed by re-using ovs flow-restore-wait.
> >> If set, OVS waits (prevents upcalls, ignores bfd, ...) until reset.
> >> Once OVS receives the notification of flow-restore-wait being false,
> >> it restarts handling upcalls, bfd... and ignores any new change to
> >> flow-restore-wait.
> >>
> >> Hence, on chassis hosting ha gateways, OVN toggles flow-restore-wait:
> >> it set it to false, waits for ack from OVS and then sets it back to
> true.
> >> If server reboots, OVS will see flow-restore-wait being true.
> >>
> >> OVN also sets external_ids->ovn-managed-flow-restore-wait when setting
> >> flow-restore-wait. When set, it tells that OVN once set
> flow-restore-wait.
> >>
> >> "ovs-ctl restart" also uses flow-restore-wait: when called, it saves the
> >> flows, stops "ovs-vswitchd", sets "flow-restore-wait" to true, restarts
> >> "ovs-vswitchd", restores the flows and finally removes
> "flow-restore-wait".
> >> So OVS will wait either for "ovs-ctl restart" to remove
> "flow-restore-wait"
> >> or for OVN to set "flow-restore-wait" to false.
> >>
> >> Reported-at: https://issues.redhat.com/browse/FDP-3075
> >> Signed-off-by: Xavier Simonart
> >>
> >> ---
> >> -v2 : - Updated based on Mark's feedback (commit message, comments).
> >> - Avoid setting flow-restore-wait for computes.
> >> - Add external_ids->ovn-managed-flow-restore-wait.
> >> - Updated test: add test for compute update + nits (variable name
> changes)
> >> ---
> >> controller/bfd.c| 5 +-
> >> controller/bfd.h| 4 +-
> >> controller/ovn-controller.8.xml | 11 +
> >> controller/ovn-controller.c | 171 +-
> >> tests/multinode-macros.at | 22 ++
> >> tests/multinode.at | 546 ++--
> >> 6 files changed, 584 insertions(+), 175 deletions(-)
> >>
> >> diff --git a/controller/bfd.c b/controller/bfd.c
> >> index 3b0c3f6da..56bfa4936 100644
> >> --- a/controller/bfd.c
> >> +++ b/controller/bfd.c
> >> @@ -117,13 +117,14 @@ bfd_calculate_active_tunnels(const struct
> ovsrec_bridge *br_int,
> >> *
> >> * If 'our_chassis' is C5 then this function returns empty bfd set.
> >> */
> >> -void
> >> +bool
> >> bfd_calculate_chassis(
> >> const struct sbrec_chassis *our_chassis,
> >> const struct sbrec_ha_chassis_group_table *ha_chassis_grp_table,
> >> struct sset *bfd_chassis)
> >> {
> >> const struct sbrec_ha_chassis_group *ha_chassis_grp;
> >> +bool chassis_is_ha_gw = false;
> >> SBREC_HA_CHASSIS_GROUP_TABLE_FOR_EACH (ha_chassis_grp,
> >> ha_chassis_grp_table) {
> >> bool is_ha_chassis = false;
> >> @@ -143,6 +144,7 @@ bfd_calculate_chassis(
> >> sset_add(&grp_chassis, ha_ch->chassis->name);
> >> if (our_chassis == ha_ch->chassis) {
> >> is_ha_chassis = true;
> >> +chassis_is_ha_gw = true;
> >> bfd_setup_required = true;
> >> }
> >> }
> >> @@ -178,6 +180,7 @@ bfd_calculate_chassis(
> >> }
> >> sset_destroy(&grp_chassis);
> >> }
> >> +return chassis_is_ha_gw;
> >> }
> >>
> >> void
> >> diff --git a/controller/bfd.h b/controller/bfd.h
> >> index f8fece5a5..3e3384891 100644
> >> --- a/controller/bfd.h
> >> +++ b/controller/bfd.h
> >> @@ -16,6 +16,8 @@
> >> #ifndef OVN_BFD_H
> >> #define OVN_BFD_H 1
> >>
> >> +#include
> >> +
> >> struct hmap;
> >> struct ovsdb_idl;
> >> struct ovsdb_idl_index;
> >> @@ -36,7 +38,7 @@ void bfd_run(const struct ovsrec_interface_table *,
> >> const struct sbrec_sb_global_table *,
> >> const struct ovsrec_open_vswitch_table *);
> >>
> >> -void bfd_calculate_chassis(
> >> +bool bfd_calculate_chassis(
> >> const struct sbrec_chassis *,
> >> const struct sbr
Re: [ovs-dev] [PATCH ovn v2] controller: Fix bfd up too early after unexpected reboot.
Hi Dumitru
Thanks for looking into this.
On Wed, Mar 25, 2026 at 4:35 PM Dumitru Ceara wrote:
> On 3/25/26 4:31 PM, Dumitru Ceara wrote:
> > On 3/12/26 8:35 PM, Mark Michelson via dev wrote:
> >> Hi Xavier,
> >>
> >
> > Hi Xavier, Mark,
> >
> >> It took a bit for me to look through all of this, but I believe this
> >> looks good to me. Thanks for adding the notes about the offline
> >> discussions, as I had not recalled all the details about those
> >> discussions between then and now.
> >>
> >> Acked-by: Mark Michelson
> >>
> >
> > I think this change makes sense too. I do have some very small comments
> > below but I might be able to address them myself and just squash that
> > into this version of the patch before applying it to main.
> >
> > Please let me know what you think.
> >
>
> Hmm, this actually failed in CI:
>
> https://github.com/dceara/ovn/actions/runs/23547684046/job/68553636598
>
> Xavier, would you happen to have some time to look into it by any chance?
>
I think that it is a bug in the test added by the patch: it uses an
incorrect socket path (which depends on the way
the host OVS was installed/started).
I'll provide a v2., and while at it, will go through the 'nits'
>
Thanks,
> Dumitru
>
Thanks
Xavier
>
> >> On Mon, Mar 9, 2026 at 3:36 PM Xavier Simonart
> wrote:
> >>>
> >>> If a server unexpectedly rebooted, OVS, when restarted, sets BFD
> >>> UP on bfd-enabled geneve tunnels.
> >>> However, if it takes time to restart OVN, an HA gw chassis
> >>> would attract the traffic while being unable to handle it
> >>> (as no flows), resulting in traffic loss.
> >>>
> >>> This is fixed by re-using ovs flow-restore-wait.
> >>> If set, OVS waits (prevents upcalls, ignores bfd, ...) until reset.
> >>> Once OVS receives the notification of flow-restore-wait being false,
> >>> it restarts handling upcalls, bfd... and ignores any new change to
> >>> flow-restore-wait.
> >>>
> >>> Hence, on chassis hosting ha gateways, OVN toggles flow-restore-wait:
> >>> it set it to false, waits for ack from OVS and then sets it back to
> true.
> >>> If server reboots, OVS will see flow-restore-wait being true.
> >>>
> >>> OVN also sets external_ids->ovn-managed-flow-restore-wait when setting
> >>> flow-restore-wait. When set, it tells that OVN once set
> flow-restore-wait.
> >>>
> >>> "ovs-ctl restart" also uses flow-restore-wait: when called, it saves
> the
> >>> flows, stops "ovs-vswitchd", sets "flow-restore-wait" to true, restarts
> >>> "ovs-vswitchd", restores the flows and finally removes
> "flow-restore-wait".
> >>> So OVS will wait either for "ovs-ctl restart" to remove
> "flow-restore-wait"
> >>> or for OVN to set "flow-restore-wait" to false.
> >>>
> >>> Reported-at: https://issues.redhat.com/browse/FDP-3075
> >>> Signed-off-by: Xavier Simonart
> >>>
> >>> ---
> >>> -v2 : - Updated based on Mark's feedback (commit message, comments).
> >>> - Avoid setting flow-restore-wait for computes.
> >>> - Add external_ids->ovn-managed-flow-restore-wait.
> >>> - Updated test: add test for compute update + nits (variable
> name changes)
> >>> ---
> >>> controller/bfd.c| 5 +-
> >>> controller/bfd.h| 4 +-
> >>> controller/ovn-controller.8.xml | 11 +
> >>> controller/ovn-controller.c | 171 +-
> >>> tests/multinode-macros.at | 22 ++
> >>> tests/multinode.at | 546
> ++--
> >>> 6 files changed, 584 insertions(+), 175 deletions(-)
> >>>
> >>> diff --git a/controller/bfd.c b/controller/bfd.c
> >>> index 3b0c3f6da..56bfa4936 100644
> >>> --- a/controller/bfd.c
> >>> +++ b/controller/bfd.c
> >>> @@ -117,13 +117,14 @@ bfd_calculate_active_tunnels(const struct
> ovsrec_bridge *br_int,
> >>> *
> >>> * If 'our_chassis' is C5 then this function returns empty bfd set.
> >>> */
> >>> -void
> >>> +bool
> >>> bfd_calculate_chassis(
> >>> const struct sbrec_chassis *our_chassis,
> >>> const struct sbrec_ha_chassis_group_table *ha_chassis_grp_table,
> >>> struct sset *bfd_chassis)
> >>> {
> >>> const struct sbrec_ha_chassis_group *ha_chassis_grp;
> >>> +bool chassis_is_ha_gw = false;
> >>> SBREC_HA_CHASSIS_GROUP_TABLE_FOR_EACH (ha_chassis_grp,
> >>> ha_chassis_grp_table) {
> >>> bool is_ha_chassis = false;
> >>> @@ -143,6 +144,7 @@ bfd_calculate_chassis(
> >>> sset_add(&grp_chassis, ha_ch->chassis->name);
> >>> if (our_chassis == ha_ch->chassis) {
> >>> is_ha_chassis = true;
> >>> +chassis_is_ha_gw = true;
> >>> bfd_setup_required = true;
> >>> }
> >>> }
> >>> @@ -178,6 +180,7 @@ bfd_calculate_chassis(
> >>> }
> >>> sset_destroy(&grp_chassis);
> >>> }
> >>> +return chassis_is_ha_gw;
> >>> }
> >>>
> >>> void
> >>> diff --git a/controller/bfd.h b/controller/bfd.h
> >>> index f8fece5a5..3e3384891 100
Re: [ovs-dev] [PATCH ovn v2] controller: Fix bfd up too early after unexpected reboot.
On 3/25/26 4:31 PM, Dumitru Ceara wrote:
> On 3/12/26 8:35 PM, Mark Michelson via dev wrote:
>> Hi Xavier,
>>
>
> Hi Xavier, Mark,
>
>> It took a bit for me to look through all of this, but I believe this
>> looks good to me. Thanks for adding the notes about the offline
>> discussions, as I had not recalled all the details about those
>> discussions between then and now.
>>
>> Acked-by: Mark Michelson
>>
>
> I think this change makes sense too. I do have some very small comments
> below but I might be able to address them myself and just squash that
> into this version of the patch before applying it to main.
>
> Please let me know what you think.
>
Hmm, this actually failed in CI:
https://github.com/dceara/ovn/actions/runs/23547684046/job/68553636598
Xavier, would you happen to have some time to look into it by any chance?
Thanks,
Dumitru
>> On Mon, Mar 9, 2026 at 3:36 PM Xavier Simonart wrote:
>>>
>>> If a server unexpectedly rebooted, OVS, when restarted, sets BFD
>>> UP on bfd-enabled geneve tunnels.
>>> However, if it takes time to restart OVN, an HA gw chassis
>>> would attract the traffic while being unable to handle it
>>> (as no flows), resulting in traffic loss.
>>>
>>> This is fixed by re-using ovs flow-restore-wait.
>>> If set, OVS waits (prevents upcalls, ignores bfd, ...) until reset.
>>> Once OVS receives the notification of flow-restore-wait being false,
>>> it restarts handling upcalls, bfd... and ignores any new change to
>>> flow-restore-wait.
>>>
>>> Hence, on chassis hosting ha gateways, OVN toggles flow-restore-wait:
>>> it set it to false, waits for ack from OVS and then sets it back to true.
>>> If server reboots, OVS will see flow-restore-wait being true.
>>>
>>> OVN also sets external_ids->ovn-managed-flow-restore-wait when setting
>>> flow-restore-wait. When set, it tells that OVN once set flow-restore-wait.
>>>
>>> "ovs-ctl restart" also uses flow-restore-wait: when called, it saves the
>>> flows, stops "ovs-vswitchd", sets "flow-restore-wait" to true, restarts
>>> "ovs-vswitchd", restores the flows and finally removes "flow-restore-wait".
>>> So OVS will wait either for "ovs-ctl restart" to remove "flow-restore-wait"
>>> or for OVN to set "flow-restore-wait" to false.
>>>
>>> Reported-at: https://issues.redhat.com/browse/FDP-3075
>>> Signed-off-by: Xavier Simonart
>>>
>>> ---
>>> -v2 : - Updated based on Mark's feedback (commit message, comments).
>>> - Avoid setting flow-restore-wait for computes.
>>> - Add external_ids->ovn-managed-flow-restore-wait.
>>> - Updated test: add test for compute update + nits (variable name
>>> changes)
>>> ---
>>> controller/bfd.c| 5 +-
>>> controller/bfd.h| 4 +-
>>> controller/ovn-controller.8.xml | 11 +
>>> controller/ovn-controller.c | 171 +-
>>> tests/multinode-macros.at | 22 ++
>>> tests/multinode.at | 546 ++--
>>> 6 files changed, 584 insertions(+), 175 deletions(-)
>>>
>>> diff --git a/controller/bfd.c b/controller/bfd.c
>>> index 3b0c3f6da..56bfa4936 100644
>>> --- a/controller/bfd.c
>>> +++ b/controller/bfd.c
>>> @@ -117,13 +117,14 @@ bfd_calculate_active_tunnels(const struct
>>> ovsrec_bridge *br_int,
>>> *
>>> * If 'our_chassis' is C5 then this function returns empty bfd set.
>>> */
>>> -void
>>> +bool
>>> bfd_calculate_chassis(
>>> const struct sbrec_chassis *our_chassis,
>>> const struct sbrec_ha_chassis_group_table *ha_chassis_grp_table,
>>> struct sset *bfd_chassis)
>>> {
>>> const struct sbrec_ha_chassis_group *ha_chassis_grp;
>>> +bool chassis_is_ha_gw = false;
>>> SBREC_HA_CHASSIS_GROUP_TABLE_FOR_EACH (ha_chassis_grp,
>>> ha_chassis_grp_table) {
>>> bool is_ha_chassis = false;
>>> @@ -143,6 +144,7 @@ bfd_calculate_chassis(
>>> sset_add(&grp_chassis, ha_ch->chassis->name);
>>> if (our_chassis == ha_ch->chassis) {
>>> is_ha_chassis = true;
>>> +chassis_is_ha_gw = true;
>>> bfd_setup_required = true;
>>> }
>>> }
>>> @@ -178,6 +180,7 @@ bfd_calculate_chassis(
>>> }
>>> sset_destroy(&grp_chassis);
>>> }
>>> +return chassis_is_ha_gw;
>>> }
>>>
>>> void
>>> diff --git a/controller/bfd.h b/controller/bfd.h
>>> index f8fece5a5..3e3384891 100644
>>> --- a/controller/bfd.h
>>> +++ b/controller/bfd.h
>>> @@ -16,6 +16,8 @@
>>> #ifndef OVN_BFD_H
>>> #define OVN_BFD_H 1
>>>
>>> +#include
>>> +
>>> struct hmap;
>>> struct ovsdb_idl;
>>> struct ovsdb_idl_index;
>>> @@ -36,7 +38,7 @@ void bfd_run(const struct ovsrec_interface_table *,
>>> const struct sbrec_sb_global_table *,
>>> const struct ovsrec_open_vswitch_table *);
>>>
>>> -void bfd_calculate_chassis(
>>> +bool bfd_calculate_chassis(
>>> const struct sbrec_chassis *,
>>> const struct sbrec_ha_chas
Re: [ovs-dev] [PATCH ovn v2] controller: Fix bfd up too early after unexpected reboot.
On 3/12/26 8:35 PM, Mark Michelson via dev wrote:
> Hi Xavier,
>
Hi Xavier, Mark,
> It took a bit for me to look through all of this, but I believe this
> looks good to me. Thanks for adding the notes about the offline
> discussions, as I had not recalled all the details about those
> discussions between then and now.
>
> Acked-by: Mark Michelson
>
I think this change makes sense too. I do have some very small comments
below but I might be able to address them myself and just squash that
into this version of the patch before applying it to main.
Please let me know what you think.
> On Mon, Mar 9, 2026 at 3:36 PM Xavier Simonart wrote:
>>
>> If a server unexpectedly rebooted, OVS, when restarted, sets BFD
>> UP on bfd-enabled geneve tunnels.
>> However, if it takes time to restart OVN, an HA gw chassis
>> would attract the traffic while being unable to handle it
>> (as no flows), resulting in traffic loss.
>>
>> This is fixed by re-using ovs flow-restore-wait.
>> If set, OVS waits (prevents upcalls, ignores bfd, ...) until reset.
>> Once OVS receives the notification of flow-restore-wait being false,
>> it restarts handling upcalls, bfd... and ignores any new change to
>> flow-restore-wait.
>>
>> Hence, on chassis hosting ha gateways, OVN toggles flow-restore-wait:
>> it set it to false, waits for ack from OVS and then sets it back to true.
>> If server reboots, OVS will see flow-restore-wait being true.
>>
>> OVN also sets external_ids->ovn-managed-flow-restore-wait when setting
>> flow-restore-wait. When set, it tells that OVN once set flow-restore-wait.
>>
>> "ovs-ctl restart" also uses flow-restore-wait: when called, it saves the
>> flows, stops "ovs-vswitchd", sets "flow-restore-wait" to true, restarts
>> "ovs-vswitchd", restores the flows and finally removes "flow-restore-wait".
>> So OVS will wait either for "ovs-ctl restart" to remove "flow-restore-wait"
>> or for OVN to set "flow-restore-wait" to false.
>>
>> Reported-at: https://issues.redhat.com/browse/FDP-3075
>> Signed-off-by: Xavier Simonart
>>
>> ---
>> -v2 : - Updated based on Mark's feedback (commit message, comments).
>> - Avoid setting flow-restore-wait for computes.
>> - Add external_ids->ovn-managed-flow-restore-wait.
>> - Updated test: add test for compute update + nits (variable name
>> changes)
>> ---
>> controller/bfd.c| 5 +-
>> controller/bfd.h| 4 +-
>> controller/ovn-controller.8.xml | 11 +
>> controller/ovn-controller.c | 171 +-
>> tests/multinode-macros.at | 22 ++
>> tests/multinode.at | 546 ++--
>> 6 files changed, 584 insertions(+), 175 deletions(-)
>>
>> diff --git a/controller/bfd.c b/controller/bfd.c
>> index 3b0c3f6da..56bfa4936 100644
>> --- a/controller/bfd.c
>> +++ b/controller/bfd.c
>> @@ -117,13 +117,14 @@ bfd_calculate_active_tunnels(const struct
>> ovsrec_bridge *br_int,
>> *
>> * If 'our_chassis' is C5 then this function returns empty bfd set.
>> */
>> -void
>> +bool
>> bfd_calculate_chassis(
>> const struct sbrec_chassis *our_chassis,
>> const struct sbrec_ha_chassis_group_table *ha_chassis_grp_table,
>> struct sset *bfd_chassis)
>> {
>> const struct sbrec_ha_chassis_group *ha_chassis_grp;
>> +bool chassis_is_ha_gw = false;
>> SBREC_HA_CHASSIS_GROUP_TABLE_FOR_EACH (ha_chassis_grp,
>> ha_chassis_grp_table) {
>> bool is_ha_chassis = false;
>> @@ -143,6 +144,7 @@ bfd_calculate_chassis(
>> sset_add(&grp_chassis, ha_ch->chassis->name);
>> if (our_chassis == ha_ch->chassis) {
>> is_ha_chassis = true;
>> +chassis_is_ha_gw = true;
>> bfd_setup_required = true;
>> }
>> }
>> @@ -178,6 +180,7 @@ bfd_calculate_chassis(
>> }
>> sset_destroy(&grp_chassis);
>> }
>> +return chassis_is_ha_gw;
>> }
>>
>> void
>> diff --git a/controller/bfd.h b/controller/bfd.h
>> index f8fece5a5..3e3384891 100644
>> --- a/controller/bfd.h
>> +++ b/controller/bfd.h
>> @@ -16,6 +16,8 @@
>> #ifndef OVN_BFD_H
>> #define OVN_BFD_H 1
>>
>> +#include
>> +
>> struct hmap;
>> struct ovsdb_idl;
>> struct ovsdb_idl_index;
>> @@ -36,7 +38,7 @@ void bfd_run(const struct ovsrec_interface_table *,
>> const struct sbrec_sb_global_table *,
>> const struct ovsrec_open_vswitch_table *);
>>
>> -void bfd_calculate_chassis(
>> +bool bfd_calculate_chassis(
>> const struct sbrec_chassis *,
>> const struct sbrec_ha_chassis_group_table *,
>> struct sset *);
>> diff --git a/controller/ovn-controller.8.xml
>> b/controller/ovn-controller.8.xml
>> index 57e7cf5dd..33281a4d6 100644
>> --- a/controller/ovn-controller.8.xml
>> +++ b/controller/ovn-controller.8.xml
>> @@ -531,6 +531,17 @@
>> 65535.
>>
>>
>> +
>> +external_ids:ovn-managed-flow-restore-wait
Re: [ovs-dev] [PATCH ovn v2] controller: Fix bfd up too early after unexpected reboot.
Hi Xavier,
It took a bit for me to look through all of this, but I believe this
looks good to me. Thanks for adding the notes about the offline
discussions, as I had not recalled all the details about those
discussions between then and now.
Acked-by: Mark Michelson
On Mon, Mar 9, 2026 at 3:36 PM Xavier Simonart wrote:
>
> If a server unexpectedly rebooted, OVS, when restarted, sets BFD
> UP on bfd-enabled geneve tunnels.
> However, if it takes time to restart OVN, an HA gw chassis
> would attract the traffic while being unable to handle it
> (as no flows), resulting in traffic loss.
>
> This is fixed by re-using ovs flow-restore-wait.
> If set, OVS waits (prevents upcalls, ignores bfd, ...) until reset.
> Once OVS receives the notification of flow-restore-wait being false,
> it restarts handling upcalls, bfd... and ignores any new change to
> flow-restore-wait.
>
> Hence, on chassis hosting ha gateways, OVN toggles flow-restore-wait:
> it set it to false, waits for ack from OVS and then sets it back to true.
> If server reboots, OVS will see flow-restore-wait being true.
>
> OVN also sets external_ids->ovn-managed-flow-restore-wait when setting
> flow-restore-wait. When set, it tells that OVN once set flow-restore-wait.
>
> "ovs-ctl restart" also uses flow-restore-wait: when called, it saves the
> flows, stops "ovs-vswitchd", sets "flow-restore-wait" to true, restarts
> "ovs-vswitchd", restores the flows and finally removes "flow-restore-wait".
> So OVS will wait either for "ovs-ctl restart" to remove "flow-restore-wait"
> or for OVN to set "flow-restore-wait" to false.
>
> Reported-at: https://issues.redhat.com/browse/FDP-3075
> Signed-off-by: Xavier Simonart
>
> ---
> -v2 : - Updated based on Mark's feedback (commit message, comments).
> - Avoid setting flow-restore-wait for computes.
> - Add external_ids->ovn-managed-flow-restore-wait.
> - Updated test: add test for compute update + nits (variable name
> changes)
> ---
> controller/bfd.c| 5 +-
> controller/bfd.h| 4 +-
> controller/ovn-controller.8.xml | 11 +
> controller/ovn-controller.c | 171 +-
> tests/multinode-macros.at | 22 ++
> tests/multinode.at | 546 ++--
> 6 files changed, 584 insertions(+), 175 deletions(-)
>
> diff --git a/controller/bfd.c b/controller/bfd.c
> index 3b0c3f6da..56bfa4936 100644
> --- a/controller/bfd.c
> +++ b/controller/bfd.c
> @@ -117,13 +117,14 @@ bfd_calculate_active_tunnels(const struct ovsrec_bridge
> *br_int,
> *
> * If 'our_chassis' is C5 then this function returns empty bfd set.
> */
> -void
> +bool
> bfd_calculate_chassis(
> const struct sbrec_chassis *our_chassis,
> const struct sbrec_ha_chassis_group_table *ha_chassis_grp_table,
> struct sset *bfd_chassis)
> {
> const struct sbrec_ha_chassis_group *ha_chassis_grp;
> +bool chassis_is_ha_gw = false;
> SBREC_HA_CHASSIS_GROUP_TABLE_FOR_EACH (ha_chassis_grp,
> ha_chassis_grp_table) {
> bool is_ha_chassis = false;
> @@ -143,6 +144,7 @@ bfd_calculate_chassis(
> sset_add(&grp_chassis, ha_ch->chassis->name);
> if (our_chassis == ha_ch->chassis) {
> is_ha_chassis = true;
> +chassis_is_ha_gw = true;
> bfd_setup_required = true;
> }
> }
> @@ -178,6 +180,7 @@ bfd_calculate_chassis(
> }
> sset_destroy(&grp_chassis);
> }
> +return chassis_is_ha_gw;
> }
>
> void
> diff --git a/controller/bfd.h b/controller/bfd.h
> index f8fece5a5..3e3384891 100644
> --- a/controller/bfd.h
> +++ b/controller/bfd.h
> @@ -16,6 +16,8 @@
> #ifndef OVN_BFD_H
> #define OVN_BFD_H 1
>
> +#include
> +
> struct hmap;
> struct ovsdb_idl;
> struct ovsdb_idl_index;
> @@ -36,7 +38,7 @@ void bfd_run(const struct ovsrec_interface_table *,
> const struct sbrec_sb_global_table *,
> const struct ovsrec_open_vswitch_table *);
>
> -void bfd_calculate_chassis(
> +bool bfd_calculate_chassis(
> const struct sbrec_chassis *,
> const struct sbrec_ha_chassis_group_table *,
> struct sset *);
> diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml
> index 57e7cf5dd..33281a4d6 100644
> --- a/controller/ovn-controller.8.xml
> +++ b/controller/ovn-controller.8.xml
> @@ -531,6 +531,17 @@
> 65535.
>
>
> +
> +external_ids:ovn-managed-flow-restore-wait in the
> +Open_vSwitch table
> +
> +
> +When set to true, this key indicates that ovn-controller
> +has set the other_config:flow-restore-wait option.
> +The key is set when ovn-controller enables
> +flow-restore-wait and removed when it clears it.
> +
> +
>
> external_ids:ct-zone-* in the Bridge table
>
> diff --git a/controller/ovn-controller.c b/controller/o
