On 11/7/24 12:28, Lorenzo Bianconi wrote:
> [...]
>>>> + };
>>>> +
>>>> + ovs_be32 lo = get_unaligned_be32((void *)&ea.be16[1]);
>>>> + ovs_u128 nexthop = {
>>>> + .u64.hi = ntohs(ea.be16[0]),
>>>> + .u64.lo = (uint64_t) ntohl(lo) << 32,
>>>> + };
>>>> +
>>>> + struct ofp_ct_match match = {
>>>> + .labels = nexthop,
>>>> + .labels_mask = mask,
>>>> + };
>>>> + struct ofpbuf *msg = ofp_ct_match_encode(&match, NULL,
>>>> + rconn_get_version(swconn));
>>>> + ovs_list_push_back(msgs, &msg->list_node);
>>
>> One more comment: this doesn't work properly with the case when we have:
>>
>> - two OVN routers, R1 and R2
>> - each of them with a static ecmp route (ecmp-symmetric-reply) with the
>> same next-hop
>> - the route is removed on R2.
>>
>> We'll flush all matching conntrack entries, breaking sessions on R1.
>
> considering ecmp_nexthop_monitor_run(), if the same next hop mac address is
> shared between two different SBREC_ECMP_NEXTHOP rows, mac_count for the given
> nexthop mac address will be greater than 1 so the related ct entry will not be
> flushed removing one of them. Am I missing something? I guess we need a test
> for it.
>
There's no guarantee that the next-hop IP used by R1 and R2 are actually
the same host. Let me expand the topology:
host1 (1.1.1.1) -- vlan100 -- localnet-switch1 - R1
host2 (1.1.1.1) -- vlan200 -- localnet-switch2 - R2
All these configured on the same hypervisor but vlans 100 and 200 are
independent broadcast domains with no routing between them. host1 and
host2 are two independent machines.
If on R1 we add:
route1: 42.42.42.0/24 via 1.1.1.1 [ecmp-symmetric-reply]
And on R2 we add:
route2: 43.43.43.0/24 via 1.1.1.1 [ecmp-symmetric-reply]
Route1 forwards traffic through host1 and route2 forwards traffic
through host2. If host1 goes down the CMS will presumably remove
route1. At that point we _should_ clear all conntrack entries
corresponding to route1 but we _should not_ clear conntrack entries
corresponding to route2.
Does that explain it better?
Thanks,
Dumitru
> Regards,
> Lorenzo
>
>>
>>>> +}
>>>> +
>>>> +static void
>>>> +ecmp_nexthop_monitor_run(const struct sbrec_ecmp_nexthop_table *enh_table,
>>>> + struct ovs_list *msgs)
>>>> +{
>>>> + struct sset ecmp_sb_only = SSET_INITIALIZER(&ecmp_sb_only);
>>>> + struct simap mac_count = SIMAP_INITIALIZER(&mac_count);
>>>> +
>>>> + struct smap_node *node;
>>>> + SMAP_FOR_EACH_SAFE (node, &ecmp_nexthop) {
>>>> + simap_increase(&mac_count, node->value, 1);
>>>> + }
>>>> +
>>>> + const struct sbrec_ecmp_nexthop *sbrec_ecmp_nexthop;
>>>> + SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sbrec_ecmp_nexthop, enh_table) {
>>>> + smap_replace(&ecmp_nexthop, sbrec_ecmp_nexthop->nexthop,
>>>> + sbrec_ecmp_nexthop->mac);
>>>> + sset_add(&ecmp_sb_only, sbrec_ecmp_nexthop->nexthop);
>>>> + }
>>>> +
>>>> + SMAP_FOR_EACH_SAFE (node, &ecmp_nexthop) {
>>>> + /* Do not flush CT entries if the share the same mac address. */
>>>> + if (!sset_contains(&ecmp_sb_only, node->key)) {
>>>> + if (simap_get(&mac_count, node->value) == 1) {
>>>> + ecmp_nexthop_monitor_flush_ct_entry(node->value, msgs);
>>>> + }
>>>> + smap_remove_node(&ecmp_nexthop, node);
>>>> + }
>>>> + }
>>>> +
>>>> + sset_destroy(&ecmp_sb_only);
>>>> + simap_destroy(&mac_count);
>>>> +}
>>>> +
>>>> static void
>>>> installed_flow_add(struct ovn_flow *d,
>>>> struct ofputil_bundle_ctrl_msg *bc,
>>>> @@ -2664,6 +2734,7 @@ ofctrl_put(struct ovn_desired_flow_table
>>>> *lflow_table,
>>>> struct shash *pending_ct_zones,
>>>> struct hmap *pending_lb_tuples,
>>>> struct ovsdb_idl_index *sbrec_meter_by_name,
>>>> + const struct sbrec_ecmp_nexthop_table *enh_table,
>>>> uint64_t req_cfg,
>>>> bool lflows_changed,
>>>> bool pflows_changed)
>>>> @@ -2704,6 +2775,8 @@ ofctrl_put(struct ovn_desired_flow_table
>>>> *lflow_table,
>>>> /* OpenFlow messages to send to the switch to bring it up-to-date. */
>>>> struct ovs_list msgs = OVS_LIST_INITIALIZER(&msgs);
>>>>
>>>> + ecmp_nexthop_monitor_run(enh_table, &msgs);
>>>> +
>>>> /* Iterate through ct zones that need to be flushed. */
>>>> struct shash_node *iter;
>>>> SHASH_FOR_EACH(iter, pending_ct_zones) {
>>>> diff --git a/controller/ofctrl.h b/controller/ofctrl.h
>>>> index 129e3b6ad..33953a8a4 100644
>>>> --- a/controller/ofctrl.h
>>>> +++ b/controller/ofctrl.h
>>>> @@ -31,6 +31,7 @@ struct ofpbuf;
>>>> struct ovsrec_bridge;
>>>> struct ovsrec_open_vswitch_table;
>>>> struct sbrec_meter_table;
>>>> +struct sbrec_ecmp_nexthop_table;
>>>> struct shash;
>>>>
>>>> struct ovn_desired_flow_table {
>>>> @@ -59,6 +60,7 @@ void ofctrl_put(struct ovn_desired_flow_table
>>>> *lflow_table,
>>>> struct shash *pending_ct_zones,
>>>> struct hmap *pending_lb_tuples,
>>>> struct ovsdb_idl_index *sbrec_meter_by_name,
>>>> + const struct sbrec_ecmp_nexthop_table *enh_table,
>>>> uint64_t nb_cfg,
>>>> bool lflow_changed,
>>>> bool pflow_changed);
>>>> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
>>>> index 4dc9bc8cb..7203cda42 100644
>>>> --- a/controller/ovn-controller.c
>>>> +++ b/controller/ovn-controller.c
>>>> @@ -5731,6 +5731,8 @@ main(int argc, char *argv[])
>>>> &ct_zones_data->ctx.pending,
>>>> &lb_data->removed_tuples,
>>>> sbrec_meter_by_name,
>>>> + sbrec_ecmp_nexthop_table_get(
>>>> + ovnsb_idl_loop.idl),
>>>> ofctrl_seqno_get_req_cfg(),
>>>> engine_node_changed(&en_lflow_output),
>>>> engine_node_changed(&en_pflow_output));
>>>> diff --git a/tests/system-ovn.at b/tests/system-ovn.at
>>>> index e065b1079..01008b899 100644
>>>> --- a/tests/system-ovn.at
>>>> +++ b/tests/system-ovn.at
>>>> @@ -13884,3 +13884,381 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/.*error
>>>> receiving.*/d
>>>> /.*terminating with signal 15.*/d"])
>>>> AT_CLEANUP
>>>> ])
>>>> +
>>>> +OVN_FOR_EACH_NORTHD([
>>>> +AT_SETUP([ECMP Flush CT entries - IPv4])
>>>> +AT_KEYWORDS([ecmp])
>>>> +ovn_start
>>>> +OVS_TRAFFIC_VSWITCHD_START()
>>>> +
>>>> +ADD_BR([br-int])
>>>> +ADD_BR([br-ext])
>>>> +ADD_BR([br-ecmp])
>>>> +
>>>> +ovs-ofctl add-flow br-ext action=normal
>>>> +ovs-ofctl add-flow br-ecmp action=normal
>>>> +# Set external-ids in br-int needed for ovn-controller
>>>> +ovs-vsctl \
>>>> + -- set Open_vSwitch . external-ids:system-id=hv1 \
>>>> + -- set Open_vSwitch .
>>>> external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
>>>> + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
>>>> + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
>>>> + -- set bridge br-int fail-mode=secure
>>>> other-config:disable-in-band=true
>>>> +
>>>> +# Start ovn-controller
>>>> +start_daemon ovn-controller
>>>> +ovs-vsctl set Open_vSwitch . external-ids:arp-max-timeout-sec=1
>>>> +
>>>> +check ovn-nbctl lr-add R1
>>>> +check ovn-nbctl set logical_router R1 options:chassis=hv1
>>>> +
>>>> +check ovn-nbctl ls-add sw0
>>>> +check ovn-nbctl ls-add public
>>>> +
>>>> +check ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 192.168.1.1/24
>>>> +check ovn-nbctl lrp-add R1 rp-public 00:00:02:01:02:03 172.16.1.1/24
>>>> +
>>>> +ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \
>>>> + type=router options:router-port=rp-sw0 \
>>>> + -- lsp-set-addresses sw0-rp router
>>>
>>> check
>>>
>>>> +
>>>> +ovn-nbctl lsp-add public public-rp -- set Logical_Switch_Port public-rp \
>>>> + type=router options:router-port=rp-public \
>>>> + -- lsp-set-addresses public-rp router
>>>
>>> check
>>>
>>>> +
>>>> +ADD_NAMESPACES(alice)
>>>> +ADD_VETH(alice, alice, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \
>>>> + "192.168.1.1")
>>>> +check ovn-nbctl lsp-add sw0 alice \
>>>> + -- lsp-set-addresses alice "f0:00:00:01:02:03 192.168.1.2"
>>>> +
>>>> +AT_CHECK([ovs-vsctl set Open_vSwitch .
>>>> external-ids:ovn-bridge-mappings=phynet:br-ext])
>>>
>>> s/AT_CHECK/check
>>>
>>>> +check ovn-nbctl lsp-add public public1 \
>>>> + -- lsp-set-addresses public1 unknown \
>>>> + -- lsp-set-type public1 localnet \
>>>> + -- lsp-set-options public1 network_name=phynet
>>>> +
>>>> +ADD_NAMESPACES(ecmp-path0)
>>>> +ADD_VETH(ecmp-p01, ecmp-path0, br-ext, "172.16.1.2/24",
>>>> "f0:00:00:01:02:04", "172.16.1.1")
>>>> +ADD_VETH(ecmp-p02, ecmp-path0, br-ecmp, "172.16.2.2/24",
>>>> "f0:00:00:01:03:04")
>>>> +
>>>> +ADD_NAMESPACES(ecmp-path1)
>>>> +ADD_VETH(ecmp-p11, ecmp-path1, br-ext, "172.16.1.3/24",
>>>> "f0:00:00:01:02:05", "172.16.1.1")
>>>> +ADD_VETH(ecmp-p12, ecmp-path1, br-ecmp, "172.16.2.3/24",
>>>> "f0:00:00:01:03:05")
>>>> +
>>>> +ADD_NAMESPACES(bob)
>>>> +ADD_VETH(bob, bob, br-ecmp, "172.16.2.10/24", "f0:00:00:01:02:06",
>>>> "172.16.2.2")
>>>> +
>>>> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24
>>>> 172.16.1.2
>>>> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24
>>>> 172.16.1.3
>>>> +
>>>> +ovn-nbctl --wait=hv sync
>>>
>>> We probably need:
>>>
>>> wait_for_ports_up
>>> check ovn-nbctl --wait=hv sync
>>>
>>> instead.
>>>
>>>> +NETNS_DAEMONIZE([alice], [nc -l -k 80], [alice.pid])
>>>> +
>>>> +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 |
>>>> FORMAT_PING], \
>>>> +[0], [dnl
>>>> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
>>>> +])
>>>> +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0])
>>>> +
>>>> +wait_row_count ECMP_Nexthop 2
>>>> +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2'
>>>> +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3'
>>>> +
>>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
>>>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>>>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>>>> +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000
>>>> +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>)
>>>> +])
>>>> +
>>>> +# Change bob default IP address
>>>> +NS_CHECK_EXEC([bob], [ip route del 0.0.0.0/0 via 172.16.2.2])
>>>> +NS_CHECK_EXEC([bob], [ip route add 0.0.0.0/0 via 172.16.2.3])
>>>> +
>>>> +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 |
>>>> FORMAT_PING], \
>>>> +[0], [dnl
>>>> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
>>>> +])
>>>> +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0])
>>>> +
>>>> +wait_row_count ECMP_Nexthop 2
>>>> +check_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2'
>>>> +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3'
>>>> +
>>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
>>>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>>>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>>>> +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000
>>>> +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
>>>> +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>)
>>>> +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
>>>> +])
>>>> +
>>>> +# Remove first ECMP route
>>>> +check ovn-nbctl lr-route-del R1 172.16.2.0/24 172.16.1.2
>>>> +wait_row_count ECMP_Nexthop 1
>>>> +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3'
>>>> +
>>>
>>> We have a race condition here. We should add "check ovn-nbctl --wait=hv
>>> sync" to ensure ovn-controller actually processed the next hop change.
>>>
>>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
>>>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>>>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>>>> +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
>>>> +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
>>>> +])
>>>> +
>>>> +# Add the route back and verify we do not flush if we have multiple
>>>> next-hops with the same mac address
>>>> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24
>>>> 172.16.1.2
>>>> +wait_row_count ECMP_Nexthop 2
>>>> +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2'
>>>> +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3'
>>>> +
>>>> +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address
>>>> f0:00:00:01:02:05])
>>>> +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.2'
>>>> +
>>>> +# Change bob default IP address
>>>> +NS_CHECK_EXEC([bob], [ip route del 0.0.0.0/0 via 172.16.2.3])
>>>> +NS_CHECK_EXEC([bob], [ip route add 0.0.0.0/0 via 172.16.2.2])
>>>> +
>>>> +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 |
>>>> FORMAT_PING], \
>>>> +[0], [dnl
>>>> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
>>>> +])
>>>> +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0])
>>>> +
>>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
>>>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>>>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>>>> +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
>>>> +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
>>>> +])
>>>> +
>>>> +# Remove first ECMP route
>>>> +check ovn-nbctl lr-route-del R1 172.16.2.0/24 172.16.1.2
>>>> +wait_row_count ECMP_Nexthop 1
>>>
>>> We have a race condition here. We should add "check ovn-nbctl --wait=hv
>>> sync" to ensure ovn-controller actually processed the next hop change.
>>>
>>>> +
>>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
>>>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>>>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>>>> +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
>>>> +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
>>>> +])
>>>> +
>>>> +# Remove second ECMP route
>>>> +check ovn-nbctl lr-route-del R1
>>>> +wait_row_count ECMP_Nexthop 0
>>>
>>> We have a race condition here. We should add "check ovn-nbctl --wait=hv
>>> sync" to ensure ovn-controller actually flushed conntrack.
>>>
>>>> +
>>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
>>>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>>>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>>>> +])
>>>> +
>>>> +OVS_APP_EXIT_AND_WAIT([ovn-controller])
>>>> +
>>>> +as ovn-sb
>>>> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
>>>> +
>>>> +as ovn-nb
>>>> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
>>>> +
>>>> +as northd
>>>> +OVS_APP_EXIT_AND_WAIT([ovn-northd])
>>>> +
>>>> +as
>>>> +OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d
>>>> +/.*terminating with signal 15.*/d"])
>>>> +AT_CLEANUP
>>>> +])
>>>> +
>>>> +OVN_FOR_EACH_NORTHD([
>>>> +AT_SETUP([ECMP Flush CT entries - IPv6])
>>>> +AT_KEYWORDS([ecmp])
>>>> +ovn_start
>>>> +OVS_TRAFFIC_VSWITCHD_START()
>>>> +
>>>> +ADD_BR([br-int])
>>>> +ADD_BR([br-ext])
>>>> +ADD_BR([br-ecmp])
>>>> +
>>>> +ovs-ofctl add-flow br-ext action=normal
>>>> +ovs-ofctl add-flow br-ecmp action=normal
>>>> +# Set external-ids in br-int needed for ovn-controller
>>>> +ovs-vsctl \
>>>> + -- set Open_vSwitch . external-ids:system-id=hv1 \
>>>> + -- set Open_vSwitch .
>>>> external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
>>>> + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
>>>> + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
>>>> + -- set bridge br-int fail-mode=secure
>>>> other-config:disable-in-band=true
>>>> +
>>>> +# Start ovn-controller
>>>> +start_daemon ovn-controller
>>>> +ovs-vsctl set Open_vSwitch . external-ids:arp-max-timeout-sec=1
>>>> +
>>>> +check ovn-nbctl lr-add R1
>>>> +check ovn-nbctl set logical_router R1 options:chassis=hv1
>>>> +
>>>> +check ovn-nbctl ls-add sw0
>>>> +check ovn-nbctl ls-add public
>>>> +
>>>> +check ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 fd11::1/64
>>>> +check ovn-nbctl lrp-add R1 rp-public 00:00:02:01:02:03 fd12::1/64
>>>> +
>>>> +ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \
>>>> + type=router options:router-port=rp-sw0 \
>>>> + -- lsp-set-addresses sw0-rp router
>>>
>>> check
>>>
>>>> +
>>>> +ovn-nbctl lsp-add public public-rp -- set Logical_Switch_Port public-rp \
>>>> + type=router options:router-port=rp-public \
>>>> + -- lsp-set-addresses public-rp router
>>>
>>> check
>>>
>>>> +
>>>> +ADD_NAMESPACES(alice)
>>>> +ADD_VETH(alice, alice, br-int, "fd11::2/64", "f0:00:00:01:02:03",
>>>> "fd11::1", "nodad")
>>>> +check ovn-nbctl lsp-add sw0 alice -- lsp-set-addresses alice
>>>> "f0:00:00:01:02:03 fd11::2"
>>>> +
>>>> +AT_CHECK([ovs-vsctl set Open_vSwitch .
>>>> external-ids:ovn-bridge-mappings=phynet:br-ext])
>>>> +check ovn-nbctl lsp-add public public1 \
>>>> + -- lsp-set-addresses public1 unknown \
>>>> + -- lsp-set-type public1 localnet \
>>>> + -- lsp-set-options public1 network_name=phynet
>>>> +
>>>> +ADD_NAMESPACES(ecmp-path0)
>>>> +ADD_VETH(ecmp-p01, ecmp-path0, br-ext, "fd12::2/64", "f0:00:00:01:02:04",
>>>> "fd12::1", "nodad")
>>>> +ADD_VETH(ecmp-p02, ecmp-path0, br-ecmp, "fd13::2/64", "f0:00:00:01:03:04")
>>>> +OVS_WAIT_UNTIL([NS_EXEC([ecmp-path0], [ip a show dev ecmp-p02 | grep
>>>> "fe80::" | grep -v tentative])])
>>>> +
>>>> +ADD_NAMESPACES(ecmp-path1)
>>>> +ADD_VETH(ecmp-p11, ecmp-path1, br-ext, "fd12::3/64", "f0:00:00:01:02:05",
>>>> "fd12::1", "nodad")
>>>> +ADD_VETH(ecmp-p12, ecmp-path1, br-ecmp, "fd13::3/64", "f0:00:00:01:03:05")
>>>> +OVS_WAIT_UNTIL([NS_EXEC([ecmp-path1], [ip a show dev ecmp-p12 | grep
>>>> "fe80::" | grep -v tentative])])
>>>> +
>>>> +ADD_NAMESPACES(bob)
>>>> +ADD_VETH(bob, bob, br-ecmp, "fd13::a/64", "f0:00:00:01:02:06", "fd13::2",
>>>> "nodad")
>>>> +
>>>> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2
>>>> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::3
>>>> +
>>>> +NS_CHECK_EXEC([ecmp-path0], [sysctl -w
>>>> net.ipv6.conf.all.forwarding=1],[0], [dnl
>>>> +net.ipv6.conf.all.forwarding = 1
>>>> +])
>>>> +NS_CHECK_EXEC([ecmp-path1], [sysctl -w
>>>> net.ipv6.conf.all.forwarding=1],[0], [dnl
>>>> +net.ipv6.conf.all.forwarding = 1
>>>> +])
>>>> +
>>>> +ovn-nbctl --wait=hv sync
>>>> +NETNS_DAEMONIZE([alice], [nc -6 -l -k 80], [alice.pid])
>>>> +
>>>> +NS_CHECK_EXEC([bob], [ping6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \
>>>> +[0], [dnl
>>>> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
>>>> +])
>>>> +
>>>> +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0])
>>>> +
>>>> +wait_row_count ECMP_Nexthop 2
>>>> +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"'
>>>> +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"'
>>>> +
>>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
>>>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>>>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>>>> +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000
>>>> +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>)
>>>> +])
>>>> +
>>>> +# Change bob default IP address
>>>> +NS_CHECK_EXEC([bob], [ip -6 route del ::/0 via fd13::2])
>>>> +NS_CHECK_EXEC([bob], [ip -6 route add ::/0 via fd13::3])
>>>> +
>>>> +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING],
>>>> \
>>>> +[0], [dnl
>>>> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
>>>> +])
>>>> +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0])
>>>> +
>>>> +wait_row_count ECMP_Nexthop 2
>>>> +check_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"'
>>>> +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"'
>>>> +
>>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
>>>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>>>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>>>> +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000
>>>> +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
>>>> +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>)
>>>> +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
>>>> +])
>>>> +
>>>> +# Remove first ECMP route
>>>> +check ovn-nbctl lr-route-del R1 fd13::/64 fd12::2
>>>> +wait_row_count ECMP_Nexthop 1
>>>> +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"'
>>>> +
>>>
>>> Same comment as for v4: race condition.
>>>
>>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
>>>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>>>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>>>> +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
>>>> +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
>>>> +])
>>>> +
>>>> + Add the route back and verify we do not flush if we have multiple
>>>> next-hops with the same mac address
>>>> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2
>>>> +wait_row_count ECMP_Nexthop 2
>>>> +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"'
>>>> +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"'
>>>> +#
>>>> +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address
>>>> f0:00:00:01:02:05])
>>>> +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::2"'
>>>> +
>>>> +# Change bob default IP address
>>>> +NS_CHECK_EXEC([bob], [ip -6 route del ::/0 via fd13::3])
>>>> +NS_CHECK_EXEC([bob], [ip -6 route add ::/0 via fd13::2])
>>>> +
>>>> +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING],
>>>> \
>>>> +[0], [dnl
>>>> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
>>>> +])
>>>> +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0])
>>>> +
>>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
>>>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>>>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>>>> +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
>>>> +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
>>>> +])
>>>> +
>>>> +# Remove first ECMP route
>>>> +check ovn-nbctl lr-route-del R1 fd13::/64 fd12::2
>>>> +wait_row_count ECMP_Nexthop 1
>>>
>>> Same comment as for v4: race condition.
>>>
>>>> +
>>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
>>>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>>>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>>>> +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
>>>> +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
>>>> +])
>>>> +
>>>> +# Remove second ECMP route
>>>> +check ovn-nbctl lr-route-del R1
>>>> +wait_row_count ECMP_Nexthop 0
>>>> +
>>>
>>> Same comment as for v4: race condition.
>>>
>>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
>>>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>>>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>>>> +])
>>>> +
>>>> +OVS_APP_EXIT_AND_WAIT([ovn-controller])
>>>> +
>>>> +as ovn-sb
>>>> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
>>>> +
>>>> +as ovn-nb
>>>> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
>>>> +
>>>> +as northd
>>>> +OVS_APP_EXIT_AND_WAIT([ovn-northd])
>>>> +
>>>> +as
>>>> +OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d
>>>> +/.*terminating with signal 15.*/d"])
>>>> +AT_CLEANUP
>>>> +])
>>>
>>> We don't have any test for the case when two OVN routers have routes
>>> configured via the same next-hop IP. We should check what happens when
>>> we remove one of the routes.
>>>
>>> Regards,
>>> Dumitru
>>>
>>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev