On 11/6/24 12:38, Dumitru Ceara wrote:
> Hi Lorenzo,
>
> On 9/19/24 19:08, Lorenzo Bianconi wrote:
>> Introduce ecmp_nexthop_monitor in ovn-controller in order to track and
>> flush ecmp-symmetric reply ct entires when requested by the CMS (e.g
>> removing the related static ecmp routes). CT entries are flushed using
>> the ethernet mac address stored in ct_label.
>>
>> Signed-off-by: Lorenzo Bianconi <[email protected]>
>> ---
>> controller/ofctrl.c | 73 +++++++
>> controller/ofctrl.h | 2 +
>> controller/ovn-controller.c | 2 +
>> tests/system-ovn.at | 378 ++++++++++++++++++++++++++++++++++++
>
> This is a user-visible change, it needs a detailed NEWS entry.
>
>> 4 files changed, 455 insertions(+)
>>
>> diff --git a/controller/ofctrl.c b/controller/ofctrl.c
>> index f9387d375..5987e9be3 100644
>> --- a/controller/ofctrl.c
>> +++ b/controller/ofctrl.c
>> @@ -45,6 +45,7 @@
>> #include "ovn/actions.h"
>> #include "lib/extend-table.h"
>> #include "lib/lb.h"
>> +#include "lib/ovn-util.h"
>> #include "openvswitch/poll-loop.h"
>> #include "physical.h"
>> #include "openvswitch/rconn.h"
>> @@ -392,6 +393,11 @@ static struct shash meter_bands;
>> static void ofctrl_meter_bands_destroy(void);
>> static void ofctrl_meter_bands_clear(void);
>>
>> +static struct smap ecmp_nexthop;
>
> Let's add a comment? It's not immediately clear that
> ecmp_nexthop_monitor_run() keeps adding removing to this map across
> iterations?
>
> Also, should we instead add a new module? E.g., ecmp-next-hop-monitor.c.?
>
>> +static void ecmp_nexthop_monitor_run(
>> + const struct sbrec_ecmp_nexthop_table *enh_table,
>> + struct ovs_list *msgs);
>> +
>> /* MFF_* field ID for our Geneve option. In S_TLV_TABLE_MOD_SENT, this is
>> * the option we requested (we don't know whether we obtained it yet). In
>> * S_CLEAR_FLOWS or S_UPDATE_FLOWS, this is really the option we have. */
>> @@ -425,6 +431,7 @@ ofctrl_init(struct ovn_extend_table *group_table,
>> tx_counter = rconn_packet_counter_create();
>> hmap_init(&installed_lflows);
>> hmap_init(&installed_pflows);
>> + smap_init(&ecmp_nexthop);
>> ovs_list_init(&flow_updates);
>> ovn_init_symtab(&symtab);
>> groups = group_table;
>> @@ -877,6 +884,7 @@ ofctrl_destroy(void)
>> expr_symtab_destroy(&symtab);
>> shash_destroy(&symtab);
>> ofctrl_meter_bands_destroy();
>> + smap_destroy(&ecmp_nexthop);
>> }
>>
>> uint64_t
>> @@ -2306,6 +2314,68 @@ add_meter(struct ovn_extend_table_info *m_desired,
>> ofctrl_meter_bands_alloc(sb_meter, m_desired, msgs);
>> }
>>
>> +static void
>> +ecmp_nexthop_monitor_flush_ct_entry(const char *mac, struct ovs_list *msgs)
>> +{
>> + struct eth_addr ea;
>> + if (!ovs_scan(mac, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(ea))) {
>> + return;
>> + }
>> +
>> + ovs_u128 mask = {
>> + /* ct_label.ecmp_reply_eth BITS[32-79] */
>> + .u64.hi = 0x000000000000ffff,
>> + .u64.lo = 0xffffffff00000000,
>
> No need to hardcode these, we actually define:
>
> OVN_CT_ECMP_ETH_1ST_BIT
> OVN_CT_ECMP_ETH_END_BIT
>
> in logical-fields.h
>
>> + };
>> +
>> + ovs_be32 lo = get_unaligned_be32((void *)&ea.be16[1]);
>> + ovs_u128 nexthop = {
>> + .u64.hi = ntohs(ea.be16[0]),
>> + .u64.lo = (uint64_t) ntohl(lo) << 32,
>> + };
>> +
>> + struct ofp_ct_match match = {
>> + .labels = nexthop,
>> + .labels_mask = mask,
>> + };
>> + struct ofpbuf *msg = ofp_ct_match_encode(&match, NULL,
>> + rconn_get_version(swconn));
>> + ovs_list_push_back(msgs, &msg->list_node);
One more comment: this doesn't work properly with the case when we have:
- two OVN routers, R1 and R2
- each of them with a static ecmp route (ecmp-symmetric-reply) with the
same next-hop
- the route is removed on R2.
We'll flush all matching conntrack entries, breaking sessions on R1.
>> +}
>> +
>> +static void
>> +ecmp_nexthop_monitor_run(const struct sbrec_ecmp_nexthop_table *enh_table,
>> + struct ovs_list *msgs)
>> +{
>> + struct sset ecmp_sb_only = SSET_INITIALIZER(&ecmp_sb_only);
>> + struct simap mac_count = SIMAP_INITIALIZER(&mac_count);
>> +
>> + struct smap_node *node;
>> + SMAP_FOR_EACH_SAFE (node, &ecmp_nexthop) {
>> + simap_increase(&mac_count, node->value, 1);
>> + }
>> +
>> + const struct sbrec_ecmp_nexthop *sbrec_ecmp_nexthop;
>> + SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sbrec_ecmp_nexthop, enh_table) {
>> + smap_replace(&ecmp_nexthop, sbrec_ecmp_nexthop->nexthop,
>> + sbrec_ecmp_nexthop->mac);
>> + sset_add(&ecmp_sb_only, sbrec_ecmp_nexthop->nexthop);
>> + }
>> +
>> + SMAP_FOR_EACH_SAFE (node, &ecmp_nexthop) {
>> + /* Do not flush CT entries if the share the same mac address. */
>> + if (!sset_contains(&ecmp_sb_only, node->key)) {
>> + if (simap_get(&mac_count, node->value) == 1) {
>> + ecmp_nexthop_monitor_flush_ct_entry(node->value, msgs);
>> + }
>> + smap_remove_node(&ecmp_nexthop, node);
>> + }
>> + }
>> +
>> + sset_destroy(&ecmp_sb_only);
>> + simap_destroy(&mac_count);
>> +}
>> +
>> static void
>> installed_flow_add(struct ovn_flow *d,
>> struct ofputil_bundle_ctrl_msg *bc,
>> @@ -2664,6 +2734,7 @@ ofctrl_put(struct ovn_desired_flow_table *lflow_table,
>> struct shash *pending_ct_zones,
>> struct hmap *pending_lb_tuples,
>> struct ovsdb_idl_index *sbrec_meter_by_name,
>> + const struct sbrec_ecmp_nexthop_table *enh_table,
>> uint64_t req_cfg,
>> bool lflows_changed,
>> bool pflows_changed)
>> @@ -2704,6 +2775,8 @@ ofctrl_put(struct ovn_desired_flow_table *lflow_table,
>> /* OpenFlow messages to send to the switch to bring it up-to-date. */
>> struct ovs_list msgs = OVS_LIST_INITIALIZER(&msgs);
>>
>> + ecmp_nexthop_monitor_run(enh_table, &msgs);
>> +
>> /* Iterate through ct zones that need to be flushed. */
>> struct shash_node *iter;
>> SHASH_FOR_EACH(iter, pending_ct_zones) {
>> diff --git a/controller/ofctrl.h b/controller/ofctrl.h
>> index 129e3b6ad..33953a8a4 100644
>> --- a/controller/ofctrl.h
>> +++ b/controller/ofctrl.h
>> @@ -31,6 +31,7 @@ struct ofpbuf;
>> struct ovsrec_bridge;
>> struct ovsrec_open_vswitch_table;
>> struct sbrec_meter_table;
>> +struct sbrec_ecmp_nexthop_table;
>> struct shash;
>>
>> struct ovn_desired_flow_table {
>> @@ -59,6 +60,7 @@ void ofctrl_put(struct ovn_desired_flow_table *lflow_table,
>> struct shash *pending_ct_zones,
>> struct hmap *pending_lb_tuples,
>> struct ovsdb_idl_index *sbrec_meter_by_name,
>> + const struct sbrec_ecmp_nexthop_table *enh_table,
>> uint64_t nb_cfg,
>> bool lflow_changed,
>> bool pflow_changed);
>> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
>> index 4dc9bc8cb..7203cda42 100644
>> --- a/controller/ovn-controller.c
>> +++ b/controller/ovn-controller.c
>> @@ -5731,6 +5731,8 @@ main(int argc, char *argv[])
>> &ct_zones_data->ctx.pending,
>> &lb_data->removed_tuples,
>> sbrec_meter_by_name,
>> + sbrec_ecmp_nexthop_table_get(
>> + ovnsb_idl_loop.idl),
>> ofctrl_seqno_get_req_cfg(),
>> engine_node_changed(&en_lflow_output),
>> engine_node_changed(&en_pflow_output));
>> diff --git a/tests/system-ovn.at b/tests/system-ovn.at
>> index e065b1079..01008b899 100644
>> --- a/tests/system-ovn.at
>> +++ b/tests/system-ovn.at
>> @@ -13884,3 +13884,381 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d
>> /.*terminating with signal 15.*/d"])
>> AT_CLEANUP
>> ])
>> +
>> +OVN_FOR_EACH_NORTHD([
>> +AT_SETUP([ECMP Flush CT entries - IPv4])
>> +AT_KEYWORDS([ecmp])
>> +ovn_start
>> +OVS_TRAFFIC_VSWITCHD_START()
>> +
>> +ADD_BR([br-int])
>> +ADD_BR([br-ext])
>> +ADD_BR([br-ecmp])
>> +
>> +ovs-ofctl add-flow br-ext action=normal
>> +ovs-ofctl add-flow br-ecmp action=normal
>> +# Set external-ids in br-int needed for ovn-controller
>> +ovs-vsctl \
>> + -- set Open_vSwitch . external-ids:system-id=hv1 \
>> + -- set Open_vSwitch .
>> external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
>> + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
>> + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
>> + -- set bridge br-int fail-mode=secure
>> other-config:disable-in-band=true
>> +
>> +# Start ovn-controller
>> +start_daemon ovn-controller
>> +ovs-vsctl set Open_vSwitch . external-ids:arp-max-timeout-sec=1
>> +
>> +check ovn-nbctl lr-add R1
>> +check ovn-nbctl set logical_router R1 options:chassis=hv1
>> +
>> +check ovn-nbctl ls-add sw0
>> +check ovn-nbctl ls-add public
>> +
>> +check ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 192.168.1.1/24
>> +check ovn-nbctl lrp-add R1 rp-public 00:00:02:01:02:03 172.16.1.1/24
>> +
>> +ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \
>> + type=router options:router-port=rp-sw0 \
>> + -- lsp-set-addresses sw0-rp router
>
> check
>
>> +
>> +ovn-nbctl lsp-add public public-rp -- set Logical_Switch_Port public-rp \
>> + type=router options:router-port=rp-public \
>> + -- lsp-set-addresses public-rp router
>
> check
>
>> +
>> +ADD_NAMESPACES(alice)
>> +ADD_VETH(alice, alice, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \
>> + "192.168.1.1")
>> +check ovn-nbctl lsp-add sw0 alice \
>> + -- lsp-set-addresses alice "f0:00:00:01:02:03 192.168.1.2"
>> +
>> +AT_CHECK([ovs-vsctl set Open_vSwitch .
>> external-ids:ovn-bridge-mappings=phynet:br-ext])
>
> s/AT_CHECK/check
>
>> +check ovn-nbctl lsp-add public public1 \
>> + -- lsp-set-addresses public1 unknown \
>> + -- lsp-set-type public1 localnet \
>> + -- lsp-set-options public1 network_name=phynet
>> +
>> +ADD_NAMESPACES(ecmp-path0)
>> +ADD_VETH(ecmp-p01, ecmp-path0, br-ext, "172.16.1.2/24",
>> "f0:00:00:01:02:04", "172.16.1.1")
>> +ADD_VETH(ecmp-p02, ecmp-path0, br-ecmp, "172.16.2.2/24",
>> "f0:00:00:01:03:04")
>> +
>> +ADD_NAMESPACES(ecmp-path1)
>> +ADD_VETH(ecmp-p11, ecmp-path1, br-ext, "172.16.1.3/24",
>> "f0:00:00:01:02:05", "172.16.1.1")
>> +ADD_VETH(ecmp-p12, ecmp-path1, br-ecmp, "172.16.2.3/24",
>> "f0:00:00:01:03:05")
>> +
>> +ADD_NAMESPACES(bob)
>> +ADD_VETH(bob, bob, br-ecmp, "172.16.2.10/24", "f0:00:00:01:02:06",
>> "172.16.2.2")
>> +
>> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24
>> 172.16.1.2
>> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24
>> 172.16.1.3
>> +
>> +ovn-nbctl --wait=hv sync
>
> We probably need:
>
> wait_for_ports_up
> check ovn-nbctl --wait=hv sync
>
> instead.
>
>> +NETNS_DAEMONIZE([alice], [nc -l -k 80], [alice.pid])
>> +
>> +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \
>> +[0], [dnl
>> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
>> +])
>> +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0])
>> +
>> +wait_row_count ECMP_Nexthop 2
>> +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2'
>> +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3'
>> +
>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>> +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000
>> +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>)
>> +])
>> +
>> +# Change bob default IP address
>> +NS_CHECK_EXEC([bob], [ip route del 0.0.0.0/0 via 172.16.2.2])
>> +NS_CHECK_EXEC([bob], [ip route add 0.0.0.0/0 via 172.16.2.3])
>> +
>> +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \
>> +[0], [dnl
>> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
>> +])
>> +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0])
>> +
>> +wait_row_count ECMP_Nexthop 2
>> +check_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2'
>> +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3'
>> +
>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>> +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000
>> +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
>> +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>)
>> +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
>> +])
>> +
>> +# Remove first ECMP route
>> +check ovn-nbctl lr-route-del R1 172.16.2.0/24 172.16.1.2
>> +wait_row_count ECMP_Nexthop 1
>> +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3'
>> +
>
> We have a race condition here. We should add "check ovn-nbctl --wait=hv
> sync" to ensure ovn-controller actually processed the next hop change.
>
>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>> +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
>> +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
>> +])
>> +
>> +# Add the route back and verify we do not flush if we have multiple
>> next-hops with the same mac address
>> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24
>> 172.16.1.2
>> +wait_row_count ECMP_Nexthop 2
>> +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2'
>> +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3'
>> +
>> +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address
>> f0:00:00:01:02:05])
>> +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.2'
>> +
>> +# Change bob default IP address
>> +NS_CHECK_EXEC([bob], [ip route del 0.0.0.0/0 via 172.16.2.3])
>> +NS_CHECK_EXEC([bob], [ip route add 0.0.0.0/0 via 172.16.2.2])
>> +
>> +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \
>> +[0], [dnl
>> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
>> +])
>> +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0])
>> +
>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>> +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
>> +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
>> +])
>> +
>> +# Remove first ECMP route
>> +check ovn-nbctl lr-route-del R1 172.16.2.0/24 172.16.1.2
>> +wait_row_count ECMP_Nexthop 1
>
> We have a race condition here. We should add "check ovn-nbctl --wait=hv
> sync" to ensure ovn-controller actually processed the next hop change.
>
>> +
>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>> +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
>> +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
>> +])
>> +
>> +# Remove second ECMP route
>> +check ovn-nbctl lr-route-del R1
>> +wait_row_count ECMP_Nexthop 0
>
> We have a race condition here. We should add "check ovn-nbctl --wait=hv
> sync" to ensure ovn-controller actually flushed conntrack.
>
>> +
>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>> +])
>> +
>> +OVS_APP_EXIT_AND_WAIT([ovn-controller])
>> +
>> +as ovn-sb
>> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
>> +
>> +as ovn-nb
>> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
>> +
>> +as northd
>> +OVS_APP_EXIT_AND_WAIT([ovn-northd])
>> +
>> +as
>> +OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d
>> +/.*terminating with signal 15.*/d"])
>> +AT_CLEANUP
>> +])
>> +
>> +OVN_FOR_EACH_NORTHD([
>> +AT_SETUP([ECMP Flush CT entries - IPv6])
>> +AT_KEYWORDS([ecmp])
>> +ovn_start
>> +OVS_TRAFFIC_VSWITCHD_START()
>> +
>> +ADD_BR([br-int])
>> +ADD_BR([br-ext])
>> +ADD_BR([br-ecmp])
>> +
>> +ovs-ofctl add-flow br-ext action=normal
>> +ovs-ofctl add-flow br-ecmp action=normal
>> +# Set external-ids in br-int needed for ovn-controller
>> +ovs-vsctl \
>> + -- set Open_vSwitch . external-ids:system-id=hv1 \
>> + -- set Open_vSwitch .
>> external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
>> + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
>> + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
>> + -- set bridge br-int fail-mode=secure
>> other-config:disable-in-band=true
>> +
>> +# Start ovn-controller
>> +start_daemon ovn-controller
>> +ovs-vsctl set Open_vSwitch . external-ids:arp-max-timeout-sec=1
>> +
>> +check ovn-nbctl lr-add R1
>> +check ovn-nbctl set logical_router R1 options:chassis=hv1
>> +
>> +check ovn-nbctl ls-add sw0
>> +check ovn-nbctl ls-add public
>> +
>> +check ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 fd11::1/64
>> +check ovn-nbctl lrp-add R1 rp-public 00:00:02:01:02:03 fd12::1/64
>> +
>> +ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \
>> + type=router options:router-port=rp-sw0 \
>> + -- lsp-set-addresses sw0-rp router
>
> check
>
>> +
>> +ovn-nbctl lsp-add public public-rp -- set Logical_Switch_Port public-rp \
>> + type=router options:router-port=rp-public \
>> + -- lsp-set-addresses public-rp router
>
> check
>
>> +
>> +ADD_NAMESPACES(alice)
>> +ADD_VETH(alice, alice, br-int, "fd11::2/64", "f0:00:00:01:02:03",
>> "fd11::1", "nodad")
>> +check ovn-nbctl lsp-add sw0 alice -- lsp-set-addresses alice
>> "f0:00:00:01:02:03 fd11::2"
>> +
>> +AT_CHECK([ovs-vsctl set Open_vSwitch .
>> external-ids:ovn-bridge-mappings=phynet:br-ext])
>> +check ovn-nbctl lsp-add public public1 \
>> + -- lsp-set-addresses public1 unknown \
>> + -- lsp-set-type public1 localnet \
>> + -- lsp-set-options public1 network_name=phynet
>> +
>> +ADD_NAMESPACES(ecmp-path0)
>> +ADD_VETH(ecmp-p01, ecmp-path0, br-ext, "fd12::2/64", "f0:00:00:01:02:04",
>> "fd12::1", "nodad")
>> +ADD_VETH(ecmp-p02, ecmp-path0, br-ecmp, "fd13::2/64", "f0:00:00:01:03:04")
>> +OVS_WAIT_UNTIL([NS_EXEC([ecmp-path0], [ip a show dev ecmp-p02 | grep
>> "fe80::" | grep -v tentative])])
>> +
>> +ADD_NAMESPACES(ecmp-path1)
>> +ADD_VETH(ecmp-p11, ecmp-path1, br-ext, "fd12::3/64", "f0:00:00:01:02:05",
>> "fd12::1", "nodad")
>> +ADD_VETH(ecmp-p12, ecmp-path1, br-ecmp, "fd13::3/64", "f0:00:00:01:03:05")
>> +OVS_WAIT_UNTIL([NS_EXEC([ecmp-path1], [ip a show dev ecmp-p12 | grep
>> "fe80::" | grep -v tentative])])
>> +
>> +ADD_NAMESPACES(bob)
>> +ADD_VETH(bob, bob, br-ecmp, "fd13::a/64", "f0:00:00:01:02:06", "fd13::2",
>> "nodad")
>> +
>> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2
>> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::3
>> +
>> +NS_CHECK_EXEC([ecmp-path0], [sysctl -w net.ipv6.conf.all.forwarding=1],[0],
>> [dnl
>> +net.ipv6.conf.all.forwarding = 1
>> +])
>> +NS_CHECK_EXEC([ecmp-path1], [sysctl -w net.ipv6.conf.all.forwarding=1],[0],
>> [dnl
>> +net.ipv6.conf.all.forwarding = 1
>> +])
>> +
>> +ovn-nbctl --wait=hv sync
>> +NETNS_DAEMONIZE([alice], [nc -6 -l -k 80], [alice.pid])
>> +
>> +NS_CHECK_EXEC([bob], [ping6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \
>> +[0], [dnl
>> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
>> +])
>> +
>> +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0])
>> +
>> +wait_row_count ECMP_Nexthop 2
>> +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"'
>> +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"'
>> +
>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>> +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000
>> +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>)
>> +])
>> +
>> +# Change bob default IP address
>> +NS_CHECK_EXEC([bob], [ip -6 route del ::/0 via fd13::2])
>> +NS_CHECK_EXEC([bob], [ip -6 route add ::/0 via fd13::3])
>> +
>> +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \
>> +[0], [dnl
>> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
>> +])
>> +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0])
>> +
>> +wait_row_count ECMP_Nexthop 2
>> +check_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"'
>> +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"'
>> +
>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>> +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000
>> +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
>> +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>)
>> +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
>> +])
>> +
>> +# Remove first ECMP route
>> +check ovn-nbctl lr-route-del R1 fd13::/64 fd12::2
>> +wait_row_count ECMP_Nexthop 1
>> +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"'
>> +
>
> Same comment as for v4: race condition.
>
>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>> +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
>> +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
>> +])
>> +
>> + Add the route back and verify we do not flush if we have multiple
>> next-hops with the same mac address
>> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2
>> +wait_row_count ECMP_Nexthop 2
>> +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"'
>> +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"'
>> +#
>> +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address
>> f0:00:00:01:02:05])
>> +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::2"'
>> +
>> +# Change bob default IP address
>> +NS_CHECK_EXEC([bob], [ip -6 route del ::/0 via fd13::3])
>> +NS_CHECK_EXEC([bob], [ip -6 route add ::/0 via fd13::2])
>> +
>> +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \
>> +[0], [dnl
>> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
>> +])
>> +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0])
>> +
>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>> +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
>> +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
>> +])
>> +
>> +# Remove first ECMP route
>> +check ovn-nbctl lr-route-del R1 fd13::/64 fd12::2
>> +wait_row_count ECMP_Nexthop 1
>
> Same comment as for v4: race condition.
>
>> +
>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>> +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
>> +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
>> +])
>> +
>> +# Remove second ECMP route
>> +check ovn-nbctl lr-route-del R1
>> +wait_row_count ECMP_Nexthop 0
>> +
>
> Same comment as for v4: race condition.
>
>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
>> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
>> +])
>> +
>> +OVS_APP_EXIT_AND_WAIT([ovn-controller])
>> +
>> +as ovn-sb
>> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
>> +
>> +as ovn-nb
>> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
>> +
>> +as northd
>> +OVS_APP_EXIT_AND_WAIT([ovn-northd])
>> +
>> +as
>> +OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d
>> +/.*terminating with signal 15.*/d"])
>> +AT_CLEANUP
>> +])
>
> We don't have any test for the case when two OVN routers have routes
> configured via the same next-hop IP. We should check what happens when
> we remove one of the routes.
>
> Regards,
> Dumitru
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev