On Tue, Jul 23, 2024 at 10:17 AM Lorenzo Bianconi < [email protected]> wrote:
> Introduce the nexthop identifier in the ct_label.label field for > ecmp-symmetric replies connections. This field will be used by > ovn-controller to track ct entries and to flush them if requested by the > CMS (e.g. removing the related static routes). > > Signed-off-by: Lorenzo Bianconi <[email protected]> > --- > northd/en-lflow.c | 3 +++ > northd/inc-proc-northd.c | 1 + > northd/northd.c | 34 ++++++++++++++++------- > northd/northd.h | 1 + > tests/ovn.at | 4 +-- > tests/system-ovn.at | 58 +++++++++++++++++++++++----------------- > 6 files changed, 65 insertions(+), 36 deletions(-) > > diff --git a/northd/en-lflow.c b/northd/en-lflow.c > index 3dba5034b..b4df49076 100644 > --- a/northd/en-lflow.c > +++ b/northd/en-lflow.c > @@ -54,6 +54,8 @@ lflow_get_input_data(struct engine_node *node, > engine_get_input_data("lr_stateful", node); > struct ed_type_ls_stateful *ls_stateful_data = > engine_get_input_data("ls_stateful", node); > + struct ecmp_nexthop_data *nexthop_data = > + engine_get_input_data("ecmp_nexthop", node); > > lflow_input->sbrec_logical_flow_table = > EN_OVSDB_GET(engine_get_input("SB_logical_flow", node)); > @@ -83,6 +85,7 @@ lflow_get_input_data(struct engine_node *node, > lflow_input->parsed_routes = &static_routes_data->parsed_routes; > lflow_input->route_tables = &static_routes_data->route_tables; > lflow_input->route_policies = &route_policies_data->route_policies; > + lflow_input->nexthops_table = &nexthop_data->nexthops; > > struct ed_type_global_config *global_config = > engine_get_input_data("global_config", node); > diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c > index 7416b1c43..06e5bd76b 100644 > --- a/northd/inc-proc-northd.c > +++ b/northd/inc-proc-northd.c > @@ -277,6 +277,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, > engine_add_input(&en_lflow, &en_bfd_sync, NULL); > engine_add_input(&en_lflow, &en_route_policies, NULL); > engine_add_input(&en_lflow, &en_static_routes, NULL); > + engine_add_input(&en_lflow, &en_ecmp_nexthop, NULL); > engine_add_input(&en_lflow, &en_global_config, > node_global_config_handler); > engine_add_input(&en_lflow, &en_northd, lflow_northd_handler); > diff --git a/northd/northd.c b/northd/northd.c > index b5b332536..43fc7943f 100644 > --- a/northd/northd.c > +++ b/northd/northd.c > @@ -10857,7 +10857,8 @@ add_ecmp_symmetric_reply_flows(struct lflow_table > *lflows, > struct ovn_port *out_port, > const struct parsed_route *route, > struct ds *route_match, > - struct lflow_ref *lflow_ref) > + struct lflow_ref *lflow_ref, > + struct simap *nexthops_table) > { > const struct nbrec_logical_router_static_route *st_route = > route->route; > struct ds match = DS_EMPTY_INITIALIZER; > @@ -10895,9 +10896,15 @@ add_ecmp_symmetric_reply_flows(struct lflow_table > *lflows, > ds_put_cstr(&match, " && !ct.rpl && (ct.new || ct.est)"); > ds_put_format(&actions, > "ct_commit { ct_label.ecmp_reply_eth = eth.src; " > - " %s = %" PRId64 ";}; " > - "next;", > + " %s = %" PRId64 ";", > ct_ecmp_reply_port_match, out_port->sb->tunnel_key); > + > + struct simap_node *n = simap_find(nexthops_table, st_route->nexthop); > + if (n) { > + ds_put_format(&actions, " ct_label.label = %d;", n->data); > + } > + ds_put_cstr(&actions, " }; next;"); > + > ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 100, > ds_cstr(&match), ds_cstr(&actions), > &st_route->header_, > @@ -10955,7 +10962,8 @@ static void > build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od, > bool ct_masked_mark, const struct hmap *lr_ports, > struct ecmp_groups_node *eg, > - struct lflow_ref *lflow_ref) > + struct lflow_ref *lflow_ref, > + struct simap *nexthops_table) > > { > bool is_ipv4 = IN6_IS_ADDR_V4MAPPED(&eg->prefix); > @@ -11013,7 +11021,7 @@ build_ecmp_route_flow(struct lflow_table *lflows, > struct ovn_datapath *od, > add_ecmp_symmetric_reply_flows(lflows, od, ct_masked_mark, > lrp_addr_s, out_port, > route_, &route_match, > - lflow_ref); > + lflow_ref, nexthops_table); > } > ds_clear(&match); > ds_put_format(&match, REG_ECMP_GROUP_ID" == %"PRIu16" && " > @@ -12907,7 +12915,8 @@ build_static_route_flows_for_lrouter( > struct lflow_table *lflows, const struct hmap *lr_ports, > struct hmap *parsed_routes, > struct simap *route_tables, > - struct lflow_ref *lflow_ref) > + struct lflow_ref *lflow_ref, > + struct simap *nexthops_table) > { > ovs_assert(od->nbr); > ovn_lflow_add_default_drop(lflows, od, S_ROUTER_IN_IP_ROUTING_ECMP, > @@ -12950,7 +12959,7 @@ build_static_route_flows_for_lrouter( > /* add a flow in IP_ROUTING, and one flow for each member in > * IP_ROUTING_ECMP. */ > build_ecmp_route_flow(lflows, od, features->ct_no_masked_label, > - lr_ports, group, lflow_ref); > + lr_ports, group, lflow_ref, nexthops_table); > } > const struct unique_routes_node *ur; > HMAP_FOR_EACH (ur, hmap_node, &unique_routes) { > @@ -16154,6 +16163,7 @@ struct lswitch_flow_build_info { > struct hmap *parsed_routes; > struct hmap *route_policies; > struct simap *route_tables; > + struct simap *nexthops_table; > }; > > /* Helper function to combine all lflow generation which is iterated by > @@ -16202,7 +16212,7 @@ build_lswitch_and_lrouter_iterate_by_lr(struct > ovn_datapath *od, > lsi->lflows, lsi->lr_ports, > lsi->parsed_routes, > lsi->route_tables, > - NULL); > + NULL, lsi->nexthops_table); > build_mcast_lookup_flows_for_lrouter(od, lsi->lflows, &lsi->match, > &lsi->actions, NULL); > build_ingress_policy_flows_for_lrouter(od, lsi->lflows, lsi->lr_ports, > @@ -16525,7 +16535,8 @@ build_lswitch_and_lrouter_flows( > const char *svc_monitor_mac, > struct hmap *parsed_routes, > struct hmap *route_policies, > - struct simap *route_tables) > + struct simap *route_tables, > + struct simap *nexthops_table) > { > > char *svc_check_match = xasprintf("eth.dst == %s", svc_monitor_mac); > @@ -16562,6 +16573,7 @@ build_lswitch_and_lrouter_flows( > lsiv[index].parsed_routes = parsed_routes; > lsiv[index].route_tables = route_tables; > lsiv[index].route_policies = route_policies; > + lsiv[index].nexthops_table = nexthops_table; > ds_init(&lsiv[index].match); > ds_init(&lsiv[index].actions); > > @@ -16607,6 +16619,7 @@ build_lswitch_and_lrouter_flows( > .route_policies = route_policies, > .match = DS_EMPTY_INITIALIZER, > .actions = DS_EMPTY_INITIALIZER, > + .nexthops_table = nexthops_table, > }; > > /* Combined build - all lflow generation from lswitch and lrouter > @@ -16769,7 +16782,8 @@ void build_lflows(struct ovsdb_idl_txn *ovnsb_txn, > input_data->svc_monitor_mac, > input_data->parsed_routes, > input_data->route_policies, > - input_data->route_tables); > + input_data->route_tables, > + input_data->nexthops_table); > > if (parallelization_state == STATE_INIT_HASH_SIZES) { > parallelization_state = STATE_USE_PARALLELIZATION; > diff --git a/northd/northd.h b/northd/northd.h > index 205793e56..c3b9c0464 100644 > --- a/northd/northd.h > +++ b/northd/northd.h > @@ -218,6 +218,7 @@ struct lflow_input { > struct hmap *parsed_routes; > struct hmap *route_policies; > struct simap *route_tables; > + struct simap *nexthops_table; > }; > > extern int parallelization_state; > diff --git a/tests/ovn.at b/tests/ovn.at > index 482544ac2..c93789105 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -28595,7 +28595,7 @@ AT_CHECK([ > for hv in 1 2; do > grep table=$ecmp_stateful hv${hv}flows | \ > grep "priority=100" | \ > - grep -c > "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],.*exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_MARK\\[[16..31\\]]))" > + grep -c > "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],.*exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_MARK\\[[16..31\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[96..127\\]]))" > > grep table=$arp_resolve hv${hv}flows | \ > grep "priority=200" | \ > @@ -28724,7 +28724,7 @@ AT_CHECK([ > for hv in 1 2; do > grep table=$ecmp_stateful hv${hv}flows | \ > grep "priority=100" | \ > - grep -c > "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],.*exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_MARK\\[[16..31\\]]))" > + grep -c > "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],.*exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_MARK\\[[16..31\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[96..127\\]]))" > > grep table=$arp_resolve hv${hv}flows | \ > grep "priority=200" | \ > diff --git a/tests/system-ovn.at b/tests/system-ovn.at > index c24ede7c5..2c4c3f5fb 100644 > --- a/tests/system-ovn.at > +++ b/tests/system-ovn.at > @@ -6172,19 +6172,21 @@ NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 > 10.0.0.2 | FORMAT_PING], \ > # and just ensure that the known ethernet address is present. > AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.0.1) | \ > sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > -sed -e 's/mark=[[0-9]]*/mark=<cleared>/'], [0], [dnl > > -icmp,orig=(src=172.16.0.1,dst=10.0.0.2,id=<cleared>,type=8,code=0),reply=(src=10.0.0.2,dst=172.16.0.1,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0x401020400000000 > > -tcp,orig=(src=172.16.0.1,dst=10.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=10.0.0.2,dst=172.16.0.1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x401020400000000,protoinfo=(state=<cleared>) > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | > +sed -e 's/labels=0x[[0-9]]/labels=0x?/'], [0], [dnl > > +icmp,orig=(src=172.16.0.1,dst=10.0.0.2,id=<cleared>,type=8,code=0),reply=(src=10.0.0.2,dst=172.16.0.1,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0x?000000000401020400000000 > > +tcp,orig=(src=172.16.0.1,dst=10.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=10.0.0.2,dst=172.16.0.1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x?000000000401020400000000,protoinfo=(state=<cleared>) > ]) > > # Ensure datapaths show conntrack states as expected > # Like with conntrack entries, we shouldn't try to predict > # port binding tunnel keys. So omit them from expected labels. > -ovs-appctl dpctl/dump-flows | grep > 'ct_state(+new-est-rpl+trk).*ct(.*label=0x401020400000000/.*)' > -AT_CHECK([ovs-appctl dpctl/dump-flows | grep > 'ct_state(+new-est-rpl+trk).*ct(.*label=0x401020400000000/.*)' -c], [0], > [dnl > +AT_CHECK([ovs-appctl dpctl/dump-flows | sed -e > 's/label=0x[[0-9]]/label=0x?/' | \ > +grep > 'ct_state(+new-est-rpl+trk).*ct(.*label=0x?000000000401020400000000/.*)' > -c], [0], [dnl > 2 > ]) > -AT_CHECK([ovs-appctl dpctl/dump-flows | grep > 'ct_state(-new+est+rpl+trk).*ct_label(0x401020400000000)' -c], [0], [dnl > +AT_CHECK([[ovs-appctl dpctl/dump-flows | sed -e > 's/ct_label(0x[0-9]/ct_label(0x?/' | \ > +grep 'ct_state(-new+est+rpl+trk).*ct_label(0x?000000000401020400000000)' > -c]], [0], [dnl > 2 > ]) > > @@ -6203,18 +6205,21 @@ NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 > 10.0.0.2 | FORMAT_PING], \ > [0], [dnl > 3 packets transmitted, 3 received, 0% packet loss, time 0ms > ]) > -AT_CHECK([ovs-appctl dpctl/dump-flows | grep > 'ct_state(+new-est-rpl+trk).*ct(.*label=0x1001020400000000/.*)' -c], [0], > [dnl > +AT_CHECK([ovs-appctl dpctl/dump-flows | sed -e > 's/label=0x[[0-9]]/label=0x?/' | \ > +grep > 'ct_state(+new-est-rpl+trk).*ct(.*label=0x?000000001001020400000000/.*)' > -c], [0], [dnl > 2 > ]) > -AT_CHECK([ovs-appctl dpctl/dump-flows | grep > 'ct_state(-new+est+rpl+trk).*ct_label(0x1001020400000000)' -c], [0], [dnl > +AT_CHECK([[ovs-appctl dpctl/dump-flows | sed -e > 's/ct_label(0x[0-9]/ct_label(0x?/' | \ > +grep 'ct_state(-new+est+rpl+trk).*ct_label(0x?000000001001020400000000)' > -c]], [0], [dnl > 2 > ]) > > -AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 0x1001020400000000 | > FORMAT_CT(172.16.0.1) | \ > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 1001020400000000 | > FORMAT_CT(172.16.0.1) | \ > sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > -sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > > -icmp,orig=(src=172.16.0.1,dst=10.0.0.2,id=<cleared>,type=8,code=0),reply=(src=10.0.0.2,dst=172.16.0.1,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000 > > -tcp,orig=(src=172.16.0.1,dst=10.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=10.0.0.2,dst=172.16.0.1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000,protoinfo=(state=<cleared>) > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | > +sed -e 's/labels=0x[[0-9]]/labels=0x?/' | sort], [0], [dnl > > +icmp,orig=(src=172.16.0.1,dst=10.0.0.2,id=<cleared>,type=8,code=0),reply=(src=10.0.0.2,dst=172.16.0.1,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0x?000000001001020400000000 > > +tcp,orig=(src=172.16.0.1,dst=10.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=10.0.0.2,dst=172.16.0.1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x?000000001001020400000000,protoinfo=(state=<cleared>) > ]) > # Check entries in table 76 and 77 expires w/o traffic > OVS_WAIT_UNTIL([ > @@ -6373,11 +6378,12 @@ NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 > fd01::2 | FORMAT_PING], \ > # Ensure datapaths show conntrack states as expected > # Like with conntrack entries, we shouldn't try to predict > # port binding tunnel keys. So omit them from expected labels. > -AT_CHECK([ovs-appctl dpctl/dump-flows | grep > 'ct_state(+new-est-rpl+trk).*ct(.*label=0x401020400000000/.*)' -c], [0], > [dnl > +AT_CHECK([ovs-appctl dpctl/dump-flows | sed -e > 's/label=0x[[0-9]]/label=0x?/' | \ > +grep > 'ct_state(+new-est-rpl+trk).*ct(.*label=0x?000000000401020400000000/.*)' > -c], [0], [dnl > 2 > ]) > - > -AT_CHECK([ovs-appctl dpctl/dump-flows | grep > 'ct_state(-new+est+rpl+trk).*ct_label(0x401020400000000)' -c], [0], [dnl > +AT_CHECK([[ovs-appctl dpctl/dump-flows | sed -e > 's/ct_label(0x[0-9]/ct_label(0x?/' | \ > +grep 'ct_state(-new+est+rpl+trk).*ct_label(0x?000000000401020400000000)' > -c]], [0], [dnl > 2 > ]) > > @@ -6386,9 +6392,10 @@ AT_CHECK([ovs-appctl dpctl/dump-flows | grep > 'ct_state(-new+est+rpl+trk).*ct_lab > # and just ensure that the known ethernet address is present. > AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd01::2) | \ > sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > -sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > > -icmpv6,orig=(src=fd07::1,dst=fd01::2,id=<cleared>,type=128,code=0),reply=(src=fd01::2,dst=fd07::1,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0x401020400000000 > > -tcp,orig=(src=fd07::1,dst=fd01::2,sport=<cleared>,dport=<cleared>),reply=(src=fd01::2,dst=fd07::1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x401020400000000,protoinfo=(state=<cleared>) > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | > +sed -e 's/labels=0x[[0-9]]/labels=0x?/' | sort], [0], [dnl > > +icmpv6,orig=(src=fd07::1,dst=fd01::2,id=<cleared>,type=128,code=0),reply=(src=fd01::2,dst=fd07::1,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0x?000000000401020400000000 > > +tcp,orig=(src=fd07::1,dst=fd01::2,sport=<cleared>,dport=<cleared>),reply=(src=fd01::2,dst=fd07::1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x?000000000401020400000000,protoinfo=(state=<cleared>) > ]) > > # Flush conntrack entries for easier output parsing of next test. > @@ -6405,18 +6412,21 @@ NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 > fd01::2 | FORMAT_PING], \ > 3 packets transmitted, 3 received, 0% packet loss, time 0ms > ]) > > -AT_CHECK([ovs-appctl dpctl/dump-flows | grep > 'ct_state(+new-est-rpl+trk).*ct(.*label=0x1001020400000000/.*)' -c], [0], > [dnl > +AT_CHECK([ovs-appctl dpctl/dump-flows | sed -e > 's/label=0x[[0-9]]/label=0x?/' | \ > +grep > 'ct_state(+new-est-rpl+trk).*ct(.*label=0x?000000001001020400000000/.*)' > -c], [0], [dnl > 2 > ]) > -AT_CHECK([ovs-appctl dpctl/dump-flows | grep > 'ct_state(-new+est+rpl+trk).*ct_label(0x1001020400000000)' -c], [0], [dnl > +AT_CHECK([[ovs-appctl dpctl/dump-flows | sed -e > 's/ct_label(0x[0-9]/ct_label(0x?/' | \ > +grep 'ct_state(-new+est+rpl+trk).*ct_label(0x?000000001001020400000000)' > -c]], [0], [dnl > 2 > ]) > > -AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 0x1001020400000000 | > FORMAT_CT(fd01::2) | \ > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 1001020400000000 | > FORMAT_CT(fd01::2) | \ > sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > -sed -e 's/mark=[[0-9]]*/mark=<cleared>/'], [0], [dnl > > -icmpv6,orig=(src=fd07::1,dst=fd01::2,id=<cleared>,type=128,code=0),reply=(src=fd01::2,dst=fd07::1,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000 > > -tcp,orig=(src=fd07::1,dst=fd01::2,sport=<cleared>,dport=<cleared>),reply=(src=fd01::2,dst=fd07::1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000,protoinfo=(state=<cleared>) > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | > +sed -e 's/labels=0x[[0-9]]/labels=0x?/'], [0], [dnl > > +icmpv6,orig=(src=fd07::1,dst=fd01::2,id=<cleared>,type=128,code=0),reply=(src=fd01::2,dst=fd07::1,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0x?000000001001020400000000 > > +tcp,orig=(src=fd07::1,dst=fd01::2,sport=<cleared>,dport=<cleared>),reply=(src=fd01::2,dst=fd07::1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x?000000001001020400000000,protoinfo=(state=<cleared>) > ]) > > # Check entries in table 76 and 77 expires w/o traffic > -- > 2.45.2 > > Looks good to me, thanks. Acked-by: Ales Musil <[email protected]> -- Ales Musil Senior Software Engineer - OVN Core Red Hat EMEA <https://www.redhat.com> [email protected] <https://red.ht/sig> _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
