Introduce the nexthop identifier in the ct_label.label field for ecmp-symmetric replies connections. This field will be used by ovn-controller to track ct entries and to flush them if requested by the CMS (e.g. removing the related static routes).
Signed-off-by: Lorenzo Bianconi <[email protected]> --- northd/en-lflow.c | 3 +++ northd/inc-proc-northd.c | 1 + northd/northd.c | 41 +++++++++++++++++++++------- northd/northd.h | 1 + tests/ovn.at | 4 +-- tests/system-ovn.at | 58 +++++++++++++++++++++++----------------- 6 files changed, 72 insertions(+), 36 deletions(-) diff --git a/northd/en-lflow.c b/northd/en-lflow.c index 3dba5034b..b4df49076 100644 --- a/northd/en-lflow.c +++ b/northd/en-lflow.c @@ -54,6 +54,8 @@ lflow_get_input_data(struct engine_node *node, engine_get_input_data("lr_stateful", node); struct ed_type_ls_stateful *ls_stateful_data = engine_get_input_data("ls_stateful", node); + struct ecmp_nexthop_data *nexthop_data = + engine_get_input_data("ecmp_nexthop", node); lflow_input->sbrec_logical_flow_table = EN_OVSDB_GET(engine_get_input("SB_logical_flow", node)); @@ -83,6 +85,7 @@ lflow_get_input_data(struct engine_node *node, lflow_input->parsed_routes = &static_routes_data->parsed_routes; lflow_input->route_tables = &static_routes_data->route_tables; lflow_input->route_policies = &route_policies_data->route_policies; + lflow_input->nexthops_table = &nexthop_data->nexthops; struct ed_type_global_config *global_config = engine_get_input_data("global_config", node); diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c index c4e5b9bf6..3d4bfa175 100644 --- a/northd/inc-proc-northd.c +++ b/northd/inc-proc-northd.c @@ -281,6 +281,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, engine_add_input(&en_lflow, &en_route_policies, NULL); engine_add_input(&en_lflow, &en_static_routes, NULL); engine_add_input(&en_lflow, &en_bfd, NULL); + engine_add_input(&en_lflow, &en_ecmp_nexthop, NULL); engine_add_input(&en_lflow, &en_northd, lflow_northd_handler); engine_add_input(&en_lflow, &en_port_group, lflow_port_group_handler); engine_add_input(&en_lflow, &en_lr_stateful, lflow_lr_stateful_handler); diff --git a/northd/northd.c b/northd/northd.c index efe1e3f46..0e7ff0df1 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -10903,7 +10903,8 @@ add_ecmp_symmetric_reply_flows(struct lflow_table *lflows, struct ovn_port *out_port, const struct parsed_route *route, struct ds *route_match, - struct lflow_ref *lflow_ref) + struct lflow_ref *lflow_ref, + struct hmap *nexthops_table) { const struct nbrec_logical_router_static_route *st_route = route->route; struct ds match = DS_EMPTY_INITIALIZER; @@ -10939,15 +10940,28 @@ add_ecmp_symmetric_reply_flows(struct lflow_table *lflows, * ds_put_cstr() call. The previous contents are needed. */ ds_put_cstr(&match, " && !ct.rpl && (ct.new || ct.est)"); + struct ds nexthop_label = DS_EMPTY_INITIALIZER; + + struct ecmp_nexthop_entry *e; + HMAP_FOR_EACH_WITH_HASH (e, hmap_node, hash_string(st_route->nexthop, 0), + nexthops_table) { + if (!strcmp(st_route->nexthop, e->nexthop)) { + ds_put_format(&nexthop_label, "ct_label.label = %d;", e->id); + break; + } + } + ds_put_format(&actions, "ct_commit { ct_label.ecmp_reply_eth = eth.src; " - " %s = %" PRId64 ";}; " + " %s = %" PRId64 "; %s }; " "next;", - ct_ecmp_reply_port_match, out_port->sb->tunnel_key); + ct_ecmp_reply_port_match, out_port->sb->tunnel_key, + ds_cstr(&nexthop_label)); ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 100, ds_cstr(&match), ds_cstr(&actions), &st_route->header_, lflow_ref); + ds_destroy(&nexthop_label); /* Bypass ECMP selection if we already have ct_label information * for where to route the packet. @@ -11001,7 +11015,8 @@ static void build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od, bool ct_masked_mark, const struct hmap *lr_ports, struct ecmp_groups_node *eg, - struct lflow_ref *lflow_ref) + struct lflow_ref *lflow_ref, + struct hmap *nexthops_table) { bool is_ipv4 = IN6_IS_ADDR_V4MAPPED(&eg->prefix); @@ -11059,7 +11074,7 @@ build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od, add_ecmp_symmetric_reply_flows(lflows, od, ct_masked_mark, lrp_addr_s, out_port, route_, &route_match, - lflow_ref); + lflow_ref, nexthops_table); } ds_clear(&match); ds_put_format(&match, REG_ECMP_GROUP_ID" == %"PRIu16" && " @@ -12937,7 +12952,8 @@ build_static_route_flows_for_lrouter( struct lflow_table *lflows, const struct hmap *lr_ports, struct hmap *parsed_routes, struct simap *route_tables, - struct lflow_ref *lflow_ref) + struct lflow_ref *lflow_ref, + struct hmap *nexthops_table) { ovs_assert(od->nbr); ovn_lflow_add_default_drop(lflows, od, S_ROUTER_IN_IP_ROUTING_ECMP, @@ -12980,7 +12996,7 @@ build_static_route_flows_for_lrouter( /* add a flow in IP_ROUTING, and one flow for each member in * IP_ROUTING_ECMP. */ build_ecmp_route_flow(lflows, od, features->ct_no_masked_label, - lr_ports, group, lflow_ref); + lr_ports, group, lflow_ref, nexthops_table); } const struct unique_routes_node *ur; HMAP_FOR_EACH (ur, hmap_node, &unique_routes) { @@ -16204,6 +16220,7 @@ struct lswitch_flow_build_info { struct hmap *parsed_routes; struct hmap *route_policies; struct simap *route_tables; + struct hmap *nexthops_table; }; /* Helper function to combine all lflow generation which is iterated by @@ -16252,7 +16269,7 @@ build_lswitch_and_lrouter_iterate_by_lr(struct ovn_datapath *od, lsi->lflows, lsi->lr_ports, lsi->parsed_routes, lsi->route_tables, - NULL); + NULL, lsi->nexthops_table); build_mcast_lookup_flows_for_lrouter(od, lsi->lflows, &lsi->match, &lsi->actions, NULL); build_ingress_policy_flows_for_lrouter(od, lsi->lflows, lsi->lr_ports, @@ -16575,7 +16592,8 @@ build_lswitch_and_lrouter_flows( const char *svc_monitor_mac, struct hmap *parsed_routes, struct hmap *route_policies, - struct simap *route_tables) + struct simap *route_tables, + struct hmap *nexthops_table) { char *svc_check_match = xasprintf("eth.dst == %s", svc_monitor_mac); @@ -16612,6 +16630,7 @@ build_lswitch_and_lrouter_flows( lsiv[index].parsed_routes = parsed_routes; lsiv[index].route_tables = route_tables; lsiv[index].route_policies = route_policies; + lsiv[index].nexthops_table = nexthops_table; ds_init(&lsiv[index].match); ds_init(&lsiv[index].actions); @@ -16657,6 +16676,7 @@ build_lswitch_and_lrouter_flows( .route_policies = route_policies, .match = DS_EMPTY_INITIALIZER, .actions = DS_EMPTY_INITIALIZER, + .nexthops_table = nexthops_table, }; /* Combined build - all lflow generation from lswitch and lrouter @@ -16819,7 +16839,8 @@ void build_lflows(struct ovsdb_idl_txn *ovnsb_txn, input_data->svc_monitor_mac, input_data->parsed_routes, input_data->route_policies, - input_data->route_tables); + input_data->route_tables, + input_data->nexthops_table); if (parallelization_state == STATE_INIT_HASH_SIZES) { parallelization_state = STATE_USE_PARALLELIZATION; diff --git a/northd/northd.h b/northd/northd.h index 1e82a1a48..da79730d6 100644 --- a/northd/northd.h +++ b/northd/northd.h @@ -216,6 +216,7 @@ struct lflow_input { struct hmap *parsed_routes; struct hmap *route_policies; struct simap *route_tables; + struct hmap *nexthops_table; }; extern int parallelization_state; diff --git a/tests/ovn.at b/tests/ovn.at index 5e984cf0a..0d7ca2b19 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -28571,7 +28571,7 @@ AT_CHECK([ for hv in 1 2; do grep table=$ecmp_stateful hv${hv}flows | \ grep "priority=100" | \ - grep -c "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],.*exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_MARK\\[[16..31\\]]))" + grep -c "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],.*exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_MARK\\[[16..31\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[96..127\\]]))" grep table=$arp_resolve hv${hv}flows | \ grep "priority=200" | \ @@ -28700,7 +28700,7 @@ AT_CHECK([ for hv in 1 2; do grep table=$ecmp_stateful hv${hv}flows | \ grep "priority=100" | \ - grep -c "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],.*exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_MARK\\[[16..31\\]]))" + grep -c "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],.*exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_MARK\\[[16..31\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[96..127\\]]))" grep table=$arp_resolve hv${hv}flows | \ grep "priority=200" | \ diff --git a/tests/system-ovn.at b/tests/system-ovn.at index f49330a1e..c12998946 100644 --- a/tests/system-ovn.at +++ b/tests/system-ovn.at @@ -6172,19 +6172,21 @@ NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.2 | FORMAT_PING], \ # and just ensure that the known ethernet address is present. AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.0.1) | \ sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | -sed -e 's/mark=[[0-9]]*/mark=<cleared>/'], [0], [dnl -icmp,orig=(src=172.16.0.1,dst=10.0.0.2,id=<cleared>,type=8,code=0),reply=(src=10.0.0.2,dst=172.16.0.1,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0x401020400000000 -tcp,orig=(src=172.16.0.1,dst=10.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=10.0.0.2,dst=172.16.0.1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x401020400000000,protoinfo=(state=<cleared>) +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | +sed -e 's/labels=0x[[0-9]]/labels=0x?/'], [0], [dnl +icmp,orig=(src=172.16.0.1,dst=10.0.0.2,id=<cleared>,type=8,code=0),reply=(src=10.0.0.2,dst=172.16.0.1,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0x?000000000401020400000000 +tcp,orig=(src=172.16.0.1,dst=10.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=10.0.0.2,dst=172.16.0.1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x?000000000401020400000000,protoinfo=(state=<cleared>) ]) # Ensure datapaths show conntrack states as expected # Like with conntrack entries, we shouldn't try to predict # port binding tunnel keys. So omit them from expected labels. -ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x401020400000000/.*)' -AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x401020400000000/.*)' -c], [0], [dnl +AT_CHECK([ovs-appctl dpctl/dump-flows | sed -e 's/label=0x[[0-9]]/label=0x?/' | \ +grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x?000000000401020400000000/.*)' -c], [0], [dnl 2 ]) -AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_label(0x401020400000000)' -c], [0], [dnl +AT_CHECK([[ovs-appctl dpctl/dump-flows | sed -e 's/ct_label(0x[0-9]/ct_label(0x?/' | \ +grep 'ct_state(-new+est+rpl+trk).*ct_label(0x?000000000401020400000000)' -c]], [0], [dnl 2 ]) @@ -6203,18 +6205,21 @@ NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.2 | FORMAT_PING], \ [0], [dnl 3 packets transmitted, 3 received, 0% packet loss, time 0ms ]) -AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x1001020400000000/.*)' -c], [0], [dnl +AT_CHECK([ovs-appctl dpctl/dump-flows | sed -e 's/label=0x[[0-9]]/label=0x?/' | \ +grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x?000000001001020400000000/.*)' -c], [0], [dnl 2 ]) -AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_label(0x1001020400000000)' -c], [0], [dnl +AT_CHECK([[ovs-appctl dpctl/dump-flows | sed -e 's/ct_label(0x[0-9]/ct_label(0x?/' | \ +grep 'ct_state(-new+est+rpl+trk).*ct_label(0x?000000001001020400000000)' -c]], [0], [dnl 2 ]) -AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 0x1001020400000000 | FORMAT_CT(172.16.0.1) | \ +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 1001020400000000 | FORMAT_CT(172.16.0.1) | \ sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | -sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl -icmp,orig=(src=172.16.0.1,dst=10.0.0.2,id=<cleared>,type=8,code=0),reply=(src=10.0.0.2,dst=172.16.0.1,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000 -tcp,orig=(src=172.16.0.1,dst=10.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=10.0.0.2,dst=172.16.0.1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000,protoinfo=(state=<cleared>) +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | +sed -e 's/labels=0x[[0-9]]/labels=0x?/' | sort], [0], [dnl +icmp,orig=(src=172.16.0.1,dst=10.0.0.2,id=<cleared>,type=8,code=0),reply=(src=10.0.0.2,dst=172.16.0.1,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0x?000000001001020400000000 +tcp,orig=(src=172.16.0.1,dst=10.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=10.0.0.2,dst=172.16.0.1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x?000000001001020400000000,protoinfo=(state=<cleared>) ]) # Check entries in table 76 and 77 expires w/o traffic OVS_WAIT_UNTIL([ @@ -6373,11 +6378,12 @@ NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 fd01::2 | FORMAT_PING], \ # Ensure datapaths show conntrack states as expected # Like with conntrack entries, we shouldn't try to predict # port binding tunnel keys. So omit them from expected labels. -AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x401020400000000/.*)' -c], [0], [dnl +AT_CHECK([ovs-appctl dpctl/dump-flows | sed -e 's/label=0x[[0-9]]/label=0x?/' | \ +grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x?000000000401020400000000/.*)' -c], [0], [dnl 2 ]) - -AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_label(0x401020400000000)' -c], [0], [dnl +AT_CHECK([[ovs-appctl dpctl/dump-flows | sed -e 's/ct_label(0x[0-9]/ct_label(0x?/' | \ +grep 'ct_state(-new+est+rpl+trk).*ct_label(0x?000000000401020400000000)' -c]], [0], [dnl 2 ]) @@ -6386,9 +6392,10 @@ AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_lab # and just ensure that the known ethernet address is present. AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd01::2) | \ sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | -sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl -icmpv6,orig=(src=fd07::1,dst=fd01::2,id=<cleared>,type=128,code=0),reply=(src=fd01::2,dst=fd07::1,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0x401020400000000 -tcp,orig=(src=fd07::1,dst=fd01::2,sport=<cleared>,dport=<cleared>),reply=(src=fd01::2,dst=fd07::1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x401020400000000,protoinfo=(state=<cleared>) +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | +sed -e 's/labels=0x[[0-9]]/labels=0x?/' | sort], [0], [dnl +icmpv6,orig=(src=fd07::1,dst=fd01::2,id=<cleared>,type=128,code=0),reply=(src=fd01::2,dst=fd07::1,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0x?000000000401020400000000 +tcp,orig=(src=fd07::1,dst=fd01::2,sport=<cleared>,dport=<cleared>),reply=(src=fd01::2,dst=fd07::1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x?000000000401020400000000,protoinfo=(state=<cleared>) ]) # Flush conntrack entries for easier output parsing of next test. @@ -6405,18 +6412,21 @@ NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 fd01::2 | FORMAT_PING], \ 3 packets transmitted, 3 received, 0% packet loss, time 0ms ]) -AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x1001020400000000/.*)' -c], [0], [dnl +AT_CHECK([ovs-appctl dpctl/dump-flows | sed -e 's/label=0x[[0-9]]/label=0x?/' | \ +grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x?000000001001020400000000/.*)' -c], [0], [dnl 2 ]) -AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_label(0x1001020400000000)' -c], [0], [dnl +AT_CHECK([[ovs-appctl dpctl/dump-flows | sed -e 's/ct_label(0x[0-9]/ct_label(0x?/' | \ +grep 'ct_state(-new+est+rpl+trk).*ct_label(0x?000000001001020400000000)' -c]], [0], [dnl 2 ]) -AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 0x1001020400000000 | FORMAT_CT(fd01::2) | \ +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 1001020400000000 | FORMAT_CT(fd01::2) | \ sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | -sed -e 's/mark=[[0-9]]*/mark=<cleared>/'], [0], [dnl -icmpv6,orig=(src=fd07::1,dst=fd01::2,id=<cleared>,type=128,code=0),reply=(src=fd01::2,dst=fd07::1,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000 -tcp,orig=(src=fd07::1,dst=fd01::2,sport=<cleared>,dport=<cleared>),reply=(src=fd01::2,dst=fd07::1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000,protoinfo=(state=<cleared>) +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | +sed -e 's/labels=0x[[0-9]]/labels=0x?/'], [0], [dnl +icmpv6,orig=(src=fd07::1,dst=fd01::2,id=<cleared>,type=128,code=0),reply=(src=fd01::2,dst=fd07::1,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0x?000000001001020400000000 +tcp,orig=(src=fd07::1,dst=fd01::2,sport=<cleared>,dport=<cleared>),reply=(src=fd01::2,dst=fd07::1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x?000000001001020400000000,protoinfo=(state=<cleared>) ]) # Check entries in table 76 and 77 expires w/o traffic -- 2.45.1 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
