From: Karthik Chandrashekar <[email protected]> This patch adds the ability to specify a custom set of packet headers for hash computation for ECMP routes similar to the support that was added for LB in 5af304e7478adcf5ac50ed41e96a55bebebff3e8
ECMP routes by default use dp_hash as a selection_method for OVS flows. When ecmp_selection_fields is specified, the selection_method will be hash with the specified list of fields used for computing the hash. For simplicity, list of fields that are used in the select action is a union of all the fields specified in each Logical_Route_Static_Route that is part of a given ECMP route. Signed-off-by: Karthik Chandrashekar <[email protected]> --- include/ovn/actions.h | 1 + lib/actions.c | 55 +++++++++++++++- northd/northd.c | 60 ++++++++++++++++-- ovn-nb.xml | 17 +++++ tests/ovn.at | 144 ++++++++++++++++++++++++++++++++++++++++-- utilities/ovn-nbctl.c | 17 +++-- 6 files changed, 273 insertions(+), 21 deletions(-) diff --git a/include/ovn/actions.h b/include/ovn/actions.h index 88cf4de79..a9af1e38e 100644 --- a/include/ovn/actions.h +++ b/include/ovn/actions.h @@ -338,6 +338,7 @@ struct ovnact_select { struct ovnact_select_dst *dsts; size_t n_dsts; uint8_t ltable; /* Logical table ID of next table. */ + char *hash_fields; struct expr_field res_field; }; diff --git a/lib/actions.c b/lib/actions.c index e8cc0994d..11d4998f2 100644 --- a/lib/actions.c +++ b/lib/actions.c @@ -1534,11 +1534,19 @@ parse_select_action(struct action_context *ctx, struct expr_field *res_field) struct ovnact_select_dst *dsts = NULL; size_t allocated_dsts = 0; size_t n_dsts = 0; + bool requires_hash_fields = false; + char *hash_fields = NULL; lexer_get(ctx->lexer); /* Skip "select". */ lexer_get(ctx->lexer); /* Skip '('. */ - while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) { + if (lexer_match_id(ctx->lexer, "values")) { + lexer_force_match(ctx->lexer, LEX_T_EQUALS); + requires_hash_fields = true; + } + + while (!lexer_match(ctx->lexer, LEX_T_SEMICOLON) && + !lexer_match(ctx->lexer, LEX_T_RPAREN)) { struct ovnact_select_dst dst; if (!action_parse_uint16(ctx, &dst.id, "id")) { free(dsts); @@ -1574,11 +1582,39 @@ parse_select_action(struct action_context *ctx, struct expr_field *res_field) return; } + if (requires_hash_fields) { + if (!lexer_match_id(ctx->lexer, "hash_fields")) { + lexer_syntax_error(ctx->lexer, "expecting hash_fields"); + free(dsts); + return; + } + if (!lexer_match(ctx->lexer, LEX_T_EQUALS) || + ctx->lexer->token.type != LEX_T_STRING || + lexer_lookahead(ctx->lexer) != LEX_T_RPAREN) { + lexer_syntax_error(ctx->lexer, "invalid hash_fields"); + free(dsts); + return; + } + + hash_fields = xstrdup(ctx->lexer->token.s); + lexer_get(ctx->lexer); + if (!lexer_match(ctx->lexer, LEX_T_SEMICOLON)) { + lexer_get(ctx->lexer); + } + } else { + if (lexer_match_id(ctx->lexer, "hash_fields")) { + lexer_syntax_error(ctx->lexer, "hash_fields unexpected"); + free(dsts); + return; + } + } + struct ovnact_select *select = ovnact_put_SELECT(ctx->ovnacts); select->ltable = ctx->pp->cur_ltable + 1; select->dsts = dsts; select->n_dsts = n_dsts; select->res_field = *res_field; + select->hash_fields = hash_fields; } static void @@ -1588,6 +1624,9 @@ format_SELECT(const struct ovnact_select *select, struct ds *s) ds_put_cstr(s, " = "); ds_put_cstr(s, "select"); ds_put_char(s, '('); + if (select->hash_fields) { + ds_put_format(s, "values="); + } for (size_t i = 0; i < select->n_dsts; i++) { if (i) { ds_put_cstr(s, ", "); @@ -1598,6 +1637,10 @@ format_SELECT(const struct ovnact_select *select, struct ds *s) ds_put_format(s, "=%"PRIu16, dst->weight); } ds_put_char(s, ')'); + if (select->hash_fields) { + ds_chomp(s, ')'); + ds_put_format(s, "; hash_fields=\"%s\")", select->hash_fields); + } ds_put_char(s, ';'); } @@ -1612,9 +1655,14 @@ encode_SELECT(const struct ovnact_select *select, struct ofpact_group *og; struct ds ds = DS_EMPTY_INITIALIZER; - ds_put_format(&ds, "type=select,selection_method=dp_hash"); + ds_put_format(&ds, "type=select,selection_method=%s", + select->hash_fields ? "hash": "dp_hash"); + if (select->hash_fields) { + ds_put_format(&ds, ",fields(%s)", select->hash_fields); + } - if (ovs_feature_is_supported(OVS_DP_HASH_L4_SYM_SUPPORT)) { + if (ovs_feature_is_supported(OVS_DP_HASH_L4_SYM_SUPPORT) && + !select->hash_fields) { /* Select dp-hash l4_symmetric by setting the upper 32bits of * selection_method_param to value 1 (1 << 32): */ ds_put_cstr(&ds, ",selection_method_param=0x100000000"); @@ -1647,6 +1695,7 @@ static void ovnact_select_free(struct ovnact_select *select) { free(select->dsts); + free(select->hash_fields); } static void diff --git a/northd/northd.c b/northd/northd.c index 5b50ea191..70bd384c3 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -10242,6 +10242,7 @@ struct parsed_route { uint32_t route_table_id; uint32_t hash; const struct nbrec_logical_router_static_route *route; + const char *ecmp_selection_fields; bool ecmp_symmetric_reply; bool is_discard_route; }; @@ -10352,6 +10353,12 @@ parsed_routes_add(struct ovn_datapath *od, const struct hmap *lr_ports, pr->ecmp_symmetric_reply = smap_get_bool(&route->options, "ecmp_symmetric_reply", false); pr->is_discard_route = is_discard_route; + pr->ecmp_selection_fields = NULL; + const char *ecmp_selection_fields = smap_get(&route->options, + "ecmp_selection_fields"); + if (ecmp_selection_fields) { + pr->ecmp_selection_fields = ecmp_selection_fields; + } ovs_list_insert(routes, &pr->list_node); return pr; } @@ -10381,6 +10388,7 @@ struct ecmp_groups_node { const char *origin; uint32_t route_table_id; uint16_t route_count; + char *selection_fields; struct ovs_list route_list; /* Contains ecmp_route_list_node */ }; @@ -10397,6 +10405,34 @@ ecmp_groups_add_route(struct ecmp_groups_node *group, struct ecmp_route_list_node *er = xmalloc(sizeof *er); er->route = route; er->id = ++group->route_count; + + if (route->ecmp_selection_fields) { + if (group->selection_fields) { + struct sset current_field_set; + struct sset field_set; + + sset_from_delimited_string(¤t_field_set, + group->selection_fields, ","); + sset_from_delimited_string(&field_set, + route->ecmp_selection_fields, ","); + + const char *field; + SSET_FOR_EACH(field, &field_set) { + sset_add(¤t_field_set, field); + } + + group->selection_fields = xasprintf("%s", + sset_join(¤t_field_set, + ",", "")); + + sset_destroy(&field_set); + sset_destroy(¤t_field_set); + } else { + group->selection_fields = xasprintf("%s", + route->ecmp_selection_fields); + } + } + ovs_list_insert(&group->route_list, &er->list_node); } @@ -10419,6 +10455,7 @@ ecmp_groups_add(struct hmap *ecmp_groups, eg->is_src_route = route->is_src_route; eg->origin = smap_get_def(&route->route->options, "origin", ""); eg->route_table_id = route->route_table_id; + eg->selection_fields = NULL; ovs_list_init(&eg->route_list); ecmp_groups_add_route(eg, route); @@ -10734,19 +10771,28 @@ build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od, eg->is_src_route, is_ipv4, &route_match, &priority, ofs); free(prefix_s); - struct ds actions = DS_EMPTY_INITIALIZER; - ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16 - "; %s = select(", REG_ECMP_GROUP_ID, eg->id, - REG_ECMP_MEMBER_ID); - + struct ds values = DS_EMPTY_INITIALIZER; bool is_first = true; LIST_FOR_EACH (er, list_node, &eg->route_list) { if (is_first) { is_first = false; } else { - ds_put_cstr(&actions, ", "); + ds_put_cstr(&values, ", "); } - ds_put_format(&actions, "%"PRIu16, er->id); + ds_put_format(&values, "%"PRIu16, er->id); + } + + struct ds actions = DS_EMPTY_INITIALIZER; + if (eg->selection_fields) { + ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16 + "; %s = select(values=%s", REG_ECMP_GROUP_ID, eg->id, + REG_ECMP_MEMBER_ID, ds_cstr(&values)); + + ds_put_format(&actions, "; hash_fields=\"%s\"", eg->selection_fields); + } else { + ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16 + "; %s = select(%s", REG_ECMP_GROUP_ID, eg->id, + REG_ECMP_MEMBER_ID, ds_cstr(&values)); } ds_put_cstr(&actions, ");"); diff --git a/ovn-nb.xml b/ovn-nb.xml index 0f9a1005a..e362609ab 100644 --- a/ovn-nb.xml +++ b/ovn-nb.xml @@ -3662,6 +3662,23 @@ or <ref column="options" key="chassis" table="Logical_Router" /> set). </column> + <column name="options" key="ecmp_selection_fields"> + <p> + ECMP routes use OpenFlow groups of type <code>select</code> to + pick a nexthop among the list of avaible nexthops. + OVS supports two selection methods: <code>dp_hash</code> and + <code>hash</code> for hash computatiion and selecting + the buckets of a group. Please see the OVS documentation + (man ovs-ofctl) for more details on the selection methods. + </p> + + <p> + OVN by default uses <code>dp_hash</code>. In order to use the + <code>hash</code> selection method, specify comma-separated + list of selectoin fields. + </p> + </column> + <column name="options" key="origin"> In case ovn-interconnection has been learned this route, it will have its origin set: either "connected" or "static". This key is supposed diff --git a/tests/ovn.at b/tests/ovn.at index 2ced7c0b2..d43668af8 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -2086,6 +2086,16 @@ reg0 = select(1, 2); encodes as group:20 uses group: id(20), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[[96..127]],resubmit(,oflow_in_table),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[[96..127]],resubmit(,oflow_in_table)) +reg9[[16..31]] = select(values=1=50, 2=100, 3; hash_fields="ip_src,ip_dst" ); + formats as reg9[[16..31]] = select(values=1=50, 2=100, 3=100; hash_fields="ip_src,ip_dst"); + encodes as group:21 + uses group: id(21), name(type=select,selection_method=hash,fields(ip_src,ip_dst),bucket=bucket_id=0,weight:50,actions=load:1->xreg4[[16..31]],resubmit(,oflow_in_table),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[[16..31]],resubmit(,oflow_in_table),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[[16..31]],resubmit(,oflow_in_table)) + +reg0 = select(values=1, 2; hash_fields="ip_dst,ip_src"); + formats as reg0 = select(values=1=100, 2=100; hash_fields="ip_dst,ip_src"); + encodes as group:22 + uses group: id(22), name(type=select,selection_method=hash,fields(ip_dst,ip_src),bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[[96..127]],resubmit(,oflow_in_table),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[[96..127]],resubmit(,oflow_in_table)) + reg0 = select(1=, 2); Syntax error at `,' expecting weight. reg0 = select(1=0, 2); @@ -2094,6 +2104,14 @@ reg0 = select(1=123456, 2); Syntax error at `123456' expecting weight. reg0 = select(123); Syntax error at `;' expecting at least 2 group members. +reg0 = select(values=1, 2); + Syntax error at `;' expecting hash_fields. +reg0 = select(values=1, 2; hash_fields); + Syntax error at `)' invalid hash_fields. +reg0 = select(values=1, 2; hash_fields=); + Syntax error at `)' invalid hash_fields. +reg0 = select(1, 2; hash_fields="ip_src"); + Syntax error at `=' hash_fields unexpected. ip.proto = select(1, 2, 3); Field ip.proto is not modifiable. reg0[[0..14]] = select(1, 2, 3); @@ -2101,12 +2119,12 @@ reg0[[0..14]] = select(1, 2, 3); fwd_group(liveness=true, childports="eth0", "lsp1"); formats as fwd_group(liveness="true", childports="eth0", "lsp1"); - encodes as group:21 - uses group: id(21), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=watch_port:17,load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) + encodes as group:23 + uses group: id(23), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=watch_port:17,load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) fwd_group(childports="eth0", "lsp1"); - encodes as group:22 - uses group: id(22), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) + encodes as group:24 + uses group: id(24), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) fwd_group(childports=eth0); Syntax error at `eth0' expecting logical switch port. @@ -2115,8 +2133,8 @@ fwd_group(); Syntax error at `)' expecting `;'. fwd_group(childports="eth0", "lsp1"); - encodes as group:22 - uses group: id(22), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) + encodes as group:24 + uses group: id(24), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) fwd_group(liveness=xyzzy, childports="eth0", "lsp1"); Syntax error at `xyzzy' expecting true or false. @@ -26459,6 +26477,120 @@ OVN_CLEANUP([hv1]) AT_CLEANUP ]) +OVN_FOR_EACH_NORTHD([ +AT_SETUP([ECMP static routes - custom hash]) +ovn_start + +# Logical network: +# ls1 (192.168.1.0/24) - lr1 - ls2 (192.168.2.0/24) +# lsl has lsp11 (192.168.1.11) and ls2 has lsp21 (192.168.2.21) and lsp22 +# (192.168.2.22) +# +# Static routes on lr1: +# 10.0.0.0/24 nexthop 192.168.2.21 +# 10.0.0.0/24 nexthop 192.168.2.22 +# +# ECMP hash on eth_src,eth_dst,src_ip,dst_ip +# +# Test: +# lsp11 send packets to 10.0.0.100 with different source ports +# +# Expected result: +# All packets should go out of a either lsp21 or lsp22 + +ovn-nbctl lr-add lr1 + +ovn-nbctl ls-add ls1 +ovn-nbctl ls-add ls2 + +for i in 1 2; do + ovn-nbctl lrp-add lr1 lrp-lr1-ls${i} 00:00:00:01:0${i}:01 192.168.${i}.1/24 + ovn-nbctl lsp-add ls${i} lsp-ls${i}-lr1 -- lsp-set-type lsp-ls${i}-lr1 router \ + -- lsp-set-options lsp-ls${i}-lr1 router-port=lrp-lr1-ls${i} \ + -- lsp-set-addresses lsp-ls${i}-lr1 router +done + +#install static routes +ovn-nbctl --ecmp-selection-fields="ip_src,eth_src" lr-route-add lr1 10.0.0.0/24 192.168.2.21 +ovn-nbctl --ecmp --ecmp-selection-fields="ip_src,ip_dst,eth_dst" lr-route-add lr1 10.0.0.0/24 192.168.2.22 + +# Create logical ports +ovn-nbctl lsp-add ls1 lsp11 -- \ + lsp-set-addresses lsp11 "f0:00:00:00:01:11 192.168.1.11" +ovn-nbctl lsp-add ls2 lsp21 -- \ + lsp-set-addresses lsp21 "f0:00:00:00:02:21 192.168.2.21" +ovn-nbctl lsp-add ls2 lsp22 -- \ + lsp-set-addresses lsp22 "f0:00:00:00:02:22 192.168.2.22" + +net_add n1 +sim_add hv1 +as hv1 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.1 +ovs-vsctl -- add-port br-int hv1-vif1 -- \ + set interface hv1-vif1 external-ids:iface-id=lsp11 \ + options:tx_pcap=hv1/vif1-tx.pcap \ + options:rxq_pcap=hv1/vif1-rx.pcap \ + ofport-request=1 + +ovs-vsctl -- add-port br-int hv1-vif2 -- \ + set interface hv1-vif2 external-ids:iface-id=lsp21 \ + options:tx_pcap=hv1/vif2-tx.pcap \ + options:rxq_pcap=hv1/vif2-rx.pcap \ + ofport-request=2 + +ovs-vsctl -- add-port br-int hv1-vif3 -- \ + set interface hv1-vif3 external-ids:iface-id=lsp22 \ + options:tx_pcap=hv1/vif3-tx.pcap \ + options:rxq_pcap=hv1/vif3-rx.pcap \ + ofport-request=3 + +# wait for earlier changes to take effect +check ovn-nbctl --wait=hv sync +wait_for_ports_up + +ovn-sbctl dump-flows > sbflows +AT_CAPTURE_FILE([sbflows]) + +as hv1 ovs-ofctl dump-groups br-int > ofgroups +AT_CAPTURE_FILE([ofgroups]) +OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-groups br-int > ofgroups + grep "selection_method=hash,fields" ofgroups | \ + grep "eth_src" | grep "eth_dst" | grep "ip_src" | grep "ip_dst" | wc -l], [0], [1 +]) + +for i in $(seq 5001 5010); do + packet="inport==\"lsp11\" && eth.src==f0:00:00:00:01:11 && eth.dst==00:00:00:01:01:01 && + ip4 && ip.ttl==64 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && + tcp && tcp.src==$i && tcp.dst==80" + OVS_WAIT_UNTIL([as hv1 ovs-appctl -t ovn-controller inject-pkt "$packet"]) + + for j in 1 2; do + # Assume all packets go to lsp2${j}. + exp_packet="eth.src==00:00:00:01:02:01 && eth.dst==f0:00:00:00:02:2${j} && + ip4 && ip.ttl==63 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && + tcp && tcp.src==$i && tcp.dst==80" + echo $exp_packet | ovstest test-ovn expr-to-packets >> expected_lsp2${j} + done +done + +# All packets should go out of a single port given the hashing is based on eth_src,eth_dst,ip_src,ip_dst which is fixed +OVS_WAIT_UNTIL([ + rcv_n1=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > lsp21.packets && cat lsp21.packets | wc -l` + rcv_n2=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif3-tx.pcap > lsp22.packets && cat lsp22.packets | wc -l` + echo $rcv_n1 $rcv_n2 + test $(($rcv_n1 + $rcv_n2)) -ge 10]) + +if test $rcv_n1 = 0; then + test $rcv_n2 -ge 10 +else + test $rcv_n1 -ge 10 +fi + +OVN_CLEANUP([hv1]) + +AT_CLEANUP +]) OVN_FOR_EACH_NORTHD([ AT_SETUP([route tables -- <main> route table routes]) diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c index 679d3f2d9..70e8fb239 100644 --- a/utilities/ovn-nbctl.c +++ b/utilities/ovn-nbctl.c @@ -4720,11 +4720,18 @@ nbctl_lr_route_add(struct ctl_context *ctx) nbrec_logical_router_static_route_set_route_table(route, route_table); } - if (ecmp_symmetric_reply) { - const struct smap options = SMAP_CONST1(&options, - "ecmp_symmetric_reply", - "true"); + const char *ecmp_selection_fields = shash_find_data(&ctx->options, + "--ecmp-selection-fields"); + if (ecmp_symmetric_reply || ecmp_selection_fields) { + struct smap options = SMAP_INITIALIZER(&options); + if (ecmp_symmetric_reply) { + smap_add(&options, "ecmp_symmetric_reply", "true"); + } + if (ecmp_selection_fields) { + smap_add(&options, "ecmp_selection_fields", ecmp_selection_fields); + } nbrec_logical_router_static_route_set_options(route, &options); + smap_destroy(&options); } nbrec_logical_router_update_static_routes_addvalue(lr, route); @@ -8057,7 +8064,7 @@ static const struct ctl_command_syntax nbctl_commands[] = { { "lr-route-add", 3, 4, "ROUTER PREFIX NEXTHOP [PORT]", nbctl_pre_lr_route_add, nbctl_lr_route_add, NULL, "--may-exist,--ecmp,--ecmp-symmetric-reply,--policy=," - "--route-table=,--bfd?", RW }, + "--route-table=,--bfd?,--ecmp-selection-fields=", RW }, { "lr-route-del", 1, 4, "ROUTER [PREFIX [NEXTHOP [PORT]]]", nbctl_pre_lr_route_del, nbctl_lr_route_del, NULL, "--if-exists,--policy=,--route-table=", RW }, -- 2.22.3 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
