On Tue, Aug 12, 2025 at 3:06 PM Xavier Simonart <xsimo...@redhat.com> wrote:
> Hi Ales, Dumitru > > Thanks for the patch > I have a comment below > > Thanks > Xavier > Hi Xavier, thank you for the review. > > On Wed, Aug 6, 2025 at 8:29 PM Ales Musil via dev <ovs-dev@openvswitch.org> > wrote: > >> On Wed, Aug 6, 2025 at 12:53 PM Dumitru Ceara <dce...@redhat.com> wrote: >> >> > On 7/25/25 8:03 AM, Ales Musil wrote: >> > > Add physical flows based on the EVPN bindings and multicast groups >> > > that ensure the proper flow of traffic that is going through the >> > > EVPN. In order for this to wrk the CMS needs to ensure that there >> > > is proper VXLAN tunnel port in br-int and also the >> > > "external_ids:ovn-evpn-local-ip" is set properly in the local OvS >> > > database. >> > > >> > > All traffic will be flooded through the UNKNOWN group if there is >> > > unknown port at this stage. That's due to missing support for FDB >> > > learning. >> > > >> > > Signed-off-by: Ales Musil <amu...@redhat.com> >> > > --- >> > >> > Hi Ales, >> > >> > I only have a few minor comments, please see below. >> > >> >> Hi Dumitru, >> >> thank you for the review. >> >> >> > >> > > NEWS | 4 + >> > > TODO.rst | 4 + >> > > controller/chassis.c | 19 +++ >> > > controller/lflow.h | 7 +- >> > > controller/ovn-controller.8.xml | 8 + >> > > controller/ovn-controller.c | 30 +++- >> > > controller/physical.c | 283 >> ++++++++++++++++++++++++++++---- >> > > controller/physical.h | 8 + >> > > tests/ovn-macros.at | 5 +- >> > > tests/ovn.at | 2 +- >> > > 10 files changed, 327 insertions(+), 43 deletions(-) >> > > >> > > diff --git a/NEWS b/NEWS >> > > index 0cce1790d..1b6b5ab1d 100644 >> > > --- a/NEWS >> > > +++ b/NEWS >> > > @@ -41,6 +41,10 @@ Post v25.03.0 >> > > - Added support for running tests from the 'check-kernel' system >> > test target >> > > under retis by setting OVS_TEST_WITH_RETIS=yes. See the >> 'Testing' >> > section >> > > of the documentation for more details. >> > > + - Dynamic Routing: >> > > + * Add the option "external_ids:ovn-evpn-local-ip" into OvS >> > datapath. >> > > + This option allows CMS to set IP address that will be used as >> > source >> > > + for the EVPN traffic egressing through the tunnel. >> > > >> > > OVN v25.03.0 - 07 Mar 2025 >> > > -------------------------- >> > > diff --git a/TODO.rst b/TODO.rst >> > > index 85208bfe3..9a9df78f2 100644 >> > > --- a/TODO.rst >> > > +++ b/TODO.rst >> > > @@ -153,3 +153,7 @@ OVN To-do List >> > > monitoring conditions to update before we actually try to learn >> > routes. >> > > Otherwise we could try to add duplicated Learned_Routes and the >> > ovnsb >> > > commit would fail. >> > > + >> > > + * Allow ovn-evpn-local-ip to accept list of >> > > + $VNI1:$LOCAL_IP1,$VNI2:$LOCAL_IP2 combinations which will be >> > properly >> > > + reflected in physical flows for given LS with VNI. >> > > diff --git a/controller/chassis.c b/controller/chassis.c >> > > index 7616069b7..d67cd0898 100644 >> > > --- a/controller/chassis.c >> > > +++ b/controller/chassis.c >> > > @@ -58,6 +58,7 @@ struct ovs_chassis_cfg { >> > > const char *trim_limit_lflow_cache; >> > > const char *trim_wmark_perc_lflow_cache; >> > > const char *trim_timeout_ms; >> > > + const char *evpn_local_ip; >> > > >> > > /* Set of encap types parsed from the 'ovn-encap-type' >> external-id. >> > */ >> > > struct sset encap_type_set; >> > > @@ -198,6 +199,13 @@ get_encap_csum(const struct smap *ext_ids, const >> > char *chassis_id) >> > > "ovn-encap-csum", "true"); >> > > } >> > > >> > > +static const char * >> > > +get_evpn_local_ip(const struct smap *ext_ids, const char *chassis_id) >> > > +{ >> > > + return get_chassis_external_id_value(ext_ids, chassis_id, >> > > + "ovn-evpn-local-ip", >> "true"); >> > > +} >> > > + >> > > static const char * >> > > get_datapath_type(const struct ovsrec_bridge *br_int) >> > > { >> > > @@ -332,6 +340,8 @@ chassis_parse_ovs_config(const struct >> > ovsrec_open_vswitch_table *ovs_table, >> > > get_trim_wmark_perc_lflow_cache(&cfg->external_ids, >> chassis_id); >> > > ovs_cfg->trim_timeout_ms = >> > > get_trim_timeout(&cfg->external_ids, chassis_id); >> > > + ovs_cfg->evpn_local_ip = >> > > + get_evpn_local_ip(&cfg->external_ids, chassis_id); >> > > >> > > chassis_parse_ovs_encap_type(encap_type, >> &ovs_cfg->encap_type_set); >> > > >> > > @@ -384,6 +394,7 @@ chassis_build_other_config(const struct >> > ovs_chassis_cfg *ovs_cfg, >> > > smap_replace(config, "ovn-trim-timeout-ms", >> > ovs_cfg->trim_timeout_ms); >> > > smap_replace(config, "iface-types", >> > ds_cstr_ro(&ovs_cfg->iface_types)); >> > > smap_replace(config, "ovn-chassis-mac-mappings", >> > ovs_cfg->chassis_macs); >> > > + smap_replace(config, "evpn-local-ip", ovs_cfg->evpn_local_ip); >> > > smap_replace(config, "is-interconn", >> > > ovs_cfg->is_interconn ? "true" : "false"); >> > > smap_replace(config, OVN_FEATURE_PORT_UP_NOTIF, "true"); >> > > @@ -504,6 +515,13 @@ chassis_other_config_changed(const struct >> > ovs_chassis_cfg *ovs_cfg, >> > > return true; >> > > } >> > > >> > > + const char *chassis_evpn_local_ip = >> > > + get_evpn_local_ip(&chassis_rec->other_config, >> > chassis_rec->name); >> > > + if (strcmp(ovs_cfg->evpn_local_ip, chassis_evpn_local_ip)) { >> > > + return true; >> > > + } >> > > + >> > > + >> > > if (!smap_get_bool(&chassis_rec->other_config, >> > OVN_FEATURE_PORT_UP_NOTIF, >> > > false)) { >> > > return true; >> > > @@ -722,6 +740,7 @@ update_supported_sset(struct sset *supported) >> > > sset_add(supported, "iface-types"); >> > > sset_add(supported, "ovn-chassis-mac-mappings"); >> > > sset_add(supported, "is-interconn"); >> > > + sset_add(supported, "evpn-local-ip"); >> > > >> > > /* Internal options. */ >> > > sset_add(supported, "is-vtep"); >> > > diff --git a/controller/lflow.h b/controller/lflow.h >> > > index fa9e795e0..c2d277078 100644 >> > > --- a/controller/lflow.h >> > > +++ b/controller/lflow.h >> > > @@ -70,14 +70,15 @@ struct uuid; >> > > #define OFTABLE_OUTPUT_LARGE_PKT_DETECT 41 >> > > #define OFTABLE_OUTPUT_LARGE_PKT_PROCESS 42 >> > > #define OFTABLE_REMOTE_OUTPUT 43 >> > > -#define OFTABLE_LOCAL_OUTPUT 44 >> > > -#define OFTABLE_CHECK_LOOPBACK 45 >> > > +#define OFTABLE_REMOTE_VTEP_OUTPUT 44 >> > > +#define OFTABLE_LOCAL_OUTPUT 45 >> > > +#define OFTABLE_CHECK_LOOPBACK 46 >> > > >> > > /* Start of the OUTPUT section of the pipeline. */ >> > > #define OFTABLE_OUTPUT_INIT OFTABLE_OUTPUT_LARGE_PKT_DETECT >> > > >> > > /* Start of LOG_PIPELINE_LEN tables. */ >> > > -#define OFTABLE_LOG_EGRESS_PIPELINE 46 >> > > +#define OFTABLE_LOG_EGRESS_PIPELINE 47 >> > > #define OFTABLE_SAVE_INPORT 64 >> > > #define OFTABLE_LOG_TO_PHY 65 >> > > #define OFTABLE_MAC_BINDING 66 >> > > diff --git a/controller/ovn-controller.8.xml >> > b/controller/ovn-controller.8.xml >> > > index 3f2cf5bae..8a6d0b767 100644 >> > > --- a/controller/ovn-controller.8.xml >> > > +++ b/controller/ovn-controller.8.xml >> > > @@ -392,6 +392,14 @@ >> > > exit. In order to keep backward compatibility the >> > > <code>--restart</code> exit flag has priority over this flag. >> > > </dd> >> > > + >> > > + <dt><code>external_ids:ovn-evpn-local-ip</code></dt> >> > > + <dd> >> > > + IP address used as a source address for the EVPN traffic >> > leaving this >> > > + ovn-controller. There is currently support only for single IP >> > address >> > > + being specified. NOTE: this feature is experimental and may >> be >> > subject >> > > + to removal/change in the future. >> > > + </dd> >> > >> > This is experimental so maybe it's fine but I'm afraid I'd expect at >> > least dual stack support (so max one IP per family). I know you have a >> > v2 almost ready so maybe just take this as a reminder of something to >> > add in v3? >> > >> >> Ack I'll try to add it in v3. >> >> >> > >> > > </dl> >> > > >> > > <p> >> > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c >> > > index 996dcc021..2f2959edb 100644 >> > > --- a/controller/ovn-controller.c >> > > +++ b/controller/ovn-controller.c >> > > @@ -4630,6 +4630,9 @@ static void init_physical_ctx(struct engine_node >> > *node, >> > > struct ed_type_northd_options *n_opts = >> > > engine_get_input_data("northd_options", node); >> > > >> > > + struct ed_type_evpn_vtep_binding *eb_data = >> > > + engine_get_input_data("evpn_vtep_binding", node); >> > > + >> > > parse_encap_ips(ovs_table, &p_ctx->n_encap_ips, >> &p_ctx->encap_ips); >> > > p_ctx->sbrec_port_binding_by_name = sbrec_port_binding_by_name; >> > > p_ctx->sbrec_port_binding_by_datapath = >> > sbrec_port_binding_by_datapath; >> > > @@ -4647,6 +4650,8 @@ static void init_physical_ctx(struct engine_node >> > *node, >> > > p_ctx->patch_ofports = &non_vif_data->patch_ofports; >> > > p_ctx->chassis_tunnels = &non_vif_data->chassis_tunnels; >> > > p_ctx->always_tunnel = n_opts->always_tunnel; >> > > + p_ctx->evpn_bindings = &eb_data->bindings; >> > > + p_ctx->evpn_multicast_groups = &eb_data->multicast_groups; >> > > >> > > struct controller_engine_ctx *ctrl_ctx = >> > engine_get_context()->client_ctx; >> > > p_ctx->if_mgr = ctrl_ctx->if_mgr; >> > > @@ -4936,6 +4941,28 @@ pflow_output_debug_handler(struct engine_node >> > *node, void *data) >> > > } >> > > return EN_HANDLED_UPDATED; >> > > } >> > > +static enum engine_input_handler_result >> > > +pflow_output_evpn_binding_handler(struct engine_node *node, void >> *data) >> > > +{ >> > > + struct ed_type_pflow_output *pfo = data; >> > > + struct ed_type_runtime_data *rt_data = >> > > + engine_get_input_data("runtime_data", node); >> > > + struct ed_type_non_vif_data *non_vif_data = >> > > + engine_get_input_data("non_vif_data", node); >> > > + struct ed_type_evpn_vtep_binding *eb_data = >> > > + engine_get_input_data("evpn_vtep_binding", node); >> > > + >> > > + struct physical_ctx ctx; >> > > + init_physical_ctx(node, rt_data, non_vif_data, &ctx); >> > > + >> > > + physical_handle_evpn_binding_changes(&ctx, &pfo->flow_table, >> > > + &eb_data->updated_bindings, >> > > + >> > &eb_data->updated_multicast_groups, >> > > + &eb_data->removed_bindings, >> > > + >> > &eb_data->removed_multicast_groups); >> > > + destroy_physical_ctx(&ctx); >> > > + return EN_HANDLED_UPDATED; >> > > +} >> > > >> > > static void * >> > > en_controller_output_init(struct engine_node *node OVS_UNUSED, >> > > @@ -6661,9 +6688,8 @@ main(int argc, char *argv[]) >> > > engine_add_input(&en_evpn_vtep_binding, &en_sb_datapath_binding, >> > > evpn_vtep_binding_datapath_binding_handler); >> > > >> > > - /* TODO Add real handler to process all bindings. */ >> > > engine_add_input(&en_pflow_output, &en_evpn_vtep_binding, >> > > - engine_noop_handler); >> > > + pflow_output_evpn_binding_handler); >> > > >> > > engine_add_input(&en_controller_output, &en_dns_cache, >> > > NULL); >> > > diff --git a/controller/physical.c b/controller/physical.c >> > > index 15209318b..0a4b31040 100644 >> > > --- a/controller/physical.c >> > > +++ b/controller/physical.c >> > > @@ -19,6 +19,7 @@ >> > > #include "byte-order.h" >> > > #include "ct-zone.h" >> > > #include "encaps.h" >> > > +#include "evpn-binding.h" >> > > #include "flow.h" >> > > #include "ha-chassis.h" >> > > #include "lflow.h" >> > > @@ -95,19 +96,25 @@ physical_register_ovs_idl(struct ovsdb_idl >> *ovs_idl) >> > > ovsdb_idl_track_add_column(ovs_idl, >> &ovsrec_interface_col_ofport); >> > > ovsdb_idl_track_add_column(ovs_idl, >> > &ovsrec_interface_col_external_ids); >> > > } >> > > - >> > > static void >> > > -put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits, >> > > - struct ofpbuf *ofpacts) >> > > +put_load_be(const void *value, size_t len, enum mf_field_id dst, >> > > + size_t ofs, size_t n_bits, struct ofpbuf *ofpacts) >> > > { >> > > struct ofpact_set_field *sf = ofpact_put_set_field(ofpacts, >> > > >> mf_from_id(dst), >> > NULL, >> > > NULL); >> > > - ovs_be64 n_value = htonll(value); >> > > - bitwise_copy(&n_value, 8, 0, sf->value, sf->field->n_bytes, ofs, >> > n_bits); >> > > + bitwise_copy(value, len, 0, sf->value, sf->field->n_bytes, ofs, >> > n_bits); >> > > bitwise_one(ofpact_set_field_mask(sf), sf->field->n_bytes, ofs, >> > n_bits); >> > > } >> > > >> > > +static void >> > > +put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits, >> > > + struct ofpbuf *ofpacts) >> > > +{ >> > > + ovs_be64 n_value = htonll(value); >> > > + put_load_be(&n_value, 8, dst, ofs, n_bits, ofpacts); >> > > +} >> > > + >> > > static void >> > > put_move(enum mf_field_id src, int src_ofs, >> > > enum mf_field_id dst, int dst_ofs, >> > > @@ -487,7 +494,7 @@ put_remote_port_redirect_overlay(const struct >> > sbrec_port_binding *binding, >> > > port_key, is_vtep_port, >> > ofpacts_clone); >> > > ofpact_put_OUTPUT(ofpacts_clone)->port = tun->ofport; >> > > } >> > > - put_resubmit(OFTABLE_LOCAL_OUTPUT, ofpacts_clone); >> > > + put_resubmit(OFTABLE_REMOTE_VTEP_OUTPUT, ofpacts_clone); >> > > ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 100, >> > > binding->header_.uuid.parts[0], match, >> > > ofpacts_clone, &binding->header_.uuid); >> > > @@ -985,12 +992,12 @@ put_local_common_flows(uint32_t dp_key, >> > > >> > > uint32_t port_key = pb->tunnel_key; >> > > >> > > - /* Table 43, priority 100. >> > > + /* Table 45, priority 100. >> > > * ======================= >> > > * >> > > * Implements output to local hypervisor. Each flow matches a >> > > * logical output port on the local hypervisor, and resubmits to >> > > - * table 44. >> > > + * table 46. >> > > */ >> > > >> > > ofpbuf_clear(ofpacts_p); >> > > @@ -1000,13 +1007,13 @@ put_local_common_flows(uint32_t dp_key, >> > > >> > > put_zones_ofpacts(zone_ids, ofpacts_p); >> > > >> > > - /* Resubmit to table 44. */ >> > > + /* Resubmit to table 46. */ >> > > put_resubmit(OFTABLE_CHECK_LOOPBACK, ofpacts_p); >> > > ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100, >> > > pb->header_.uuid.parts[0], &match, ofpacts_p, >> > > &pb->header_.uuid); >> > > >> > > - /* Table 44, Priority 100. >> > > + /* Table 46, Priority 100. >> > > * ======================= >> > > * >> > > * Drop packets whose logical inport and outport are the same >> > > @@ -1784,12 +1791,12 @@ consider_port_binding(const struct >> physical_ctx >> > *ctx, >> > > ha_chassis_group_is_active(binding->ha_chassis_group, >> > > ctx->active_tunnels, >> > ctx->chassis))) { >> > > >> > > - /* Table 43, priority 100. >> > > + /* Table 45, priority 100. >> > > * ======================= >> > > * >> > > * Implements output to local hypervisor. Each flow matches >> a >> > > * logical output port on the local hypervisor, and >> resubmits to >> > > - * table 41. For ports of type "chassisredirect", the >> logical >> > > + * table 46. For ports of type "chassisredirect", the >> logical >> > > * output port is changed from the "chassisredirect" port to >> the >> > > * underlying distributed port. */ >> > > >> > > @@ -1830,7 +1837,7 @@ consider_port_binding(const struct physical_ctx >> > *ctx, >> > > * go out from the same tunnel inport. */ >> > > put_load(ofp_to_u16(OFPP_NONE), MFF_IN_PORT, 0, 16, >> > ofpacts_p); >> > > >> > > - /* Resubmit to table 41. */ >> > > + /* Resubmit to table 46. */ >> > > put_resubmit(OFTABLE_CHECK_LOOPBACK, ofpacts_p); >> > > } >> > > >> > > @@ -2061,7 +2068,7 @@ consider_port_binding(const struct physical_ctx >> > *ctx, >> > > ofport, flow_table); >> > > } >> > > >> > > - /* Table 41, priority 160. >> > > + /* Table 46, priority 160. >> > > * ======================= >> > > * >> > > * Do not forward local traffic from a localport to a >> localnet >> > port. >> > > @@ -2132,13 +2139,13 @@ consider_port_binding(const struct >> physical_ctx >> > *ctx, >> > > } >> > > } >> > > >> > > - /* Table 42, priority 150. >> > > + /* Table 43, priority 150. >> > > * ======================= >> > > * >> > > * Handles packets received from ports of type "localport". >> > These >> > > * ports are present on every hypervisor. Traffic that >> > originates at >> > > * one should never go over a tunnel to a remote hypervisor, >> > > - * so resubmit them to table 40 for local delivery. */ >> > > + * so resubmit them to table 45 for local delivery. */ >> > > if (type == LP_LOCALPORT) { >> > > ofpbuf_clear(ofpacts_p); >> > > put_resubmit(OFTABLE_LOCAL_OUTPUT, ofpacts_p); >> > > @@ -2152,7 +2159,7 @@ consider_port_binding(const struct physical_ctx >> > *ctx, >> > > } >> > > } else if (access_type == PORT_LOCALNET && !ctx->always_tunnel) { >> > > /* Remote port connected by localnet port */ >> > > - /* Table 43, priority 100. >> > > + /* Table 45, priority 100. >> > > * ======================= >> > > * >> > > * Implements switching to localnet port. Each flow matches a >> > > @@ -2172,7 +2179,7 @@ consider_port_binding(const struct physical_ctx >> > *ctx, >> > > >> > > put_load(localnet_port->tunnel_key, MFF_LOG_OUTPORT, 0, 32, >> > ofpacts_p); >> > > >> > > - /* Resubmit to table 43. */ >> > > + /* Resubmit to table 45. */ >> > > put_resubmit(OFTABLE_LOCAL_OUTPUT, ofpacts_p); >> > > ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100, >> > > binding->header_.uuid.parts[0], >> > > @@ -2528,15 +2535,13 @@ consider_mc_group(const struct physical_ctx >> *ctx, >> > > >> > > remote_ports = remote_ctx->ofpacts.size > 0; >> > > if (remote_ports) { >> > > - if (local_lports) { >> > > - put_resubmit(OFTABLE_LOCAL_OUTPUT, &remote_ctx->ofpacts); >> > > - } >> > > + put_resubmit(OFTABLE_REMOTE_VTEP_OUTPUT, >> &remote_ctx->ofpacts); >> > > mc_ofctrl_add_flow(mc, remote_ctx, false, flow_table); >> > > } >> > > >> > > if (ramp_ports && has_vtep) { >> > > put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, >> > &ramp_ctx->ofpacts); >> > > - put_resubmit(OFTABLE_LOCAL_OUTPUT, &ramp_ctx->ofpacts); >> > > + put_resubmit(OFTABLE_REMOTE_VTEP_OUTPUT, &ramp_ctx->ofpacts); >> > > mc_ofctrl_add_flow(mc, ramp_ctx, false, flow_table); >> > > } >> > > >> > > @@ -2653,6 +2658,153 @@ physical_eval_remote_chassis_flows(const >> struct >> > physical_ctx *ctx, >> > > ofpbuf_uninit(&ingress_ofpacts); >> > > } >> > > >> > > +static void >> > > +physical_consider_evpn_binding(const struct evpn_binding *binding, >> > > + const struct in6_addr *local_ip, >> > > + struct ofpbuf *ofpacts, struct match >> > *match, >> > > + struct ovn_desired_flow_table >> > *flow_table, >> > > + bool ipv4) >> > > +{ >> > > + /* Ingress flows. */ >> > > + ofpbuf_clear(ofpacts); >> > > + match_init_catchall(match); >> > > + >> > > + match_set_in_port(match, binding->tunnel_ofport); >> > > + match_set_tun_id(match, htonll(binding->vni)); >> > > + if (ipv4) { >> > > + match_set_tun_src(match, >> > > + >> > in6_addr_get_mapped_ipv4(&binding->remote_ip)); >> > > + match_set_tun_dst(match, in6_addr_get_mapped_ipv4(local_ip)); >> > > + } else { >> > > + match_set_tun_ipv6_src(match, &binding->remote_ip); >> > > + match_set_tun_ipv6_dst(match, local_ip); >> > > + } >> > > + >> > > + put_load(binding->dp_key, MFF_LOG_DATAPATH, 0, 32, ofpacts); >> > > + put_load(binding->tunnel_key, MFF_LOG_INPORT, 0, 32, ofpacts); >> > > + put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, ofpacts); >> > > + >> > > + ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 1050, >> > > + binding->flow_uuid.parts[0], >> > > + match, ofpacts, &binding->flow_uuid); >> > > + >> > > + /* Egress flows. */ >> > > + ofpbuf_clear(ofpacts); >> > > + match_init_catchall(match); >> > > + >> > > + match_outport_dp_and_port_keys(match, binding->dp_key, >> > > + binding->tunnel_key); >> > > + >> > > + if (ipv4) { >> > > + ovs_be32 ip4 = in6_addr_get_mapped_ipv4(local_ip); >> > > + put_load_be(&ip4, sizeof ip4, MFF_TUN_SRC, 0, 32, ofpacts); >> > > + ip4 = in6_addr_get_mapped_ipv4(&binding->remote_ip); >> > > + put_load_be(&ip4, sizeof ip4, MFF_TUN_DST, 0, 32, ofpacts); >> > > + } else { >> > > + put_load_be(&local_ip, sizeof local_ip, MFF_TUN_IPV6_SRC, >> > > + 0, 128, ofpacts); >> > > + put_load_be(&binding->remote_ip, sizeof binding->remote_ip, >> > > + MFF_TUN_IPV6_DST, 0, 128, ofpacts); >> > > + } >> > > + put_load(binding->vni, MFF_TUN_ID, 0, 24, ofpacts); >> > > + >> > > + size_t ofpacts_size = ofpacts->size; >> > > + ofpact_put_OUTPUT(ofpacts)->port = binding->tunnel_ofport; >> > > + >> > > + ofctrl_add_flow(flow_table, OFTABLE_REMOTE_VTEP_OUTPUT, 50, >> > > + binding->flow_uuid.parts[0], >> > > + match, ofpacts, &binding->flow_uuid); >> > > + >> > > + /* Add flow that will match on LOOPBACK flag, in that case set >> > > + * in_port to none otherwise the hairpin traffic would be >> rejected >> > > + * by ovs. */ >> > > + match_set_reg_masked(match, MFF_LOG_FLAGS - MFF_REG0, >> > > + MLF_ALLOW_LOOPBACK, MLF_ALLOW_LOOPBACK); >> > > + >> > > + ofpbuf_truncate(ofpacts, ofpacts_size); >> > > + put_load(ofp_to_u16(OFPP_NONE), MFF_IN_PORT, 0, 16, ofpacts); >> > > + ofpact_put_OUTPUT(ofpacts)->port = binding->tunnel_ofport; >> > > + >> > > + ofctrl_add_flow(flow_table, OFTABLE_REMOTE_VTEP_OUTPUT, 55, >> > > + binding->flow_uuid.parts[0], >> > > + match, ofpacts, &binding->flow_uuid); >> > >> > I know you have v2 almost ready so maybe let's just add a TODO.rst item >> > to document these new flows in the ovn-architecture doc? Or just a note >> > to ourselves to do this in v3? :) >> > >> >> >> I'm not sure if ovn-architecture is the right place for this. Should we >> try >> to >> explain that in a separate document that will document the overall BGP? >> >> >> > >> > > +} >> > > + >> > > +static void >> > > +physical_consider_evpn_multicast(const struct evpn_multicast_group >> > *mc_group, >> > > + const struct in6_addr *local_ip, >> > > + struct ofpbuf *ofpacts, struct match >> > *match, >> > > + struct ovn_desired_flow_table >> > *flow_table, >> > > + bool ipv4) >> > > +{ >> > > + const struct evpn_binding *binding = NULL; >> > > + >> > > + ofpbuf_clear(ofpacts); >> > > + uint32_t multicast_tunnel_keys[] = {OVN_MCAST_FLOOD_TUNNEL_KEY, >> > > + OVN_MCAST_UNKNOWN_TUNNEL_KEY, >> > > + >> OVN_MCAST_FLOOD_L2_TUNNEL_KEY}; >> > > + if (ipv4) { >> > > + ovs_be32 ip4 = in6_addr_get_mapped_ipv4(local_ip); >> > > + put_load_be(&ip4, sizeof ip4, MFF_TUN_SRC, 0, 32, ofpacts); >> > > + } else { >> > > + put_load_be(&local_ip, sizeof local_ip, MFF_TUN_IPV6_SRC, >> > > + 0, 128, ofpacts); >> > > + } >> > > + put_load(mc_group->vni, MFF_TUN_ID, 0, 24, ofpacts); >> > > + >> > > + const struct hmapx_node *node; >> > > + HMAPX_FOR_EACH (node, &mc_group->bindings) { >> > > + binding = node->data; >> > > + if (ipv4) { >> > > + ovs_be32 ip4 = >> > in6_addr_get_mapped_ipv4(&binding->remote_ip); >> > > + put_load_be(&ip4, sizeof ip4, MFF_TUN_DST, 0, 32, >> ofpacts); >> > > + } else { >> > > + put_load_be(&binding->remote_ip, sizeof >> binding->remote_ip, >> > > + MFF_TUN_IPV6_DST, 0, 128, ofpacts); >> > > + } >> > > + ofpact_put_OUTPUT(ofpacts)->port = binding->tunnel_ofport; >> > > + } >> > > + put_resubmit(OFTABLE_LOCAL_OUTPUT, ofpacts); >> > > + >> > > + for (size_t i = 0; i < ARRAY_SIZE(multicast_tunnel_keys); i++) { >> > > + match_init_catchall(match); >> > > + match_outport_dp_and_port_keys(match, binding->dp_key, >> > Do we have the same dp_key for all bindings as this uses only the last > "binding" from the previous loop ? Is this expected ? > Yeah we should have the same dp_key for all bindings in the group. I have added assert in v3 that mc_group actually has any bindings. > > > + multicast_tunnel_keys[i]); >> > > >> > > + >> > > + ofctrl_add_flow(flow_table, OFTABLE_REMOTE_VTEP_OUTPUT, 50, >> > > + mc_group->flow_uuid.parts[0], >> > > + match, ofpacts, &mc_group->flow_uuid); >> > > + } >> > >> > I have the same doc comment for these I guess. >> > >> > > +} >> > > + >> > > +static void >> > > +physical_eval_evpn_flows(const struct physical_ctx *ctx, >> > > + struct ofpbuf *ofpacts, >> > > + struct ovn_desired_flow_table *flow_table) >> > > +{ >> > > + const char *local_ip_str = >> smap_get_def(&ctx->chassis->other_config, >> > > + "evpn-local-ip", ""); >> > > + struct in6_addr local_ip; >> > > + if (!ip46_parse(local_ip_str, &local_ip)) { >> > > + return; >> > > + } >> > > + >> > > + struct match match = MATCH_CATCHALL_INITIALIZER; >> > > + bool ipv4 = IN6_IS_ADDR_V4MAPPED(&local_ip); >> > > + >> > > + const struct evpn_binding *binding; >> > > + HMAP_FOR_EACH (binding, hmap_node, ctx->evpn_bindings) { >> > > + physical_consider_evpn_binding(binding, &local_ip, ofpacts, >> > > + &match, flow_table, ipv4); >> > > + } >> > > + >> > > + const struct evpn_multicast_group *mc_group; >> > > + HMAP_FOR_EACH (mc_group, hmap_node, ctx->evpn_multicast_groups) { >> > > + physical_consider_evpn_multicast(mc_group, &local_ip, >> ofpacts, >> > > + &match, flow_table, ipv4); >> > > + } >> > > +} >> > > + >> > > static void >> > > physical_eval_port_binding(struct physical_ctx *p_ctx, >> > > const struct sbrec_port_binding *pb, >> > > @@ -2760,6 +2912,55 @@ physical_handle_mc_group_changes(struct >> > physical_ctx *p_ctx, >> > > } >> > > } >> > > >> > > +void >> > > +physical_handle_evpn_binding_changes( >> > > + struct physical_ctx *ctx, struct ovn_desired_flow_table >> *flow_table, >> > > + const struct hmapx *updated_bindings, >> > > + const struct hmapx *updated_multicast_groups, >> > > + const struct uuidset *removed_bindings, >> > > + const struct uuidset *removed_multicast_groups) >> > > +{ >> > > + const char *local_ip_str = >> smap_get_def(&ctx->chassis->other_config, >> > > + "evpn-local-ip", ""); >> > > + struct in6_addr local_ip; >> > > + if (!ip46_parse(local_ip_str, &local_ip)) { >> > > + return; >> > > + } >> > > + >> > > + struct ofpbuf ofpacts; >> > > + ofpbuf_init(&ofpacts, 0); >> > > + struct match match = MATCH_CATCHALL_INITIALIZER; >> > > + bool ipv4 = IN6_IS_ADDR_V4MAPPED(&local_ip); >> > > + >> > > + const struct hmapx_node *node; >> > > + HMAPX_FOR_EACH (node, updated_bindings) { >> > > + const struct evpn_binding *binding = node->data; >> > > + >> > > + ofctrl_remove_flows(flow_table, &binding->flow_uuid); >> > > + physical_consider_evpn_binding(binding, &local_ip, &ofpacts, >> > > + &match, flow_table, ipv4); >> > > + } >> > > + >> > > + HMAPX_FOR_EACH (node, updated_multicast_groups) { >> > > + const struct evpn_multicast_group *mc_group = node->data; >> > > + >> > > + ofctrl_remove_flows(flow_table, &mc_group->flow_uuid); >> > > + physical_consider_evpn_multicast(mc_group, &local_ip, >> &ofpacts, >> > > + &match, flow_table, ipv4); >> > > + } >> > > + >> > > + ofpbuf_uninit(&ofpacts); >> > > + >> > > + const struct uuidset_node *uuidset_node; >> > > + UUIDSET_FOR_EACH (uuidset_node, removed_bindings) { >> > > + ofctrl_remove_flows(flow_table, &uuidset_node->uuid); >> > > + } >> > > + >> > > + UUIDSET_FOR_EACH (uuidset_node, removed_multicast_groups) { >> > > + ofctrl_remove_flows(flow_table, &uuidset_node->uuid); >> > > + } >> > > +} >> > > + >> > > void >> > > physical_run(struct physical_ctx *p_ctx, >> > > struct ovn_desired_flow_table *flow_table) >> > > @@ -2809,7 +3010,7 @@ physical_run(struct physical_ctx *p_ctx, >> > > * have metadata about the ingress and egress logical ports. >> > > * VXLAN encapsulations have metadata about the egress logical >> port >> > only. >> > > * We set MFF_LOG_DATAPATH, MFF_LOG_INPORT, and MFF_LOG_OUTPORT >> > from the >> > > - * tunnel key data where possible, then resubmit to table 40 to >> > handle >> > > + * tunnel key data where possible, then resubmit to table 45 to >> > handle >> > > * packets to the local hypervisor. */ >> > > struct chassis_tunnel *tun; >> > > HMAP_FOR_EACH (tun, hmap_node, p_ctx->chassis_tunnels) { >> > > @@ -2922,7 +3123,7 @@ physical_run(struct physical_ctx *p_ctx, >> > > */ >> > > add_default_drop_flow(p_ctx, OFTABLE_PHY_TO_LOG, flow_table); >> > > >> > > - /* Table 40-43, priority 0. >> > > + /* Table 41-42, priority 0. >> > > * ======================== >> > > * >> > > * Default resubmit actions for OFTABLE_OUTPUT_LARGE_PKT_* >> tables. >> > > @@ -2948,7 +3149,7 @@ physical_run(struct physical_ctx *p_ctx, >> > > ofctrl_add_flow(flow_table, OFTABLE_OUTPUT_LARGE_PKT_PROCESS, 0, >> 0, >> > &match, >> > > &ofpacts, hc_uuid); >> > > >> > > - /* Table 42, priority 150. >> > > + /* Table 43, priority 150. >> > > * ======================= >> > > * >> > > * Handles packets received from a VXLAN tunnel which get >> > resubmitted to >> > > @@ -2961,13 +3162,13 @@ physical_run(struct physical_ctx *p_ctx, >> > > match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0, >> > MLF_RCV_FROM_RAMP, >> > > MLF_RCV_FROM_RAMP | MLF_ALLOW_LOOPBACK); >> > > >> > > - /* Resubmit to table 40. */ >> > > + /* Resubmit to table 45. */ >> > > ofpbuf_clear(&ofpacts); >> > > put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts); >> > > ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 150, 0, >> > > &match, &ofpacts, hc_uuid); >> > > >> > > - /* Table 42, priority 150. >> > > + /* Table 43, priority 150. >> > > * ======================= >> > > * >> > > * Packets that should not be sent to other hypervisors. >> > > @@ -2975,35 +3176,46 @@ physical_run(struct physical_ctx *p_ctx, >> > > match_init_catchall(&match); >> > > match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0, >> > > MLF_LOCAL_ONLY, MLF_LOCAL_ONLY); >> > > - /* Resubmit to table 40. */ >> > > + /* Resubmit to table 45. */ >> > > ofpbuf_clear(&ofpacts); >> > > put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts); >> > > ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 150, 0, >> > > &match, &ofpacts, hc_uuid); >> > > >> > > - /* Table 42, Priority 0. >> > > + /* Table 43, Priority 0. >> > > * ======================= >> > > * >> > > - * Resubmit packets that are not directed at tunnels or part of a >> > > - * multicast group to the local output table. */ >> > > + * Resubmit packets that are not directed at OVN tunnels or part >> of >> > a >> > > + * multicast group to the VTEP output table. */ >> > > match_init_catchall(&match); >> > > ofpbuf_clear(&ofpacts); >> > > - put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts); >> > > + put_resubmit(OFTABLE_REMOTE_VTEP_OUTPUT, &ofpacts); >> > > ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 0, 0, &match, >> > > &ofpacts, hc_uuid); >> > > >> > > - /* Table 40, priority 0. >> > > + /* Table 44, Priority 0. >> > > + * ======================= >> > > + * >> > > + * Resubmit packets that are not directed to remote VTEP to the >> > local >> > > + * output table. */ >> > > + match_init_catchall(&match); >> > > + ofpbuf_clear(&ofpacts); >> > > + put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts); >> > > + ofctrl_add_flow(flow_table, OFTABLE_REMOTE_VTEP_OUTPUT, 0, 0, >> > &match, >> > > + &ofpacts, hc_uuid); >> > > + >> > > + /* Table 45, priority 0. >> > > * ====================== >> > > * >> > > * Drop packets that do not match previous flows. >> > > */ >> > > add_default_drop_flow(p_ctx, OFTABLE_LOCAL_OUTPUT, flow_table); >> > > >> > > - /* Table 41, Priority 0. >> > > + /* Table 46, Priority 0. >> > > * ======================= >> > > * >> > > * Resubmit packets that don't output to the ingress port >> (already >> > checked >> > > - * in table 40) to the logical egress pipeline, clearing the >> logical >> > > + * in table 44) to the logical egress pipeline, clearing the >> logical >> > > * registers (for consistent behavior with packets that get >> > tunneled). */ >> > > match_init_catchall(&match); >> > > ofpbuf_clear(&ofpacts); >> > > @@ -3131,6 +3343,7 @@ physical_run(struct physical_ctx *p_ctx, >> > > &match, &ofpacts, hc_uuid); >> > > >> > > physical_eval_remote_chassis_flows(p_ctx, &ofpacts, flow_table); >> > > + physical_eval_evpn_flows(p_ctx, &ofpacts, flow_table); >> > > >> > > ofpbuf_uninit(&ofpacts); >> > > } >> > > diff --git a/controller/physical.h b/controller/physical.h >> > > index 0420251c6..e0443924b 100644 >> > > --- a/controller/physical.h >> > > +++ b/controller/physical.h >> > > @@ -69,6 +69,8 @@ struct physical_ctx { >> > > const char **encap_ips; >> > > struct physical_debug debug; >> > > bool always_tunnel; >> > > + const struct hmap *evpn_bindings; >> > > + const struct hmap *evpn_multicast_groups; >> > > >> > > /* Set of port binding names that have been already reprocessed >> > during >> > > * the I-P run. */ >> > > @@ -87,4 +89,10 @@ bool physical_handle_flows_for_lport(const struct >> > sbrec_port_binding *, >> > > void physical_multichassis_reprocess(const struct sbrec_port_binding >> *, >> > > struct physical_ctx *, >> > > struct ovn_desired_flow_table >> *); >> > > +void physical_handle_evpn_binding_changes( >> > > + struct physical_ctx *, struct ovn_desired_flow_table *, >> > > + const struct hmapx *updated_bindings, >> > > + const struct hmapx *updated_multicast_groups, >> > > + const struct uuidset *removed_bindings, >> > > + const struct uuidset *removed_multicast_groups); >> > > #endif /* controller/physical.h */ >> > > diff --git a/tests/ovn-macros.at b/tests/ovn-macros.at >> > > index 334aa7fc6..d6529e015 100644 >> > > --- a/tests/ovn-macros.at >> > > +++ b/tests/ovn-macros.at >> > > @@ -1476,8 +1476,9 @@ m4_define([OFTABLE_LOG_INGRESS_PIPELINE], [8]) >> > > m4_define([OFTABLE_OUTPUT_LARGE_PKT_DETECT], [41]) >> > > m4_define([OFTABLE_OUTPUT_LARGE_PKT_PROCESS], [42]) >> > > m4_define([OFTABLE_REMOTE_OUTPUT], [43]) >> > > -m4_define([OFTABLE_LOCAL_OUTPUT], [44]) >> > > -m4_define([OFTABLE_LOG_EGRESS_PIPELINE], [46]) >> > > +m4_define([OFTABLE_REMOTE_VTEP_OUTPUT], [44]) >> > > +m4_define([OFTABLE_LOCAL_OUTPUT], [45]) >> > > +m4_define([OFTABLE_LOG_EGRESS_PIPELINE], [47]) >> > > m4_define([OFTABLE_SAVE_INPORT], [64]) >> > > m4_define([OFTABLE_LOG_TO_PHY], [65]) >> > > m4_define([OFTABLE_MAC_BINDING], [66]) >> > > diff --git a/tests/ovn.at b/tests/ovn.at >> > > index 0dabec8d9..2a834dc41 100644 >> > > --- a/tests/ovn.at >> > > +++ b/tests/ovn.at >> > > @@ -40794,7 +40794,7 @@ check_output_flows_tunnelled() { >> > > dp_key=$2 >> > > dp_rport=$3 >> > > AT_CHECK_UNQUOTED([as $hv ovs-ofctl dump-flows br-int >> > table=OFTABLE_REMOTE_OUTPUT,metadata=0x${dp_key},reg15=0x${dp_rport} | >> > ofctl_strip_all | grep -v NXST_FLOW], [0], [dnl >> > > - table=OFTABLE_REMOTE_OUTPUT, >> > priority=100,reg13=0/0xffff0000,reg15=0x${dp_rport},metadata=0x${dp_key} >> > >> actions=load:0x${dp_key}->NXM_NX_TUN_ID[[0..23]],set_field:0x${dp_rport}->tun_metadata0,move:NXM_NX_REG14[[0..14]]->NXM_NX_TUN_METADATA0[[16..30]],output:1,resubmit(,OFTABLE_LOCAL_OUTPUT) >> > > + table=OFTABLE_REMOTE_OUTPUT, >> > priority=100,reg13=0/0xffff0000,reg15=0x${dp_rport},metadata=0x${dp_key} >> > >> actions=load:0x${dp_key}->NXM_NX_TUN_ID[[0..23]],set_field:0x${dp_rport}->tun_metadata0,move:NXM_NX_REG14[[0..14]]->NXM_NX_TUN_METADATA0[[16..30]],output:1,resubmit(,OFTABLE_REMOTE_VTEP_OUTPUT) >> > > ]) >> > > } >> > > >> > >> > Regards, >> > Dumitru >> > >> > >> Thanks, >> Ales >> _______________________________________________ >> dev mailing list >> d...@openvswitch.org >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev > > Thanks, Ales _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev