On Tue, Aug 12, 2025 at 3:06 PM Xavier Simonart <xsimo...@redhat.com> wrote:

> Hi Ales, Dumitru
>
> Thanks for the patch
> I have a comment below
>
> Thanks
> Xavier
>

Hi Xavier,

thank you for the review.


>
> On Wed, Aug 6, 2025 at 8:29 PM Ales Musil via dev <ovs-dev@openvswitch.org>
> wrote:
>
>> On Wed, Aug 6, 2025 at 12:53 PM Dumitru Ceara <dce...@redhat.com> wrote:
>>
>> > On 7/25/25 8:03 AM, Ales Musil wrote:
>> > > Add physical flows based on the EVPN bindings and multicast groups
>> > > that ensure the proper flow of traffic that is going through the
>> > > EVPN. In order for this to wrk the CMS needs to ensure that there
>> > > is proper VXLAN tunnel port in br-int and also the
>> > > "external_ids:ovn-evpn-local-ip" is set properly in the local OvS
>> > > database.
>> > >
>> > > All traffic will be flooded through the UNKNOWN group if there is
>> > > unknown port at this stage. That's due to missing support for FDB
>> > > learning.
>> > >
>> > > Signed-off-by: Ales Musil <amu...@redhat.com>
>> > > ---
>> >
>> > Hi Ales,
>> >
>> > I only have a few minor comments, please see below.
>> >
>>
>> Hi Dumitru,
>>
>> thank you for the review.
>>
>>
>> >
>> > >  NEWS                            |   4 +
>> > >  TODO.rst                        |   4 +
>> > >  controller/chassis.c            |  19 +++
>> > >  controller/lflow.h              |   7 +-
>> > >  controller/ovn-controller.8.xml |   8 +
>> > >  controller/ovn-controller.c     |  30 +++-
>> > >  controller/physical.c           | 283
>> ++++++++++++++++++++++++++++----
>> > >  controller/physical.h           |   8 +
>> > >  tests/ovn-macros.at             |   5 +-
>> > >  tests/ovn.at                    |   2 +-
>> > >  10 files changed, 327 insertions(+), 43 deletions(-)
>> > >
>> > > diff --git a/NEWS b/NEWS
>> > > index 0cce1790d..1b6b5ab1d 100644
>> > > --- a/NEWS
>> > > +++ b/NEWS
>> > > @@ -41,6 +41,10 @@ Post v25.03.0
>> > >     - Added support for running tests from the 'check-kernel' system
>> > test target
>> > >       under retis by setting OVS_TEST_WITH_RETIS=yes.  See the
>> 'Testing'
>> > section
>> > >       of the documentation for more details.
>> > > +   - Dynamic Routing:
>> > > +     * Add the option "external_ids:ovn-evpn-local-ip" into OvS
>> > datapath.
>> > > +       This option allows CMS to set IP address that will be used as
>> > source
>> > > +       for the EVPN traffic egressing through the tunnel.
>> > >
>> > >  OVN v25.03.0 - 07 Mar 2025
>> > >  --------------------------
>> > > diff --git a/TODO.rst b/TODO.rst
>> > > index 85208bfe3..9a9df78f2 100644
>> > > --- a/TODO.rst
>> > > +++ b/TODO.rst
>> > > @@ -153,3 +153,7 @@ OVN To-do List
>> > >      monitoring conditions to update before we actually try to learn
>> > routes.
>> > >      Otherwise we could try to add duplicated Learned_Routes and the
>> > ovnsb
>> > >      commit would fail.
>> > > +
>> > > +  * Allow ovn-evpn-local-ip to accept list of
>> > > +    $VNI1:$LOCAL_IP1,$VNI2:$LOCAL_IP2 combinations which will be
>> > properly
>> > > +    reflected in physical flows for given LS with VNI.
>> > > diff --git a/controller/chassis.c b/controller/chassis.c
>> > > index 7616069b7..d67cd0898 100644
>> > > --- a/controller/chassis.c
>> > > +++ b/controller/chassis.c
>> > > @@ -58,6 +58,7 @@ struct ovs_chassis_cfg {
>> > >      const char *trim_limit_lflow_cache;
>> > >      const char *trim_wmark_perc_lflow_cache;
>> > >      const char *trim_timeout_ms;
>> > > +    const char *evpn_local_ip;
>> > >
>> > >      /* Set of encap types parsed from the 'ovn-encap-type'
>> external-id.
>> > */
>> > >      struct sset encap_type_set;
>> > > @@ -198,6 +199,13 @@ get_encap_csum(const struct smap *ext_ids, const
>> > char *chassis_id)
>> > >                                           "ovn-encap-csum", "true");
>> > >  }
>> > >
>> > > +static const char *
>> > > +get_evpn_local_ip(const struct smap *ext_ids, const char *chassis_id)
>> > > +{
>> > > +    return get_chassis_external_id_value(ext_ids, chassis_id,
>> > > +                                         "ovn-evpn-local-ip",
>> "true");
>> > > +}
>> > > +
>> > >  static const char *
>> > >  get_datapath_type(const struct ovsrec_bridge *br_int)
>> > >  {
>> > > @@ -332,6 +340,8 @@ chassis_parse_ovs_config(const struct
>> > ovsrec_open_vswitch_table *ovs_table,
>> > >          get_trim_wmark_perc_lflow_cache(&cfg->external_ids,
>> chassis_id);
>> > >      ovs_cfg->trim_timeout_ms =
>> > >          get_trim_timeout(&cfg->external_ids, chassis_id);
>> > > +    ovs_cfg->evpn_local_ip =
>> > > +        get_evpn_local_ip(&cfg->external_ids, chassis_id);
>> > >
>> > >      chassis_parse_ovs_encap_type(encap_type,
>> &ovs_cfg->encap_type_set);
>> > >
>> > > @@ -384,6 +394,7 @@ chassis_build_other_config(const struct
>> > ovs_chassis_cfg *ovs_cfg,
>> > >      smap_replace(config, "ovn-trim-timeout-ms",
>> > ovs_cfg->trim_timeout_ms);
>> > >      smap_replace(config, "iface-types",
>> > ds_cstr_ro(&ovs_cfg->iface_types));
>> > >      smap_replace(config, "ovn-chassis-mac-mappings",
>> > ovs_cfg->chassis_macs);
>> > > +    smap_replace(config, "evpn-local-ip", ovs_cfg->evpn_local_ip);
>> > >      smap_replace(config, "is-interconn",
>> > >                   ovs_cfg->is_interconn ? "true" : "false");
>> > >      smap_replace(config, OVN_FEATURE_PORT_UP_NOTIF, "true");
>> > > @@ -504,6 +515,13 @@ chassis_other_config_changed(const struct
>> > ovs_chassis_cfg *ovs_cfg,
>> > >          return true;
>> > >      }
>> > >
>> > > +    const char *chassis_evpn_local_ip =
>> > > +        get_evpn_local_ip(&chassis_rec->other_config,
>> > chassis_rec->name);
>> > > +    if (strcmp(ovs_cfg->evpn_local_ip, chassis_evpn_local_ip)) {
>> > > +        return true;
>> > > +    }
>> > > +
>> > > +
>> > >      if (!smap_get_bool(&chassis_rec->other_config,
>> > OVN_FEATURE_PORT_UP_NOTIF,
>> > >                         false)) {
>> > >          return true;
>> > > @@ -722,6 +740,7 @@ update_supported_sset(struct sset *supported)
>> > >      sset_add(supported, "iface-types");
>> > >      sset_add(supported, "ovn-chassis-mac-mappings");
>> > >      sset_add(supported, "is-interconn");
>> > > +    sset_add(supported, "evpn-local-ip");
>> > >
>> > >      /* Internal options. */
>> > >      sset_add(supported, "is-vtep");
>> > > diff --git a/controller/lflow.h b/controller/lflow.h
>> > > index fa9e795e0..c2d277078 100644
>> > > --- a/controller/lflow.h
>> > > +++ b/controller/lflow.h
>> > > @@ -70,14 +70,15 @@ struct uuid;
>> > >  #define OFTABLE_OUTPUT_LARGE_PKT_DETECT  41
>> > >  #define OFTABLE_OUTPUT_LARGE_PKT_PROCESS 42
>> > >  #define OFTABLE_REMOTE_OUTPUT            43
>> > > -#define OFTABLE_LOCAL_OUTPUT             44
>> > > -#define OFTABLE_CHECK_LOOPBACK           45
>> > > +#define OFTABLE_REMOTE_VTEP_OUTPUT       44
>> > > +#define OFTABLE_LOCAL_OUTPUT             45
>> > > +#define OFTABLE_CHECK_LOOPBACK           46
>> > >
>> > >  /* Start of the OUTPUT section of the pipeline. */
>> > >  #define OFTABLE_OUTPUT_INIT OFTABLE_OUTPUT_LARGE_PKT_DETECT
>> > >
>> > >  /* Start of LOG_PIPELINE_LEN tables. */
>> > > -#define OFTABLE_LOG_EGRESS_PIPELINE      46
>> > > +#define OFTABLE_LOG_EGRESS_PIPELINE      47
>> > >  #define OFTABLE_SAVE_INPORT              64
>> > >  #define OFTABLE_LOG_TO_PHY               65
>> > >  #define OFTABLE_MAC_BINDING              66
>> > > diff --git a/controller/ovn-controller.8.xml
>> > b/controller/ovn-controller.8.xml
>> > > index 3f2cf5bae..8a6d0b767 100644
>> > > --- a/controller/ovn-controller.8.xml
>> > > +++ b/controller/ovn-controller.8.xml
>> > > @@ -392,6 +392,14 @@
>> > >          exit. In order to keep backward compatibility the
>> > >          <code>--restart</code> exit flag has priority over this flag.
>> > >        </dd>
>> > > +
>> > > +      <dt><code>external_ids:ovn-evpn-local-ip</code></dt>
>> > > +      <dd>
>> > > +        IP address used as a source address for the EVPN traffic
>> > leaving this
>> > > +        ovn-controller. There is currently support only for single IP
>> > address
>> > > +        being specified. NOTE: this feature is experimental and may
>> be
>> > subject
>> > > +        to removal/change in the future.
>> > > +      </dd>
>> >
>> > This is experimental so maybe it's fine but I'm afraid I'd expect at
>> > least dual stack support (so max one IP per family).  I know you have a
>> > v2 almost ready so maybe just take this as a reminder of something to
>> > add in v3?
>> >
>>
>> Ack I'll try to add it in v3.
>>
>>
>> >
>> > >      </dl>
>> > >
>> > >      <p>
>> > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
>> > > index 996dcc021..2f2959edb 100644
>> > > --- a/controller/ovn-controller.c
>> > > +++ b/controller/ovn-controller.c
>> > > @@ -4630,6 +4630,9 @@ static void init_physical_ctx(struct engine_node
>> > *node,
>> > >      struct ed_type_northd_options *n_opts =
>> > >          engine_get_input_data("northd_options", node);
>> > >
>> > > +    struct ed_type_evpn_vtep_binding *eb_data =
>> > > +        engine_get_input_data("evpn_vtep_binding", node);
>> > > +
>> > >      parse_encap_ips(ovs_table, &p_ctx->n_encap_ips,
>> &p_ctx->encap_ips);
>> > >      p_ctx->sbrec_port_binding_by_name = sbrec_port_binding_by_name;
>> > >      p_ctx->sbrec_port_binding_by_datapath =
>> > sbrec_port_binding_by_datapath;
>> > > @@ -4647,6 +4650,8 @@ static void init_physical_ctx(struct engine_node
>> > *node,
>> > >      p_ctx->patch_ofports = &non_vif_data->patch_ofports;
>> > >      p_ctx->chassis_tunnels = &non_vif_data->chassis_tunnels;
>> > >      p_ctx->always_tunnel = n_opts->always_tunnel;
>> > > +    p_ctx->evpn_bindings = &eb_data->bindings;
>> > > +    p_ctx->evpn_multicast_groups = &eb_data->multicast_groups;
>> > >
>> > >      struct controller_engine_ctx *ctrl_ctx =
>> > engine_get_context()->client_ctx;
>> > >      p_ctx->if_mgr = ctrl_ctx->if_mgr;
>> > > @@ -4936,6 +4941,28 @@ pflow_output_debug_handler(struct engine_node
>> > *node, void *data)
>> > >      }
>> > >      return EN_HANDLED_UPDATED;
>> > >  }
>> > > +static enum engine_input_handler_result
>> > > +pflow_output_evpn_binding_handler(struct engine_node *node, void
>> *data)
>> > > +{
>> > > +    struct ed_type_pflow_output *pfo = data;
>> > > +    struct ed_type_runtime_data *rt_data =
>> > > +        engine_get_input_data("runtime_data", node);
>> > > +    struct ed_type_non_vif_data *non_vif_data =
>> > > +        engine_get_input_data("non_vif_data", node);
>> > > +    struct ed_type_evpn_vtep_binding *eb_data =
>> > > +        engine_get_input_data("evpn_vtep_binding", node);
>> > > +
>> > > +    struct physical_ctx ctx;
>> > > +    init_physical_ctx(node, rt_data, non_vif_data, &ctx);
>> > > +
>> > > +    physical_handle_evpn_binding_changes(&ctx, &pfo->flow_table,
>> > > +                                         &eb_data->updated_bindings,
>> > > +
>> >  &eb_data->updated_multicast_groups,
>> > > +                                         &eb_data->removed_bindings,
>> > > +
>> >  &eb_data->removed_multicast_groups);
>> > > +    destroy_physical_ctx(&ctx);
>> > > +    return EN_HANDLED_UPDATED;
>> > > +}
>> > >
>> > >  static void *
>> > >  en_controller_output_init(struct engine_node *node OVS_UNUSED,
>> > > @@ -6661,9 +6688,8 @@ main(int argc, char *argv[])
>> > >      engine_add_input(&en_evpn_vtep_binding, &en_sb_datapath_binding,
>> > >                       evpn_vtep_binding_datapath_binding_handler);
>> > >
>> > > -    /* TODO Add real handler to process all bindings. */
>> > >      engine_add_input(&en_pflow_output, &en_evpn_vtep_binding,
>> > > -                     engine_noop_handler);
>> > > +                     pflow_output_evpn_binding_handler);
>> > >
>> > >      engine_add_input(&en_controller_output, &en_dns_cache,
>> > >                       NULL);
>> > > diff --git a/controller/physical.c b/controller/physical.c
>> > > index 15209318b..0a4b31040 100644
>> > > --- a/controller/physical.c
>> > > +++ b/controller/physical.c
>> > > @@ -19,6 +19,7 @@
>> > >  #include "byte-order.h"
>> > >  #include "ct-zone.h"
>> > >  #include "encaps.h"
>> > > +#include "evpn-binding.h"
>> > >  #include "flow.h"
>> > >  #include "ha-chassis.h"
>> > >  #include "lflow.h"
>> > > @@ -95,19 +96,25 @@ physical_register_ovs_idl(struct ovsdb_idl
>> *ovs_idl)
>> > >      ovsdb_idl_track_add_column(ovs_idl,
>> &ovsrec_interface_col_ofport);
>> > >      ovsdb_idl_track_add_column(ovs_idl,
>> > &ovsrec_interface_col_external_ids);
>> > >  }
>> > > -
>> > >  static void
>> > > -put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits,
>> > > -         struct ofpbuf *ofpacts)
>> > > +put_load_be(const void *value, size_t len, enum mf_field_id dst,
>> > > +            size_t ofs, size_t n_bits, struct ofpbuf *ofpacts)
>> > >  {
>> > >      struct ofpact_set_field *sf = ofpact_put_set_field(ofpacts,
>> > >
>>  mf_from_id(dst),
>> > NULL,
>> > >                                                         NULL);
>> > > -    ovs_be64 n_value = htonll(value);
>> > > -    bitwise_copy(&n_value, 8, 0, sf->value, sf->field->n_bytes, ofs,
>> > n_bits);
>> > > +    bitwise_copy(value, len, 0, sf->value, sf->field->n_bytes, ofs,
>> > n_bits);
>> > >      bitwise_one(ofpact_set_field_mask(sf), sf->field->n_bytes, ofs,
>> > n_bits);
>> > >  }
>> > >
>> > > +static void
>> > > +put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits,
>> > > +         struct ofpbuf *ofpacts)
>> > > +{
>> > > +    ovs_be64 n_value = htonll(value);
>> > > +    put_load_be(&n_value, 8, dst, ofs, n_bits, ofpacts);
>> > > +}
>> > > +
>> > >  static void
>> > >  put_move(enum mf_field_id src, int src_ofs,
>> > >           enum mf_field_id dst, int dst_ofs,
>> > > @@ -487,7 +494,7 @@ put_remote_port_redirect_overlay(const struct
>> > sbrec_port_binding *binding,
>> > >                                    port_key, is_vtep_port,
>> > ofpacts_clone);
>> > >                  ofpact_put_OUTPUT(ofpacts_clone)->port = tun->ofport;
>> > >              }
>> > > -            put_resubmit(OFTABLE_LOCAL_OUTPUT, ofpacts_clone);
>> > > +            put_resubmit(OFTABLE_REMOTE_VTEP_OUTPUT, ofpacts_clone);
>> > >              ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 100,
>> > >                              binding->header_.uuid.parts[0], match,
>> > >                              ofpacts_clone, &binding->header_.uuid);
>> > > @@ -985,12 +992,12 @@ put_local_common_flows(uint32_t dp_key,
>> > >
>> > >      uint32_t port_key = pb->tunnel_key;
>> > >
>> > > -    /* Table 43, priority 100.
>> > > +    /* Table 45, priority 100.
>> > >       * =======================
>> > >       *
>> > >       * Implements output to local hypervisor.  Each flow matches a
>> > >       * logical output port on the local hypervisor, and resubmits to
>> > > -     * table 44.
>> > > +     * table 46.
>> > >       */
>> > >
>> > >      ofpbuf_clear(ofpacts_p);
>> > > @@ -1000,13 +1007,13 @@ put_local_common_flows(uint32_t dp_key,
>> > >
>> > >      put_zones_ofpacts(zone_ids, ofpacts_p);
>> > >
>> > > -    /* Resubmit to table 44. */
>> > > +    /* Resubmit to table 46. */
>> > >      put_resubmit(OFTABLE_CHECK_LOOPBACK, ofpacts_p);
>> > >      ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100,
>> > >                      pb->header_.uuid.parts[0], &match, ofpacts_p,
>> > >                      &pb->header_.uuid);
>> > >
>> > > -    /* Table 44, Priority 100.
>> > > +    /* Table 46, Priority 100.
>> > >       * =======================
>> > >       *
>> > >       * Drop packets whose logical inport and outport are the same
>> > > @@ -1784,12 +1791,12 @@ consider_port_binding(const struct
>> physical_ctx
>> > *ctx,
>> > >              ha_chassis_group_is_active(binding->ha_chassis_group,
>> > >                                         ctx->active_tunnels,
>> > ctx->chassis))) {
>> > >
>> > > -        /* Table 43, priority 100.
>> > > +        /* Table 45, priority 100.
>> > >           * =======================
>> > >           *
>> > >           * Implements output to local hypervisor.  Each flow matches
>> a
>> > >           * logical output port on the local hypervisor, and
>> resubmits to
>> > > -         * table 41.  For ports of type "chassisredirect", the
>> logical
>> > > +         * table 46.  For ports of type "chassisredirect", the
>> logical
>> > >           * output port is changed from the "chassisredirect" port to
>> the
>> > >           * underlying distributed port. */
>> > >
>> > > @@ -1830,7 +1837,7 @@ consider_port_binding(const struct physical_ctx
>> > *ctx,
>> > >               * go out from the same tunnel inport. */
>> > >              put_load(ofp_to_u16(OFPP_NONE), MFF_IN_PORT, 0, 16,
>> > ofpacts_p);
>> > >
>> > > -            /* Resubmit to table 41. */
>> > > +            /* Resubmit to table 46. */
>> > >              put_resubmit(OFTABLE_CHECK_LOOPBACK, ofpacts_p);
>> > >          }
>> > >
>> > > @@ -2061,7 +2068,7 @@ consider_port_binding(const struct physical_ctx
>> > *ctx,
>> > >                                                ofport, flow_table);
>> > >          }
>> > >
>> > > -        /* Table 41, priority 160.
>> > > +        /* Table 46, priority 160.
>> > >           * =======================
>> > >           *
>> > >           * Do not forward local traffic from a localport to a
>> localnet
>> > port.
>> > > @@ -2132,13 +2139,13 @@ consider_port_binding(const struct
>> physical_ctx
>> > *ctx,
>> > >              }
>> > >          }
>> > >
>> > > -        /* Table 42, priority 150.
>> > > +        /* Table 43, priority 150.
>> > >           * =======================
>> > >           *
>> > >           * Handles packets received from ports of type "localport".
>> > These
>> > >           * ports are present on every hypervisor.  Traffic that
>> > originates at
>> > >           * one should never go over a tunnel to a remote hypervisor,
>> > > -         * so resubmit them to table 40 for local delivery. */
>> > > +         * so resubmit them to table 45 for local delivery. */
>> > >          if (type == LP_LOCALPORT) {
>> > >              ofpbuf_clear(ofpacts_p);
>> > >              put_resubmit(OFTABLE_LOCAL_OUTPUT, ofpacts_p);
>> > > @@ -2152,7 +2159,7 @@ consider_port_binding(const struct physical_ctx
>> > *ctx,
>> > >          }
>> > >      } else if (access_type == PORT_LOCALNET && !ctx->always_tunnel) {
>> > >          /* Remote port connected by localnet port */
>> > > -        /* Table 43, priority 100.
>> > > +        /* Table 45, priority 100.
>> > >           * =======================
>> > >           *
>> > >           * Implements switching to localnet port. Each flow matches a
>> > > @@ -2172,7 +2179,7 @@ consider_port_binding(const struct physical_ctx
>> > *ctx,
>> > >
>> > >          put_load(localnet_port->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
>> > ofpacts_p);
>> > >
>> > > -        /* Resubmit to table 43. */
>> > > +        /* Resubmit to table 45. */
>> > >          put_resubmit(OFTABLE_LOCAL_OUTPUT, ofpacts_p);
>> > >          ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100,
>> > >                          binding->header_.uuid.parts[0],
>> > > @@ -2528,15 +2535,13 @@ consider_mc_group(const struct physical_ctx
>> *ctx,
>> > >
>> > >      remote_ports = remote_ctx->ofpacts.size > 0;
>> > >      if (remote_ports) {
>> > > -        if (local_lports) {
>> > > -            put_resubmit(OFTABLE_LOCAL_OUTPUT, &remote_ctx->ofpacts);
>> > > -        }
>> > > +        put_resubmit(OFTABLE_REMOTE_VTEP_OUTPUT,
>> &remote_ctx->ofpacts);
>> > >          mc_ofctrl_add_flow(mc, remote_ctx, false, flow_table);
>> > >      }
>> > >
>> > >      if (ramp_ports && has_vtep) {
>> > >          put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
>> > &ramp_ctx->ofpacts);
>> > > -        put_resubmit(OFTABLE_LOCAL_OUTPUT, &ramp_ctx->ofpacts);
>> > > +        put_resubmit(OFTABLE_REMOTE_VTEP_OUTPUT, &ramp_ctx->ofpacts);
>> > >          mc_ofctrl_add_flow(mc, ramp_ctx, false, flow_table);
>> > >      }
>> > >
>> > > @@ -2653,6 +2658,153 @@ physical_eval_remote_chassis_flows(const
>> struct
>> > physical_ctx *ctx,
>> > >      ofpbuf_uninit(&ingress_ofpacts);
>> > >  }
>> > >
>> > > +static void
>> > > +physical_consider_evpn_binding(const struct evpn_binding *binding,
>> > > +                               const struct in6_addr *local_ip,
>> > > +                               struct ofpbuf *ofpacts, struct match
>> > *match,
>> > > +                               struct ovn_desired_flow_table
>> > *flow_table,
>> > > +                               bool ipv4)
>> > > +{
>> > > +    /* Ingress flows. */
>> > > +    ofpbuf_clear(ofpacts);
>> > > +    match_init_catchall(match);
>> > > +
>> > > +    match_set_in_port(match, binding->tunnel_ofport);
>> > > +    match_set_tun_id(match, htonll(binding->vni));
>> > > +    if (ipv4) {
>> > > +        match_set_tun_src(match,
>> > > +
>> > in6_addr_get_mapped_ipv4(&binding->remote_ip));
>> > > +        match_set_tun_dst(match, in6_addr_get_mapped_ipv4(local_ip));
>> > > +    } else {
>> > > +        match_set_tun_ipv6_src(match, &binding->remote_ip);
>> > > +        match_set_tun_ipv6_dst(match, local_ip);
>> > > +    }
>> > > +
>> > > +    put_load(binding->dp_key, MFF_LOG_DATAPATH, 0, 32, ofpacts);
>> > > +    put_load(binding->tunnel_key, MFF_LOG_INPORT, 0, 32, ofpacts);
>> > > +    put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, ofpacts);
>> > > +
>> > > +    ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 1050,
>> > > +                    binding->flow_uuid.parts[0],
>> > > +                    match, ofpacts, &binding->flow_uuid);
>> > > +
>> > > +    /* Egress flows. */
>> > > +    ofpbuf_clear(ofpacts);
>> > > +    match_init_catchall(match);
>> > > +
>> > > +    match_outport_dp_and_port_keys(match, binding->dp_key,
>> > > +                                   binding->tunnel_key);
>> > > +
>> > > +    if (ipv4) {
>> > > +        ovs_be32 ip4 = in6_addr_get_mapped_ipv4(local_ip);
>> > > +        put_load_be(&ip4, sizeof ip4, MFF_TUN_SRC, 0, 32, ofpacts);
>> > > +        ip4 = in6_addr_get_mapped_ipv4(&binding->remote_ip);
>> > > +        put_load_be(&ip4, sizeof ip4, MFF_TUN_DST, 0, 32, ofpacts);
>> > > +    } else {
>> > > +        put_load_be(&local_ip, sizeof local_ip, MFF_TUN_IPV6_SRC,
>> > > +                    0, 128, ofpacts);
>> > > +        put_load_be(&binding->remote_ip, sizeof binding->remote_ip,
>> > > +                    MFF_TUN_IPV6_DST, 0, 128, ofpacts);
>> > > +    }
>> > > +    put_load(binding->vni, MFF_TUN_ID, 0, 24, ofpacts);
>> > > +
>> > > +    size_t ofpacts_size = ofpacts->size;
>> > > +    ofpact_put_OUTPUT(ofpacts)->port = binding->tunnel_ofport;
>> > > +
>> > > +    ofctrl_add_flow(flow_table, OFTABLE_REMOTE_VTEP_OUTPUT, 50,
>> > > +                    binding->flow_uuid.parts[0],
>> > > +                    match, ofpacts, &binding->flow_uuid);
>> > > +
>> > > +    /* Add flow that will match on LOOPBACK flag, in that case set
>> > > +     * in_port to none otherwise the hairpin traffic would be
>> rejected
>> > > +     * by ovs. */
>> > > +    match_set_reg_masked(match, MFF_LOG_FLAGS - MFF_REG0,
>> > > +                         MLF_ALLOW_LOOPBACK, MLF_ALLOW_LOOPBACK);
>> > > +
>> > > +    ofpbuf_truncate(ofpacts, ofpacts_size);
>> > > +    put_load(ofp_to_u16(OFPP_NONE), MFF_IN_PORT, 0, 16, ofpacts);
>> > > +    ofpact_put_OUTPUT(ofpacts)->port = binding->tunnel_ofport;
>> > > +
>> > > +    ofctrl_add_flow(flow_table, OFTABLE_REMOTE_VTEP_OUTPUT, 55,
>> > > +                    binding->flow_uuid.parts[0],
>> > > +                    match, ofpacts, &binding->flow_uuid);
>> >
>> > I know you have v2 almost ready so maybe let's just add a TODO.rst item
>> > to document these new flows in the ovn-architecture doc?  Or just a note
>> > to ourselves to do this in v3? :)
>> >
>>
>>
>> I'm not sure if ovn-architecture is the right place for this. Should we
>> try
>> to
>> explain that in a separate document that will document the overall BGP?
>>
>>
>> >
>> > > +}
>> > > +
>> > > +static void
>> > > +physical_consider_evpn_multicast(const struct evpn_multicast_group
>> > *mc_group,
>> > > +                                 const struct in6_addr *local_ip,
>> > > +                                 struct ofpbuf *ofpacts, struct match
>> > *match,
>> > > +                                 struct ovn_desired_flow_table
>> > *flow_table,
>> > > +                                 bool ipv4)
>> > > +{
>> > > +    const struct evpn_binding *binding = NULL;
>> > > +
>> > > +    ofpbuf_clear(ofpacts);
>> > > +    uint32_t multicast_tunnel_keys[] = {OVN_MCAST_FLOOD_TUNNEL_KEY,
>> > > +                                        OVN_MCAST_UNKNOWN_TUNNEL_KEY,
>> > > +
>> OVN_MCAST_FLOOD_L2_TUNNEL_KEY};
>> > > +    if (ipv4) {
>> > > +        ovs_be32 ip4 = in6_addr_get_mapped_ipv4(local_ip);
>> > > +        put_load_be(&ip4, sizeof ip4, MFF_TUN_SRC, 0, 32, ofpacts);
>> > > +    } else {
>> > > +        put_load_be(&local_ip, sizeof local_ip, MFF_TUN_IPV6_SRC,
>> > > +                    0, 128, ofpacts);
>> > > +    }
>> > > +    put_load(mc_group->vni, MFF_TUN_ID, 0, 24, ofpacts);
>> > > +
>> > > +    const struct hmapx_node *node;
>> > > +    HMAPX_FOR_EACH (node, &mc_group->bindings) {
>> > > +        binding = node->data;
>> > > +        if (ipv4) {
>> > > +            ovs_be32 ip4 =
>> > in6_addr_get_mapped_ipv4(&binding->remote_ip);
>> > > +            put_load_be(&ip4, sizeof ip4, MFF_TUN_DST, 0, 32,
>> ofpacts);
>> > > +        } else {
>> > > +            put_load_be(&binding->remote_ip, sizeof
>> binding->remote_ip,
>> > > +                        MFF_TUN_IPV6_DST, 0, 128, ofpacts);
>> > > +        }
>> > > +        ofpact_put_OUTPUT(ofpacts)->port = binding->tunnel_ofport;
>> > > +    }
>> > > +    put_resubmit(OFTABLE_LOCAL_OUTPUT, ofpacts);
>> > > +
>> > > +    for (size_t i = 0; i < ARRAY_SIZE(multicast_tunnel_keys); i++) {
>> > > +        match_init_catchall(match);
>> > > +        match_outport_dp_and_port_keys(match, binding->dp_key,
>>
> Do we have the same dp_key for all bindings as this uses only the last
> "binding" from the previous loop ? Is this expected ?
>

Yeah we should have the same dp_key for all bindings in the group.
I have added assert in v3 that mc_group actually has any bindings.



> > > +                                       multicast_tunnel_keys[i]);
>>
>
>
>> > > +
>> > > +        ofctrl_add_flow(flow_table, OFTABLE_REMOTE_VTEP_OUTPUT, 50,
>> > > +                        mc_group->flow_uuid.parts[0],
>> > > +                        match, ofpacts, &mc_group->flow_uuid);
>> > > +    }
>> >
>> > I have the same doc comment for these I guess.
>> >
>> > > +}
>> > > +
>> > > +static void
>> > > +physical_eval_evpn_flows(const struct physical_ctx *ctx,
>> > > +                         struct ofpbuf *ofpacts,
>> > > +                         struct ovn_desired_flow_table *flow_table)
>> > > +{
>> > > +    const char *local_ip_str =
>> smap_get_def(&ctx->chassis->other_config,
>> > > +                                            "evpn-local-ip", "");
>> > > +    struct in6_addr local_ip;
>> > > +    if (!ip46_parse(local_ip_str, &local_ip)) {
>> > > +        return;
>> > > +    }
>> > > +
>> > > +    struct match match = MATCH_CATCHALL_INITIALIZER;
>> > > +    bool ipv4 = IN6_IS_ADDR_V4MAPPED(&local_ip);
>> > > +
>> > > +    const struct evpn_binding *binding;
>> > > +    HMAP_FOR_EACH (binding, hmap_node, ctx->evpn_bindings) {
>> > > +        physical_consider_evpn_binding(binding, &local_ip, ofpacts,
>> > > +                                       &match, flow_table, ipv4);
>> > > +    }
>> > > +
>> > > +    const struct evpn_multicast_group *mc_group;
>> > > +    HMAP_FOR_EACH (mc_group, hmap_node, ctx->evpn_multicast_groups) {
>> > > +        physical_consider_evpn_multicast(mc_group, &local_ip,
>> ofpacts,
>> > > +                                         &match, flow_table, ipv4);
>> > > +    }
>> > > +}
>> > > +
>> > >  static void
>> > >  physical_eval_port_binding(struct physical_ctx *p_ctx,
>> > >                             const struct sbrec_port_binding *pb,
>> > > @@ -2760,6 +2912,55 @@ physical_handle_mc_group_changes(struct
>> > physical_ctx *p_ctx,
>> > >      }
>> > >  }
>> > >
>> > > +void
>> > > +physical_handle_evpn_binding_changes(
>> > > +    struct physical_ctx *ctx, struct ovn_desired_flow_table
>> *flow_table,
>> > > +    const struct hmapx *updated_bindings,
>> > > +    const struct hmapx *updated_multicast_groups,
>> > > +    const struct uuidset *removed_bindings,
>> > > +    const struct uuidset *removed_multicast_groups)
>> > > +{
>> > > +    const char *local_ip_str =
>> smap_get_def(&ctx->chassis->other_config,
>> > > +                                            "evpn-local-ip", "");
>> > > +    struct in6_addr local_ip;
>> > > +    if (!ip46_parse(local_ip_str, &local_ip)) {
>> > > +        return;
>> > > +    }
>> > > +
>> > > +    struct ofpbuf ofpacts;
>> > > +    ofpbuf_init(&ofpacts, 0);
>> > > +    struct match match = MATCH_CATCHALL_INITIALIZER;
>> > > +    bool ipv4 = IN6_IS_ADDR_V4MAPPED(&local_ip);
>> > > +
>> > > +    const struct hmapx_node *node;
>> > > +    HMAPX_FOR_EACH (node, updated_bindings) {
>> > > +        const struct evpn_binding *binding = node->data;
>> > > +
>> > > +        ofctrl_remove_flows(flow_table, &binding->flow_uuid);
>> > > +        physical_consider_evpn_binding(binding, &local_ip, &ofpacts,
>> > > +                                       &match, flow_table, ipv4);
>> > > +    }
>> > > +
>> > > +    HMAPX_FOR_EACH (node, updated_multicast_groups) {
>> > > +        const struct evpn_multicast_group *mc_group = node->data;
>> > > +
>> > > +        ofctrl_remove_flows(flow_table, &mc_group->flow_uuid);
>> > > +        physical_consider_evpn_multicast(mc_group, &local_ip,
>> &ofpacts,
>> > > +                                         &match, flow_table, ipv4);
>> > > +    }
>> > > +
>> > > +    ofpbuf_uninit(&ofpacts);
>> > > +
>> > > +    const struct uuidset_node *uuidset_node;
>> > > +    UUIDSET_FOR_EACH (uuidset_node, removed_bindings) {
>> > > +        ofctrl_remove_flows(flow_table, &uuidset_node->uuid);
>> > > +    }
>> > > +
>> > > +    UUIDSET_FOR_EACH (uuidset_node, removed_multicast_groups) {
>> > > +        ofctrl_remove_flows(flow_table, &uuidset_node->uuid);
>> > > +    }
>> > > +}
>> > > +
>> > >  void
>> > >  physical_run(struct physical_ctx *p_ctx,
>> > >               struct ovn_desired_flow_table *flow_table)
>> > > @@ -2809,7 +3010,7 @@ physical_run(struct physical_ctx *p_ctx,
>> > >       * have metadata about the ingress and egress logical ports.
>> > >       * VXLAN encapsulations have metadata about the egress logical
>> port
>> > only.
>> > >       * We set MFF_LOG_DATAPATH, MFF_LOG_INPORT, and MFF_LOG_OUTPORT
>> > from the
>> > > -     * tunnel key data where possible, then resubmit to table 40 to
>> > handle
>> > > +     * tunnel key data where possible, then resubmit to table 45 to
>> > handle
>> > >       * packets to the local hypervisor. */
>> > >      struct chassis_tunnel *tun;
>> > >      HMAP_FOR_EACH (tun, hmap_node, p_ctx->chassis_tunnels) {
>> > > @@ -2922,7 +3123,7 @@ physical_run(struct physical_ctx *p_ctx,
>> > >       */
>> > >      add_default_drop_flow(p_ctx, OFTABLE_PHY_TO_LOG, flow_table);
>> > >
>> > > -    /* Table 40-43, priority 0.
>> > > +    /* Table 41-42, priority 0.
>> > >       * ========================
>> > >       *
>> > >       * Default resubmit actions for OFTABLE_OUTPUT_LARGE_PKT_*
>> tables.
>> > > @@ -2948,7 +3149,7 @@ physical_run(struct physical_ctx *p_ctx,
>> > >      ofctrl_add_flow(flow_table, OFTABLE_OUTPUT_LARGE_PKT_PROCESS, 0,
>> 0,
>> > &match,
>> > >                      &ofpacts, hc_uuid);
>> > >
>> > > -    /* Table 42, priority 150.
>> > > +    /* Table 43, priority 150.
>> > >       * =======================
>> > >       *
>> > >       * Handles packets received from a VXLAN tunnel which get
>> > resubmitted to
>> > > @@ -2961,13 +3162,13 @@ physical_run(struct physical_ctx *p_ctx,
>> > >      match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0,
>> > MLF_RCV_FROM_RAMP,
>> > >                           MLF_RCV_FROM_RAMP | MLF_ALLOW_LOOPBACK);
>> > >
>> > > -    /* Resubmit to table 40. */
>> > > +    /* Resubmit to table 45. */
>> > >      ofpbuf_clear(&ofpacts);
>> > >      put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts);
>> > >      ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 150, 0,
>> > >                      &match, &ofpacts, hc_uuid);
>> > >
>> > > -    /* Table 42, priority 150.
>> > > +    /* Table 43, priority 150.
>> > >       * =======================
>> > >       *
>> > >       * Packets that should not be sent to other hypervisors.
>> > > @@ -2975,35 +3176,46 @@ physical_run(struct physical_ctx *p_ctx,
>> > >      match_init_catchall(&match);
>> > >      match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0,
>> > >                           MLF_LOCAL_ONLY, MLF_LOCAL_ONLY);
>> > > -    /* Resubmit to table 40. */
>> > > +    /* Resubmit to table 45. */
>> > >      ofpbuf_clear(&ofpacts);
>> > >      put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts);
>> > >      ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 150, 0,
>> > >                      &match, &ofpacts, hc_uuid);
>> > >
>> > > -    /* Table 42, Priority 0.
>> > > +    /* Table 43, Priority 0.
>> > >       * =======================
>> > >       *
>> > > -     * Resubmit packets that are not directed at tunnels or part of a
>> > > -     * multicast group to the local output table. */
>> > > +     * Resubmit packets that are not directed at OVN tunnels or part
>> of
>> > a
>> > > +     * multicast group to the VTEP output table. */
>> > >      match_init_catchall(&match);
>> > >      ofpbuf_clear(&ofpacts);
>> > > -    put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts);
>> > > +    put_resubmit(OFTABLE_REMOTE_VTEP_OUTPUT, &ofpacts);
>> > >      ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 0, 0, &match,
>> > >                      &ofpacts, hc_uuid);
>> > >
>> > > -    /* Table 40, priority 0.
>> > > +    /* Table 44, Priority 0.
>> > > +     * =======================
>> > > +     *
>> > > +     * Resubmit packets that are not directed to remote VTEP to the
>> > local
>> > > +     * output table. */
>> > > +    match_init_catchall(&match);
>> > > +    ofpbuf_clear(&ofpacts);
>> > > +    put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts);
>> > > +    ofctrl_add_flow(flow_table, OFTABLE_REMOTE_VTEP_OUTPUT, 0, 0,
>> > &match,
>> > > +                    &ofpacts, hc_uuid);
>> > > +
>> > > +    /* Table 45, priority 0.
>> > >       * ======================
>> > >       *
>> > >       * Drop packets that do not match previous flows.
>> > >       */
>> > >      add_default_drop_flow(p_ctx, OFTABLE_LOCAL_OUTPUT, flow_table);
>> > >
>> > > -    /* Table 41, Priority 0.
>> > > +    /* Table 46, Priority 0.
>> > >       * =======================
>> > >       *
>> > >       * Resubmit packets that don't output to the ingress port
>> (already
>> > checked
>> > > -     * in table 40) to the logical egress pipeline, clearing the
>> logical
>> > > +     * in table 44) to the logical egress pipeline, clearing the
>> logical
>> > >       * registers (for consistent behavior with packets that get
>> > tunneled). */
>> > >      match_init_catchall(&match);
>> > >      ofpbuf_clear(&ofpacts);
>> > > @@ -3131,6 +3343,7 @@ physical_run(struct physical_ctx *p_ctx,
>> > >                      &match, &ofpacts, hc_uuid);
>> > >
>> > >      physical_eval_remote_chassis_flows(p_ctx, &ofpacts, flow_table);
>> > > +    physical_eval_evpn_flows(p_ctx, &ofpacts, flow_table);
>> > >
>> > >      ofpbuf_uninit(&ofpacts);
>> > >  }
>> > > diff --git a/controller/physical.h b/controller/physical.h
>> > > index 0420251c6..e0443924b 100644
>> > > --- a/controller/physical.h
>> > > +++ b/controller/physical.h
>> > > @@ -69,6 +69,8 @@ struct physical_ctx {
>> > >      const char **encap_ips;
>> > >      struct physical_debug debug;
>> > >      bool always_tunnel;
>> > > +    const struct hmap *evpn_bindings;
>> > > +    const struct hmap *evpn_multicast_groups;
>> > >
>> > >      /* Set of port binding names that have been already reprocessed
>> > during
>> > >       * the I-P run. */
>> > > @@ -87,4 +89,10 @@ bool physical_handle_flows_for_lport(const struct
>> > sbrec_port_binding *,
>> > >  void physical_multichassis_reprocess(const struct sbrec_port_binding
>> *,
>> > >                                       struct physical_ctx *,
>> > >                                       struct ovn_desired_flow_table
>> *);
>> > > +void physical_handle_evpn_binding_changes(
>> > > +    struct physical_ctx *, struct ovn_desired_flow_table *,
>> > > +    const struct hmapx *updated_bindings,
>> > > +    const struct hmapx *updated_multicast_groups,
>> > > +    const struct uuidset *removed_bindings,
>> > > +    const struct uuidset *removed_multicast_groups);
>> > >  #endif /* controller/physical.h */
>> > > diff --git a/tests/ovn-macros.at b/tests/ovn-macros.at
>> > > index 334aa7fc6..d6529e015 100644
>> > > --- a/tests/ovn-macros.at
>> > > +++ b/tests/ovn-macros.at
>> > > @@ -1476,8 +1476,9 @@ m4_define([OFTABLE_LOG_INGRESS_PIPELINE], [8])
>> > >  m4_define([OFTABLE_OUTPUT_LARGE_PKT_DETECT], [41])
>> > >  m4_define([OFTABLE_OUTPUT_LARGE_PKT_PROCESS], [42])
>> > >  m4_define([OFTABLE_REMOTE_OUTPUT], [43])
>> > > -m4_define([OFTABLE_LOCAL_OUTPUT], [44])
>> > > -m4_define([OFTABLE_LOG_EGRESS_PIPELINE], [46])
>> > > +m4_define([OFTABLE_REMOTE_VTEP_OUTPUT], [44])
>> > > +m4_define([OFTABLE_LOCAL_OUTPUT], [45])
>> > > +m4_define([OFTABLE_LOG_EGRESS_PIPELINE], [47])
>> > >  m4_define([OFTABLE_SAVE_INPORT], [64])
>> > >  m4_define([OFTABLE_LOG_TO_PHY], [65])
>> > >  m4_define([OFTABLE_MAC_BINDING], [66])
>> > > diff --git a/tests/ovn.at b/tests/ovn.at
>> > > index 0dabec8d9..2a834dc41 100644
>> > > --- a/tests/ovn.at
>> > > +++ b/tests/ovn.at
>> > > @@ -40794,7 +40794,7 @@ check_output_flows_tunnelled() {
>> > >    dp_key=$2
>> > >    dp_rport=$3
>> > >    AT_CHECK_UNQUOTED([as $hv ovs-ofctl dump-flows br-int
>> > table=OFTABLE_REMOTE_OUTPUT,metadata=0x${dp_key},reg15=0x${dp_rport} |
>> > ofctl_strip_all | grep -v NXST_FLOW], [0], [dnl
>> > > - table=OFTABLE_REMOTE_OUTPUT,
>> > priority=100,reg13=0/0xffff0000,reg15=0x${dp_rport},metadata=0x${dp_key}
>> >
>> actions=load:0x${dp_key}->NXM_NX_TUN_ID[[0..23]],set_field:0x${dp_rport}->tun_metadata0,move:NXM_NX_REG14[[0..14]]->NXM_NX_TUN_METADATA0[[16..30]],output:1,resubmit(,OFTABLE_LOCAL_OUTPUT)
>> > > + table=OFTABLE_REMOTE_OUTPUT,
>> > priority=100,reg13=0/0xffff0000,reg15=0x${dp_rport},metadata=0x${dp_key}
>> >
>> actions=load:0x${dp_key}->NXM_NX_TUN_ID[[0..23]],set_field:0x${dp_rport}->tun_metadata0,move:NXM_NX_REG14[[0..14]]->NXM_NX_TUN_METADATA0[[16..30]],output:1,resubmit(,OFTABLE_REMOTE_VTEP_OUTPUT)
>> > >  ])
>> > >  }
>> > >
>> >
>> > Regards,
>> > Dumitru
>> >
>> >
>> Thanks,
>> Ales
>> _______________________________________________
>> dev mailing list
>> d...@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
>
Thanks,
Ales
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to