On 5/19/23 20:18, Vladislav Odintsov wrote:
> This patch is intended to make next two changes:
> 
> 1. Support create/update of MAC_Binding for GARP/ND from HW VTEP.
> 
> Prior to this patch MAC_Binding records were created only when LRP issued
> ARP request/Neighbor solicitation.  If IP to MAC in network attached via
> vtep lport was changed the old MAC_Binding prevented normal
> communication.  Now router port (chassisredirect) in logical switch with
> vtep lport catches GARP or Neighbor Solicitation and updates MAC_Binding.
> 
> New flow with max priority was added in ls_in_arp_nd_resp and additional
> OF rule in table 37 (OFTABLE_REMOTE_OUTPUT) to process multicast packets
> received from RAMP_SWITCH.
> 
> 2. Answer to ARP/ND on requests coming from HW VTEP in lrouter pipeline.
> 
> This is needed to reduce duplicated arp-responses, which were generated
> by OVN arp-responder flows in node, where chassisredirect port located
> with responses coming directly from VM interfaces.
> 
> As a result of this change, now vifs located on same chassis, where
> chassisredirect ports are located receive ARP/ND mcast packets, which
> previously were suppressed by arp-responder on the node.
> 
> Tests and documentation were updated to conform to new behaviour.
> 
> Signed-off-by: Vladislav Odintsov <[email protected]>
> ---

Hi Vladislav,

Thanks for the patch.

>  controller/binding.c    | 48 ++++++++++++++++++++
>  controller/local_data.h |  3 ++
>  controller/physical.c   | 46 +++++++++++++++++--
>  northd/northd.c         | 10 +++++
>  northd/ovn-northd.8.xml |  6 +++
>  tests/ovn.at            | 99 ++++++++++++++++++++++++++++++++++++++++-

Could you please add a NEWS entry for this behavior change?

>  6 files changed, 207 insertions(+), 5 deletions(-)
> 
> diff --git a/controller/binding.c b/controller/binding.c
> index c7a2b3f10..5932ad785 100644
> --- a/controller/binding.c
> +++ b/controller/binding.c
> @@ -509,6 +509,30 @@ update_ld_multichassis_ports(const struct 
> sbrec_port_binding *binding_rec,
>      }
>  }
>  
> +static void
> +update_ld_vtep_port(const struct sbrec_port_binding *binding_rec,
> +                    struct hmap *local_datapaths)
> +{
> +    struct local_datapath *ld
> +        = get_local_datapath(local_datapaths,
> +                             binding_rec->datapath->tunnel_key);
> +    if (!ld) {
> +        return;
> +    }
> +
> +    if (ld->vtep_port && strcmp(ld->vtep_port->logical_port,
> +                                binding_rec->logical_port)) {
> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
> +        VLOG_WARN_RL(&rl, "vtep port '%s' already set for datapath "
> +                     "'%"PRId64"', skipping the new port '%s'.",
> +                     ld->vtep_port->logical_port,
> +                     binding_rec->datapath->tunnel_key,
> +                     binding_rec->logical_port);
> +        return;
> +    }
> +    ld->vtep_port = binding_rec;
> +}
> +
>  static void
>  update_ld_localnet_port(const struct sbrec_port_binding *binding_rec,
>                          struct shash *bridge_mappings,
> @@ -1987,6 +2011,7 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct 
> binding_ctx_out *b_ctx_out)
>      struct ovs_list external_lports = OVS_LIST_INITIALIZER(&external_lports);
>      struct ovs_list multichassis_ports = OVS_LIST_INITIALIZER(
>                                                          &multichassis_ports);
> +    struct ovs_list vtep_lports = OVS_LIST_INITIALIZER(&vtep_lports);
>  
>      struct lport {
>          struct ovs_list list_node;
> @@ -2010,8 +2035,13 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct 
> binding_ctx_out *b_ctx_out)
>  
>          switch (lport_type) {
>          case LP_PATCH:
> +            update_related_lport(pb, b_ctx_out);
> +            break;
>          case LP_VTEP:
>              update_related_lport(pb, b_ctx_out);
> +            struct lport *vtep_lport = xmalloc(sizeof *vtep_lport);
> +            vtep_lport->pb = pb;
> +            ovs_list_push_back(&vtep_lports, &vtep_lport->list_node);
>              break;
>  
>          case LP_LOCALPORT:
> @@ -2111,6 +2141,16 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct 
> binding_ctx_out *b_ctx_out)
>          free(multichassis_lport);
>      }
>  
> +    /* Run through vtep lport list to see if there are vtep ports
> +     * on local datapaths discovered from above loop, and update the
> +     * corresponding local datapath accordingly. */
> +    struct lport *vtep_lport;
> +    ovs_list_size(&vtep_lports);

I guess this is a leftover.

> +    LIST_FOR_EACH_POP (vtep_lport, list_node, &vtep_lports) {
> +        update_ld_vtep_port(vtep_lport->pb, b_ctx_out->local_datapaths);
> +        free(vtep_lport);
> +    }
> +
>      shash_destroy(&bridge_mappings);
>      /* Remove stale QoS entries. */
>      ovs_qos_entries_gc(b_ctx_in->ovs_idl_txn, b_ctx_in->ovsrec_port_by_qos,
> @@ -2175,6 +2215,11 @@ remove_pb_from_local_datapath(const struct 
> sbrec_port_binding *pb,
>          }
>      } else if (!strcmp(pb->type, "external")) {
>          remove_local_datapath_external_port(ld, pb->logical_port);
> +    } else if (!strcmp(pb->type, "vtep")) {
> +        if (ld->vtep_port && !strcmp(ld->vtep_port->logical_port,
> +                                     pb->logical_port)) {
> +            ld->vtep_port = NULL;
> +        }
>      }
>      remove_local_datapath_multichassis_port(ld, pb->logical_port);
>  }
> @@ -2836,6 +2881,7 @@ handle_updated_port(struct binding_ctx_in *b_ctx_in,
>  
>      case LP_VTEP:
>          update_related_lport(pb, b_ctx_out);
> +        update_ld_vtep_port(pb, b_ctx_out->local_datapaths);
>          /* VTEP lports are claimed/released by ovn-controller-vteps.
>           * We are not sure what changed. */
>          b_ctx_out->non_vif_ports_changed = true;
> @@ -3091,6 +3137,8 @@ delete_done:
>                  } else if (pb->n_additional_chassis) {
>                      update_ld_multichassis_ports(pb,
>                                                   b_ctx_out->local_datapaths);
> +                } else if (lport_type == LP_VTEP) {
> +                    update_ld_vtep_port(pb, b_ctx_out->local_datapaths);
>                  }

Nit: I think I'd move this "else if (lport_type == LP_VTEP) {" just
under the LP_EXTERNAL case.

>              }
>              sbrec_port_binding_index_destroy_row(target);
> diff --git a/controller/local_data.h b/controller/local_data.h
> index 748f009aa..19d13a558 100644
> --- a/controller/local_data.h
> +++ b/controller/local_data.h
> @@ -50,6 +50,9 @@ struct local_datapath {
>      /* The localnet port in this datapath, if any (at most one is allowed). 
> */
>      const struct sbrec_port_binding *localnet_port;
>  
> +    /* The vtep port in this datapath, if any (at most one is allowed). */
> +    const struct sbrec_port_binding *vtep_port;
> +
>      struct {
>          const struct sbrec_port_binding *local;
>          const struct sbrec_port_binding *remote;
> diff --git a/controller/physical.c b/controller/physical.c
> index 2925dcd1d..b75e120c7 100644
> --- a/controller/physical.c
> +++ b/controller/physical.c
> @@ -194,6 +194,15 @@ get_localnet_port(const struct hmap *local_datapaths, 
> int64_t tunnel_key)
>  }
>  
>  
> +static const struct sbrec_port_binding *
> +get_vtep_port(const struct hmap *local_datapaths, int64_t tunnel_key)
> +{
> +    const struct local_datapath *ld = get_local_datapath(local_datapaths,
> +                                                         tunnel_key);
> +    return ld ? ld->vtep_port : NULL;
> +}
> +
> +
>  static struct zone_ids
>  get_zone_ids(const struct sbrec_port_binding *binding,
>               const struct simap *ct_zones)
> @@ -1732,7 +1741,7 @@ consider_mc_group(struct ovsdb_idl_index 
> *sbrec_port_binding_by_name,
>      struct sset remote_chassis = SSET_INITIALIZER(&remote_chassis);
>      struct sset vtep_chassis = SSET_INITIALIZER(&vtep_chassis);
>  
> -    struct match match;
> +    struct match match, match_ramp;
>      match_outport_dp_and_port_keys(&match, dp_key, mc->tunnel_key);
>  
>      /* Go through all of the ports in the multicast group:
> @@ -1755,10 +1764,10 @@ consider_mc_group(struct ovsdb_idl_index 
> *sbrec_port_binding_by_name,
>       *      set the output port to be the router patch port for which
>       *      the redirect port was added.
>       */
> -    struct ofpbuf ofpacts;
> +    struct ofpbuf ofpacts, remote_ofpacts, remote_ofpacts_ramp;
>      ofpbuf_init(&ofpacts, 0);
> -    struct ofpbuf remote_ofpacts;
>      ofpbuf_init(&remote_ofpacts, 0);
> +    ofpbuf_init(&remote_ofpacts_ramp, 0);
>      for (size_t i = 0; i < mc->n_ports; i++) {
>          struct sbrec_port_binding *port = mc->ports[i];
>  
> @@ -1783,6 +1792,7 @@ consider_mc_group(struct ovsdb_idl_index 
> *sbrec_port_binding_by_name,
>                  local_output_pb(port->tunnel_key, &ofpacts);
>              } else {
>                  local_output_pb(port->tunnel_key, &remote_ofpacts);
> +                local_output_pb(port->tunnel_key, &remote_ofpacts_ramp);
>              }
>          } if (!strcmp(port->type, "remote")) {
>              if (port->chassis) {
> @@ -1866,6 +1876,35 @@ consider_mc_group(struct ovsdb_idl_index 
> *sbrec_port_binding_by_name,
>               * multicast group as the logical output port. */
>              put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
>                       &remote_ofpacts);
> +
> +            if (get_vtep_port(local_datapaths, mc->datapath->tunnel_key)) {

I'd declare 'match_ramp' here as it's only used on this branch of the if
statement.

> +                match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0, 0,
> +                                     MLF_RCV_FROM_RAMP);
> +

Are we going to match less traffic now?  We add this "flags &
MLF_RCV_FROM_RAMP" condition to the already existing priority-100
OFTABLE_REMOTE_OUTPUT flow.  Does this break multicast traffic not
received from RAMP (vtep) ports?

> +                put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
> +                         &remote_ofpacts_ramp);
> +
> +                /* MCAST traffic which was originally received from 
> RAMP_SWITCH
> +                 * is not allowed to be re-sent to remote_chassis.
> +                 * Process "patch" port binding for routing in
> +                 * OFTABLE_REMOTE_OUTPUT and resubmit packets to
> +                 * OFTABLE_LOCAL_OUTPUT for local delivery. */
> +
> +                match_outport_dp_and_port_keys(&match_ramp, dp_key,
> +                                               mc->tunnel_key);
> +
> +                /* Match packets coming from RAMP_SWITCH and allowed for
> +                * loopback processing (including routing). */
> +                match_set_reg_masked(&match_ramp, MFF_LOG_FLAGS - MFF_REG0,
> +                                     MLF_RCV_FROM_RAMP | MLF_ALLOW_LOOPBACK,
> +                                     MLF_RCV_FROM_RAMP | MLF_ALLOW_LOOPBACK);
> +
> +                put_resubmit(OFTABLE_LOCAL_OUTPUT, &remote_ofpacts_ramp);
> +
> +                ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 120,
> +                                mc->header_.uuid.parts[0], &match_ramp,
> +                                &remote_ofpacts_ramp, &mc->header_.uuid);
> +            }
>          }
>  
>          fanout_to_chassis(mff_ovn_geneve, &remote_chassis, chassis_tunnels,
> @@ -1886,6 +1925,7 @@ consider_mc_group(struct ovsdb_idl_index 
> *sbrec_port_binding_by_name,
>      }
>      ofpbuf_uninit(&ofpacts);
>      ofpbuf_uninit(&remote_ofpacts);
> +    ofpbuf_uninit(&remote_ofpacts_ramp);
>      sset_destroy(&remote_chassis);
>      sset_destroy(&vtep_chassis);
>  }
> diff --git a/northd/northd.c b/northd/northd.c
> index e713973e7..6dcbf0e1a 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -7849,6 +7849,16 @@ build_vtep_hairpin(struct ovn_datapath *od, struct 
> hmap *lflows)
>                            ds_cstr(&match), "next;");
>          }
>      }
> +
> +    /* ARP/Neighbor Solicitation requests must skip ls_in_arp_rsp table for
> +     * packets arrived from HW VTEP (ramp) switch.
> +     * Neighbor resolution for router ports is done in logical router ingress
> +     * pipeline.  ARP resolution for vif lports is done directly by vif 
> ports.
> +     */
> +    ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 65535,
> +                  REGBIT_FROM_RAMP" == 1 && (arp || nd_ns)",
> +                  "flags.loopback = 1; next;");
> +
>      ds_destroy(&match);
>  }
>  
> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> index 8164be300..1a9e50b6f 100644
> --- a/northd/ovn-northd.8.xml
> +++ b/northd/ovn-northd.8.xml
> @@ -1378,6 +1378,12 @@
>       </p>
>  
>      <ul>
> +      <li>
> +        If packet was received from HW VTEP (ramp switch), and this packet is
> +        ARP or Neighbor Solicitation, such packet is passed to next table 
> with
> +        max proirity.  ARP/ND requests from HW VTEP must be handled in 
> logical
> +        router ingress pipeline.
> +      </li>
>        <li>
>          If the logical switch has no router ports with options:arp_proxy
>          configured add a priority-100 flows to skip the ARP responder if 
> inport
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 53349530b..cb643c39d 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -4267,7 +4267,7 @@ ovn_start
>  ovn-nbctl ls-add lsw0
>  
>  ovn-nbctl lsp-add lsw0 lp1
> -ovn-nbctl lsp-set-addresses lp1 f0:00:00:00:00:01
> +ovn-nbctl lsp-set-addresses lp1 'f0:00:00:00:00:01 192.168.1.24'
>  
>  ovn-nbctl lsp-add lsw0 lp2
>  ovn-nbctl lsp-set-addresses lp2 f0:00:00:00:00:02
> @@ -4288,7 +4288,6 @@ ovn-nbctl set Logical_Switch_Port lpr \
>  ovn-nbctl lrp-set-gateway-chassis lrp1 hv1
>  
>  ovn-nbctl lsp-add lsw0 lpr2
> -ovn-nbctl lr-add lr2
>  ovn-nbctl lrp-add lr lrp2 f0:00:00:00:00:f2 192.168.1.254/24
>  ovn-nbctl set Logical_Switch_Port lpr2 \
>      type=router \
> @@ -4421,6 +4420,7 @@ spa=`ip_to_hex 192 168 1 2`
>  tpa=`ip_to_hex 192 168 1 1`
>  request=ffffffffffff${sha}08060001080006040001${sha}${spa}ffffffffffff${tpa}
>  as hv3 ovs-appctl netdev-dummy/receive vif3 $request
> +echo $request >> 1.expected
>  echo $request >> 2.expected
>  
>  lrpmac=f000000000f1
> @@ -4431,12 +4431,107 @@ 
> response=${sha}${lrpmac}08060001080006040002${lrpmac}${tpa}${sha}${spa}
>  # we expect arp reply packet on hv3
>  echo $response >> 3.expected
>  
> +# Ensure there is no MAC_Binding, send GARP from vtep side,
> +# check that GARP was received by vif1, vif2 and MAC_Binding was created.
> +wait_row_count MAC_Binding 0 ip="192.168.1.200" mac='"f0:00:00:00:00:12"'
> +
> +sha=0200000000ff
> +tha=ffffffffffff
> +spa=`ip_to_hex 192 168 1 200`
> +tpa=$spa
> +garp=ffffffffffff${sha}08060001080006040002${sha}${spa}${tha}${tpa}
> +as hv3 ovs-appctl netdev-dummy/receive vif3 $garp
> +echo $garp >> 1.expected
> +echo $garp >> 2.expected
> +
> +wait_row_count MAC_Binding 2 ip="192.168.1.200" mac='"02:00:00:00:00:ff"'
> +
> +# Send same GARP with changed SHA and ensure MAC_Binding was updated.
> +sha=0200000000ee
> +garp=ffffffffffff${sha}08060001080006040002${sha}${spa}${tha}${tpa}
> +as hv3 ovs-appctl netdev-dummy/receive vif3 $garp
> +echo $garp >> 1.expected
> +echo $garp >> 2.expected
> +
> +wait_row_count MAC_Binding 2 ip="192.168.1.200" mac='"02:00:00:00:00:ee"'
> +
> +# Remove local ports on hv1 and check that ARP logic is stil working. -->
> +check as hv1 ovs-vsctl remove interface vif1 external_ids iface-id
> +hv_uuid=$(fetch_column Chassis _uuid name=hv1)
> +wait_row_count Port_Binding 0 chassis="$hv_uuid" type="''"
> +
> +# Cleanup MAC_Bindings after previous checks.
> +check ovn-sbctl --all destroy MAC_Binding
> +
> +# ARP request from VTEP to LRP should be responded by ARP responder.
> +sha=f00000000003
> +spa=`ip_to_hex 192 168 1 2`
> +tpa=`ip_to_hex 192 168 1 1`
> +request=ffffffffffff${sha}08060001080006040001${sha}${spa}ffffffffffff${tpa}
> +as hv3 ovs-appctl netdev-dummy/receive vif3 $request
> +echo $request >> 2.expected
> +
> +lrpmac=f000000000f1
> +response=${sha}${lrpmac}08060001080006040002${lrpmac}${tpa}${sha}${spa}
> +# since lrp1 has gateway chassis set on hv1, hv1 will suppress arp request 
> and
> +# answer with arp reply by OVS directly to vtep lport. all other lports,
> +# except lport from which this request was initiated, will receive arp 
> request.
> +# we expect arp reply packet on hv3
> +echo $response >> 3.expected
> +
> +# Ensure there is no MAC_Binding, send GARP from vtep side,
> +# check that GARP was received by vif1, vif2 and MAC_Binding was created.
> +wait_row_count MAC_Binding 0 ip="192.168.1.200" mac='"f0:00:00:00:00:12"'
> +
> +sha=0200000000ff
> +tha=ffffffffffff
> +spa=`ip_to_hex 192 168 1 200`
> +tpa=$spa
> +garp=ffffffffffff${sha}08060001080006040002${sha}${spa}${tha}${tpa}
> +as hv3 ovs-appctl netdev-dummy/receive vif3 $garp
> +echo $garp >> 2.expected
> +
> +wait_row_count MAC_Binding 2 ip="192.168.1.200" mac='"02:00:00:00:00:ff"'
> +
> +# Send same GARP with changed SHA and ensure MAC_Binding was updated.
> +sha=0200000000ee
> +garp=ffffffffffff${sha}08060001080006040002${sha}${spa}${tha}${tpa}
> +as hv3 ovs-appctl netdev-dummy/receive vif3 $garp
> +echo $garp >> 2.expected
> +
> +wait_row_count MAC_Binding 2 ip="192.168.1.200" mac='"02:00:00:00:00:ee"'
> +
> +# Restore.
> +check as hv1 ovs-vsctl set interface vif1 external_ids:iface-id=lp1
> +wait_row_count Port_Binding 1 chassis="$hv_uuid" type='""'
> +# <--
> +
> +# Check ARP response to request for IP on VIF port sent from vtep.
> +# ARP request from VTEP to LSP's IP must be answered only by LSP vif1 (lp1).
> +sha=f00000000003
> +spa=`ip_to_hex 192 168 1 2`
> +tpa=`ip_to_hex 192 168 1 24`
> +request=ffffffffffff${sha}08060001080006040001${sha}${spa}ffffffffffff${tpa}
> +as hv3 ovs-appctl netdev-dummy/receive vif3 $request
> +echo $request >> 1.expected
> +echo $request >> 2.expected
> +
> +# Simulate response from vif.
> +sha=f00000000001
> +tha=f00000000003
> +spa=`ip_to_hex 192 168 1 200`
> +tpa=`ip_to_hex 192 168 1 2`
> +response=${tha}${sha}08060001080006040002${sha}${spa}${tha}${tpa}
> +as hv1 ovs-appctl netdev-dummy/receive vif1 $response
> +echo $response >> 3.expected
> +
>  # First ensure basic flow contents are as we expect.
>  AT_CHECK([ovn-sbctl lflow-list lsw0 | grep 'reg0[\[14\]]' | sort | sed 
> 's/table=../table=??/g'], [0], [dnl
>    table=??(ls_in_check_port_sec), priority=70   , match=(inport == 
> "lp-vtep"), action=(reg0[[14]] = 1; next(pipeline=ingress, table=??);)
>    table=??(ls_in_hairpin      ), priority=1000 , match=(reg0[[14]] == 1), 
> action=(next(pipeline=ingress, table=??);)
>    table=??(ls_in_hairpin      ), priority=2000 , match=(reg0[[14]] == 1 && 
> is_chassis_resident("cr-lrp1")), action=(next;)
>    table=??(ls_in_hairpin      ), priority=2000 , match=(reg0[[14]] == 1 && 
> is_chassis_resident("cr-lrp2")), action=(next;)
> +  table=??(ls_in_arp_rsp      ), priority=65535, match=(reg0[[14]] == 1 && 
> (arp || nd_ns)), action=(flags.loopback = 1; next;)
>  ])
>  
>  # dump information with counters

Regards,
Dumitru

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to