On 5/19/23 20:18, Vladislav Odintsov wrote:
> This patch is intended to make next two changes:
>
> 1. Support create/update of MAC_Binding for GARP/ND from HW VTEP.
>
> Prior to this patch MAC_Binding records were created only when LRP issued
> ARP request/Neighbor solicitation. If IP to MAC in network attached via
> vtep lport was changed the old MAC_Binding prevented normal
> communication. Now router port (chassisredirect) in logical switch with
> vtep lport catches GARP or Neighbor Solicitation and updates MAC_Binding.
>
> New flow with max priority was added in ls_in_arp_nd_resp and additional
> OF rule in table 37 (OFTABLE_REMOTE_OUTPUT) to process multicast packets
> received from RAMP_SWITCH.
>
> 2. Answer to ARP/ND on requests coming from HW VTEP in lrouter pipeline.
>
> This is needed to reduce duplicated arp-responses, which were generated
> by OVN arp-responder flows in node, where chassisredirect port located
> with responses coming directly from VM interfaces.
>
> As a result of this change, now vifs located on same chassis, where
> chassisredirect ports are located receive ARP/ND mcast packets, which
> previously were suppressed by arp-responder on the node.
>
> Tests and documentation were updated to conform to new behaviour.
>
> Signed-off-by: Vladislav Odintsov <[email protected]>
> ---
Hi Vladislav,
Thanks for the patch.
> controller/binding.c | 48 ++++++++++++++++++++
> controller/local_data.h | 3 ++
> controller/physical.c | 46 +++++++++++++++++--
> northd/northd.c | 10 +++++
> northd/ovn-northd.8.xml | 6 +++
> tests/ovn.at | 99 ++++++++++++++++++++++++++++++++++++++++-
Could you please add a NEWS entry for this behavior change?
> 6 files changed, 207 insertions(+), 5 deletions(-)
>
> diff --git a/controller/binding.c b/controller/binding.c
> index c7a2b3f10..5932ad785 100644
> --- a/controller/binding.c
> +++ b/controller/binding.c
> @@ -509,6 +509,30 @@ update_ld_multichassis_ports(const struct
> sbrec_port_binding *binding_rec,
> }
> }
>
> +static void
> +update_ld_vtep_port(const struct sbrec_port_binding *binding_rec,
> + struct hmap *local_datapaths)
> +{
> + struct local_datapath *ld
> + = get_local_datapath(local_datapaths,
> + binding_rec->datapath->tunnel_key);
> + if (!ld) {
> + return;
> + }
> +
> + if (ld->vtep_port && strcmp(ld->vtep_port->logical_port,
> + binding_rec->logical_port)) {
> + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
> + VLOG_WARN_RL(&rl, "vtep port '%s' already set for datapath "
> + "'%"PRId64"', skipping the new port '%s'.",
> + ld->vtep_port->logical_port,
> + binding_rec->datapath->tunnel_key,
> + binding_rec->logical_port);
> + return;
> + }
> + ld->vtep_port = binding_rec;
> +}
> +
> static void
> update_ld_localnet_port(const struct sbrec_port_binding *binding_rec,
> struct shash *bridge_mappings,
> @@ -1987,6 +2011,7 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct
> binding_ctx_out *b_ctx_out)
> struct ovs_list external_lports = OVS_LIST_INITIALIZER(&external_lports);
> struct ovs_list multichassis_ports = OVS_LIST_INITIALIZER(
> &multichassis_ports);
> + struct ovs_list vtep_lports = OVS_LIST_INITIALIZER(&vtep_lports);
>
> struct lport {
> struct ovs_list list_node;
> @@ -2010,8 +2035,13 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct
> binding_ctx_out *b_ctx_out)
>
> switch (lport_type) {
> case LP_PATCH:
> + update_related_lport(pb, b_ctx_out);
> + break;
> case LP_VTEP:
> update_related_lport(pb, b_ctx_out);
> + struct lport *vtep_lport = xmalloc(sizeof *vtep_lport);
> + vtep_lport->pb = pb;
> + ovs_list_push_back(&vtep_lports, &vtep_lport->list_node);
> break;
>
> case LP_LOCALPORT:
> @@ -2111,6 +2141,16 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct
> binding_ctx_out *b_ctx_out)
> free(multichassis_lport);
> }
>
> + /* Run through vtep lport list to see if there are vtep ports
> + * on local datapaths discovered from above loop, and update the
> + * corresponding local datapath accordingly. */
> + struct lport *vtep_lport;
> + ovs_list_size(&vtep_lports);
I guess this is a leftover.
> + LIST_FOR_EACH_POP (vtep_lport, list_node, &vtep_lports) {
> + update_ld_vtep_port(vtep_lport->pb, b_ctx_out->local_datapaths);
> + free(vtep_lport);
> + }
> +
> shash_destroy(&bridge_mappings);
> /* Remove stale QoS entries. */
> ovs_qos_entries_gc(b_ctx_in->ovs_idl_txn, b_ctx_in->ovsrec_port_by_qos,
> @@ -2175,6 +2215,11 @@ remove_pb_from_local_datapath(const struct
> sbrec_port_binding *pb,
> }
> } else if (!strcmp(pb->type, "external")) {
> remove_local_datapath_external_port(ld, pb->logical_port);
> + } else if (!strcmp(pb->type, "vtep")) {
> + if (ld->vtep_port && !strcmp(ld->vtep_port->logical_port,
> + pb->logical_port)) {
> + ld->vtep_port = NULL;
> + }
> }
> remove_local_datapath_multichassis_port(ld, pb->logical_port);
> }
> @@ -2836,6 +2881,7 @@ handle_updated_port(struct binding_ctx_in *b_ctx_in,
>
> case LP_VTEP:
> update_related_lport(pb, b_ctx_out);
> + update_ld_vtep_port(pb, b_ctx_out->local_datapaths);
> /* VTEP lports are claimed/released by ovn-controller-vteps.
> * We are not sure what changed. */
> b_ctx_out->non_vif_ports_changed = true;
> @@ -3091,6 +3137,8 @@ delete_done:
> } else if (pb->n_additional_chassis) {
> update_ld_multichassis_ports(pb,
> b_ctx_out->local_datapaths);
> + } else if (lport_type == LP_VTEP) {
> + update_ld_vtep_port(pb, b_ctx_out->local_datapaths);
> }
Nit: I think I'd move this "else if (lport_type == LP_VTEP) {" just
under the LP_EXTERNAL case.
> }
> sbrec_port_binding_index_destroy_row(target);
> diff --git a/controller/local_data.h b/controller/local_data.h
> index 748f009aa..19d13a558 100644
> --- a/controller/local_data.h
> +++ b/controller/local_data.h
> @@ -50,6 +50,9 @@ struct local_datapath {
> /* The localnet port in this datapath, if any (at most one is allowed).
> */
> const struct sbrec_port_binding *localnet_port;
>
> + /* The vtep port in this datapath, if any (at most one is allowed). */
> + const struct sbrec_port_binding *vtep_port;
> +
> struct {
> const struct sbrec_port_binding *local;
> const struct sbrec_port_binding *remote;
> diff --git a/controller/physical.c b/controller/physical.c
> index 2925dcd1d..b75e120c7 100644
> --- a/controller/physical.c
> +++ b/controller/physical.c
> @@ -194,6 +194,15 @@ get_localnet_port(const struct hmap *local_datapaths,
> int64_t tunnel_key)
> }
>
>
> +static const struct sbrec_port_binding *
> +get_vtep_port(const struct hmap *local_datapaths, int64_t tunnel_key)
> +{
> + const struct local_datapath *ld = get_local_datapath(local_datapaths,
> + tunnel_key);
> + return ld ? ld->vtep_port : NULL;
> +}
> +
> +
> static struct zone_ids
> get_zone_ids(const struct sbrec_port_binding *binding,
> const struct simap *ct_zones)
> @@ -1732,7 +1741,7 @@ consider_mc_group(struct ovsdb_idl_index
> *sbrec_port_binding_by_name,
> struct sset remote_chassis = SSET_INITIALIZER(&remote_chassis);
> struct sset vtep_chassis = SSET_INITIALIZER(&vtep_chassis);
>
> - struct match match;
> + struct match match, match_ramp;
> match_outport_dp_and_port_keys(&match, dp_key, mc->tunnel_key);
>
> /* Go through all of the ports in the multicast group:
> @@ -1755,10 +1764,10 @@ consider_mc_group(struct ovsdb_idl_index
> *sbrec_port_binding_by_name,
> * set the output port to be the router patch port for which
> * the redirect port was added.
> */
> - struct ofpbuf ofpacts;
> + struct ofpbuf ofpacts, remote_ofpacts, remote_ofpacts_ramp;
> ofpbuf_init(&ofpacts, 0);
> - struct ofpbuf remote_ofpacts;
> ofpbuf_init(&remote_ofpacts, 0);
> + ofpbuf_init(&remote_ofpacts_ramp, 0);
> for (size_t i = 0; i < mc->n_ports; i++) {
> struct sbrec_port_binding *port = mc->ports[i];
>
> @@ -1783,6 +1792,7 @@ consider_mc_group(struct ovsdb_idl_index
> *sbrec_port_binding_by_name,
> local_output_pb(port->tunnel_key, &ofpacts);
> } else {
> local_output_pb(port->tunnel_key, &remote_ofpacts);
> + local_output_pb(port->tunnel_key, &remote_ofpacts_ramp);
> }
> } if (!strcmp(port->type, "remote")) {
> if (port->chassis) {
> @@ -1866,6 +1876,35 @@ consider_mc_group(struct ovsdb_idl_index
> *sbrec_port_binding_by_name,
> * multicast group as the logical output port. */
> put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
> &remote_ofpacts);
> +
> + if (get_vtep_port(local_datapaths, mc->datapath->tunnel_key)) {
I'd declare 'match_ramp' here as it's only used on this branch of the if
statement.
> + match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0, 0,
> + MLF_RCV_FROM_RAMP);
> +
Are we going to match less traffic now? We add this "flags &
MLF_RCV_FROM_RAMP" condition to the already existing priority-100
OFTABLE_REMOTE_OUTPUT flow. Does this break multicast traffic not
received from RAMP (vtep) ports?
> + put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
> + &remote_ofpacts_ramp);
> +
> + /* MCAST traffic which was originally received from
> RAMP_SWITCH
> + * is not allowed to be re-sent to remote_chassis.
> + * Process "patch" port binding for routing in
> + * OFTABLE_REMOTE_OUTPUT and resubmit packets to
> + * OFTABLE_LOCAL_OUTPUT for local delivery. */
> +
> + match_outport_dp_and_port_keys(&match_ramp, dp_key,
> + mc->tunnel_key);
> +
> + /* Match packets coming from RAMP_SWITCH and allowed for
> + * loopback processing (including routing). */
> + match_set_reg_masked(&match_ramp, MFF_LOG_FLAGS - MFF_REG0,
> + MLF_RCV_FROM_RAMP | MLF_ALLOW_LOOPBACK,
> + MLF_RCV_FROM_RAMP | MLF_ALLOW_LOOPBACK);
> +
> + put_resubmit(OFTABLE_LOCAL_OUTPUT, &remote_ofpacts_ramp);
> +
> + ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 120,
> + mc->header_.uuid.parts[0], &match_ramp,
> + &remote_ofpacts_ramp, &mc->header_.uuid);
> + }
> }
>
> fanout_to_chassis(mff_ovn_geneve, &remote_chassis, chassis_tunnels,
> @@ -1886,6 +1925,7 @@ consider_mc_group(struct ovsdb_idl_index
> *sbrec_port_binding_by_name,
> }
> ofpbuf_uninit(&ofpacts);
> ofpbuf_uninit(&remote_ofpacts);
> + ofpbuf_uninit(&remote_ofpacts_ramp);
> sset_destroy(&remote_chassis);
> sset_destroy(&vtep_chassis);
> }
> diff --git a/northd/northd.c b/northd/northd.c
> index e713973e7..6dcbf0e1a 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -7849,6 +7849,16 @@ build_vtep_hairpin(struct ovn_datapath *od, struct
> hmap *lflows)
> ds_cstr(&match), "next;");
> }
> }
> +
> + /* ARP/Neighbor Solicitation requests must skip ls_in_arp_rsp table for
> + * packets arrived from HW VTEP (ramp) switch.
> + * Neighbor resolution for router ports is done in logical router ingress
> + * pipeline. ARP resolution for vif lports is done directly by vif
> ports.
> + */
> + ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 65535,
> + REGBIT_FROM_RAMP" == 1 && (arp || nd_ns)",
> + "flags.loopback = 1; next;");
> +
> ds_destroy(&match);
> }
>
> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> index 8164be300..1a9e50b6f 100644
> --- a/northd/ovn-northd.8.xml
> +++ b/northd/ovn-northd.8.xml
> @@ -1378,6 +1378,12 @@
> </p>
>
> <ul>
> + <li>
> + If packet was received from HW VTEP (ramp switch), and this packet is
> + ARP or Neighbor Solicitation, such packet is passed to next table
> with
> + max proirity. ARP/ND requests from HW VTEP must be handled in
> logical
> + router ingress pipeline.
> + </li>
> <li>
> If the logical switch has no router ports with options:arp_proxy
> configured add a priority-100 flows to skip the ARP responder if
> inport
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 53349530b..cb643c39d 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -4267,7 +4267,7 @@ ovn_start
> ovn-nbctl ls-add lsw0
>
> ovn-nbctl lsp-add lsw0 lp1
> -ovn-nbctl lsp-set-addresses lp1 f0:00:00:00:00:01
> +ovn-nbctl lsp-set-addresses lp1 'f0:00:00:00:00:01 192.168.1.24'
>
> ovn-nbctl lsp-add lsw0 lp2
> ovn-nbctl lsp-set-addresses lp2 f0:00:00:00:00:02
> @@ -4288,7 +4288,6 @@ ovn-nbctl set Logical_Switch_Port lpr \
> ovn-nbctl lrp-set-gateway-chassis lrp1 hv1
>
> ovn-nbctl lsp-add lsw0 lpr2
> -ovn-nbctl lr-add lr2
> ovn-nbctl lrp-add lr lrp2 f0:00:00:00:00:f2 192.168.1.254/24
> ovn-nbctl set Logical_Switch_Port lpr2 \
> type=router \
> @@ -4421,6 +4420,7 @@ spa=`ip_to_hex 192 168 1 2`
> tpa=`ip_to_hex 192 168 1 1`
> request=ffffffffffff${sha}08060001080006040001${sha}${spa}ffffffffffff${tpa}
> as hv3 ovs-appctl netdev-dummy/receive vif3 $request
> +echo $request >> 1.expected
> echo $request >> 2.expected
>
> lrpmac=f000000000f1
> @@ -4431,12 +4431,107 @@
> response=${sha}${lrpmac}08060001080006040002${lrpmac}${tpa}${sha}${spa}
> # we expect arp reply packet on hv3
> echo $response >> 3.expected
>
> +# Ensure there is no MAC_Binding, send GARP from vtep side,
> +# check that GARP was received by vif1, vif2 and MAC_Binding was created.
> +wait_row_count MAC_Binding 0 ip="192.168.1.200" mac='"f0:00:00:00:00:12"'
> +
> +sha=0200000000ff
> +tha=ffffffffffff
> +spa=`ip_to_hex 192 168 1 200`
> +tpa=$spa
> +garp=ffffffffffff${sha}08060001080006040002${sha}${spa}${tha}${tpa}
> +as hv3 ovs-appctl netdev-dummy/receive vif3 $garp
> +echo $garp >> 1.expected
> +echo $garp >> 2.expected
> +
> +wait_row_count MAC_Binding 2 ip="192.168.1.200" mac='"02:00:00:00:00:ff"'
> +
> +# Send same GARP with changed SHA and ensure MAC_Binding was updated.
> +sha=0200000000ee
> +garp=ffffffffffff${sha}08060001080006040002${sha}${spa}${tha}${tpa}
> +as hv3 ovs-appctl netdev-dummy/receive vif3 $garp
> +echo $garp >> 1.expected
> +echo $garp >> 2.expected
> +
> +wait_row_count MAC_Binding 2 ip="192.168.1.200" mac='"02:00:00:00:00:ee"'
> +
> +# Remove local ports on hv1 and check that ARP logic is stil working. -->
> +check as hv1 ovs-vsctl remove interface vif1 external_ids iface-id
> +hv_uuid=$(fetch_column Chassis _uuid name=hv1)
> +wait_row_count Port_Binding 0 chassis="$hv_uuid" type="''"
> +
> +# Cleanup MAC_Bindings after previous checks.
> +check ovn-sbctl --all destroy MAC_Binding
> +
> +# ARP request from VTEP to LRP should be responded by ARP responder.
> +sha=f00000000003
> +spa=`ip_to_hex 192 168 1 2`
> +tpa=`ip_to_hex 192 168 1 1`
> +request=ffffffffffff${sha}08060001080006040001${sha}${spa}ffffffffffff${tpa}
> +as hv3 ovs-appctl netdev-dummy/receive vif3 $request
> +echo $request >> 2.expected
> +
> +lrpmac=f000000000f1
> +response=${sha}${lrpmac}08060001080006040002${lrpmac}${tpa}${sha}${spa}
> +# since lrp1 has gateway chassis set on hv1, hv1 will suppress arp request
> and
> +# answer with arp reply by OVS directly to vtep lport. all other lports,
> +# except lport from which this request was initiated, will receive arp
> request.
> +# we expect arp reply packet on hv3
> +echo $response >> 3.expected
> +
> +# Ensure there is no MAC_Binding, send GARP from vtep side,
> +# check that GARP was received by vif1, vif2 and MAC_Binding was created.
> +wait_row_count MAC_Binding 0 ip="192.168.1.200" mac='"f0:00:00:00:00:12"'
> +
> +sha=0200000000ff
> +tha=ffffffffffff
> +spa=`ip_to_hex 192 168 1 200`
> +tpa=$spa
> +garp=ffffffffffff${sha}08060001080006040002${sha}${spa}${tha}${tpa}
> +as hv3 ovs-appctl netdev-dummy/receive vif3 $garp
> +echo $garp >> 2.expected
> +
> +wait_row_count MAC_Binding 2 ip="192.168.1.200" mac='"02:00:00:00:00:ff"'
> +
> +# Send same GARP with changed SHA and ensure MAC_Binding was updated.
> +sha=0200000000ee
> +garp=ffffffffffff${sha}08060001080006040002${sha}${spa}${tha}${tpa}
> +as hv3 ovs-appctl netdev-dummy/receive vif3 $garp
> +echo $garp >> 2.expected
> +
> +wait_row_count MAC_Binding 2 ip="192.168.1.200" mac='"02:00:00:00:00:ee"'
> +
> +# Restore.
> +check as hv1 ovs-vsctl set interface vif1 external_ids:iface-id=lp1
> +wait_row_count Port_Binding 1 chassis="$hv_uuid" type='""'
> +# <--
> +
> +# Check ARP response to request for IP on VIF port sent from vtep.
> +# ARP request from VTEP to LSP's IP must be answered only by LSP vif1 (lp1).
> +sha=f00000000003
> +spa=`ip_to_hex 192 168 1 2`
> +tpa=`ip_to_hex 192 168 1 24`
> +request=ffffffffffff${sha}08060001080006040001${sha}${spa}ffffffffffff${tpa}
> +as hv3 ovs-appctl netdev-dummy/receive vif3 $request
> +echo $request >> 1.expected
> +echo $request >> 2.expected
> +
> +# Simulate response from vif.
> +sha=f00000000001
> +tha=f00000000003
> +spa=`ip_to_hex 192 168 1 200`
> +tpa=`ip_to_hex 192 168 1 2`
> +response=${tha}${sha}08060001080006040002${sha}${spa}${tha}${tpa}
> +as hv1 ovs-appctl netdev-dummy/receive vif1 $response
> +echo $response >> 3.expected
> +
> # First ensure basic flow contents are as we expect.
> AT_CHECK([ovn-sbctl lflow-list lsw0 | grep 'reg0[\[14\]]' | sort | sed
> 's/table=../table=??/g'], [0], [dnl
> table=??(ls_in_check_port_sec), priority=70 , match=(inport ==
> "lp-vtep"), action=(reg0[[14]] = 1; next(pipeline=ingress, table=??);)
> table=??(ls_in_hairpin ), priority=1000 , match=(reg0[[14]] == 1),
> action=(next(pipeline=ingress, table=??);)
> table=??(ls_in_hairpin ), priority=2000 , match=(reg0[[14]] == 1 &&
> is_chassis_resident("cr-lrp1")), action=(next;)
> table=??(ls_in_hairpin ), priority=2000 , match=(reg0[[14]] == 1 &&
> is_chassis_resident("cr-lrp2")), action=(next;)
> + table=??(ls_in_arp_rsp ), priority=65535, match=(reg0[[14]] == 1 &&
> (arp || nd_ns)), action=(flags.loopback = 1; next;)
> ])
>
> # dump information with counters
Regards,
Dumitru
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev