[ovs-dev] [PATCH v6 ovn 2/3] northd: enable check_pkt_larger for gw router

2021-07-24 Thread Lorenzo Bianconi
As it is already done for distributed gw router scenario, introduce
check_pkt_larger logical flows for gw router use case.

Co-authored-by: Numan Siddique 
Signed-off-by: Numan Siddique 
Signed-off-by: Lorenzo Bianconi 
---
 northd/ovn-northd.8.xml |  24 
 northd/ovn-northd.c |  30 +++---
 northd/ovn_northd.dl|  82 +++
 tests/ovn-northd.at | 121 
 tests/ovn.at|  47 
 5 files changed, 283 insertions(+), 21 deletions(-)

diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index 99a19f853..5f2b43cd1 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -3673,13 +3673,13 @@ outport = P
 Ingress Table 15: Check packet length
 
 
-  For distributed logical routers with distributed gateway port configured
-  with options:gateway_mtu to a valid integer value, this
-  table adds a priority-50 logical flow with the match
-  ip4  outport == GW_PORT where
-  GW_PORT is the distributed gateway router port and applies the
-  action check_pkt_larger and advances the packet to the
-  next table.
+  For distributed logical routers or gateway routers with gateway
+  port configured with options:gateway_mtu to a valid
+  integer value, this table adds a priority-50 logical flow with
+  the match ip4  outport == GW_PORT
+  where GW_PORT is the gateway router port and applies
+  the action check_pkt_larger and advances the packet
+  to the next table.
 
 
 
@@ -3703,14 +3703,14 @@ REGBIT_PKT_LARGER = check_pkt_larger(L); 
next;
 Ingress Table 16: Handle larger packets
 
 
-  For distributed logical routers with distributed gateway port configured
-  with options:gateway_mtu to a valid integer value, this
-  table adds the following priority-50 logical flow for each
+  For distributed logical routers or gateway routers with gateway port
+  configured with options:gateway_mtu to a valid integer
+  value, this table adds the following priority-50 logical flow for each
   logical router port with the match inport == LRP
outport == GW_PORT 
   REGBIT_PKT_LARGER, where LRP is the logical
-  router port and GW_PORT is the distributed gateway router
-  port and applies the following action for ipv4 and ipv6 respectively:
+  router port and GW_PORT is the gateway router port and applies
+  the following action for ipv4 and ipv6 respectively:
 
 
 
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 18f6f90ce..20f8d7b73 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -11038,17 +11038,29 @@ build_check_pkt_len_flows_for_lrouter(
 struct ds *match, struct ds *actions,
 struct shash *meter_groups)
 {
-if (od->nbr) {
+if (!od->nbr) {
+return;
+}
 
-/* Packets are allowed by default. */
-ovn_lflow_add(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 0, "1",
-  "next;");
-ovn_lflow_add(lflows, od, S_ROUTER_IN_LARGER_PKTS, 0, "1",
-  "next;");
+/* Packets are allowed by default. */
+ovn_lflow_add(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 0, "1",
+  "next;");
+ovn_lflow_add(lflows, od, S_ROUTER_IN_LARGER_PKTS, 0, "1",
+  "next;");
 
-if (od->l3dgw_port && od->l3redirect_port) {
-build_check_pkt_len_flows_for_lrp(od->l3dgw_port, lflows,
-  ports, meter_groups,
+if (od->l3dgw_port && od->l3redirect_port) {
+/* gw router port */
+build_check_pkt_len_flows_for_lrp(od->l3dgw_port, lflows,
+  ports, meter_groups, match, actions);
+} else if (smap_get(>nbr->options, "chassis")) {
+for (size_t i = 0; i < od->nbr->n_ports; i++) {
+/* gw router */
+struct ovn_port *rp = ovn_port_find(ports,
+od->nbr->ports[i]->name);
+if (!rp) {
+continue;
+}
+build_check_pkt_len_flows_for_lrp(rp, lflows, ports, meter_groups,
   match, actions);
 }
 }
diff --git a/northd/ovn_northd.dl b/northd/ovn_northd.dl
index 7bfaae992..4c7bf386a 100644
--- a/northd/ovn_northd.dl
+++ b/northd/ovn_northd.dl
@@ -7504,6 +7504,7 @@ for ((._uuid = lr_uuid))
 
 /* Local router ingress table CHK_PKT_LEN: Check packet length.
  *
+ * For distributed routers with gateway ports.
  * Any IPv4 packet with outport set to the distributed gateway
  * router port, check the packet length and store the result in the
  * 'REGBIT_PKT_LARGER' register bit.
@@ -7514,6 +7515,18 @@ for ((._uuid = lr_uuid))
  * router port and the 'REGBIT_PKT_LARGER' register bit is set,
  * generate ICMPv4 packet with type 3 (Destination Unreachable) and
  * code 4 (Fragmentation 

[ovs-dev] [PATCH v6 ovn 3/3] northd: add check_pkt_larger lflows for ingress traffic

2021-07-24 Thread Lorenzo Bianconi
Introduce check_pkt_larger action for ingress traffic
entering the cluster from a distributed gw router port
or from a gw router. This patch enables pMTU discovery
for ingress traffic.

Signed-off-by: Lorenzo Bianconi 
---
 northd/ovn-northd.8.xml |  60 +++--
 northd/ovn-northd.c | 184 +++-
 northd/ovn_northd.dl| 157 +++---
 tests/ovn-northd.at |  40 -
 tests/ovn.at| 135 -
 5 files changed, 458 insertions(+), 118 deletions(-)

diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index 5f2b43cd1..290de7540 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -1940,6 +1940,15 @@ output;
   eth.dst == E is only programmed on
   the gateway port instance on the gateway chassis.
 
+
+
+  For a distributed logical router or for gateway router where
+  the port is configured with options:gateway_mtu
+  the action of the above flow is modified adding
+  check_pkt_larger in order to mark the packet
+  setting REGBIT_PKT_LARGER if the size is greater
+  than the MTU
+
   
 
   
@@ -2164,6 +2173,46 @@ next;
 
 
 
+  
+
+  For distributed logical routers or gateway routers with gateway port
+  configured with options:gateway_mtu to a valid integer
+  value, a priority-150 flow with the match inport ==
+  LRP  REGBIT_PKT_LARGER 
+  REGBIT_EGRESS_LOOPBACK == 0, where LRP is the
+  logical router port and applies the following action for ipv4
+  and ipv6 respectively:
+
+
+
+icmp4 {
+icmp4.type = 3; /* Destination Unreachable. */
+icmp4.code = 4;  /* Frag Needed and DF was Set. */
+icmp4.frag_mtu = M;
+eth.dst = E;
+ip4.dst = ip4.src;
+ip4.src = I;
+ip.ttl = 255;
+REGBIT_EGRESS_LOOPBACK = 1;
+REGBIT_PKT_LARGER 0;
+next(pipeline=ingress, table=0);
+};
+
+icmp6 {
+icmp6.type = 2;
+icmp6.code = 0;
+icmp6.frag_mtu = M;
+eth.dst = E;
+ip6.dst = ip6.src;
+ip6.src = I;
+ip.ttl = 255;
+REGBIT_EGRESS_LOOPBACK = 1;
+REGBIT_PKT_LARGER 0;
+next(pipeline=ingress, table=0);
+};
+
+  
+
   
 
   For each NAT entry of a distributed logical router  (with
@@ -3705,12 +3754,11 @@ REGBIT_PKT_LARGER = check_pkt_larger(L); 
next;
 
   For distributed logical routers or gateway routers with gateway port
   configured with options:gateway_mtu to a valid integer
-  value, this table adds the following priority-50 logical flow for each
+  value, this table adds the following priority-150 logical flow for each
   logical router port with the match inport == LRP
-   outport == GW_PORT 
-  REGBIT_PKT_LARGER, where LRP is the logical
-  router port and GW_PORT is the gateway router port and applies
-  the following action for ipv4 and ipv6 respectively:
+   REGBIT_PKT_LARGER  !REGBIT_EGRESS_LOOPBACK,
+  where LRP is the logical router port and applies the following
+  action for ipv4 and ipv6 respectively:
 
 
 
@@ -3723,6 +3771,7 @@ icmp4 {
 ip4.src = I;
 ip.ttl = 255;
 REGBIT_EGRESS_LOOPBACK = 1;
+REGBIT_PKT_LARGER = 0;
 next(pipeline=ingress, table=0);
 };
 
@@ -3735,6 +3784,7 @@ icmp6 {
 ip6.src = I;
 ip.ttl = 255;
 REGBIT_EGRESS_LOOPBACK = 1;
+REGBIT_PKT_LARGER = 0;
 next(pipeline=ingress, table=0);
 };
 
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 20f8d7b73..9d6f52c2e 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -9952,6 +9952,10 @@ build_adm_ctrl_flows_for_lrouter(
 }
 }
 
+static void
+build_check_pkt_len_action_string(struct ovn_port *op, int *pmtu,
+  struct ds *actions);
+
 /* Logical router ingress Table 0: L2 Admission Control
  * This table drops packets that the router shouldn???t see at all based
  * on their Ethernet headers.
@@ -9979,6 +9983,8 @@ build_adm_ctrl_flows_for_lrouter_port(
  * the pipeline.
  */
 ds_clear(actions);
+
+build_check_pkt_len_action_string(op, NULL, actions);
 ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
   op->lrp_networks.ea_s);
 
@@ -10914,33 +10920,121 @@ build_arp_resolve_flows_for_lrouter_port(
 
 }
 
+static void
+build_icmperr_pkt_big_flows(struct ovn_port *op, int mtu, struct hmap *lflows,
+struct shash *meter_groups, struct ds *match,
+struct ds *actions, enum ovn_stage stage)
+{
+if (op->lrp_networks.ipv4_addrs) {
+ds_clear(match);
+ds_put_format(match,
+  "inport == %s && ip4 && "REGBIT_PKT_LARGER
+  " && "REGBIT_EGRESS_LOOPBACK" == 0", op->json_key);
+
+ds_clear(actions);
+/* Set 

[ovs-dev] [PATCH v6 ovn 1/3] northd: introduce build_check_pkt_len_flows_for_lrp routine

2021-07-24 Thread Lorenzo Bianconi
Introduce build_check_pkt_len_flows_for_lrp routine to configure
check_pkt_larger logical flow for a given logical port. This is a
preliminary patch to enable check_pkt_larger support for gw router
use case.

Acked-by: Mark Michelson 
Signed-off-by: Lorenzo Bianconi 
---
 northd/ovn-northd.c | 203 +++-
 1 file changed, 107 insertions(+), 96 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index ebe12cace..18f6f90ce 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -10914,6 +10914,110 @@ build_arp_resolve_flows_for_lrouter_port(
 
 }
 
+static void
+build_check_pkt_len_flows_for_lrp(struct ovn_port *op,
+  struct hmap *lflows, struct hmap *ports,
+  struct shash *meter_groups, struct ds *match,
+  struct ds *actions)
+{
+int gw_mtu = 0;
+
+if (op->nbrp) {
+ gw_mtu = smap_get_int(>nbrp->options, "gateway_mtu", 0);
+}
+/* Add the flows only if gateway_mtu is configured. */
+if (gw_mtu <= 0) {
+return;
+}
+
+ds_clear(match);
+ds_put_format(match, "outport == %s", op->json_key);
+
+ds_clear(actions);
+ds_put_format(actions,
+  REGBIT_PKT_LARGER" = check_pkt_larger(%d);"
+  " next;", gw_mtu + VLAN_ETH_HEADER_LEN);
+ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_CHK_PKT_LEN, 50,
+ds_cstr(match), ds_cstr(actions),
+>nbrp->header_);
+
+for (size_t i = 0; i < op->od->nbr->n_ports; i++) {
+struct ovn_port *rp = ovn_port_find(ports,
+op->od->nbr->ports[i]->name);
+if (!rp || rp == op) {
+continue;
+}
+
+if (rp->lrp_networks.ipv4_addrs) {
+ds_clear(match);
+ds_put_format(match, "inport == %s && outport == %s"
+  " && ip4 && "REGBIT_PKT_LARGER,
+  rp->json_key, op->json_key);
+
+ds_clear(actions);
+/* Set icmp4.frag_mtu to gw_mtu */
+ds_put_format(actions,
+"icmp4_error {"
+REGBIT_EGRESS_LOOPBACK" = 1; "
+"eth.dst = %s; "
+"ip4.dst = ip4.src; "
+"ip4.src = %s; "
+"ip.ttl = 255; "
+"icmp4.type = 3; /* Destination Unreachable. */ "
+"icmp4.code = 4; /* Frag Needed and DF was Set. */ "
+"icmp4.frag_mtu = %d; "
+"next(pipeline=ingress, table=%d); };",
+rp->lrp_networks.ea_s,
+rp->lrp_networks.ipv4_addrs[0].addr_s,
+gw_mtu,
+ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
+ovn_lflow_add_with_hint__(lflows, op->od,
+  S_ROUTER_IN_LARGER_PKTS, 50,
+  ds_cstr(match), ds_cstr(actions),
+  NULL,
+  copp_meter_get(
+COPP_ICMP4_ERR,
+rp->od->nbr->copp,
+meter_groups),
+  >nbrp->header_);
+}
+
+if (rp->lrp_networks.ipv6_addrs) {
+ds_clear(match);
+ds_put_format(match, "inport == %s && outport == %s"
+  " && ip6 && "REGBIT_PKT_LARGER,
+  rp->json_key, op->json_key);
+
+ds_clear(actions);
+/* Set icmp6.frag_mtu to gw_mtu */
+ds_put_format(actions,
+"icmp6_error {"
+REGBIT_EGRESS_LOOPBACK" = 1; "
+"eth.dst = %s; "
+"ip6.dst = ip6.src; "
+"ip6.src = %s; "
+"ip.ttl = 255; "
+"icmp6.type = 2; /* Packet Too Big. */ "
+"icmp6.code = 0; "
+"icmp6.frag_mtu = %d; "
+"next(pipeline=ingress, table=%d); };",
+rp->lrp_networks.ea_s,
+rp->lrp_networks.ipv6_addrs[0].addr_s,
+gw_mtu,
+ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
+ovn_lflow_add_with_hint__(lflows, op->od,
+  S_ROUTER_IN_LARGER_PKTS, 50,
+  ds_cstr(match), ds_cstr(actions),
+  NULL,
+  copp_meter_get(
+COPP_ICMP6_ERR,
+rp->od->nbr->copp,
+meter_groups),
+  >nbrp->header_);
+}
+}
+}
+
 /* Local router ingress table CHK_PKT_LEN: Check packet length.
  *
  * Any IPv4 

[ovs-dev] [PATCH v6 ovn 0/3] Introduce check_pkt_larger for ingress traffic

2021-07-24 Thread Lorenzo Bianconi
In the current codebase, check_pkt_larger is applied just for traffic
leaving the ovn cluster. This series introduces the same capability
for traffic entering the network from a gateway router or distributed
gateway router port in order to send an ICMP error packet if the frame
size is greater than the configured MTU.

Changes since v5:
- rebase on top of ovn-master

Changes since v4:
- rebase on top of ovn-master

Changes since v3:
- add missing documentation
- add DDlog implementation

Changes since v2:
- squash gw router and distributed gw router tests
- fix typos

Changes since v1:
- drop router pipeline rearrangement
- refer to check_pkt_larger instead of check_pkt_len

Lorenzo Bianconi (3):
  northd: introduce build_check_pkt_len_flows_for_lrp routine
  northd: enable check_pkt_larger for gw router
  northd: add check_pkt_larger lflows for ingress traffic

 northd/ovn-northd.8.xml |  78 +---
 northd/ovn-northd.c | 259 
 northd/ovn_northd.dl| 227 +--
 tests/ovn-northd.at | 121 +++
 tests/ovn.at| 180 +++-
 5 files changed, 739 insertions(+), 126 deletions(-)

-- 
2.31.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v3] ofproto-dpif-xlate: fix zone set from non-frozen-metadata fields

2021-07-24 Thread 贺鹏
Hi,
friendly ping for this patch.

Ilya Maximets  于2021年5月22日周六 上午3:50写道:
>
> On 5/21/21 5:00 AM, 贺鹏 wrote:
> > Hi, Ilya
> >
> >
> >
> > Ilya Maximets  于2021年5月19日周三 下午8:50写道:
> >>
> >> On 2/27/21 10:34 AM, Peng He wrote:
> >>> CT zone could be set from a field that is not included in frozen
> >>> metadata. Consider the example rules which are typically seen in
> >>> OpenStack security group rules:
> >>>
> >>> priority=100,in_port=1,tcp,ct_state=-trk,action=ct(zone=5,table=0)
> >>> priority=100,in_port=1,tcp,ct_state=+trk,action=ct(commit,zone=NXM_NX_CT_ZONE[]),2
> >>>
> >>> The zone is set from the first rule's ct action. These two rules will
> >>> generate two megaflows: the first one uses zone=5 to query the CT module,
> >>> the second one sets the zone-id from the first megaflow and commit to CT.
> >>>
> >>> The current implementation will generate a megaflow that does not use
> >>> ct_zone=5 as a match, but directly commit into the ct using zone=5, as 
> >>> zone is
> >>> set by an Imm not a field.
> >>>
> >>> Consider a situation that one changes the zone id (for example to 15)
> >>> in the first rule, however, still keep the second rule unchanged. During
> >>> this change, there is traffic hitting the two generated megaflows, the
> >>> revaldiator would revalidate all megaflows, however, the revalidator will
> >>> not change the second megaflow, because zone=5 is recorded in the
> >>> megaflow, so the xlate will still translate the commit action into zone=5,
> >>> and the new traffic will still commit to CT as zone=5, not zone=15,
> >>> resulting in taffic drops and other issues.
> >>>
> >>> Just like OVS set-field convention, if a field X is set by Y
> >>> (Y is a variable not an Imm), we should also mask Y as a match
> >>> in the generated megaflow. An exception is that if the zone-id is
> >>> set by the field that is included in the frozen state (i.e. regs) and this
> >>> upcall is a resume of a thawed xlate, the un-wildcarding can be skipped,
> >>> as the recirc_id is a hash of the values in these fields, and it will 
> >>> change
> >>> following the changes of these fields. When the recirc_id changes,
> >>> all megaflows with the old recirc id will be invalid later.
> >>>
> >>> Fixes: 07659514c3 ("Add support for connection tracking.")
> >>> Reported-by: Sai Su 
> >>> Signed-off-by: Peng He 
> >>> ---
> >>>  include/openvswitch/meta-flow.h |  1 +
> >>>  lib/meta-flow.c | 13 ++
> >>>  ofproto/ofproto-dpif-xlate.c| 12 +
> >>>  tests/system-traffic.at | 45 +
> >>>  4 files changed, 71 insertions(+)
> >>>
> >>> diff --git a/include/openvswitch/meta-flow.h 
> >>> b/include/openvswitch/meta-flow.h
> >>> index 95e52e358..045dce8f5 100644
> >>> --- a/include/openvswitch/meta-flow.h
> >>> +++ b/include/openvswitch/meta-flow.h
> >>> @@ -2305,6 +2305,7 @@ void mf_set_flow_value_masked(const struct mf_field 
> >>> *,
> >>>const union mf_value *mask,
> >>>struct flow *);
> >>>  bool mf_is_tun_metadata(const struct mf_field *);
> >>> +bool mf_is_frozen_metadata(const struct mf_field *);
> >>>  bool mf_is_pipeline_field(const struct mf_field *);
> >>>  bool mf_is_set(const struct mf_field *, const struct flow *);
> >>>  void mf_mask_field(const struct mf_field *, struct flow_wildcards *);
> >>> diff --git a/lib/meta-flow.c b/lib/meta-flow.c
> >>> index c808d205d..e03cd8d0c 100644
> >>> --- a/lib/meta-flow.c
> >>> +++ b/lib/meta-flow.c
> >>> @@ -1788,6 +1788,19 @@ mf_is_tun_metadata(const struct mf_field *mf)
> >>> mf->id < MFF_TUN_METADATA0 + TUN_METADATA_NUM_OPTS;
> >>>  }
> >>>
> >>> +bool
> >>> +mf_is_frozen_metadata(const struct mf_field *mf)
> >>> +{
> >>> +if (mf->id >= MFF_TUN_ID && mf->id <= MFF_IN_PORT_OXM) {
> >>> +return true;
> >>> +}
> >>> +
> >>> +if (mf->id >= MFF_REG0 && mf->id < MFF_ETH_SRC) {
> >>> +return true;
> >>> +}
> >>> +return false;
> >>> +}
> >>> +
> >>>  bool
> >>>  mf_is_pipeline_field(const struct mf_field *mf)
> >>>  {
> >>> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> >>> index 7108c8a30..14d00db1e 100644
> >>> --- a/ofproto/ofproto-dpif-xlate.c
> >>> +++ b/ofproto/ofproto-dpif-xlate.c
> >>> @@ -6195,6 +6195,18 @@ compose_conntrack_action(struct xlate_ctx *ctx, 
> >>> struct ofpact_conntrack *ofc,
> >>>
> >>>  if (ofc->zone_src.field) {
> >>>  zone = mf_get_subfield(>zone_src, >xin->flow);
> >>> +if (ctx->xin->frozen_state) {
> >>> +/* If the upcall is a resume of a recirculation, we only 
> >>> need to
> >>> + * unwildcard the fields that are not in the 
> >>> frozen_metadata, as
> >>> + * when the rules update, OVS will generate a new recirc_id,
> >>> + * which will invalidate the megaflow with old the recirc_id.
> >>> + */
> >>> +if