Introduce specif flows for E/W ICMPv{4,6} packets if tunnelled packets
do not fit path MTU. This patch enable PMTUD for East/West Geneve traffic.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2241711
Signed-off-by: Lorenzo Bianconi <[email protected]>
---
Changes since v2:
- the icmp error forwarding for n/s traffic
- add vxlan tests
- merge IPv6 test cases
Changes since v1:
- add fix for vxlan and stt tunnels
---
 NEWS                    |   1 +
 controller/physical.c   |  31 +++-
 northd/northd.c         |  72 +++++++++
 northd/ovn-northd.8.xml |  29 ++++
 tests/multinode.at      | 348 +++++++++++++++++++++++++++++++++++++++-
 tests/ovn-northd.at     |  21 +++
 6 files changed, 499 insertions(+), 3 deletions(-)

diff --git a/NEWS b/NEWS
index e10fb79dd..acb3b854f 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,7 @@ Post v23.09.0
     connection method and doesn't require additional probing.
     external_ids:ovn-openflow-probe-interval configuration option for
     ovn-controller no longer matters and is ignored.
+  - Enable PMTU discovery on geneve tunnels for E/W traffic.
 
 OVN v23.09.0 - 15 Sep 2023
 --------------------------
diff --git a/controller/physical.c b/controller/physical.c
index ba88e1d8b..78cde3e2a 100644
--- a/controller/physical.c
+++ b/controller/physical.c
@@ -2440,9 +2440,36 @@ physical_run(struct physical_ctx *p_ctx,
             OVS_NOT_REACHED();
         }
 
-        put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts);
-
+        struct ofpbuf *tunnel_ofpacts = ofpbuf_clone(&ofpacts);
+        put_resubmit(OFTABLE_LOCAL_OUTPUT, tunnel_ofpacts);
         ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 100, 0, &match,
+                        tunnel_ofpacts, hc_uuid);
+        ofpbuf_delete(tunnel_ofpacts);
+
+        /* Add specif flows for E/W ICMPv{4,6} packets if tunnelled packets do 
not
+         * fit path MTU.
+         */
+        put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts);
+
+        /* IPv4 */
+        match_init_catchall(&match);
+        match_set_in_port(&match, tun->ofport);
+        match_set_dl_type(&match, htons(ETH_TYPE_IP));
+        match_set_nw_proto(&match, IPPROTO_ICMP);
+        match_set_icmp_type(&match, 3);
+        match_set_icmp_code(&match, 4);
+
+        ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 120, 0, &match,
+                        &ofpacts, hc_uuid);
+        /* IPv6 */
+        match_init_catchall(&match);
+        match_set_in_port(&match, tun->ofport);
+        match_set_dl_type(&match, htons(ETH_TYPE_IPV6));
+        match_set_nw_proto(&match, IPPROTO_ICMPV6);
+        match_set_icmp_type(&match, 2);
+        match_set_icmp_code(&match, 0);
+
+        ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 120, 0, &match,
                         &ofpacts, hc_uuid);
     }
 
diff --git a/northd/northd.c b/northd/northd.c
index 617f292fe..a020f2097 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -12794,6 +12794,75 @@ build_lrouter_force_snat_flows(struct hmap *lflows, 
struct ovn_datapath *od,
     ds_destroy(&actions);
 }
 
+/* Following flows are used to manage traffic redirected by the kernel
+ * (e.g. ICMP errors packets) that enter the cluster from the geneve ports
+ */
+static void
+build_lrouter_icmp_packet_toobig_admin_flows(
+        struct ovn_port *op, struct hmap *lflows,
+        struct ds *match, struct ds *actions)
+{
+    ovs_assert(op->nbrp);
+
+    if (is_l3dgw_port(op)) {
+        ds_clear(match);
+        ds_put_format(match,
+                      "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
+                      " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && "
+                      "eth.dst == %s && !is_chassis_resident(%s)",
+                      op->nbrp->mac, op->cr_port->json_key);
+        ds_clear(actions);
+        ds_put_format(actions, "outport = inport; inport = %s; next;",
+                      op->json_key);
+        ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 120,
+                      ds_cstr(match), ds_cstr(actions));
+    }
+
+    /* default flow */
+    ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 110,
+                  "(ip4 && icmp4.type == 3 && icmp4.code == 4) || "
+                  "(ip6 && icmp6.type == 2 && icmp6.code == 0)", "next; ");
+}
+
+static void
+build_lswitch_icmp_packet_toobig_admin_flows(
+        struct ovn_port *op, struct hmap *lflows,
+        struct ds *match, struct ds *actions)
+{
+    ovs_assert(op->nbsp);
+
+    if (lsp_is_router(op->nbsp)) {
+        return;
+    }
+
+    struct ovn_datapath *od = op->od;
+    for (int i = 0; i < od->n_router_ports; i++) {
+        struct ovn_port *peer = od->router_ports[i]->peer;
+        if (!peer) {
+            continue;
+        }
+
+        ds_clear(match);
+        char *rp_port =
+            is_l3dgw_port(peer) ? peer->cr_port->json_key : peer->json_key;
+        ds_put_format(match,
+                      "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
+                      " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && "
+                      "eth.dst == %s && !is_chassis_resident(%s)",
+                      peer->nbrp->mac, rp_port);
+        ds_clear(actions);
+        ds_put_format(actions, "outport = %s; inport = %s; output;",
+                      od->router_ports[i]->json_key, op->json_key);
+        ovn_lflow_add(lflows, od, S_SWITCH_IN_CHECK_PORT_SEC, 120,
+                      ds_cstr(match), ds_cstr(actions));
+    }
+
+    /* default flow */
+    ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 110,
+                  "(ip4 && icmp4.type == 3 && icmp4.code == 4) || "
+                  "(ip6 && icmp6.type == 2 && icmp6.code == 0)", "next; ");
+}
+
 static void
 build_lrouter_force_snat_flows_op(struct ovn_port *op,
                                   struct hmap *lflows,
@@ -16161,6 +16230,7 @@ build_lswitch_and_lrouter_iterate_by_lsp(struct 
ovn_port *op,
     build_lswitch_dhcp_options_and_response(op, lflows, meter_groups);
     build_lswitch_external_port(op, lflows);
     build_lswitch_ip_unicast_lookup(op, lflows, actions, match);
+    build_lswitch_icmp_packet_toobig_admin_flows(op, lflows, match, actions);
 
     /* Build Logical Router Flows. */
     build_ip_routing_flows_for_router_type_lsp(op, lr_ports, lflows);
@@ -16197,6 +16267,8 @@ build_lswitch_and_lrouter_iterate_by_lrp(struct 
ovn_port *op,
                                 &lsi->match, &lsi->actions, lsi->meter_groups);
     build_lrouter_force_snat_flows_op(op, lsi->lflows, &lsi->match,
                                       &lsi->actions);
+    build_lrouter_icmp_packet_toobig_admin_flows(op, lsi->lflows, &lsi->match,
+                                                 &lsi->actions);
 }
 
 static void *
diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index 97718821f..85576a845 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -372,6 +372,20 @@
 
     <h3>Ingress Table 1: Ingress Port Security - Apply</h3>
 
+    <p>
+      For each logical switch port <var>P</var> a priority-120 flow that
+      matches icmp{4,6} error 'packet too big' and <code>eth.dst ==
+      <var>D</var> &amp;&amp; !is_chassis_resident(<var>RP</var>)</code> where
+      <var>D</var> is the peer logical router port <var>RP</var> mac address,
+      stores <var>RP</var> peer port as outport, stores <var>P</var> as inport
+      and forward the packet to the egress pipeline.
+    </p>
+
+    <p>
+      This table adds a priority-110 flow that matches icmp{4,6} error 'packet
+      too big' to forward the packet to the next stage in the pipeline.
+    </p>
+
     <p>
       This table drops the packets if the port security check failed
       in the previous stage i.e the register bit
@@ -2463,6 +2477,21 @@ output;
           (LBs, NAT).
         </p>
 
+        <p>
+          For each gateway port <var>GW</var> on a distributed logical router
+          a priority-120 flow that matches icmp{4,6} error 'packet too big' and
+          <code>eth.dst == <var>D</var> &amp;&amp; !is_chassis_resident(<var>
+          cr-GW</var>)</code> where <var>D</var> is the gateway port mac
+          address and <var>cr-GW</var> is the chassis resident port of
+          <var>GW</var>, swap inport and outport and stores <var>GW</var>
+          as inport.
+        </p>
+
+        <p>
+          This table adds a priority-110 flow that matches icmp{4,6} error 
'packet
+          too big' to forward the packet to the next stage in the pipeline.
+        </p>
+
         <p>
           For a distributed logical router or for gateway router where
           the port is configured with <code>options:gateway_mtu</code>
diff --git a/tests/multinode.at b/tests/multinode.at
index 2b199b4bc..772134b7d 100644
--- a/tests/multinode.at
+++ b/tests/multinode.at
@@ -42,7 +42,6 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 
0.3 -w 2 10.0.0.4 | F
 3 packets transmitted, 3 received, 0% packet loss, time 0ms
 ])
 
-
 # Create the second logical switch with one port
 check multinode_nbctl ls-add sw1
 check multinode_nbctl lsp-add sw1 sw1-port1
@@ -72,3 +71,350 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 
0.3 -w 2 20.0.0.3 | F
 ])
 
 AT_CLEANUP
+
+AT_SETUP([ovn multinode pmtu - distributed router])
+
+# Check that ovn-fake-multinode setup is up and running
+check_fake_multinode_setup
+
+# Delete the multinode NB and OVS resources before starting the test.
+cleanup_multinode_resources
+
+m_as ovn-chassis-1 ip link del sw0p1-p
+m_as ovn-chassis-2 ip link del sw0p2-p
+m_as ovn-chassis-2 ip link del sw1p1-p
+
+# Reset geneve tunnels
+for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
+do
+    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve
+done
+
+OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys])
+OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys])
+OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys])
+
+# Test East-West switching
+check multinode_nbctl ls-add sw0
+check multinode_nbctl lsp-add sw0 sw0-port1
+check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 
1000::3"
+check multinode_nbctl lsp-add sw0 sw0-port2
+check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 
1000::4"
+
+m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 50:54:00:00:00:03 
10.0.0.3 24 10.0.0.1 1000::3/64 1000::a
+m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 50:54:00:00:00:04 
10.0.0.4 24 10.0.0.1 1000::4/64 1000::a
+
+m_wait_for_ports_up
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | 
FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# Create the second logical switch with one port
+check multinode_nbctl ls-add sw1
+check multinode_nbctl lsp-add sw1 sw1-port1
+check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 
2000::3"
+
+# Create a logical router and attach both logical switches
+check multinode_nbctl lr-add lr0
+check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 
1000::a/64
+check multinode_nbctl lsp-add sw0 sw0-lr0
+check multinode_nbctl lsp-set-type sw0-lr0 router
+check multinode_nbctl lsp-set-addresses sw0-lr0 router
+check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
+
+check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 
2000::a/64
+check multinode_nbctl lsp-add sw1 sw1-lr0
+check multinode_nbctl lsp-set-type sw1-lr0 router
+check multinode_nbctl lsp-set-addresses sw1-lr0 router
+check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
+
+m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 40:54:00:00:00:03 
20.0.0.3 24 20.0.0.1 2000::3/64 2000::a
+
+# create exteranl connection for N/S traffic
+check multinode_nbctl ls-add public
+check multinode_nbctl lsp-add public ln-lublic
+check multinode_nbctl lsp-set-type ln-lublic localnet
+check multinode_nbctl lsp-set-addresses ln-lublic unknown
+check multinode_nbctl lsp-set-options ln-lublic network_name=public
+
+check multinode_nbctl lrp-add lr0 lr0-public 00:11:22:00:ff:01 172.20.0.100/24
+check multinode_nbctl lsp-add public public-lr0
+check multinode_nbctl lsp-set-type public-lr0 router
+check multinode_nbctl lsp-set-addresses public-lr0 router
+check multinode_nbctl lsp-set-options public-lr0 router-port=lr0-public
+check multinode_nbctl lrp-set-gateway-chassis lr0-public ovn-gw-1 10
+check multinode_nbctl lr-route-add lr0 0.0.0.0/0 172.20.0.1
+
+check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24
+check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
+
+# create some ACLs
+check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
+check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
+
+m_as ovn-gw-1 ip netns add ovn-ext0
+m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
+m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
+m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext0 up
+m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.0.1/24 dev ext0
+
+m_as ovn-gw-1 ovs-vsctl add-port br-ex ext1 -- set interface ext1 type=internal
+m_as ovn-gw-1 ip link set ext1 netns ovn-ext0
+m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext1 up
+m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.1.1/24 dev ext1
+
+m_as ovn-gw-1 ip netns add ovn-ext2
+m_as ovn-gw-1 ovs-vsctl add-port br-ex ext2 -- set interface ext2 type=internal
+m_as ovn-gw-1 ip link set ext2 netns ovn-ext2
+m_as ovn-gw-1 ip netns exec ovn-ext2 ip link set ext2 up
+m_as ovn-gw-1 ip netns exec ovn-ext2 ip addr add 172.20.1.2/24 dev ext2
+m_as ovn-gw-1 ip netns exec ovn-ext2 ip route add default via 172.20.1.1 dev 
ext2
+
+m_as ovn-gw-1 ovs-vsctl set open . 
external-ids:ovn-bridge-mappings=public:br-ex
+m_as ovn-chassis-1 ovs-vsctl set open . 
external-ids:ovn-bridge-mappings=public:br-ex
+m_as ovn-chassis-2 ovs-vsctl set open . 
external-ids:ovn-bridge-mappings=public:br-ex
+
+m_wait_for_ports_up sw1-port1
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | 
FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# Change ptmu for the geneve tunnel
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 
2>&1 |grep -q "message too long, mtu=1142"])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 
dev sw0p1])
+
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1400 dev eth1
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping6 -c 5 -s 1450 -M do 2000::3 
2>&1 |grep -q "message too long, mtu: 1342"])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 
| FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1000])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 10 -s 1300 -M do 172.20.1.2 
2>&1 |grep -q "mtu = 1000"])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 
| FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# Create vxlan tunnels
+for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
+do
+    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=vxlan
+done
+
+OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q vxlan_sys])
+OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q vxlan_sys])
+OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q vxlan_sys])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 
dev sw0p1])
+
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | 
FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 
2>&1 |grep -q "message too long, mtu=1150"])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 
dev sw0p1])
+
+M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 
| FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 
172.20.1.2 2>&1 |grep -q "mtu = 1150"])
+
+AT_CLEANUP
+
+AT_SETUP([ovn multinode pmtu - gw_router_port])
+
+# Check that ovn-fake-multinode setup is up and running
+check_fake_multinode_setup
+
+# Delete the multinode NB and OVS resources before starting the test.
+cleanup_multinode_resources
+
+m_as ovn-chassis-1 ip link del sw0p1-p
+m_as ovn-chassis-2 ip link del sw0p2-p
+m_as ovn-chassis-2 ip link del sw1p1-p
+
+# Reset geneve tunnels
+for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
+do
+    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve
+done
+
+OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys])
+OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys])
+OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys])
+
+# Test East-West switching
+check multinode_nbctl ls-add sw0
+check multinode_nbctl lsp-add sw0 sw0-port1
+check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 
1000::3"
+check multinode_nbctl lsp-add sw0 sw0-port2
+check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 
1000::4"
+
+m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 50:54:00:00:00:03 
10.0.0.3 24 10.0.0.1 1000::3/64 1000::a
+m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 50:54:00:00:00:04 
10.0.0.4 24 10.0.0.1 1000::4/64 1000::a
+
+m_wait_for_ports_up
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | 
FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# Create the second logical switch with one port
+check multinode_nbctl ls-add sw1
+check multinode_nbctl lsp-add sw1 sw1-port1
+check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 
2000::3"
+
+# Create a logical router and attach both logical switches
+check multinode_nbctl lr-add lr0
+check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 
1000::a/64
+check multinode_nbctl lsp-add sw0 sw0-lr0
+check multinode_nbctl lsp-set-type sw0-lr0 router
+check multinode_nbctl lsp-set-addresses sw0-lr0 router
+check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
+
+check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 
2000::a/64
+check multinode_nbctl lsp-add sw1 sw1-lr0
+check multinode_nbctl lsp-set-type sw1-lr0 router
+check multinode_nbctl lsp-set-addresses sw1-lr0 router
+check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
+
+m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 40:54:00:00:00:03 
20.0.0.3 24 20.0.0.1 2000::3/64 2000::a
+
+# create exteranl connection for N/S traffic
+check multinode_nbctl ls-add public
+check multinode_nbctl lsp-add public ln-lublic
+check multinode_nbctl lsp-set-type ln-lublic localnet
+check multinode_nbctl lsp-set-addresses ln-lublic unknown
+check multinode_nbctl lsp-set-options ln-lublic network_name=public
+
+check multinode_nbctl lrp-add lr0 lr0-public 00:11:22:00:ff:01 172.20.0.100/24
+check multinode_nbctl lsp-add public public-lr0
+check multinode_nbctl lsp-set-type public-lr0 router
+check multinode_nbctl lsp-set-addresses public-lr0 router
+check multinode_nbctl lsp-set-options public-lr0 router-port=lr0-public
+check multinode_nbctl lrp-set-gateway-chassis lr0-public ovn-gw-1 10
+check multinode_nbctl lr-route-add lr0 0.0.0.0/0 172.20.0.1
+
+check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24
+check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
+
+check multinode_nbctl lrp-set-gateway-chassis lr0-sw0 ovn-chassis-1 10
+check multinode_nbctl lrp-set-gateway-chassis lr0-sw1 ovn-chassis-2 10
+
+# create some ACLs
+check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
+check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
+
+m_as ovn-gw-1 ip netns add ovn-ext0
+m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
+m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
+m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext0 up
+m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.0.1/24 dev ext0
+
+m_as ovn-gw-1 ovs-vsctl add-port br-ex ext1 -- set interface ext1 type=internal
+m_as ovn-gw-1 ip link set ext1 netns ovn-ext0
+m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext1 up
+m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.1.1/24 dev ext1
+
+m_as ovn-gw-1 ip netns add ovn-ext2
+m_as ovn-gw-1 ovs-vsctl add-port br-ex ext2 -- set interface ext2 type=internal
+m_as ovn-gw-1 ip link set ext2 netns ovn-ext2
+m_as ovn-gw-1 ip netns exec ovn-ext2 ip link set ext2 up
+m_as ovn-gw-1 ip netns exec ovn-ext2 ip addr add 172.20.1.2/24 dev ext2
+m_as ovn-gw-1 ip netns exec ovn-ext2 ip route add default via 172.20.1.1 dev 
ext2
+
+m_as ovn-gw-1 ovs-vsctl set open . 
external-ids:ovn-bridge-mappings=public:br-ex
+m_as ovn-chassis-1 ovs-vsctl set open . 
external-ids:ovn-bridge-mappings=public:br-ex
+m_as ovn-chassis-2 ovs-vsctl set open . 
external-ids:ovn-bridge-mappings=public:br-ex
+
+m_wait_for_ports_up sw1-port1
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | 
FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 
dev sw0p1])
+
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 
2>&1 |grep -q "message too long, mtu=1142"])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 
dev sw0p1])
+
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1400 dev eth1
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping6 -c 5 -s 1450 -M do 2000::3 
2>&1 |grep -q "message too long, mtu: 1342"])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 
dev sw0p1])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 
| FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 
172.20.1.2 2>&1 |grep -q "mtu = 1100"])
+
+# Create vxlan tunnels
+for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
+do
+    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=vxlan
+done
+
+OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q vxlan_sys])
+OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q vxlan_sys])
+OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q vxlan_sys])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 
dev sw0p1])
+
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | 
FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 
2>&1 |grep -q "message too long, mtu=1150"])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 
dev sw0p1])
+
+M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 
| FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 
172.20.1.2 2>&1 |grep -q "mtu = 1150"])
+
+AT_CLEANUP
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index a267daca2..223e53991 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -6492,6 +6492,9 @@ AT_CAPTURE_FILE([lrflows])
 
 # Check the flows in lr_in_admission stage
 AT_CHECK([grep lr_in_admission lrflows | grep cr-DR | sort], [0], [dnl
+  table=0 (lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 
3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && 
eth.dst == 02:ac:10:01:00:01 && !is_chassis_resident("cr-DR-S1")), 
action=(outport = inport; inport = "DR-S1"; next;)
+  table=0 (lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 
3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && 
eth.dst == 03:ac:10:01:00:01 && !is_chassis_resident("cr-DR-S2")), 
action=(outport = inport; inport = "DR-S2"; next;)
+  table=0 (lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 
3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && 
eth.dst == 04:ac:10:01:00:01 && !is_chassis_resident("cr-DR-S3")), 
action=(outport = inport; inport = "DR-S3"; next;)
   table=0 (lr_in_admission    ), priority=50   , match=(eth.dst == 
02:ac:10:01:00:01 && inport == "DR-S1" && is_chassis_resident("cr-DR-S1")), 
action=(xreg0[[0..47]] = 02:ac:10:01:00:01; next;)
   table=0 (lr_in_admission    ), priority=50   , match=(eth.dst == 
03:ac:10:01:00:01 && inport == "DR-S2" && is_chassis_resident("cr-DR-S2")), 
action=(xreg0[[0..47]] = 03:ac:10:01:00:01; next;)
   table=0 (lr_in_admission    ), priority=50   , match=(eth.dst == 
04:ac:10:01:00:01 && inport == "DR-S3" && is_chassis_resident("cr-DR-S3")), 
action=(xreg0[[0..47]] = 04:ac:10:01:00:01; next;)
@@ -6551,6 +6554,7 @@ AT_CAPTURE_FILE([lrflows])
 
 # Check the flows in lr_in_admission stage
 AT_CHECK([grep lr_in_admission lrflows | grep lrp1 | sed 
's/table=../table=??/' | sort], [0], [dnl
+  table=??(lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 
3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && 
eth.dst == 00:00:00:00:00:01 && !is_chassis_resident("cr-lrp1")), 
action=(outport = inport; inport = "lrp1"; next;)
   table=??(lr_in_admission    ), priority=50   , match=(eth.dst == 
00:00:00:00:00:01 && inport == "lrp1" && is_chassis_resident("cr-lrp1")), 
action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
   table=??(lr_in_admission    ), priority=50   , match=(eth.mcast && inport == 
"lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
 ])
@@ -6572,6 +6576,7 @@ AT_CAPTURE_FILE([lrflows])
 
 # Check the flows in lr_in_admission stage
 AT_CHECK([grep lr_in_admission lrflows | grep lrp1 | sed 
's/table=../table=??/' | sort], [0], [dnl
+  table=??(lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 
3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && 
eth.dst == 00:00:00:00:00:01 && !is_chassis_resident("cr-lrp1")), 
action=(outport = inport; inport = "lrp1"; next;)
   table=??(lr_in_admission    ), priority=50   , match=(eth.dst == 
00:00:00:00:00:01 && inport == "lrp1"), action=(xreg0[[0..47]] = 
00:00:00:00:00:01; next;)
   table=??(lr_in_admission    ), priority=50   , match=(eth.mcast && inport == 
"lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
 ])
@@ -6590,6 +6595,7 @@ AT_CAPTURE_FILE([lrflows])
 
 # Check the flows in lr_in_admission stage
 AT_CHECK([grep lr_in_admission lrflows | grep lrp1 | sed 
's/table=../table=??/' | sort], [0], [dnl
+  table=??(lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 
3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && 
eth.dst == 00:00:00:00:00:01 && !is_chassis_resident("cr-lrp1")), 
action=(outport = inport; inport = "lrp1"; next;)
   table=??(lr_in_admission    ), priority=50   , match=(eth.dst == 
00:00:00:00:00:01 && inport == "lrp1" && is_chassis_resident("cr-lrp1")), 
action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
   table=??(lr_in_admission    ), priority=50   , match=(eth.mcast && inport == 
"lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
 ])
@@ -8343,6 +8349,9 @@ AT_CHECK([cat sw0flows | grep -e port_sec -e 
ls_in_l2_lkup -e ls_in_l2_unknown |
 sort | sed 's/table=../table=??/' ], [0], [dnl
   table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), 
action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), 
action=(drop;)
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 
3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), 
action=(next; )
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 
3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), 
action=(next; )
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 
3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), 
action=(next; )
   table=??(ls_in_check_port_sec), priority=50   , match=(1), 
action=(reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
   table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), 
action=(drop;)
@@ -8369,6 +8378,9 @@ AT_CHECK([cat sw0flows | grep -e port_sec -e 
ls_in_l2_lkup -e ls_in_l2_unknown |
 sort | sed 's/table=../table=??/' ], [0], [dnl
   table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), 
action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), 
action=(drop;)
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 
3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), 
action=(next; )
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 
3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), 
action=(next; )
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 
3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), 
action=(next; )
   table=??(ls_in_check_port_sec), priority=50   , match=(1), 
action=(reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
   table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), 
action=(drop;)
@@ -8396,6 +8408,9 @@ sort | sed 's/table=../table=??/' ], [0], [dnl
   table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), 
action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(inport == "sw0p1"), 
action=(reg0[[15]] = 1; next;)
   table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), 
action=(drop;)
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 
3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), 
action=(next; )
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 
3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), 
action=(next; )
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 
3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), 
action=(next; )
   table=??(ls_in_check_port_sec), priority=50   , match=(1), 
action=(reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
   table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), 
action=(drop;)
@@ -8422,6 +8437,9 @@ sort | sed 's/table=../table=??/' ], [0], [dnl
   table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), 
action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(inport == "sw0p1"), 
action=(reg0[[15]] = 1; next;)
   table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), 
action=(drop;)
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 
3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), 
action=(next; )
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 
3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), 
action=(next; )
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 
3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), 
action=(next; )
   table=??(ls_in_check_port_sec), priority=50   , match=(1), 
action=(reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_check_port_sec), priority=70   , match=(inport == "sw0p2"), 
action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
@@ -8451,6 +8469,9 @@ AT_CHECK([cat sw0flows | grep -e port_sec -e 
ls_in_l2_lkup -e ls_in_l2_unknown |
 sort | sed 's/table=../table=??/' ], [0], [dnl
   table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), 
action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), 
action=(drop;)
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 
3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), 
action=(next; )
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 
3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), 
action=(next; )
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 
3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), 
action=(next; )
   table=??(ls_in_check_port_sec), priority=50   , match=(1), 
action=(reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_check_port_sec), priority=70   , match=(inport == 
"localnetport"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_check_port_sec), priority=70   , match=(inport == "sw0p1"), 
action=(reg0[[14]] = 1; next(pipeline=ingress, table=17);)
-- 
2.43.0


_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to