Add support in the userspace datapath for PATH MTU on tunnel interfaces.

This feature allows users to configure an MTU on tunnel ports. If set,
when the userspace datapath attempts to encapsulate a packet that
exceeds the tunnels MTU, OVS will generate and send an ICMP
Fragmentation Needed or Packet Too Big message back to the source host.

If an MTU is not set on the tunnel interface, there is no change in
behaviour.

Reported-at: https://issues.redhat.com/browse/FDP-256
Signed-off-by: Mike Pattrick <[email protected]>
---
v2: Correct counter in unit test
v3: Added more checks before sending icmp packet
---
 NEWS                          |   2 +
 lib/dpif-netdev.c             | 173 +++++++++++++++++++++++++++++++++-
 lib/netdev-vport-private.h    |   1 +
 lib/netdev-vport.c            |  32 ++++++-
 lib/packets.c                 |  96 +++++++++++++++++++
 lib/packets.h                 |  68 ++++++++++++-
 ofproto/ofproto-dpif.c        |  11 +--
 tests/tunnel-push-pop-ipv6.at |  69 ++++++++++++++
 tests/tunnel-push-pop.at      |  66 +++++++++++++
 9 files changed, 506 insertions(+), 12 deletions(-)

diff --git a/NEWS b/NEWS
index f9a74df1a..679c02694 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,8 @@ Post-v3.6.0
    - Userspace datapath:
      * Conntrack now supports the FTP commands EPSV and EPRT with IPv4
        connections, instead of limiting these commands to IPv6 only.
+     * MTU can now be set on tunnel interfaces with the mtu_request
+       parameters.
    - DPDK:
      * OVS validated with DPDK 24.11.3.
    - OVSDB-IDL:
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 224ce7086..ffaaf7b57 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -26,6 +26,7 @@
 #include <net/if.h>
 #include <sys/types.h>
 #include <netinet/in.h>
+#include <netinet/ip.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
@@ -65,12 +66,14 @@
 #include "odp-execute.h"
 #include "odp-util.h"
 #include "openvswitch/dynamic-string.h"
+#include "openvswitch/flow.h"
 #include "openvswitch/list.h"
 #include "openvswitch/match.h"
 #include "openvswitch/ofp-parse.h"
 #include "openvswitch/ofp-print.h"
 #include "openvswitch/ofpbuf.h"
 #include "openvswitch/shash.h"
+#include "openvswitch/types.h"
 #include "openvswitch/vlog.h"
 #include "ovs-numa.h"
 #include "ovs-rcu.h"
@@ -114,6 +117,7 @@ COVERAGE_DEFINE(datapath_drop_upcall_error);
 COVERAGE_DEFINE(datapath_drop_lock_error);
 COVERAGE_DEFINE(datapath_drop_userspace_action_error);
 COVERAGE_DEFINE(datapath_drop_tunnel_push_error);
+COVERAGE_DEFINE(datapath_drop_tunnel_mtu_drop);
 COVERAGE_DEFINE(datapath_drop_tunnel_pop_error);
 COVERAGE_DEFINE(datapath_drop_recirc_error);
 COVERAGE_DEFINE(datapath_drop_invalid_port);
@@ -9033,13 +9037,138 @@ pmd_send_port_cache_lookup(const struct 
dp_netdev_pmd_thread *pmd,
     return tx_port_lookup(&pmd->send_port_cache, port_no);
 }
 
+/* Return NULL on no ICMP reply needed. */
+static struct dp_packet *
+netdev_generate_frag_needed(struct dp_packet *packet, int send_size, int mtu)
+{
+    const struct eth_header *eth;
+    const void *l3;
+    size_t l3_len;
+    bool is_ipv6;
+
+    if (send_size < mtu) {
+        return NULL;
+    }
+
+    eth = dp_packet_eth(packet);
+    if (!eth) {
+        return NULL;
+    }
+
+    if (eth->eth_type == htons(ETH_TYPE_IP)) {
+        is_ipv6 = false;
+    } else if (eth->eth_type == htons(ETH_TYPE_IPV6)) {
+        is_ipv6 = true;
+    } else {
+        return NULL;
+    }
+
+    l3 = dp_packet_l3(packet);
+    l3_len = dp_packet_l3_size(packet);
+
+    if (is_ipv6) {
+        const struct ovs_16aligned_ip6_hdr *ip6;
+        struct in6_addr ip6_src;
+        struct in6_addr ip6_dst;
+        size_t max_payload;
+        const void *l4_buf;
+        uint8_t nw_proto;
+        uint8_t nw_frag;
+        size_t l4_len;
+
+        ip6 = (const struct ovs_16aligned_ip6_hdr *) l3;
+
+        if (mtu < 1280) {
+            return NULL;
+        }
+
+        if (ipv6_addr_is_multicast(&ip6->ip6_src) ||
+            ipv6_addr_is_any(&ip6->ip6_src) ||
+            ipv6_addr_is_loopback(&ip6->ip6_src)) {
+            return NULL;
+        }
+
+        nw_proto = ip6->ip6_nxt;
+        l4_buf = ip6 + 1;
+        l4_len = l3_len - sizeof *ip6;
+        if (!parse_ipv6_ext_hdrs(&l4_buf, &l4_len, &nw_proto, &nw_frag,
+                                 NULL, NULL)) {
+            return NULL;
+        }
+
+        if (nw_frag == FLOW_NW_FRAG_LATER) {
+            return NULL;
+        }
+
+        if (nw_proto == IPPROTO_ICMPV6) {
+            const struct icmp6_header *icmp = l4_buf;
+            if (icmp && packet_icmpv6_is_err(icmp->icmp6_type)) {
+                return NULL;
+            }
+        }
+
+        max_payload = 1280 - ETH_HEADER_LEN - IPV6_HEADER_LEN -
+                      ICMP6_DATA_HEADER_LEN;
+        l3_len = l3_len < max_payload ? l3_len : max_payload;
+
+        memcpy(&ip6_src, &ip6->ip6_dst, sizeof(ip6_dst));
+        memcpy(&ip6_dst, &ip6->ip6_src, sizeof(ip6_dst));
+        return compose_ipv6_ptb(eth->eth_dst, eth->eth_src,
+                                &ip6_dst, &ip6_src,
+                                htonl(mtu), l3, l3_len);
+    } else {
+        const struct ip_header *ip;
+        size_t icmp_payload_len;
+        size_t available;
+
+        ip = (const struct ip_header *) l3;
+
+        if (mtu < 576) {
+            return NULL;
+        }
+
+        if (!(ip->ip_frag_off & htons(IP_DF))) {
+            return NULL;
+        }
+
+        if (ip_is_multicast(get_16aligned_be32(&ip->ip_src)) ||
+            ip_is_broadcast(get_16aligned_be32(&ip->ip_src)) ||
+            ip_is_loopback(get_16aligned_be32(&ip->ip_src))) {
+            return NULL;
+        }
+
+        if (ip->ip_proto == IPPROTO_ICMP) {
+            const struct icmp_header *icmp = dp_packet_l4(packet);
+            if (icmp && packet_icmp_is_err(icmp->icmp_type)) {
+                return NULL;
+            }
+        }
+        icmp_payload_len = IP_IHL(ip->ip_ihl_ver) * 4 + ICMP_ERROR_DATA_L4_LEN;
+
+        available = l3_len;
+        if (icmp_payload_len > available) {
+            icmp_payload_len = available;
+        }
+
+        return compose_ipv4_fn(eth->eth_dst, eth->eth_src,
+                               get_16aligned_be32(&ip->ip_dst),
+                               get_16aligned_be32(&ip->ip_src),
+                               htons(mtu), l3, icmp_payload_len);
+    }
+}
+
 static int
-push_tnl_action(const struct dp_netdev_pmd_thread *pmd,
+push_tnl_action(struct dp_netdev_pmd_thread *pmd,
                 const struct nlattr *attr,
                 struct dp_packet_batch *batch)
 {
-    struct tx_port *tun_port;
+    size_t i, size = dp_packet_batch_size(batch);
     const struct ovs_action_push_tnl *data;
+    uint32_t *depth = recirc_depth_get();
+    struct dp_packet *packet;
+    struct tx_port *tun_port;
+    struct netdev *netdev;
+    int mtu;
     int err;
 
     data = nl_attr_get(attr);
@@ -9049,7 +9178,45 @@ push_tnl_action(const struct dp_netdev_pmd_thread *pmd,
         err = -EINVAL;
         goto error;
     }
-    err = netdev_push_header(tun_port->port->netdev, batch, data);
+
+    netdev = tun_port->port->netdev;
+    if (netdev->mtu_user_config &&
+        netdev_get_mtu(netdev, &mtu) == 0) {
+        struct dp_packet_batch icmp_batch;
+
+        dp_packet_batch_init(&icmp_batch);
+        DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) {
+            int len = dp_packet_get_send_len(packet) + data->header_len;
+
+            struct dp_packet *icmp;
+
+            icmp = netdev_generate_frag_needed(packet, len, mtu);
+            if (!icmp) {
+                dp_packet_batch_refill(batch, packet, i);
+                continue;
+            }
+
+            dp_packet_delete(packet);
+            COVERAGE_INC(datapath_drop_tunnel_mtu_drop);
+
+            pkt_metadata_init(&icmp->md, data->tnl_port);
+
+            dp_packet_batch_add(&icmp_batch, icmp);
+        }
+
+        if (*depth >= MAX_RECIRC_DEPTH) {
+            COVERAGE_ADD(datapath_drop_recirc_error,
+                         dp_packet_batch_size(&icmp_batch));
+            dp_packet_delete_batch(&icmp_batch, true);
+        }
+
+        if (dp_packet_batch_size(&icmp_batch) > 0) {
+            (*depth)++;
+            dp_netdev_recirculate(pmd, &icmp_batch);
+            (*depth)--;
+        }
+    }
+    err = netdev_push_header(netdev, batch, data);
     if (!err) {
         return 0;
     }
diff --git a/lib/netdev-vport-private.h b/lib/netdev-vport-private.h
index 586231057..82ccbd8cc 100644
--- a/lib/netdev-vport-private.h
+++ b/lib/netdev-vport-private.h
@@ -42,6 +42,7 @@ struct netdev_vport {
     /* Tunnels. */
     char egress_iface[IFNAMSIZ];
     bool carrier_status;
+    int mtu;
 
     /* Patch Ports. */
     char *peer;
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 033f9a6fd..9c36eb3c1 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -101,6 +101,33 @@ get_netdev_tunnel_config(const struct netdev *netdev)
     return vport_tunnel_config(netdev_vport_cast(netdev));
 }
 
+static int
+get_tunnel_mtu(const struct netdev *netdev, int *mtup)
+{
+    struct netdev_vport *dev = netdev_vport_cast(netdev);
+
+    ovs_mutex_lock(&dev->mutex);
+    *mtup = dev->mtu;
+    ovs_mutex_unlock(&dev->mutex);
+
+    return 0;
+}
+
+static int
+set_tunnel_mtu(struct netdev *netdev, int mtu)
+{
+    struct netdev_vport *dev = netdev_vport_cast(netdev);
+
+    ovs_mutex_lock(&dev->mutex);
+    if (dev->mtu != mtu) {
+        dev->mtu = mtu;
+    }
+    ovs_mutex_unlock(&dev->mutex);
+    netdev_change_seq_changed(netdev);
+
+    return 0;
+}
+
 bool
 netdev_vport_is_patch(const struct netdev *netdev)
 {
@@ -423,7 +450,8 @@ parse_tunnel_ip(const char *value, bool accept_mcast, bool 
*flow,
         if (lookup_ipv6(value, ipv6)) {
             return ENOENT;
         }
-        if (!accept_mcast && ipv6_addr_is_multicast(ipv6)) {
+        if (!accept_mcast &&
+            ipv6_addr_is_multicast((union ovs_16aligned_in6_addr *) ipv6)) {
             return EINVAL;
         }
         *protocol = ETH_TYPE_IPV6;
@@ -1265,6 +1293,8 @@ netdev_vport_get_ifindex(const struct netdev *netdev_)
     .get_config = get_tunnel_config,                \
     .set_config = set_tunnel_config,                \
     .get_tunnel_config = get_netdev_tunnel_config,  \
+    .get_mtu = get_tunnel_mtu,                      \
+    .set_mtu = set_tunnel_mtu,                      \
     .get_status = tunnel_get_status
 
 void
diff --git a/lib/packets.c b/lib/packets.c
index 0c1f72e48..0d67dbbec 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -1783,6 +1783,102 @@ compose_ipv6(struct dp_packet *packet, uint8_t proto,
     return data;
 }
 
+/* Compose an ICMP Fragmentation Needed message. */
+struct dp_packet *
+compose_ipv4_fn(const struct eth_addr eth_src,
+                 const struct eth_addr eth_dst,
+                 const ovs_be32 ip_src,
+                 const ovs_be32 ip_dst,
+                 ovs_be16 mtu, const void *body, size_t body_len)
+{
+    struct icmp_header *icmp;
+    struct ip_header *ip;
+    struct dp_packet *b;
+
+    b = dp_packet_new(ETH_HEADER_LEN + IP_HEADER_LEN +
+                      ICMP_HEADER_LEN + body_len);
+    if (!b) {
+        return NULL;
+    }
+
+    ip = (struct ip_header *) eth_compose(b, eth_dst, eth_src, ETH_TYPE_IP,
+                                          IP_HEADER_LEN);
+
+    ip->ip_ihl_ver = IP_IHL_VER(5, IP_VERSION);
+    ip->ip_tos = 0xc0;
+    ip->ip_tot_len = htons(IP_HEADER_LEN + ICMP_HEADER_LEN + body_len);
+    ip->ip_id = 0;
+    ip->ip_frag_off = 0;
+    ip->ip_ttl = 64;
+    ip->ip_proto = IPPROTO_ICMP;
+    ip->ip_csum = 0;
+
+    put_16aligned_be32(&ip->ip_src, ip_src);
+    put_16aligned_be32(&ip->ip_dst, ip_dst);
+    ip->ip_csum = csum(ip, IP_HEADER_LEN);
+
+    icmp = (struct icmp_header *) dp_packet_put_zeros(b, ICMP_HEADER_LEN +
+                                                         body_len);
+
+    icmp->icmp_type = ICMP4_DST_UNREACH;
+    icmp->icmp_code = 4;
+    icmp->icmp_csum = 0;
+
+    icmp->icmp_fields.frag.mtu = mtu;
+
+    if (body && body_len) {
+        void *payload = (void *)(icmp + 1);
+        memcpy(payload, body, body_len);
+    }
+
+    uint32_t csum_val = csum_continue(0, icmp, ICMP_HEADER_LEN + body_len);
+    icmp->icmp_csum = csum_finish(csum_val);
+
+    dp_packet_set_l3(b, ip);
+    dp_packet_set_l4(b, icmp);
+
+    return b;
+}
+
+/* Compose an ICMP Packet Too Big message. */
+struct dp_packet *
+compose_ipv6_ptb(const struct eth_addr eth_src,
+                 const struct eth_addr eth_dst,
+                 const struct in6_addr *ipv6_src,
+                 const struct in6_addr *ipv6_dst,
+                 ovs_be32 mtu, const void *body, size_t body_len)
+{
+    struct dp_packet *b;
+        struct icmp6_data_header *icmp6;
+
+    b = dp_packet_new(ETH_HEADER_LEN + IPV6_HEADER_LEN +
+                      ICMP6_DATA_HEADER_LEN + body_len);
+    if (!b) {
+        return NULL;
+    }
+
+    eth_compose(b, eth_dst, eth_src, ETH_TYPE_IPV6, IPV6_HEADER_LEN);
+
+    icmp6 = compose_ipv6(b, IPPROTO_ICMPV6, ipv6_src, ipv6_dst,
+                         0, 0, 255, ICMP6_DATA_HEADER_LEN + body_len);
+
+    icmp6->icmp6_base.icmp6_type = ICMP6_PACKET_TOO_BIG;
+    icmp6->icmp6_base.icmp6_code = 0;
+    icmp6->icmp6_base.icmp6_cksum = 0;
+    put_16aligned_be32(&icmp6->icmp6_data.be32[0], mtu);
+    if (body && body_len) {
+        void *payload = (void *)(icmp6 + 1);
+        memcpy(payload, body, body_len);
+    }
+
+    uint32_t icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b));
+    icmp6->icmp6_base.icmp6_cksum = csum_finish(
+        csum_continue(icmp_csum, icmp6, ICMP6_DATA_HEADER_LEN + body_len));
+
+    dp_packet_set_l4(b, icmp6);
+    return b;
+}
+
 /* Compose an IPv6 Neighbor Discovery Neighbor Solicitation message. */
 void
 compose_nd_ns(struct dp_packet *b, const struct eth_addr eth_src,
diff --git a/lib/packets.h b/lib/packets.h
index ed46778fe..c41e0f79b 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <netinet/icmp6.h>
 #ifndef PACKETS_H
 #define PACKETS_H 1
 
@@ -671,6 +672,16 @@ ip_is_multicast(ovs_be32 ip)
     return (ip & htonl(0xf0000000)) == htonl(0xe0000000);
 }
 static inline bool
+ip_is_broadcast(ovs_be32 ip)
+{
+    return ip == htonl(0xffffffff);
+}
+static inline bool
+ip_is_loopback(ovs_be32 ip)
+{
+    return (ip & htonl(0xff000000)) == htonl(0x7f000000);
+}
+static inline bool
 ip_is_local_multicast(ovs_be32 ip)
 {
     return (ip & htonl(0xffffff00)) == htonl(0xe0000000);
@@ -789,6 +800,20 @@ BUILD_ASSERT_DECL(ICMP_HEADER_LEN == sizeof(struct 
icmp_header));
 /* ICMPV4 */
 #define ICMP_ERROR_DATA_L4_LEN 8
 
+static inline bool
+packet_icmp_is_err(uint8_t type) {
+    switch (type) {
+    case ICMP4_DST_UNREACH:
+    case ICMP4_SOURCEQUENCH:
+    case ICMP4_REDIRECT:
+    case ICMP4_TIME_EXCEEDED:
+    case ICMP4_PARAM_PROB:
+        return true;
+    default:
+        return false;
+    }
+}
+
 #define IGMP_HEADER_LEN 8
 struct igmp_header {
     uint8_t igmp_type;
@@ -1035,6 +1060,19 @@ struct icmp6_data_header {
 };
 BUILD_ASSERT_DECL(ICMP6_DATA_HEADER_LEN == sizeof(struct icmp6_data_header));
 
+static inline bool
+packet_icmpv6_is_err(uint8_t type) {
+    switch (type) {
+    case ICMP6_DST_UNREACH:
+    case ICMP6_PACKET_TOO_BIG:
+    case ICMP6_TIME_EXCEEDED:
+    case ICMP6_PARAM_PROB:
+        return true;
+    default:
+        return false;
+    }
+}
+
 uint32_t packet_csum_pseudoheader6(const struct ovs_16aligned_ip6_hdr *);
 ovs_be16 packet_csum_upperlayer6(const struct ovs_16aligned_ip6_hdr *,
                                  const void *, uint8_t, uint16_t);
@@ -1206,8 +1244,23 @@ static inline bool ipv6_addr_is_set(const struct 
in6_addr *addr) {
     return !ipv6_addr_equals(addr, &in6addr_any);
 }
 
-static inline bool ipv6_addr_is_multicast(const struct in6_addr *ip) {
-    return ip->s6_addr[0] == 0xff;
+static inline bool
+ipv6_addr_is_multicast(const union ovs_16aligned_in6_addr *ip) {
+    return (ip->be16[0] & htons(0xff00)) == htons(0xff00);
+}
+
+static inline bool
+ipv6_addr_is_loopback(const union ovs_16aligned_in6_addr *ip) {
+    return ip->be16[0] == 0 && ip->be16[1] == 0 &&
+           ip->be16[2] == 0 && ip->be16[3] == 0 &&
+           ip->be16[5] == 0 && ip->be16[6] == 0;
+}
+
+static inline bool
+ipv6_addr_is_any(const union ovs_16aligned_in6_addr *ip) {
+    return ip->be16[0] == 0 && ip->be16[1] == 0 &&
+           ip->be16[2] == 0 && ip->be16[3] == 0 &&
+           ip->be16[5] == 0 && ip->be16[6] == htons(1);
 }
 
 static inline struct in6_addr
@@ -1667,6 +1720,17 @@ void compose_arp(struct dp_packet *, uint16_t arp_op,
                  const struct eth_addr arp_sha,
                  const struct eth_addr arp_tha, bool broadcast,
                  ovs_be32 arp_spa, ovs_be32 arp_tpa);
+struct dp_packet *compose_ipv6_ptb(const struct eth_addr eth_src,
+                                   const struct eth_addr eth_dst,
+                                   const struct in6_addr *ipv6_src,
+                                   const struct in6_addr *ipv6_dst,
+                                   ovs_be32 mtu, const void *body,
+                                   size_t body_len);
+struct dp_packet *compose_ipv4_fn(const struct eth_addr eth_src,
+                                   const struct eth_addr eth_dst,
+                                   ovs_be32 ip_src, ovs_be32 ip_dst,
+                                   ovs_be16 mtu, const void *body,
+                                   size_t body_len);
 void compose_nd_ns(struct dp_packet *, const struct eth_addr eth_src,
                    const struct in6_addr *ipv6_src,
                    const struct in6_addr *ipv6_dst);
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index d0412f0d6..795dd8563 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -2283,12 +2283,11 @@ port_construct(struct ofport *port_)
             dpif_port_destroy(&dpif_port);
             return EBUSY;
         }
-
-        ovs_rwlock_wrlock(&ofproto->backer->odp_to_ofport_lock);
-        hmap_insert(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node,
-                    hash_odp_port(port->odp_port));
-        ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock);
     }
+    ovs_rwlock_wrlock(&ofproto->backer->odp_to_ofport_lock);
+    hmap_insert(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node,
+                hash_odp_port(port->odp_port));
+    ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock);
     dpif_port_destroy(&dpif_port);
 
     if (ofproto->sflow) {
@@ -2350,7 +2349,7 @@ port_destruct(struct ofport *port_, bool del)
         port->peer = NULL;
     }
 
-    if (port->odp_port != ODPP_NONE && !port->is_tunnel) {
+    if (port->odp_port != ODPP_NONE) {
         ovs_rwlock_wrlock(&ofproto->backer->odp_to_ofport_lock);
         hmap_remove(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node);
         ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock);
diff --git a/tests/tunnel-push-pop-ipv6.at b/tests/tunnel-push-pop-ipv6.at
index ca5cb4d19..505f634f7 100644
--- a/tests/tunnel-push-pop-ipv6.at
+++ b/tests/tunnel-push-pop-ipv6.at
@@ -940,3 +940,72 @@ AT_CHECK([grep -q "GENEVE_ACT" stdout])
 
 OVS_VSWITCHD_STOP
 AT_CLEANUP
+
+AT_SETUP([tunnel_push_pop_ipv6 - tunnel ICMPv6 fragmentation needed])
+OVS_VSWITCHD_START(
+    [add-port br0 p0 \
+     -- set Interface p0 type=dummy ofport_request=1 \
+                         other-config:hwaddr=aa:55:aa:55:00:00])
+AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg])
+AT_CHECK([ovs-vsctl add-br int-br -- set bridge int-br datapath_type=dummy])
+AT_CHECK([ovs-vsctl add-port int-br t1 \
+          -- set Interface t1 type=geneve \
+                              options:remote_ip=2001:cafe::92 \
+                              options:key=123 ofport_request=2 \
+                              options:df_default=true mtu_request=1400])
+
+dnl Setup an IP address.
+AT_CHECK([ovs-appctl netdev-dummy/ip6addr br0 2001:cafe::88/64], [0], [OK
+])
+dnl Checking that a local route for added IP was successfully installed.
+AT_CHECK([ovs-appctl ovs/route/show | grep Cached | sort], [0], [dnl
+Cached: 2001:cafe::/64 dev br0 SRC 2001:cafe::88 local
+])
+
+AT_CHECK([ovs-ofctl add-flow br0 action=normal])
+
+dnl This Neighbor Advertisement from p0 has two effects:
+dnl 1. The neighbor cache will learn that 2001:cafe::92 is at 
f8:bc:12:44:34:b6.
+dnl 2. The br0 mac learning will learn that f8:bc:12:44:34:b6 is on p0.
+AT_CHECK([ovs-appctl netdev-dummy/receive p0 dnl
+ 'recirc_id(0),in_port(1),dnl
+  eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),dnl
+  
ipv6(src=2001:cafe::92,dst=2001:cafe::88,label=0,proto=58,tclass=0,hlimit=255,frag=no),dnl
+  icmpv6(type=136,code=0),dnl
+  nd(target=2001:cafe::92,sll=00:00:00:00:00:00,tll=f8:bc:12:44:34:b6)'
+])
+
+AT_CHECK([ovs-vsctl -- set Interface int-br options:pcap=int-br.pcap])
+
+dnl Verify MTU is set on tunnel interface
+AT_CHECK([ovs-vsctl get interface t1 mtu], [0], [dnl
+1400
+])
+
+AT_CHECK([ovs-ofctl add-flow int-br 
"priority=100,in_port=LOCAL,actions=output:2"])
+AT_CHECK([ovs-ofctl add-flow int-br "priority=1,actions=normal"])
+
+dnl Send a 1500 byte packet, exceeding the tunnel's MTU.
+zeros1458=$(printf '0%.0s' $(seq 2916))
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br ]dnl
+[aa55aa550000f8bc1244cafe86dd]dnl
+[6000000005ba11402001cafe0000000000000000000000922001cafe000000000000000000000088]dnl
+[c853003a05ba54d3]dnl
+[${zeros1458}])
+
+ovs-appctl time/warp 1000
+
+dnl Verify that tunnel drop occured.
+AT_CHECK([ovs-appctl coverage/read-counter datapath_drop_tunnel_mtu_drop], 
[0], [1
+])
+
+dnl Check for ICMP Fragmentation Needed.
+AT_CHECK([ovs-pcap int-br.pcap 2>/dev/null | grep -q ]dnl
+[f8bc1244cafeaa55aa55000086dd]dnl
+[6000000004ca3aff2001cafe0000000000000000000000922001cafe000000000000000000000088]dnl
+[0200ab3a00000578]dnl
+[6000000005ba11402001cafe0000000000000000000000922001cafe000000000000000000000088]dnl
+[c853003a05ba54d3], [0])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
diff --git a/tests/tunnel-push-pop.at b/tests/tunnel-push-pop.at
index a87ae3313..fe9264a33 100644
--- a/tests/tunnel-push-pop.at
+++ b/tests/tunnel-push-pop.at
@@ -1459,3 +1459,69 @@ AT_CHECK([tail -1 stdout], [0],
 
 OVS_VSWITCHD_STOP
 AT_CLEANUP
+
+AT_SETUP([tunnel_push_pop - tunnel ICMP fragmentation needed])
+OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy 
ofport_request=1 other-config:hwaddr=aa:55:aa:55:00:00])
+AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg])
+AT_CHECK([ovs-appctl vlog/set netdev_vport:dbg])
+AT_CHECK([ovs-appctl vlog/set native_tnl:dbg])
+AT_CHECK([ovs-vsctl add-br int-br -- set bridge int-br datapath_type=dummy], 
[0])
+AT_CHECK([ovs-vsctl add-port int-br t1 -- set Interface t1 type=geneve \
+                       options:remote_ip=1.1.2.92 options:key=123 
ofport_request=2 \
+                       options:df_default=true mtu_request=1400])
+
+dnl Setup dummy interface IP addresses.
+AT_CHECK([ovs-appctl netdev-dummy/ip4addr br0 1.1.2.88/24], [0], [OK
+])
+AT_CHECK([ovs-appctl netdev-dummy/ip4addr int-br 10.0.0.1/24], [0], [OK
+])
+
+dnl Checking that a local route for added IP was successfully installed.
+AT_CHECK([ovs-appctl ovs/route/show | grep Cached | sort], [0], [dnl
+Cached: 1.1.2.0/24 dev br0 SRC 1.1.2.88 local
+Cached: 10.0.0.0/24 dev int-br SRC 10.0.0.1 local
+])
+
+AT_CHECK([ovs-ofctl add-flow br0 action=normal])
+
+dnl This ARP reply from p0 has two effects:
+dnl 1. The ARP cache will learn that 1.1.2.92 is at f8:bc:12:44:34:b6.
+dnl 2. The br0 mac learning will learn that f8:bc:12:44:34:b6 is on p0.
+AT_CHECK([ovs-appctl netdev-dummy/receive p0 ']dnl
+[recirc_id(0),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),]dnl
+[arp(sip=1.1.2.92,tip=1.1.2.88,op=2,sha=f8:bc:12:44:34:b6,tha=00:00:00:00:00:00)'])
+
+AT_CHECK([ovs-vsctl -- set Interface int-br options:pcap=int-br.pcap])
+
+dnl Verify MTU is set on tunnel interface
+AT_CHECK([ovs-vsctl get interface t1 mtu], [0], [dnl
+1400
+])
+
+AT_CHECK([ovs-ofctl add-flow int-br 
"priority=100,in_port=LOCAL,actions=output:2"])
+AT_CHECK([ovs-ofctl add-flow int-br "priority=1,actions=normal"])
+
+dnl Send a 1500 byte packet, exceeding the tunnel's MTU.
+zeros1458=$(printf '0%.0s' $(seq 2916))
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br ]dnl
+[50540000000a5054000000090800]dnl
+[450005b600014000400121440a0000020a000001]dnl
+[0800f7fd00010001]dnl
+[${zeros1458}])
+
+ovs-appctl time/warp 1000
+
+dnl Verify that tunnel drop occured.
+AT_CHECK([ovs-appctl coverage/read-counter datapath_drop_tunnel_mtu_drop], 
[0], [1
+])
+
+dnl Check for ICMP Fragmentation Needed.
+AT_CHECK([ovs-pcap int-br.pcap 2>/dev/null | grep -q ]dnl
+[50540000000950540000000a0800]dnl
+[45c0003800000000400166030a0000010a000002]dnl
+[0304f78300000578]dnl
+[450005b600014000400121440a0000020a000001]dnl
+[0800f7fd00010001], [0])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
-- 
2.51.1

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to