Hi,

When sending icmp packets for IPsec path MTU discovery, the first
icmp packet may be wrong.  The mtu 32768 is taken from the loopback
interface as the tdb mtu is copied to the route too late.

00:09:31.329586 10.8.26.36.5201 > 10.8.31.33.40322: . 11585:13033(1448) ack 38 
win 227 <nop,nop,timestamp 1158559856 4057064947> (
DF)
00:09:31.329586 10.8.26.36.5201 > 10.8.31.33.40322: . 13033:14481(1448) ack 38 
win 227 <nop,nop,timestamp 1158559856 4057064947> (
DF)
00:09:31.329729 10.8.26.52 > 10.8.26.36: icmp: 10.8.31.33 unreachable - need to 
frag (mtu 32768)
00:09:31.329730 10.8.26.52 > 10.8.26.36: icmp: 10.8.31.33 unreachable - need to 
frag (mtu 1424)
00:09:31.329731 10.8.26.52 > 10.8.26.36: icmp: 10.8.31.33 unreachable - need to 
frag (mtu 1424)
00:09:31.329731 10.8.26.52 > 10.8.26.36: icmp: 10.8.31.33 unreachable - need to 
frag (mtu 1424)

The diff below fixes this.  When ipsp_process_packet() returns
EMSGSIZE, immediately update tdb and route mtu.

ok?

bluhm

Index: netinet/ip_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.374
diff -u -p -r1.374 ip_output.c
--- netinet/ip_output.c 27 Jul 2021 17:13:03 -0000      1.374
+++ netinet/ip_output.c 22 Nov 2021 21:56:20 -0000
@@ -86,13 +86,11 @@ static __inline u_int16_t __attribute__(
 void in_delayed_cksum(struct mbuf *);
 int in_ifcap_cksum(struct mbuf *, struct ifnet *, int);
 
-#ifdef IPSEC
-struct tdb *
-ip_output_ipsec_lookup(struct mbuf *m, int hlen, int *error, struct inpcb *inp,
-    int ipsecflowinfo);
-int
-ip_output_ipsec_send(struct tdb *, struct mbuf *, struct route *, int);
-#endif /* IPSEC */
+struct tdb * ip_output_ipsec_lookup(struct mbuf *m, int hlen, int *error,
+    struct inpcb *inp, int ipsecflowinfo);
+void ip_output_ipsec_pmtu_update(struct tdb *, struct route *, struct in_addr,
+    int, int);
+int ip_output_ipsec_send(struct tdb *, struct mbuf *, struct route *, int);
 
 /*
  * IP output.  The packet in mbuf chain m contains a skeletal IP
@@ -563,6 +561,36 @@ ip_output_ipsec_lookup(struct mbuf *m, i
        return tdb;
 }
 
+void
+ip_output_ipsec_pmtu_update(struct tdb *tdb, struct route *ro,
+    struct in_addr dst, int rtableid, int transportmode)
+{
+       struct rtentry *rt = NULL;
+       int rt_mtucloned = 0;
+
+       /* Find a host route to store the mtu in */
+       if (ro != NULL)
+               rt = ro->ro_rt;
+       /* but don't add a PMTU route for transport mode SAs */
+       if (transportmode)
+               rt = NULL;
+       else if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) {
+               rt = icmp_mtudisc_clone(dst, rtableid, 1);
+               rt_mtucloned = 1;
+       }
+       DPRINTF("spi %08x mtu %d rt %p cloned %d",
+           ntohl(tdb->tdb_spi), tdb->tdb_mtu, rt, rt_mtucloned);
+       if (rt != NULL) {
+               rt->rt_mtu = tdb->tdb_mtu;
+               if (ro != NULL && ro->ro_rt != NULL) {
+                       rtfree(ro->ro_rt);
+                       ro->ro_rt = rtalloc(&ro->ro_dst, RT_RESOLVE, rtableid);
+               }
+               if (rt_mtucloned)
+                       rtfree(rt);
+       }
+}
+
 int
 ip_output_ipsec_send(struct tdb *tdb, struct mbuf *m, struct route *ro, int 
fwd)
 {
@@ -570,7 +598,8 @@ ip_output_ipsec_send(struct tdb *tdb, st
        struct ifnet *encif;
 #endif
        struct ip *ip;
-       int error;
+       struct in_addr dst;
+       int error, rtableid;
 
 #if NPF > 0
        /*
@@ -595,39 +624,17 @@ ip_output_ipsec_send(struct tdb *tdb, st
 
        /* Check if we are allowed to fragment */
        ip = mtod(m, struct ip *);
+       dst = ip->ip_dst;
+       rtableid = m->m_pkthdr.ph_rtableid;
        if (ip_mtudisc && (ip->ip_off & htons(IP_DF)) && tdb->tdb_mtu &&
            ntohs(ip->ip_len) > tdb->tdb_mtu &&
            tdb->tdb_mtutimeout > gettime()) {
-               struct rtentry *rt = NULL;
-               int rt_mtucloned = 0;
-               int transportmode = 0;
+               int transportmode;
 
                transportmode = (tdb->tdb_dst.sa.sa_family == AF_INET) &&
-                   (tdb->tdb_dst.sin.sin_addr.s_addr == ip->ip_dst.s_addr);
-
-               /* Find a host route to store the mtu in */
-               if (ro != NULL)
-                       rt = ro->ro_rt;
-               /* but don't add a PMTU route for transport mode SAs */
-               if (transportmode)
-                       rt = NULL;
-               else if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) {
-                       rt = icmp_mtudisc_clone(ip->ip_dst,
-                           m->m_pkthdr.ph_rtableid, 1);
-                       rt_mtucloned = 1;
-               }
-               DPRINTF("spi %08x mtu %d rt %p cloned %d",
-                   ntohl(tdb->tdb_spi), tdb->tdb_mtu, rt, rt_mtucloned);
-               if (rt != NULL) {
-                       rt->rt_mtu = tdb->tdb_mtu;
-                       if (ro != NULL && ro->ro_rt != NULL) {
-                               rtfree(ro->ro_rt);
-                               ro->ro_rt = rtalloc(&ro->ro_dst, RT_RESOLVE,
-                                   m->m_pkthdr.ph_rtableid);
-                       }
-                       if (rt_mtucloned)
-                               rtfree(rt);
-               }
+                   (tdb->tdb_dst.sin.sin_addr.s_addr == dst.s_addr);
+               ip_output_ipsec_pmtu_update(tdb, ro, dst, rtableid,
+                   transportmode);
                ipsec_adjust_mtu(m, tdb->tdb_mtu);
                m_freem(m);
                return EMSGSIZE;
@@ -648,6 +655,8 @@ ip_output_ipsec_send(struct tdb *tdb, st
                ipsecstat_inc(ipsec_odrops);
                tdb->tdb_odrops++;
        }
+       if (error == EMSGSIZE)
+               ip_output_ipsec_pmtu_update(tdb, ro, dst, rtableid, 0);
        return error;
 }
 #endif /* IPSEC */
Index: netinet6/ip6_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v
retrieving revision 1.260
diff -u -p -r1.260 ip6_output.c
--- netinet6/ip6_output.c       27 Jul 2021 17:13:03 -0000      1.260
+++ netinet6/ip6_output.c       22 Nov 2021 21:56:20 -0000
@@ -144,6 +144,9 @@ static __inline u_int16_t __attribute__(
     u_int32_t, u_int32_t);
 void in6_delayed_cksum(struct mbuf *, u_int8_t);
 
+int ip6_output_ipsec_pmtu_update(struct tdb *, struct route_in6 *,
+    struct in6_addr *, int, int, int);
+
 /* Context for non-repeating IDs */
 struct idgen32_ctx ip6_id_ctx;
 
@@ -2772,6 +2775,51 @@ ip6_output_ipsec_lookup(struct mbuf *m, 
 }
 
 int
+ip6_output_ipsec_pmtu_update(struct tdb *tdb, struct route_in6 *ro,
+    struct in6_addr *dst, int ifidx, int rtableid, int transportmode)
+{
+       struct rtentry *rt = NULL;
+       int rt_mtucloned = 0;
+
+       /* Find a host route to store the mtu in */
+       if (ro != NULL)
+               rt = ro->ro_rt;
+       /* but don't add a PMTU route for transport mode SAs */
+       if (transportmode)
+               rt = NULL;
+       else if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) {
+               struct sockaddr_in6 sin6;
+               int error;
+
+               memset(&sin6, 0, sizeof(sin6));
+               sin6.sin6_family = AF_INET6;
+               sin6.sin6_len = sizeof(sin6);
+               sin6.sin6_addr = *dst;
+               sin6.sin6_scope_id = in6_addr2scopeid(ifidx, dst);
+               error = in6_embedscope(dst, &sin6, NULL);
+               if (error) {
+                       /* should be impossible */
+                       return error;
+               }
+               rt = icmp6_mtudisc_clone(&sin6, rtableid, 1);
+               rt_mtucloned = 1;
+       }
+       DPRINTF("spi %08x mtu %d rt %p cloned %d",
+           ntohl(tdb->tdb_spi), tdb->tdb_mtu, rt, rt_mtucloned);
+       if (rt != NULL) {
+               rt->rt_mtu = tdb->tdb_mtu;
+               if (ro != NULL && ro->ro_rt != NULL) {
+                       rtfree(ro->ro_rt);
+                       ro->ro_rt = rtalloc(sin6tosa(&ro->ro_dst), RT_RESOLVE,
+                           rtableid);
+               }
+               if (rt_mtucloned)
+                       rtfree(rt);
+       }
+       return 0;
+}
+
+int
 ip6_output_ipsec_send(struct tdb *tdb, struct mbuf *m, struct route_in6 *ro,
     int tunalready, int fwd)
 {
@@ -2779,7 +2827,8 @@ ip6_output_ipsec_send(struct tdb *tdb, s
        struct ifnet *encif;
 #endif
        struct ip6_hdr *ip6;
-       int error;
+       struct in6_addr dst;
+       int error, ifidx, rtableid;
 
 #if NPF > 0
        /*
@@ -2804,55 +2853,23 @@ ip6_output_ipsec_send(struct tdb *tdb, s
 
        /* Check if we are allowed to fragment */
        ip6 = mtod(m, struct ip6_hdr *);
+       dst = ip6->ip6_dst;
+       ifidx = m->m_pkthdr.ph_ifidx;
+       rtableid = m->m_pkthdr.ph_rtableid;
        if (ip_mtudisc && tdb->tdb_mtu &&
            sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) > tdb->tdb_mtu &&
            tdb->tdb_mtutimeout > gettime()) {
-               struct rtentry *rt = NULL;
-               int rt_mtucloned = 0;
-               int transportmode = 0;
+               int transportmode;
 
                transportmode = (tdb->tdb_dst.sa.sa_family == AF_INET6) &&
-                   (IN6_ARE_ADDR_EQUAL(&tdb->tdb_dst.sin6.sin6_addr,
-                   &ip6->ip6_dst));
-
-               /* Find a host route to store the mtu in */
-               if (ro != NULL)
-                       rt = ro->ro_rt;
-               /* but don't add a PMTU route for transport mode SAs */
-               if (transportmode)
-                       rt = NULL;
-               else if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) {
-                       struct sockaddr_in6 sin6;
-
-                       memset(&sin6, 0, sizeof(sin6));
-                       sin6.sin6_family = AF_INET6;
-                       sin6.sin6_len = sizeof(sin6);
-                       sin6.sin6_addr = ip6->ip6_dst;
-                       sin6.sin6_scope_id =
-                           in6_addr2scopeid(m->m_pkthdr.ph_ifidx,
-                           &ip6->ip6_dst);
-                       error = in6_embedscope(&ip6->ip6_dst, &sin6, NULL);
-                       if (error) {
-                               /* should be impossible */
-                               ipsecstat_inc(ipsec_odrops);
-                               m_freem(m);
-                               return error;
-                       }
-                       rt = icmp6_mtudisc_clone(&sin6,
-                           m->m_pkthdr.ph_rtableid, 1);
-                       rt_mtucloned = 1;
-               }
-               DPRINTF("spi %08x mtu %d rt %p cloned %d",
-                   ntohl(tdb->tdb_spi), tdb->tdb_mtu, rt, rt_mtucloned);
-               if (rt != NULL) {
-                       rt->rt_mtu = tdb->tdb_mtu;
-                       if (ro != NULL && ro->ro_rt != NULL) {
-                               rtfree(ro->ro_rt);
-                               ro->ro_rt = rtalloc(sin6tosa(&ro->ro_dst),
-                                   RT_RESOLVE, m->m_pkthdr.ph_rtableid);
-                       }
-                       if (rt_mtucloned)
-                               rtfree(rt);
+                   (IN6_ARE_ADDR_EQUAL(&tdb->tdb_dst.sin6.sin6_addr, &dst));
+               error = ip6_output_ipsec_pmtu_update(tdb, ro, &dst, ifidx,
+                   rtableid, transportmode);
+               if (error) {
+                       ipsecstat_inc(ipsec_odrops);
+                       tdb->tdb_odrops++;
+                       m_freem(m);
+                       return error;
                }
                ipsec_adjust_mtu(m, tdb->tdb_mtu);
                m_freem(m);
@@ -2874,6 +2891,8 @@ ip6_output_ipsec_send(struct tdb *tdb, s
                ipsecstat_inc(ipsec_odrops);
                tdb->tdb_odrops++;
        }
+       if (error == EMSGSIZE)
+               ip6_output_ipsec_pmtu_update(tdb, ro, &dst, ifidx, rtableid, 0);
        return error;
 }
 #endif /* IPSEC */

Reply via email to