Author: bz
Date: Wed Dec 18 11:48:50 2019
New Revision: 355880
URL: https://svnweb.freebsd.org/changeset/base/355880

Log:
  MFC r350532-350536,350559,350584-350585,350746-350747:
  
  Merge the first part of frag6.c changes from HEAD.  Apart from moving the
  sysctls into the local file these are mostly non-functional changes.
  
    frag6.c: sort includes
    frag6.c: move variables and sysctls into local file
    frag6.c: remove dead code
    frag6.c: rename malloc type
    frag6.c: make compile with gcc
    frag6.c: fix includes
    frag6.c: re-order functions within file
    frag6.c: rename ip6q[] to ipq6b[] and consistently use "bucket"
    frag6.c: initial comment and whitespace cleanup.
    frag6.c: cleanup varaibles and return statements.
  
    Sponsored by:               Netflix

Modified:
  stable/12/sys/netinet6/frag6.c
  stable/12/sys/netinet6/in6_proto.c
  stable/12/sys/netinet6/ip6_var.h
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/sys/netinet6/frag6.c
==============================================================================
--- stable/12/sys/netinet6/frag6.c      Wed Dec 18 09:30:32 2019        
(r355879)
+++ stable/12/sys/netinet6/frag6.c      Wed Dec 18 11:48:50 2019        
(r355880)
@@ -38,20 +38,17 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/domain.h>
+#include <sys/eventhandler.h>
 #include <sys/hash.h>
+#include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
-#include <sys/domain.h>
-#include <sys/eventhandler.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
-#include <sys/errno.h>
-#include <sys/time.h>
-#include <sys/kernel.h>
+#include <sys/sysctl.h>
 #include <sys/syslog.h>
 
-#include <machine/atomic.h>
-
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/netisr.h>
@@ -63,14 +60,14 @@ __FBSDID("$FreeBSD$");
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/icmp6.h>
-#include <netinet/in_systm.h>  /* for ECN definitions */
-#include <netinet/ip.h>                /* for ECN definitions */
+#include <netinet/in_systm.h>  /* For ECN definitions. */
+#include <netinet/ip.h>                /* For ECN definitions. */
 
+#ifdef MAC
 #include <security/mac/mac_framework.h>
+#endif
 
-/*
- * Reassembly headers are stored in hash buckets.
- */
+/* Reassembly headers are stored in hash buckets. */
 #define        IP6REASS_NHASH_LOG2     10
 #define        IP6REASS_NHASH          (1 << IP6REASS_NHASH_LOG2)
 #define        IP6REASS_HMASK          (IP6REASS_NHASH - 1)
@@ -89,23 +86,36 @@ struct ip6qbucket {
        int             count;
 };
 
-VNET_DEFINE_STATIC(volatile u_int, frag6_nfragpackets);
-volatile u_int frag6_nfrags = 0;
-VNET_DEFINE_STATIC(struct ip6qbucket, ip6q[IP6REASS_NHASH]);
-VNET_DEFINE_STATIC(uint32_t, ip6q_hashseed);
+static MALLOC_DEFINE(M_FRAG6, "frag6", "IPv6 fragment reassembly header");
 
+/* System wide (global) maximum and count of packets in reassembly queues. */ 
+static int ip6_maxfrags;
+static volatile u_int frag6_nfrags = 0;
+
+/* Maximum and current packets in per-VNET reassembly queue. */
+VNET_DEFINE_STATIC(int,                        ip6_maxfragpackets);
+VNET_DEFINE_STATIC(volatile u_int,     frag6_nfragpackets);
+#define        V_ip6_maxfragpackets            VNET(ip6_maxfragpackets)
 #define        V_frag6_nfragpackets            VNET(frag6_nfragpackets)
-#define        V_ip6q                          VNET(ip6q)
-#define        V_ip6q_hashseed                 VNET(ip6q_hashseed)
 
-#define        IP6Q_LOCK(i)            mtx_lock(&V_ip6q[(i)].lock)
-#define        IP6Q_TRYLOCK(i)         mtx_trylock(&V_ip6q[(i)].lock)
-#define        IP6Q_LOCK_ASSERT(i)     mtx_assert(&V_ip6q[(i)].lock, MA_OWNED)
-#define        IP6Q_UNLOCK(i)          mtx_unlock(&V_ip6q[(i)].lock)
-#define        IP6Q_HEAD(i)            (&V_ip6q[(i)].ip6q)
+/* Maximum per-VNET reassembly queues per bucket and fragments per packet. */
+VNET_DEFINE_STATIC(int,                        ip6_maxfragbucketsize);
+VNET_DEFINE_STATIC(int,                        ip6_maxfragsperpacket);
+#define        V_ip6_maxfragbucketsize         VNET(ip6_maxfragbucketsize)
+#define        V_ip6_maxfragsperpacket         VNET(ip6_maxfragsperpacket)
 
-static MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header");
+/* Per-VNET reassembly queue buckets. */
+VNET_DEFINE_STATIC(struct ip6qbucket,  ip6qb[IP6REASS_NHASH]);
+VNET_DEFINE_STATIC(uint32_t,           ip6qb_hashseed);
+#define        V_ip6qb                         VNET(ip6qb)
+#define        V_ip6qb_hashseed                VNET(ip6qb_hashseed)
 
+#define        IP6QB_LOCK(_b)          mtx_lock(&V_ip6qb[(_b)].lock)
+#define        IP6QB_TRYLOCK(_b)       mtx_trylock(&V_ip6qb[(_b)].lock)
+#define        IP6QB_LOCK_ASSERT(_b)   mtx_assert(&V_ip6qb[(_b)].lock, 
MA_OWNED)
+#define        IP6QB_UNLOCK(_b)        mtx_unlock(&V_ip6qb[(_b)].lock)
+#define        IP6QB_HEAD(_b)          (&V_ip6qb[(_b)].ip6q)
+
 /*
  * By default, limit the number of IP6 fragments across all reassembly
  * queues to  1/32 of the total number of mbuf clusters.
@@ -122,11 +132,14 @@ static MALLOC_DEFINE(M_FTABLE, "fragment", "fragment r
 #define        IP6_MAXFRAGS            (nmbclusters / 32)
 #define        IP6_MAXFRAGPACKETS      (imin(IP6_MAXFRAGS, IP6REASS_NHASH * 
50))
 
+
 /*
- * Initialise reassembly queue and fragment identifier.
+ * Sysctls and helper function.
  */
-void
-frag6_set_bucketsize()
+SYSCTL_DECL(_net_inet6_ip6);
+
+static void
+frag6_set_bucketsize(void)
 {
        int i;
 
@@ -134,68 +147,140 @@ frag6_set_bucketsize()
                V_ip6_maxfragbucketsize = imax(i / (IP6REASS_NHASH / 2), 1);
 }
 
-static void
-frag6_change(void *tag)
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags,
+       CTLFLAG_RW, &ip6_maxfrags, 0,
+       "Maximum allowed number of outstanding IPv6 packet fragments. "
+       "A value of 0 means no fragmented packets will be accepted, while a "
+       "a value of -1 means no limit");
+
+static int
+sysctl_ip6_maxfragpackets(SYSCTL_HANDLER_ARGS)
 {
-       VNET_ITERATOR_DECL(vnet_iter);
+       int error, val;
 
-       ip6_maxfrags = IP6_MAXFRAGS;
-       VNET_LIST_RLOCK_NOSLEEP();
-       VNET_FOREACH(vnet_iter) {
-               CURVNET_SET(vnet_iter);
-               V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS;
-               frag6_set_bucketsize();
-               CURVNET_RESTORE();
-       }
-       VNET_LIST_RUNLOCK_NOSLEEP();
+       val = V_ip6_maxfragpackets;
+       error = sysctl_handle_int(oidp, &val, 0, req);
+       if (error != 0 || !req->newptr)
+               return (error);
+       V_ip6_maxfragpackets = val;
+       frag6_set_bucketsize();
+       return (0);
 }
+SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
+       CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, NULL, 0,
+       sysctl_ip6_maxfragpackets, "I",
+       "Default maximum number of outstanding fragmented IPv6 packets. "
+       "A value of 0 means no fragmented packets will be accepted, while a "
+       "a value of -1 means no limit");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGSPERPACKET, maxfragsperpacket,
+       CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragsperpacket), 0,
+       "Maximum allowed number of fragments per packet");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGBUCKETSIZE, maxfragbucketsize,
+       CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragbucketsize), 0,
+       "Maximum number of reassembly queues per hash bucket");
 
-void
-frag6_init(void)
+
+/*
+ * Remove the IPv6 fragmentation header from the mbuf.
+ */
+int
+ip6_deletefraghdr(struct mbuf *m, int offset, int wait)
 {
-       struct ip6q *q6;
-       int i;
+       struct ip6_hdr *ip6;
+       struct mbuf *t;
 
-       V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS;
-       frag6_set_bucketsize();
-       for (i = 0; i < IP6REASS_NHASH; i++) {
-               q6 = IP6Q_HEAD(i);
-               q6->ip6q_next = q6->ip6q_prev = q6;
-               mtx_init(&V_ip6q[i].lock, "ip6qlock", NULL, MTX_DEF);
-               V_ip6q[i].count = 0;
+       /* Delete frag6 header. */
+       if (m->m_len >= offset + sizeof(struct ip6_frag)) {
+
+               /* This is the only possible case with !PULLDOWN_TEST. */
+               ip6  = mtod(m, struct ip6_hdr *);
+               bcopy(ip6, (char *)ip6 + sizeof(struct ip6_frag),
+                   offset);
+               m->m_data += sizeof(struct ip6_frag);
+               m->m_len -= sizeof(struct ip6_frag);
+       } else {
+
+               /* This comes with no copy if the boundary is on cluster. */
+               if ((t = m_split(m, offset, wait)) == NULL)
+                       return (ENOMEM);
+               m_adj(t, sizeof(struct ip6_frag));
+               m_cat(m, t);
        }
-       V_ip6q_hashseed = arc4random();
-       V_ip6_maxfragsperpacket = 64;
-       if (!IS_DEFAULT_VNET(curvnet))
-               return;
 
-       ip6_maxfrags = IP6_MAXFRAGS;
-       EVENTHANDLER_REGISTER(nmbclusters_change,
-           frag6_change, NULL, EVENTHANDLER_PRI_ANY);
+       m->m_flags |= M_FRAGMENTED;
+       return (0);
 }
 
 /*
- * In RFC2460, fragment and reassembly rule do not agree with each other,
- * in terms of next header field handling in fragment header.
+ * Free a fragment reassembly header and all associated datagrams.
+ */
+static void
+frag6_freef(struct ip6q *q6, uint32_t bucket)
+{
+       struct ip6_hdr *ip6;
+       struct ip6asfrag *af6, *down6;
+       struct mbuf *m;
+
+       IP6QB_LOCK_ASSERT(bucket);
+
+       for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
+            af6 = down6) {
+
+               m = IP6_REASS_MBUF(af6);
+               down6 = af6->ip6af_down;
+               frag6_deq(af6, bucket);
+
+               /*
+                * Return ICMP time exceeded error for the 1st fragment.
+                * Just free other fragments.
+                */
+               if (af6->ip6af_off == 0) {
+
+                       /* Adjust pointer. */
+                       ip6 = mtod(m, struct ip6_hdr *);
+
+                       /* Restore source and destination addresses. */
+                       ip6->ip6_src = q6->ip6q_src;
+                       ip6->ip6_dst = q6->ip6q_dst;
+
+                       icmp6_error(m, ICMP6_TIME_EXCEEDED,
+                           ICMP6_TIME_EXCEED_REASSEMBLY, 0);
+               } else
+                       m_freem(m);
+
+               free(af6, M_FRAG6);
+       }
+       frag6_remque(q6, bucket);
+       atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
+#ifdef MAC
+       mac_ip6q_destroy(q6);
+#endif
+       free(q6, M_FRAG6);
+       atomic_subtract_int(&V_frag6_nfragpackets, 1);
+}
+
+/*
+ * Like in RFC2460, in RFC8200, fragment and reassembly rules do not agree with
+ * each other, in terms of next header field handling in fragment header.
  * While the sender will use the same value for all of the fragmented packets,
- * receiver is suggested not to check the consistency.
+ * receiver is suggested not to check for consistency.
  *
- * fragment rule (p20):
- *     (2) A Fragment header containing:
- *     The Next Header value that identifies the first header of
- *     the Fragmentable Part of the original packet.
+ * Fragment rules (p18,p19):
+ *     (2)  A Fragment header containing:
+ *     The Next Header value that identifies the first header
+ *     after the Per-Fragment headers of the original packet.
  *             -> next header field is same for all fragments
  *
- * reassembly rule (p21):
- *     The Next Header field of the last header of the Unfragmentable
- *     Part is obtained from the Next Header field of the first
+ * Reassembly rule (p20):
+ *     The Next Header field of the last header of the Per-Fragment
+ *     headers is obtained from the Next Header field of the first
  *     fragment's Fragment header.
  *             -> should grab it from the first fragment only
  *
  * The following note also contradicts with fragment rule - no one is going to
  * send different fragment with different next header field.
  *
- * additional note (p22):
+ * Additional note (p22) [not an error]:
  *     The Next Header values in the Fragment headers of different
  *     fragments of the same original packet may differ.  Only the value
  *     from the Offset zero fragment packet is used for reassembly.
@@ -204,33 +289,32 @@ frag6_init(void)
  * There is no explicit reason given in the RFC.  Historical reason maybe?
  */
 /*
- * Fragment input
+ * Fragment input.
  */
 int
 frag6_input(struct mbuf **mp, int *offp, int proto)
 {
-       struct mbuf *m = *mp, *t;
+       struct ifnet *dstifp;
+       struct in6_ifaddr *ia6;
        struct ip6_hdr *ip6;
        struct ip6_frag *ip6f;
        struct ip6q *head, *q6;
-       struct ip6asfrag *af6, *ip6af, *af6dwn;
-       struct in6_ifaddr *ia;
-       int offset = *offp, nxt, i, next;
-       int first_frag = 0;
-       int fragoff, frgpartlen;        /* must be larger than u_int16_t */
+       struct ip6asfrag *af6, *af6dwn, *ip6af;
+       struct mbuf *m, *t;
        uint32_t hashkey[(sizeof(struct in6_addr) * 2 +
                    sizeof(ip6f->ip6f_ident)) / sizeof(uint32_t)];
-       uint32_t hash, *hashkeyp;
-       struct ifnet *dstifp;
-       u_int8_t ecn, ecn0;
+       uint32_t bucket, *hashkeyp;
+       int fragoff, frgpartlen;        /* Must be larger than uint16_t. */
+       int nxt, offset, plen;
+       uint8_t ecn, ecn0;
+       bool only_frag;
 #ifdef RSS
-       struct m_tag *mtag;
        struct ip6_direct_ctx *ip6dc;
+       struct m_tag *mtag;
 #endif
 
-#if 0
-       char ip6buf[INET6_ADDRSTRLEN];
-#endif
+       m = *mp;
+       offset = *offp;
 
        ip6 = mtod(m, struct ip6_hdr *);
 #ifndef PULLDOWN_TEST
@@ -243,22 +327,23 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
 #endif
 
        dstifp = NULL;
-       /* find the destination interface of the packet. */
-       ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
-       if (ia != NULL) {
-               dstifp = ia->ia_ifp;
-               ifa_free(&ia->ia_ifa);
+       /* Find the destination interface of the packet. */
+       ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
+       if (ia6 != NULL) {
+               dstifp = ia6->ia_ifp;
+               ifa_free(&ia6->ia_ifa);
        }
-       /* jumbo payload can't contain a fragment header */
+
+       /* Jumbo payload cannot contain a fragment header. */
        if (ip6->ip6_plen == 0) {
                icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, 
offset);
                in6_ifstat_inc(dstifp, ifs6_reass_fail);
-               return IPPROTO_DONE;
+               return (IPPROTO_DONE);
        }
 
        /*
-        * check whether fragment packet's fragment length is
-        * multiple of 8 octets.
+        * Check whether fragment packet's fragment length is a
+        * multiple of 8 octets (unless it is the last one).
         * sizeof(struct ip6_frag) == 8
         * sizeof(struct ip6_hdr) = 40
         */
@@ -267,22 +352,24 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
                icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
                    offsetof(struct ip6_hdr, ip6_plen));
                in6_ifstat_inc(dstifp, ifs6_reass_fail);
-               return IPPROTO_DONE;
+               return (IPPROTO_DONE);
        }
 
        IP6STAT_INC(ip6s_fragments);
        in6_ifstat_inc(dstifp, ifs6_reass_reqd);
 
-       /* offset now points to data portion */
+       /* Offset now points to data portion. */
        offset += sizeof(struct ip6_frag);
 
        /*
-        * RFC 6946: Handle "atomic" fragments (offset and m bit set to 0)
-        * upfront, unrelated to any reassembly.  Just skip the fragment header.
+        * Handle "atomic" fragments (offset and m bit set to 0) upfront,
+        * unrelated to any reassembly.  Still need to remove the frag hdr.
+        * See RFC 6946 and section 4.5 of RFC 8200.
         */
        if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) {
                /* XXX-BZ we want dedicated counters for this. */
                IP6STAT_INC(ip6s_reassembled);
+               /* XXX-BZ handle correctly. */
                in6_ifstat_inc(dstifp, ifs6_reass_ok);
                *offp = offset;
                m->m_flags |= M_FRAGMENTED;
@@ -296,22 +383,23 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
                    offsetof(struct ip6_hdr, ip6_plen));
                in6_ifstat_inc(dstifp, ifs6_reass_fail);
                IP6STAT_INC(ip6s_fragdropped);
-               return IPPROTO_DONE;
+               return (IPPROTO_DONE);
        }
 
+       /* Generate a hash value for fragment bucket selection. */
        hashkeyp = hashkey;
        memcpy(hashkeyp, &ip6->ip6_src, sizeof(struct in6_addr));
        hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp);
        memcpy(hashkeyp, &ip6->ip6_dst, sizeof(struct in6_addr));
        hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp);
        *hashkeyp = ip6f->ip6f_ident;
-       hash = jenkins_hash32(hashkey, nitems(hashkey), V_ip6q_hashseed);
-       hash &= IP6REASS_HMASK;
-       head = IP6Q_HEAD(hash);
-       IP6Q_LOCK(hash);
+       bucket = jenkins_hash32(hashkey, nitems(hashkey), V_ip6qb_hashseed);
+       bucket &= IP6REASS_HMASK;
+       head = IP6QB_HEAD(bucket);
+       IP6QB_LOCK(bucket);
 
        /*
-        * Enforce upper bound on number of fragments.
+        * Enforce upper bound on number of fragments for the entire system.
         * If maxfrag is 0, never accept fragments.
         * If maxfrag is -1, accept all fragments without limitation.
         */
@@ -330,12 +418,12 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
                    )
                        break;
 
+       only_frag = false;
        if (q6 == head) {
-               /*
-                * the first fragment to arrive, create a reassembly queue.
-                */
-               first_frag = 1;
 
+               /* A first fragment to arrive creates a reassembly queue. */
+               only_frag = true;
+
                /*
                 * Enforce upper bound on number of fragmented packets
                 * for which we attempt reassembly;
@@ -345,26 +433,27 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
                 */
                if (V_ip6_maxfragpackets < 0)
                        ;
-               else if (V_ip6q[hash].count >= V_ip6_maxfragbucketsize ||
+               else if (V_ip6qb[bucket].count >= V_ip6_maxfragbucketsize ||
                    atomic_load_int(&V_frag6_nfragpackets) >=
                    (u_int)V_ip6_maxfragpackets)
                        goto dropfrag;
                atomic_add_int(&V_frag6_nfragpackets, 1);
-               q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FTABLE,
-                   M_NOWAIT);
+
+               /* Allocate IPv6 fragement packet queue entry. */
+               q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FRAG6,
+                   M_NOWAIT | M_ZERO);
                if (q6 == NULL)
                        goto dropfrag;
-               bzero(q6, sizeof(*q6));
 #ifdef MAC
                if (mac_ip6q_init(q6, M_NOWAIT) != 0) {
-                       free(q6, M_FTABLE);
+                       free(q6, M_FRAG6);
                        goto dropfrag;
                }
                mac_ip6q_create(m, q6);
 #endif
-               frag6_insque_head(q6, head, hash);
+               frag6_insque_head(q6, head, bucket);
 
-               /* ip6q_nxt will be filled afterwards, from 1st fragment */
+               /* ip6q_nxt will be filled afterwards, from 1st fragment. */
                q6->ip6q_down   = q6->ip6q_up = (struct ip6asfrag *)q6;
 #ifdef notyet
                q6->ip6q_nxtp   = (u_char *)nxtp;
@@ -381,7 +470,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
        }
 
        /*
-        * If it's the 1st fragment, record the length of the
+        * If it is the 1st fragment, record the length of the
         * unfragmentable part and the next header of the fragment header.
         */
        fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK);
@@ -402,18 +491,18 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
                        icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
                            offset - sizeof(struct ip6_frag) +
                            offsetof(struct ip6_frag, ip6f_offlg));
-                       IP6Q_UNLOCK(hash);
+                       IP6QB_UNLOCK(bucket);
                        return (IPPROTO_DONE);
                }
        } else if (fragoff + frgpartlen > IPV6_MAXPACKET) {
                icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
                    offset - sizeof(struct ip6_frag) +
                    offsetof(struct ip6_frag, ip6f_offlg));
-               IP6Q_UNLOCK(hash);
+               IP6QB_UNLOCK(bucket);
                return (IPPROTO_DONE);
        }
        /*
-        * If it's the first fragment, do the above check for each
+        * If it is the first fragment, do the above check for each
         * fragment already stored in the reassembly queue.
         */
        if (fragoff == 0) {
@@ -423,15 +512,18 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
 
                        if (q6->ip6q_unfrglen + af6->ip6af_off + 
af6->ip6af_frglen >
                            IPV6_MAXPACKET) {
-                               struct mbuf *merr = IP6_REASS_MBUF(af6);
                                struct ip6_hdr *ip6err;
-                               int erroff = af6->ip6af_offset;
+                               struct mbuf *merr;
+                               int erroff;
 
-                               /* dequeue the fragment. */
-                               frag6_deq(af6, hash);
-                               free(af6, M_FTABLE);
+                               merr = IP6_REASS_MBUF(af6);
+                               erroff = af6->ip6af_offset;
 
-                               /* adjust pointer. */
+                               /* Dequeue the fragment. */
+                               frag6_deq(af6, bucket);
+                               free(af6, M_FRAG6);
+
+                               /* Adjust pointer. */
                                ip6err = mtod(merr, struct ip6_hdr *);
 
                                /*
@@ -449,174 +541,113 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
                }
        }
 
-       ip6af = (struct ip6asfrag *)malloc(sizeof(struct ip6asfrag), M_FTABLE,
-           M_NOWAIT);
+       /* Allocate an IPv6 fragement queue entry for this fragmented part. */
+       ip6af = (struct ip6asfrag *)malloc(sizeof(struct ip6asfrag), M_FRAG6,
+           M_NOWAIT | M_ZERO);
        if (ip6af == NULL)
                goto dropfrag;
-       bzero(ip6af, sizeof(*ip6af));
        ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG;
        ip6af->ip6af_off = fragoff;
        ip6af->ip6af_frglen = frgpartlen;
        ip6af->ip6af_offset = offset;
        IP6_REASS_MBUF(ip6af) = m;
 
-       if (first_frag) {
+       if (only_frag) {
                af6 = (struct ip6asfrag *)q6;
                goto insert;
        }
 
+       /* Do duplicate, condition, and boundry checks. */
        /*
         * Handle ECN by comparing this segment with the first one;
         * if CE is set, do not lose CE.
-        * drop if CE and not-ECT are mixed for the same packet.
+        * Drop if CE and not-ECT are mixed for the same packet.
         */
        ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
        ecn0 = q6->ip6q_ecn;
        if (ecn == IPTOS_ECN_CE) {
                if (ecn0 == IPTOS_ECN_NOTECT) {
-                       free(ip6af, M_FTABLE);
+                       free(ip6af, M_FRAG6);
                        goto dropfrag;
                }
                if (ecn0 != IPTOS_ECN_CE)
                        q6->ip6q_ecn = IPTOS_ECN_CE;
        }
        if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) {
-               free(ip6af, M_FTABLE);
+               free(ip6af, M_FRAG6);
                goto dropfrag;
        }
 
-       /*
-        * Find a segment which begins after this one does.
-        */
+       /* Find a fragmented part which begins after this one does. */
        for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
             af6 = af6->ip6af_down)
                if (af6->ip6af_off > ip6af->ip6af_off)
                        break;
 
-#if 0
        /*
-        * If there is a preceding segment, it may provide some of
-        * our data already.  If so, drop the data from the incoming
-        * segment.  If it provides all of our data, drop us.
-        */
-       if (af6->ip6af_up != (struct ip6asfrag *)q6) {
-               i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
-                       - ip6af->ip6af_off;
-               if (i > 0) {
-                       if (i >= ip6af->ip6af_frglen)
-                               goto dropfrag;
-                       m_adj(IP6_REASS_MBUF(ip6af), i);
-                       ip6af->ip6af_off += i;
-                       ip6af->ip6af_frglen -= i;
-               }
-       }
-
-       /*
-        * While we overlap succeeding segments trim them or,
-        * if they are completely covered, dequeue them.
-        */
-       while (af6 != (struct ip6asfrag *)q6 &&
-              ip6af->ip6af_off + ip6af->ip6af_frglen > af6->ip6af_off) {
-               i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
-               if (i < af6->ip6af_frglen) {
-                       af6->ip6af_frglen -= i;
-                       af6->ip6af_off += i;
-                       m_adj(IP6_REASS_MBUF(af6), i);
-                       break;
-               }
-               af6 = af6->ip6af_down;
-               m_freem(IP6_REASS_MBUF(af6->ip6af_up));
-               frag6_deq(af6->ip6af_up, hash);
-       }
-#else
-       /*
         * If the incoming framgent overlaps some existing fragments in
-        * the reassembly queue, drop it, since it is dangerous to override
-        * existing fragments from a security point of view.
-        * We don't know which fragment is the bad guy - here we trust
-        * fragment that came in earlier, with no real reason.
-        *
-        * Note: due to changes after disabling this part, mbuf passed to
-        * m_adj() below now does not meet the requirement.
+        * the reassembly queue, drop both the new fragment and the
+        * entire reassembly queue.  However, if the new fragment
+        * is an exact duplicate of an existing fragment, only silently
+        * drop the existing fragment and leave the fragmentation queue
+        * unchanged, as allowed by the RFC.  (RFC 8200, 4.5)
         */
        if (af6->ip6af_up != (struct ip6asfrag *)q6) {
-               i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
-                       - ip6af->ip6af_off;
-               if (i > 0) {
-#if 0                          /* suppress the noisy log */
-                       log(LOG_ERR, "%d bytes of a fragment from %s "
-                           "overlaps the previous fragment\n",
-                           i, ip6_sprintf(ip6buf, &q6->ip6q_src));
-#endif
-                       free(ip6af, M_FTABLE);
+               if (af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen -
+                   ip6af->ip6af_off > 0) {
+                       free(ip6af, M_FRAG6);
                        goto dropfrag;
                }
        }
        if (af6 != (struct ip6asfrag *)q6) {
-               i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
-               if (i > 0) {
-#if 0                          /* suppress the noisy log */
-                       log(LOG_ERR, "%d bytes of a fragment from %s "
-                           "overlaps the succeeding fragment",
-                           i, ip6_sprintf(ip6buf, &q6->ip6q_src));
-#endif
-                       free(ip6af, M_FTABLE);
+               if (ip6af->ip6af_off + ip6af->ip6af_frglen -
+                   af6->ip6af_off > 0) {
+                       free(ip6af, M_FRAG6);
                        goto dropfrag;
                }
        }
-#endif
 
 insert:
 #ifdef MAC
-       if (!first_frag)
+       if (!only_frag)
                mac_ip6q_update(m, q6);
 #endif
 
        /*
-        * Stick new segment in its place;
-        * check for complete reassembly.
-        * If not complete, check fragment limit.
-        * Move to front of packet queue, as we are
-        * the most recently active fragmented packet.
+        * Stick new segment in its place; check for complete reassembly.
+        * If not complete, check fragment limit.  Move to front of packet
+        * queue, as we are the most recently active fragmented packet.
         */
-       frag6_enq(ip6af, af6->ip6af_up, hash);
+       frag6_enq(ip6af, af6->ip6af_up, bucket);
        atomic_add_int(&frag6_nfrags, 1);
        q6->ip6q_nfrag++;
-#if 0 /* xxx */
-       if (q6 != head->ip6q_next) {
-               frag6_remque(q6, hash);
-               frag6_insque_head(q6, head, hash);
-       }
-#endif
-       next = 0;
+       plen = 0;
        for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
             af6 = af6->ip6af_down) {
-               if (af6->ip6af_off != next) {
+               if (af6->ip6af_off != plen) {
                        if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) {
                                IP6STAT_INC(ip6s_fragdropped);
-                               frag6_freef(q6, hash);
+                               frag6_freef(q6, bucket);
                        }
-                       IP6Q_UNLOCK(hash);
-                       return IPPROTO_DONE;
+                       IP6QB_UNLOCK(bucket);
+                       return (IPPROTO_DONE);
                }
-               next += af6->ip6af_frglen;
+               plen += af6->ip6af_frglen;
        }
        if (af6->ip6af_up->ip6af_mff) {
                if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) {
                        IP6STAT_INC(ip6s_fragdropped);
-                       frag6_freef(q6, hash);
+                       frag6_freef(q6, bucket);
                }
-               IP6Q_UNLOCK(hash);
-               return IPPROTO_DONE;
+               IP6QB_UNLOCK(bucket);
+               return (IPPROTO_DONE);
        }
 
-       /*
-        * Reassembly is complete; concatenate fragments.
-        */
+       /* Reassembly is complete; concatenate fragments. */
        ip6af = q6->ip6q_down;
        t = m = IP6_REASS_MBUF(ip6af);
        af6 = ip6af->ip6af_down;
-       frag6_deq(ip6af, hash);
+       frag6_deq(ip6af, bucket);
        while (af6 != (struct ip6asfrag *)q6) {
                m->m_pkthdr.csum_flags &=
                    IP6_REASS_MBUF(af6)->m_pkthdr.csum_flags;
@@ -624,13 +655,13 @@ insert:
                    IP6_REASS_MBUF(af6)->m_pkthdr.csum_data;
 
                af6dwn = af6->ip6af_down;
-               frag6_deq(af6, hash);
+               frag6_deq(af6, bucket);
                while (t->m_next)
                        t = t->m_next;
                m_adj(IP6_REASS_MBUF(af6), af6->ip6af_offset);
                m_demote_pkthdr(IP6_REASS_MBUF(af6));
                m_cat(t, IP6_REASS_MBUF(af6));
-               free(af6, M_FTABLE);
+               free(af6, M_FRAG6);
                af6 = af6dwn;
        }
 
@@ -638,47 +669,43 @@ insert:
                m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
                    (m->m_pkthdr.csum_data >> 16);
 
-       /* adjust offset to point where the original next header starts */
+       /* Adjust offset to point where the original next header starts. */
        offset = ip6af->ip6af_offset - sizeof(struct ip6_frag);
-       free(ip6af, M_FTABLE);
+       free(ip6af, M_FRAG6);
        ip6 = mtod(m, struct ip6_hdr *);
-       ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr));
+       ip6->ip6_plen = htons((u_short)plen + offset - sizeof(struct ip6_hdr));
        if (q6->ip6q_ecn == IPTOS_ECN_CE)
                ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20);
        nxt = q6->ip6q_nxt;
-#ifdef notyet
-       *q6->ip6q_nxtp = (u_char)(nxt & 0xff);
-#endif
 
        if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) {
-               frag6_remque(q6, hash);
+               frag6_remque(q6, bucket);
                atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
 #ifdef MAC
                mac_ip6q_destroy(q6);
 #endif
-               free(q6, M_FTABLE);
+               free(q6, M_FRAG6);
                atomic_subtract_int(&V_frag6_nfragpackets, 1);
 
                goto dropfrag;
        }
 
-       /*
-        * Store NXT to the original.
-        */
+       /* Set nxt(-hdr field value) to the original value. */
        m_copyback(m, ip6_get_prevhdr(m, offset), sizeof(uint8_t),
            (caddr_t)&nxt);
 
-       frag6_remque(q6, hash);
+       frag6_remque(q6, bucket);
        atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
 #ifdef MAC
        mac_ip6q_reassemble(q6, m);
        mac_ip6q_destroy(q6);
 #endif
-       free(q6, M_FTABLE);
+       free(q6, M_FRAG6);
        atomic_subtract_int(&V_frag6_nfragpackets, 1);
 
        if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */
-               int plen = 0;
+
+               plen = 0;
                for (t = m; t; t = t->m_next)
                        plen += t->m_len;
                m->m_pkthdr.len = plen;
@@ -697,164 +724,54 @@ insert:
        m_tag_prepend(m, mtag);
 #endif
 
-       IP6Q_UNLOCK(hash);
+       IP6QB_UNLOCK(bucket);
        IP6STAT_INC(ip6s_reassembled);
        in6_ifstat_inc(dstifp, ifs6_reass_ok);
 
 #ifdef RSS
-       /*
-        * Queue/dispatch for reprocessing.
-        */
+       /* Queue/dispatch for reprocessing. */
        netisr_dispatch(NETISR_IPV6_DIRECT, m);
-       return IPPROTO_DONE;
+       return (IPPROTO_DONE);
 #endif
 
-       /*
-        * Tell launch routine the next header
-        */
-
+       /* Tell launch routine the next header. */
        *mp = m;
        *offp = offset;
 
-       return nxt;
+       return (nxt);
 
- dropfrag:
-       IP6Q_UNLOCK(hash);
+dropfrag:
+       IP6QB_UNLOCK(bucket);
        in6_ifstat_inc(dstifp, ifs6_reass_fail);
        IP6STAT_INC(ip6s_fragdropped);
        m_freem(m);
-       return IPPROTO_DONE;
+       return (IPPROTO_DONE);
 }
 
 /*
- * Free a fragment reassembly header and all
- * associated datagrams.
- */
-static void
-frag6_freef(struct ip6q *q6, uint32_t bucket)
-{
-       struct ip6asfrag *af6, *down6;
-
-       IP6Q_LOCK_ASSERT(bucket);
-
-       for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
-            af6 = down6) {
-               struct mbuf *m = IP6_REASS_MBUF(af6);
-
-               down6 = af6->ip6af_down;
-               frag6_deq(af6, bucket);
-
-               /*
-                * Return ICMP time exceeded error for the 1st fragment.
-                * Just free other fragments.
-                */
-               if (af6->ip6af_off == 0) {
-                       struct ip6_hdr *ip6;
-
-                       /* adjust pointer */
-                       ip6 = mtod(m, struct ip6_hdr *);
-
-                       /* restore source and destination addresses */
-                       ip6->ip6_src = q6->ip6q_src;
-                       ip6->ip6_dst = q6->ip6q_dst;
-
-                       icmp6_error(m, ICMP6_TIME_EXCEEDED,
-                                   ICMP6_TIME_EXCEED_REASSEMBLY, 0);
-               } else
-                       m_freem(m);
-               free(af6, M_FTABLE);
-       }
-       frag6_remque(q6, bucket);
-       atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
-#ifdef MAC
-       mac_ip6q_destroy(q6);
-#endif
-       free(q6, M_FTABLE);
-       atomic_subtract_int(&V_frag6_nfragpackets, 1);
-}
-
-/*
- * Put an ip fragment on a reassembly chain.
- * Like insque, but pointers in middle of structure.
- */
-static void
-frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6,
-    uint32_t bucket __unused)
-{
-
-       IP6Q_LOCK_ASSERT(bucket);
-
-       af6->ip6af_up = up6;
-       af6->ip6af_down = up6->ip6af_down;
-       up6->ip6af_down->ip6af_up = af6;
-       up6->ip6af_down = af6;
-}
-
-/*
- * To frag6_enq as remque is to insque.
- */
-static void
-frag6_deq(struct ip6asfrag *af6, uint32_t bucket __unused)
-{
-
-       IP6Q_LOCK_ASSERT(bucket);
-
-       af6->ip6af_up->ip6af_down = af6->ip6af_down;
-       af6->ip6af_down->ip6af_up = af6->ip6af_up;
-}
-
-static void
-frag6_insque_head(struct ip6q *new, struct ip6q *old, uint32_t bucket)
-{
-
-       IP6Q_LOCK_ASSERT(bucket);
-       KASSERT(IP6Q_HEAD(bucket) == old,
-           ("%s: attempt to insert at head of wrong bucket"
-           " (bucket=%u, old=%p)", __func__, bucket, old));
-
-       new->ip6q_prev = old;
-       new->ip6q_next = old->ip6q_next;
-       old->ip6q_next->ip6q_prev= new;
-       old->ip6q_next = new;
-       V_ip6q[bucket].count++;
-}
-
-static void
-frag6_remque(struct ip6q *p6, uint32_t bucket)
-{
-
-       IP6Q_LOCK_ASSERT(bucket);
-
-       p6->ip6q_prev->ip6q_next = p6->ip6q_next;
-       p6->ip6q_next->ip6q_prev = p6->ip6q_prev;
-       V_ip6q[bucket].count--;
-}
-
-/*
  * IPv6 reassembling timer processing;
- * if a timer expires on a reassembly
- * queue, discard it.
+ * if a timer expires on a reassembly queue, discard it.
  */
 void
 frag6_slowtimo(void)
 {
        VNET_ITERATOR_DECL(vnet_iter);
        struct ip6q *head, *q6;
-       int i;
+       uint32_t bucket;
 
        VNET_LIST_RLOCK_NOSLEEP();
        VNET_FOREACH(vnet_iter) {
                CURVNET_SET(vnet_iter);
-               for (i = 0; i < IP6REASS_NHASH; i++) {
-                       IP6Q_LOCK(i);
-                       head = IP6Q_HEAD(i);
+               for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) {
+                       IP6QB_LOCK(bucket);
+                       head = IP6QB_HEAD(bucket);
                        q6 = head->ip6q_next;
                        if (q6 == NULL) {
                                /*
                                 * XXXJTL: This should never happen. This

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to