Here is a diff that reassembles IPv6 fragments in pf.  In the forward
case, it refragments the packets with the same size to allow Path-MTU
discovery.

With route-to and pf-sync there are still some issues regarding
IPv6 fragments.  Everything else including nat and redirect should
work.

Please test sending IPv6 fragments through pf.  Much of the IPv4
code has been reused, so test that too.

bluhm


Index: net/pf.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
retrieving revision 1.726
diff -u -p -r1.726 pf.c
--- net/pf.c    14 Feb 2011 11:01:36 -0000      1.726
+++ net/pf.c    20 Feb 2011 21:47:34 -0000
@@ -297,8 +297,6 @@ enum { PF_ICMP_MULTI_NONE, PF_ICMP_MULTI
                        mrm->r->states_cur--;                   \
        } while (0)
 
-static __inline int pf_addr_compare(struct pf_addr *, struct pf_addr *,
-       sa_family_t);
 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
 static __inline int pf_state_compare_key(struct pf_state_key *,
        struct pf_state_key *);
@@ -315,7 +313,7 @@ RB_GENERATE(pf_state_tree, pf_state_key,
 RB_GENERATE(pf_state_tree_id, pf_state,
     entry_id, pf_state_compare_id);
 
-static __inline int
+__inline int
 pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af)
 {
        switch (af) {
@@ -6066,12 +6064,13 @@ done:
 
 #ifdef INET6
 int
-pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
+pf_test6(int fwdir, struct ifnet *ifp, struct mbuf **m0,
     struct ether_header *eh)
 {
        struct pfi_kif          *kif;
        u_short                  action, reason = 0;
        struct mbuf             *m = *m0;
+       struct m_tag            *mtag;
        struct ip6_hdr          *h;
        struct pf_rule          *a = NULL, *r = &pf_default_rule;
        struct pf_state         *s = NULL;
@@ -6079,6 +6078,7 @@ pf_test6(int dir, struct ifnet *ifp, str
        struct pf_pdesc          pd;
        union pf_headers         hdrs;
        int                      off, hdrlen;
+       int                      dir = (fwdir == PF_FWD) ? PF_OUT : fwdir;
 
        if (!pf_status.running)
                return (PF_PASS);
@@ -6116,8 +6116,14 @@ pf_test6(int dir, struct ifnet *ifp, str
        if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED_PACKET)
                return (PF_PASS);
 
+       if (m->m_pkthdr.pf.flags & PF_TAG_REFRAGMENTED) {
+               m->m_pkthdr.pf.flags &= ~PF_TAG_REFRAGMENTED;
+               return (PF_PASS);
+       }
+
        /* packet reassembly */
-       if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
+       if (pf_status.reass &&
+           pf_normalize_ip6(m0, fwdir, kif, &reason, &pd) != PF_PASS) {
                action = PF_DROP;
                goto done;
        }
@@ -6322,6 +6328,11 @@ done:
                        pf_route6(m0, r, dir, kif->pfik_ifp, s);
                break;
        }
+
+       /* if reassembled packet passed, create new fragments */
+       if (pf_status.reass && action == PF_PASS && *m0 && fwdir == PF_FWD &&
+           (mtag = m_tag_find(m, PACKET_TAG_PF_REASSEMBLED, NULL)) != NULL)
+               action = pf_refragment6(m0, mtag, fwdir);
 
        return (action);
 }
Index: net/pf_norm.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf_norm.c,v
retrieving revision 1.128
diff -u -p -r1.128 pf_norm.c
--- net/pf_norm.c       1 Feb 2011 16:10:31 -0000       1.128
+++ net/pf_norm.c       1 Feb 2011 16:15:20 -0000
@@ -58,29 +58,51 @@
 
 #ifdef INET6
 #include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
 #endif /* INET6 */
 
 #include <net/pfvar.h>
 
 struct pf_frent {
-       LIST_ENTRY(pf_frent) fr_next;
-       struct ip *fr_ip;
-       struct mbuf *fr_m;
+       TAILQ_ENTRY(pf_frent) fr_next;
+       struct mbuf     *fe_m;
+       u_int16_t        fe_hdrlen;     /* ipv4 header lenght with ip options
+                                          ipv6, extension, fragment header */
+       u_int16_t        fe_extoff;     /* last extension header offset or 0 */
+       u_int16_t        fe_len;        /* fragment length */
+       u_int16_t        fe_off;        /* fragment offset */
+       u_int16_t        fe_mff;        /* more fragment flag */
 };
 
-#define PFFRAG_SEENLAST        0x0001          /* Seen the last fragment for 
this */
+/* keep synced with struct pf_fragment, used in RB_FIND */
+struct pf_fragment_cmp {
+       struct pf_addr  fr_src;
+       struct pf_addr  fr_dst;
+       u_int32_t       fr_id;
+       sa_family_t     fr_af;
+       u_int8_t        fr_proto;
+       u_int8_t        fr_direction;
+};
 
 struct pf_fragment {
+       struct pf_addr  fr_src;         /* ip source address */
+       struct pf_addr  fr_dst;         /* ip destination address */
+       u_int32_t       fr_id;          /* fragment id for reassemble */
+       sa_family_t     fr_af;          /* address family */
+       u_int8_t        fr_proto;       /* protocol of this fragment */
+       u_int8_t        fr_direction;   /* pf packet direction */
+
        RB_ENTRY(pf_fragment) fr_entry;
        TAILQ_ENTRY(pf_fragment) frag_next;
-       struct in_addr  fr_src;
-       struct in_addr  fr_dst;
-       u_int8_t        fr_p;           /* protocol of this fragment */
-       u_int8_t        fr_flags;       /* status flags */
-       u_int16_t       fr_id;          /* fragment id for reassemble */
-       u_int16_t       fr_max;         /* fragment data max */
        u_int32_t       fr_timeout;
-       LIST_HEAD(pf_fragq, pf_frent) fr_queue;
+       u_int16_t       fr_maxlen;      /* maximum length of single fragment */
+       TAILQ_HEAD(pf_fragq, pf_frent) fr_queue;
+};
+
+struct pf_fragment_tag {
+       u_int16_t        ft_hdrlen;     /* header lenght of reassembled pkt */
+       u_int16_t        ft_extoff;     /* last extension header offset or 0 */
+       u_int16_t        ft_maxlen;     /* maximum fragment payload length */
 };
 
 TAILQ_HEAD(pf_fragqueue, pf_fragment)  pf_fragqueue;
@@ -92,13 +114,21 @@ RB_PROTOTYPE(pf_frag_tree, pf_fragment, 
 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
 
 /* Private prototypes */
-void                    pf_ip2key(struct pf_fragment *, struct ip *);
 void                    pf_remove_fragment(struct pf_fragment *);
 void                    pf_flush_fragments(void);
 void                    pf_free_fragment(struct pf_fragment *);
-struct pf_fragment     *pf_find_fragment(struct ip *, struct pf_frag_tree *);
-int                     pf_reassemble(struct mbuf **, struct pf_fragment **,
-                           struct pf_frent *, int, u_short *);
+struct pf_fragment     *pf_find_fragment(struct pf_fragment_cmp *,
+                           struct pf_frag_tree *);
+struct pf_frent                *pf_create_fragment(u_short *);
+struct pf_fragment     *pf_fillup_fragment(struct pf_fragment_cmp *,
+                           struct pf_frent *, u_short *);
+int                     pf_isfull_fragment(struct pf_fragment *);
+struct mbuf            *pf_join_fragment(struct pf_fragment *);
+int                     pf_reassemble(struct mbuf **, struct ip *, int,
+                           u_short *);
+int                     pf_reassemble6(struct mbuf **, struct ip6_hdr *,
+                           struct ip6_frag *, u_int16_t, u_int16_t, int,
+                           u_short *);
 
 /* Globals */
 struct pool             pf_frent_pl, pf_frag_pl;
@@ -126,18 +156,16 @@ pf_frag_compare(struct pf_fragment *a, s
 {
        int     diff;
 
-       if ((diff = a->fr_id - b->fr_id))
+       if ((diff = a->fr_id - b->fr_id) != 0)
+               return (diff);
+       if ((diff = a->fr_proto - b->fr_proto) != 0)
+               return (diff);
+       if ((diff = a->fr_af - b->fr_af) != 0)
+               return (diff);
+       if ((diff = pf_addr_compare(&a->fr_src, &b->fr_src, a->fr_af)) != 0)
                return (diff);
-       else if ((diff = a->fr_p - b->fr_p))
+       if ((diff = pf_addr_compare(&a->fr_dst, &b->fr_dst, a->fr_af)) != 0)
                return (diff);
-       else if (a->fr_src.s_addr < b->fr_src.s_addr)
-               return (-1);
-       else if (a->fr_src.s_addr > b->fr_src.s_addr)
-               return (1);
-       else if (a->fr_dst.s_addr < b->fr_dst.s_addr)
-               return (-1);
-       else if (a->fr_dst.s_addr > b->fr_dst.s_addr)
-               return (1);
        return (0);
 }
 
@@ -186,11 +214,11 @@ pf_free_fragment(struct pf_fragment *fra
        struct pf_frent         *frent;
 
        /* Free all fragments */
-       for (frent = LIST_FIRST(&frag->fr_queue); frent;
-           frent = LIST_FIRST(&frag->fr_queue)) {
-               LIST_REMOVE(frent, fr_next);
+       for (frent = TAILQ_FIRST(&frag->fr_queue); frent;
+           frent = TAILQ_FIRST(&frag->fr_queue)) {
+               TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
 
-               m_freem(frent->fr_m);
+               m_freem(frent->fe_m);
                pool_put(&pf_frent_pl, frent);
                pf_nfrents--;
        }
@@ -198,24 +226,12 @@ pf_free_fragment(struct pf_fragment *fra
        pf_remove_fragment(frag);
 }
 
-void
-pf_ip2key(struct pf_fragment *key, struct ip *ip)
-{
-       key->fr_p = ip->ip_p;
-       key->fr_id = ip->ip_id;
-       key->fr_src.s_addr = ip->ip_src.s_addr;
-       key->fr_dst.s_addr = ip->ip_dst.s_addr;
-}
-
 struct pf_fragment *
-pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree)
+pf_find_fragment(struct pf_fragment_cmp *key, struct pf_frag_tree *tree)
 {
-       struct pf_fragment       key;
        struct pf_fragment      *frag;
 
-       pf_ip2key(&key, ip);
-
-       frag = RB_FIND(pf_frag_tree, tree, &key);
+       frag = RB_FIND(pf_frag_tree, tree, (struct pf_fragment *)key);
        if (frag != NULL) {
                /* XXX Are we sure we want to update the timeout? */
                frag->fr_timeout = time_second;
@@ -236,196 +252,295 @@ pf_remove_fragment(struct pf_fragment *f
        pool_put(&pf_frag_pl, frag);
 }
 
-#define FR_IP_OFF(fr)  ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
-int
-pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
-    struct pf_frent *frent, int mff, u_short *reason)
+struct pf_frent *
+pf_create_fragment(u_short *reason)
 {
-       struct mbuf     *m = *m0, *m2;
-       struct pf_frent *frea, *next;
-       struct pf_frent *frep = NULL;
-       struct ip       *ip = frent->fr_ip;
-       int              hlen = ip->ip_hl << 2;
-       u_int16_t        off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
-       u_int16_t        ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
-       u_int16_t        max = ip_len + off;
-
-       /* Strip off ip header */
-       m->m_data += hlen;
-       m->m_len -= hlen;
+       struct pf_frent *frent;
+
+       frent = pool_get(&pf_frent_pl, PR_NOWAIT);
+       if (frent == NULL) {
+               pf_flush_fragments();
+               frent = pool_get(&pf_frent_pl, PR_NOWAIT);
+               if (frent == NULL) {
+                       REASON_SET(reason, PFRES_MEMORY);
+                       return (NULL);
+               }
+       }
+       pf_nfrents++;
+
+       return (frent);
+}
+
+struct pf_fragment *
+pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent,
+    u_short *reason)
+{
+       struct pf_frent         *after, *next, *prev;
+       struct pf_fragment      *frag;
+       u_int16_t                total;
+
+       /* No empty fragments */
+       if (frent->fe_len == 0) {
+               DPFPRINTF(LOG_NOTICE, "bad fragment: len 0");
+               goto bad_fragment;
+       }
+
+       /* All fragments are 8 byte aligned */
+       if (frent->fe_mff && (frent->fe_len & 0x7)) {
+               DPFPRINTF(LOG_NOTICE, "bad fragment: mff and len %d",
+                   frent->fe_len);
+               goto bad_fragment;
+       }
+
+       /* Respect maximum length, IP_MAXPACKET == IPV6_MAXPACKET */
+       if (frent->fe_off + frent->fe_len > IP_MAXPACKET) {
+               DPFPRINTF(LOG_NOTICE, "bad fragment: max packet %d",
+                   frent->fe_off + frent->fe_len);
+               goto bad_fragment;
+       }
+
+       DPFPRINTF(LOG_NOTICE, key->fr_af == AF_INET ?
+           "reass frag %d @ %d-%d" : "reass frag %#08x @ %d-%d",
+           key->fr_id, frent->fe_off, frent->fe_off + frent->fe_len);
+
+       /* Fully buffer all of the fragments in this fragment queue */
+       frag = pf_find_fragment(key, &pf_frag_tree);
 
        /* Create a new reassembly queue for this packet */
-       if (*frag == NULL) {
-               *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
-               if (*frag == NULL) {
+       if (frag == NULL) {
+               frag = pool_get(&pf_frag_pl, PR_NOWAIT);
+               if (frag == NULL) {
                        pf_flush_fragments();
-                       *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
-                       if (*frag == NULL) {
+                       frag = pool_get(&pf_frag_pl, PR_NOWAIT);
+                       if (frag == NULL) {
                                REASON_SET(reason, PFRES_MEMORY);
                                goto drop_fragment;
                        }
                }
 
-               (*frag)->fr_flags = 0;
-               (*frag)->fr_max = 0;
-               (*frag)->fr_src = frent->fr_ip->ip_src;
-               (*frag)->fr_dst = frent->fr_ip->ip_dst;
-               (*frag)->fr_p = frent->fr_ip->ip_p;
-               (*frag)->fr_id = frent->fr_ip->ip_id;
-               (*frag)->fr_timeout = time_second;
-               LIST_INIT(&(*frag)->fr_queue);
+               *(struct pf_fragment_cmp *)frag = *key;
+               frag->fr_timeout = time_second;
+               frag->fr_maxlen = frent->fe_len;
+               TAILQ_INIT(&frag->fr_queue);
 
-               RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
-               TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
+               RB_INSERT(pf_frag_tree, &pf_frag_tree, frag);
+               TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
 
                /* We do not have a previous fragment */
-               frep = NULL;
-               goto insert;
+               TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
+
+               return (frag);
        }
 
-       /*
-        * Find a fragment after the current one:
-        *  - off contains the real shifted offset.
-        */
-       LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
-               if (FR_IP_OFF(frea) > off)
+       KASSERT(!TAILQ_EMPTY(&frag->fr_queue));
+
+       /* Remember maximum fragment len for refragmentation */
+       if (frent->fe_len > frag->fr_maxlen)
+               frag->fr_maxlen = frent->fe_len;
+
+       /* Maximum data we have seen already */
+       total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
+           TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
+
+       /* Non terminal fragments must have more fragments flag */
+       if (frent->fe_off + frent->fe_len < total && !frent->fe_mff)
+               goto bad_fragment;
+
+       /* Check if we saw the last fragment already */
+       if (!TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) {
+               if (frent->fe_off + frent->fe_len > total ||
+                   (frent->fe_off + frent->fe_len == total && frent->fe_mff))
+                       goto bad_fragment;
+       } else {
+               if (frent->fe_off + frent->fe_len == total && !frent->fe_mff)
+                       goto bad_fragment;
+       }
+
+       /* Find a fragment after the current one */
+       prev = NULL;
+       TAILQ_FOREACH(after, &frag->fr_queue, fr_next) {
+               if (after->fe_off > frent->fe_off)
                        break;
-               frep = frea;
+               prev = after;
        }
 
-       KASSERT(frep != NULL || frea != NULL);
+       KASSERT(prev != NULL || after != NULL);
 
-       if (frep != NULL &&
-           FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
-           4 > off)
-       {
-               u_int16_t       precut;
+       if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) {
+               int     precut;
 
-               precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
-                   frep->fr_ip->ip_hl * 4 - off;
-               if (precut >= ip_len)
+               precut = prev->fe_off + prev->fe_len - frent->fe_off;
+               if (precut >= frent->fe_len)
                        goto bad_fragment;
-               m_adj(frent->fr_m, precut);
                DPFPRINTF(LOG_NOTICE, "overlap -%d", precut);
-               /* Enforce 8 byte boundaries */
-               ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
-               off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
-               ip_len -= precut;
-               ip->ip_len = htons(ip_len);
+               m_adj(prev->fe_m, -precut);
+               prev->fe_len -= precut;
        }
 
-       for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
-           frea = next)
+       for (; after != NULL && frent->fe_off + frent->fe_len > after->fe_off;
+           after = next)
        {
-               u_int16_t       aftercut;
+               int     aftercut;
 
-               aftercut = ip_len + off - FR_IP_OFF(frea);
+               aftercut = frent->fe_off + frent->fe_len - after->fe_off;
                DPFPRINTF(LOG_NOTICE, "adjust overlap %d", aftercut);
-               if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
-                   * 4)
-               {
-                       frea->fr_ip->ip_len =
-                           htons(ntohs(frea->fr_ip->ip_len) - aftercut);
-                       frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
-                           (aftercut >> 3));
-                       m_adj(frea->fr_m, aftercut);
+               if (aftercut < after->fe_len) {
+                       m_adj(frent->fe_m, -aftercut);
+                       frent->fe_len -= aftercut;
                        break;
                }
 
                /* This fragment is completely overlapped, lose it */
-               next = LIST_NEXT(frea, fr_next);
-               m_freem(frea->fr_m);
-               LIST_REMOVE(frea, fr_next);
-               pool_put(&pf_frent_pl, frea);
+               next = TAILQ_NEXT(after, fr_next);
+               m_freem(after->fe_m);
+               TAILQ_REMOVE(&frag->fr_queue, after, fr_next);
+               pool_put(&pf_frent_pl, after);
                pf_nfrents--;
        }
 
- insert:
-       /* Update maximum data size */
-       if ((*frag)->fr_max < max)
-               (*frag)->fr_max = max;
-       /* This is the last segment */
-       if (!mff)
-               (*frag)->fr_flags |= PFFRAG_SEENLAST;
-
-       if (frep == NULL)
-               LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
+       if (prev == NULL)
+               TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
        else
-               LIST_INSERT_AFTER(frep, frent, fr_next);
+               TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next);
 
-       /* The mbuf is part of the fragment entry, no direct free or access */
-       m = *m0 = NULL;
+       return (frag);
+
+ bad_fragment:
+       REASON_SET(reason, PFRES_FRAG);
+ drop_fragment:
+       pool_put(&pf_frent_pl, frent);
+       pf_nfrents--;
+       return (NULL);
+}
+
+int
+pf_isfull_fragment(struct pf_fragment *frag)
+{
+       struct pf_frent         *frent, *next;
+       u_int16_t                off, total;
 
        /* Check if we are completely reassembled */
-       if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
-               return (PF_PASS);
+       if (TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff)
+               return (0);
+
+       /* Maximum data we have seen already */
+       total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
+           TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
 
        /* Check if we have all the data */
        off = 0;
-       for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
-               next = LIST_NEXT(frep, fr_next);
+       for (frent = TAILQ_FIRST(&frag->fr_queue); frent; frent = next) {
+               next = TAILQ_NEXT(frent, fr_next);
 
-               off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
-               if (off < (*frag)->fr_max &&
-                   (next == NULL || FR_IP_OFF(next) != off))
-               {
+               off += frent->fe_len;
+               if (off < total && (next == NULL || next->fe_off != off)) {
                        DPFPRINTF(LOG_NOTICE,
-                           "missing fragment at %d, next %d, max %d",
-                           off, next == NULL ? -1 : FR_IP_OFF(next),
-                           (*frag)->fr_max);
-                       return (PF_PASS);
+                           "missing fragment at %d, next %d, total %d",
+                           off, next == NULL ? -1 : next->fe_off, total);
+                       return (0);
                }
        }
-       DPFPRINTF(LOG_NOTICE, "%d < %d?", off, (*frag)->fr_max);
-       if (off < (*frag)->fr_max)
-               return (PF_PASS);
+       DPFPRINTF(LOG_NOTICE, "%d < %d?", off, total);
+       if (off < total)
+               return (0);
+       KASSERT(off == total);
 
-       /* We have all the data */
-       frent = LIST_FIRST(&(*frag)->fr_queue);
-       KASSERT(frent != NULL);
-       next = LIST_NEXT(frent, fr_next);
+       return (1);
+}
+
+struct mbuf *
+pf_join_fragment(struct pf_fragment *frag)
+{
+       struct mbuf             *m, *m2;
+       struct pf_frent         *frent, *next;
+
+       frent = TAILQ_FIRST(&frag->fr_queue);
+       next = TAILQ_NEXT(frent, fr_next);
 
        /* Magic from ip_input */
-       ip = frent->fr_ip;
-       m = frent->fr_m;
+       m = frent->fe_m;
        m2 = m->m_next;
        m->m_next = NULL;
        m_cat(m, m2);
        pool_put(&pf_frent_pl, frent);
        pf_nfrents--;
        for (frent = next; frent != NULL; frent = next) {
-               next = LIST_NEXT(frent, fr_next);
+               next = TAILQ_NEXT(frent, fr_next);
 
-               m2 = frent->fr_m;
+               m2 = frent->fe_m;
+               /* Strip off ip header */
+               m_adj(m2, frent->fe_hdrlen);
                pool_put(&pf_frent_pl, frent);
                pf_nfrents--;
                m_cat(m, m2);
        }
 
-       ip->ip_src = (*frag)->fr_src;
-       ip->ip_dst = (*frag)->fr_dst;
-
        /* Remove from fragment queue */
-       pf_remove_fragment(*frag);
-       *frag = NULL;
-       *m0 = m;
-
-       hlen = ip->ip_hl << 2;
-       ip->ip_len = htons(off + hlen);
-       m->m_len += hlen;
-       m->m_data -= hlen;
+       pf_remove_fragment(frag);
+
+       return (m);
+}
+
+int
+pf_reassemble(struct mbuf **m0, struct ip *ip, int dir, u_short *reason)
+{
+       struct mbuf             *m = *m0;
+       struct pf_frent         *frent;
+       struct pf_fragment      *frag;
+       struct pf_fragment_cmp   key;
+       u_int16_t                total, hdrlen;
+
+       /* Get an entry for the fragment queue */
+       if ((frent = pf_create_fragment(reason)) == NULL)
+               return (PF_DROP);
+
+       frent->fe_m = m;
+       frent->fe_hdrlen = ip->ip_hl << 2;
+       frent->fe_extoff = 0;
+       frent->fe_len = ntohs(ip->ip_len) - (ip->ip_hl << 2);
+       frent->fe_off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
+       frent->fe_mff = ntohs(ip->ip_off) & IP_MF;
+
+       key.fr_src.v4 = ip->ip_src;
+       key.fr_dst.v4 = ip->ip_dst;
+       key.fr_af = AF_INET;
+       key.fr_proto = ip->ip_p;
+       key.fr_id = ip->ip_id;
+       key.fr_direction = dir;
+
+       if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL)
+               return (PF_DROP);
+
+       /* The mbuf is part of the fragment entry, no direct free or access */
+       m = *m0 = NULL;
+
+       if (!pf_isfull_fragment(frag))
+               return (PF_PASS);  /* drop because *m0 is NULL, no error */
+
+       /* We have all the data */
+       frent = TAILQ_FIRST(&frag->fr_queue);
+       KASSERT(frent != NULL);
+       total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
+           TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
+       hdrlen = frent->fe_hdrlen;
+
+       m = *m0 = pf_join_fragment(frag);
+       frag = NULL;
 
-       /* some debugging cruft by sklower, below, will go away soon */
-       /* XXX this should be done elsewhere */
        if (m->m_flags & M_PKTHDR) {
                int plen = 0;
-               for (m2 = m; m2; m2 = m2->m_next)
-                       plen += m2->m_len;
+               for (m = *m0; m; m = m->m_next)
+                       plen += m->m_len;
+               m = *m0;
                m->m_pkthdr.len = plen;
        }
 
-       if (hlen + off > IP_MAXPACKET) {
-               DPFPRINTF(LOG_NOTICE, "drop: too big: %d", off);
+       ip = mtod(m, struct ip *);
+       ip->ip_len = htons(hdrlen + total);
+       ip->ip_off &= ~(IP_MF|IP_OFFMASK);
+
+       if (hdrlen + total > IP_MAXPACKET) {
+               DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total);
                ip->ip_len = 0;
                REASON_SET(reason, PFRES_SHORT);
                /* PF_DROP requires a valid mbuf *m0 in pf_test() */
@@ -434,31 +549,201 @@ pf_reassemble(struct mbuf **m0, struct p
 
        DPFPRINTF(LOG_NOTICE, "complete: %p(%d)", m, ntohs(ip->ip_len));
        return (PF_PASS);
+}
+
+#ifdef INET6
+int
+pf_reassemble6(struct mbuf **m0, struct ip6_hdr *ip6, struct ip6_frag *fraghdr,
+    u_int16_t hdrlen, u_int16_t extoff, int dir, u_short *reason)
+{
+       struct mbuf             *m = *m0;
+       struct m_tag            *mtag;
+       struct pf_fragment_tag  *ftag;
+       struct pf_frent         *frent;
+       struct pf_fragment      *frag;
+       struct pf_fragment_cmp   key;
+       int                      off;
+       u_int16_t                total, maxlen;
+       u_int8_t                 proto;
+
+       /* Get an entry for the fragment queue */
+       if ((frent = pf_create_fragment(reason)) == NULL)
+               return (PF_DROP);
+
+       frent->fe_m = m;
+       frent->fe_hdrlen = hdrlen;
+       frent->fe_extoff = extoff;
+       frent->fe_len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - hdrlen;
+       frent->fe_off = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK);
+       frent->fe_mff = fraghdr->ip6f_offlg & IP6F_MORE_FRAG;
+
+       key.fr_src.v6 = ip6->ip6_src;
+       key.fr_dst.v6 = ip6->ip6_dst;
+       key.fr_af = AF_INET6;
+       /* Only the first fragment's protocol is relevant */
+       key.fr_proto = 0;
+       key.fr_id = fraghdr->ip6f_ident;
+       key.fr_direction = dir;
+
+       if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL)
+               return (PF_DROP);
+
+       /* The mbuf is part of the fragment entry, no direct free or access */
+       m = *m0 = NULL;
+
+       if (!pf_isfull_fragment(frag))
+               return (PF_PASS);  /* drop because *m0 is NULL, no error */
+
+       /* We have all the data */
+       extoff = frent->fe_extoff;
+       maxlen = frag->fr_maxlen;
+       frent = TAILQ_FIRST(&frag->fr_queue);
+       KASSERT(frent != NULL);
+       total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
+           TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
+       hdrlen = frent->fe_hdrlen - sizeof(struct ip6_frag);
+
+       m = *m0 = pf_join_fragment(frag);
+       frag = NULL;
+
+       /* Take protocol from first fragment header */
+       if ((m = m_getptr(m, hdrlen + offsetof(struct ip6_frag, ip6f_nxt),
+           &off)) == NULL)
+               panic("pf_reassemble6: short mbuf chain");
+       proto = *(mtod(m, caddr_t) + off);
+       m = *m0;
+
+       /* Delete frag6 header */
+       if (frag6_deletefraghdr(m, hdrlen) != 0)
+               goto fail;
+
+       if (m->m_flags & M_PKTHDR) {
+               int plen = 0;
+               for (m = *m0; m; m = m->m_next)
+                       plen += m->m_len;
+               m = *m0;
+               m->m_pkthdr.len = plen;
+       }
+
+       if ((mtag = m_tag_get(PACKET_TAG_PF_REASSEMBLED, sizeof(struct
+           pf_fragment_tag), M_NOWAIT)) == NULL)
+               goto fail;
+       ftag = (struct pf_fragment_tag *)(mtag + 1);
+       ftag->ft_hdrlen = hdrlen;
+       ftag->ft_extoff = extoff;
+       ftag->ft_maxlen = maxlen;
+       m_tag_prepend(m, mtag);
+
+       ip6 = mtod(m, struct ip6_hdr *);
+       ip6->ip6_plen = htons(hdrlen - sizeof(struct ip6_hdr) + total);
+       if (extoff) {
+               /* Write protocol into next field of last extension header */
+               if ((m = m_getptr(m, extoff + offsetof(struct ip6_ext,
+                   ip6e_nxt), &off)) == NULL)
+                       panic("pf_reassemble6: short mbuf chain");
+               *(mtod(m, caddr_t) + off) = proto;
+               m = *m0;
+       } else
+               ip6->ip6_nxt = proto;
+
+       if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) {
+               DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total);
+               ip6->ip6_plen = 0;
+               REASON_SET(reason, PFRES_SHORT);
+               /* PF_DROP requires a valid mbuf *m0 in pf_test6() */
+               return (PF_DROP);
+       }
+
+       DPFPRINTF(LOG_NOTICE, "complete: %p(%d)", m, ntohs(ip6->ip6_plen));
+       return (PF_PASS);
 
- bad_fragment:
-       REASON_SET(reason, PFRES_FRAG);
- drop_fragment:
-       /* Oops - fail safe - drop packet */
-       pool_put(&pf_frent_pl, frent);
-       pf_nfrents--;
-       /* PF_DROP requires a valid mbuf *m0 in pf_test(), will free later */
+ fail:
+       REASON_SET(reason, PFRES_MEMORY);
+       /* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later */
        return (PF_DROP);
 }
 
 int
-pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short 
*reason,
-    struct pf_pdesc *pd)
+pf_refragment6(struct mbuf **m0, struct m_tag *mtag, int dir)
+{
+       struct mbuf             *m = *m0, *t;
+       struct pf_fragment_tag  *ftag = (struct pf_fragment_tag *)(mtag + 1);
+       u_int32_t                mtu;
+       u_int16_t                hdrlen, extoff, maxlen;
+       u_int8_t                 proto;
+       int                      error, action;
+
+       hdrlen = ftag->ft_hdrlen;
+       extoff = ftag->ft_extoff;
+       maxlen = ftag->ft_maxlen;
+       m_tag_delete(m, mtag);
+       mtag = NULL;
+       ftag = NULL;
+
+       if (extoff) {
+               int off;
+
+               /* Use protocol from next field of last extension header */
+               if ((m = m_getptr(m, extoff + offsetof(struct ip6_ext,
+                   ip6e_nxt), &off)) == NULL)
+                       panic("pf_refragment6: short mbuf chain");
+               proto = *(mtod(m, caddr_t) + off);
+               *(mtod(m, caddr_t) + off) = IPPROTO_FRAGMENT;
+               m = *m0;
+       } else {
+               struct ip6_hdr *hdr;
+
+               hdr = mtod(m, struct ip6_hdr *);
+               proto = hdr->ip6_nxt;
+               hdr->ip6_nxt = IPPROTO_FRAGMENT;
+       }
+
+       /*
+        * Maxlen may be less than 8 iff there was only a single
+        * fragment.  As it was fragmented before, add a fragment
+        * header also for a single fragment.  If total or maxlen
+        * is less than 8, ip6_fragment() will return EMSGSIZE and
+        * we drop the packet.
+        */
+
+       mtu = hdrlen + sizeof(struct ip6_frag) + maxlen;
+       error = ip6_fragment(m, hdrlen, proto, mtu);
+
+       m = (*m0)->m_nextpkt;
+       (*m0)->m_nextpkt = NULL;
+       if (error == 0) {
+               /* The first mbuf contains the unfragmented packet */
+               m_freem(*m0);
+               *m0 = NULL;
+               action = PF_PASS;
+       } else {
+               /* Drop expects an mbuf to free */
+               DPFPRINTF(LOG_NOTICE, "refragment error %d", error);
+               action = PF_DROP;
+       }
+       for (t = m; m; m = t) {
+               t = m->m_nextpkt;
+               m->m_nextpkt = NULL;
+               m->m_pkthdr.pf.flags |= PF_TAG_REFRAGMENTED;
+               if (error == 0)
+                       ip6_forward(m, 0);
+               else
+                       m_freem(m);
+       }
+
+       return (action);
+}
+#endif /* INET6 */
+
+int
+pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif,
+    u_short *reason, struct pf_pdesc *pd)
 {
        struct mbuf             *m = *m0;
-       struct pf_frent         *frent;
-       struct pf_fragment      *frag = NULL;
        struct ip               *h = mtod(m, struct ip *);
-       int                      mff = (ntohs(h->ip_off) & IP_MF);
        int                      hlen = h->ip_hl << 2;
        u_int16_t                fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
-       u_int16_t                max;
-       int                      ip_len;
-       int                      ip_off;
+       u_int16_t                mff = (ntohs(h->ip_off) & IP_MF);
 
        /* Check for illegal packets */
        if (hlen < (int)sizeof(struct ip))
@@ -484,48 +769,13 @@ pf_normalize_ip(struct mbuf **m0, int di
         * no-df above, fine. Otherwise drop it.
         */
        if (h->ip_off & htons(IP_DF)) {
-               DPFPRINTF(LOG_NOTICE, "IP_DF");
-               goto bad;
-       }
-
-       ip_len = ntohs(h->ip_len) - hlen;
-       ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
-
-       /* All fragments are 8 byte aligned */
-       if (mff && (ip_len & 0x7)) {
-               DPFPRINTF(LOG_NOTICE, "mff and %d", ip_len);
-               goto bad;
-       }
-
-       /* Respect maximum length */
-       if (fragoff + ip_len > IP_MAXPACKET) {
-               DPFPRINTF(LOG_NOTICE, "max packet %d", fragoff + ip_len);
-               goto bad;
-       }
-       max = fragoff + ip_len;
-
-       /* Fully buffer all of the fragments */
-       frag = pf_find_fragment(h, &pf_frag_tree);
-
-       /* Check if we saw the last fragment already */
-       if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
-           max > frag->fr_max)
-               goto bad;
-
-       /* Get an entry for the fragment queue */
-       frent = pool_get(&pf_frent_pl, PR_NOWAIT);
-       if (frent == NULL) {
-               REASON_SET(reason, PFRES_MEMORY);
+               DPFPRINTF(LOG_NOTICE, "bad fragment: IP_DF");
+               REASON_SET(reason, PFRES_FRAG);
                return (PF_DROP);
        }
-       pf_nfrents++;
-       frent->fr_ip = h;
-       frent->fr_m = m;
 
        /* Returns PF_DROP or *m0 is NULL or completely reassembled mbuf */
-       DPFPRINTF(LOG_NOTICE,
-           "reass frag %d @ %d-%d", h->ip_id, fragoff, max);
-       if (pf_reassemble(m0, &frag, frent, mff, reason) != PF_PASS)
+       if (pf_reassemble(m0, h, dir, reason) != PF_PASS)
                return (PF_DROP);
        m = *m0;
        if (m == NULL)
@@ -548,17 +798,6 @@ pf_normalize_ip(struct mbuf **m0, int di
  drop:
        REASON_SET(reason, PFRES_NORM);
        return (PF_DROP);
-
- bad:
-       DPFPRINTF(LOG_NOTICE, "dropping bad fragment");
-
-       /* Free associated fragments */
-       if (frag != NULL)
-               pf_free_fragment(frag);
-
-       REASON_SET(reason, PFRES_FRAG);
-
-       return (PF_DROP);
 }
 
 #ifdef INET6
@@ -568,13 +807,13 @@ pf_normalize_ip6(struct mbuf **m0, int d
 {
        struct mbuf             *m = *m0;
        struct ip6_hdr          *h = mtod(m, struct ip6_hdr *);
-       int                      off;
        struct ip6_ext           ext;
        struct ip6_opt           opt;
        struct ip6_opt_jumbo     jumbo;
        struct ip6_frag          frag;
        u_int32_t                jumbolen = 0, plen;
-       u_int16_t                fragoff = 0;
+       int                      extoff;
+       int                      off;
        int                      optend;
        int                      ooff;
        u_int8_t                 proto;
@@ -584,6 +823,7 @@ pf_normalize_ip6(struct mbuf **m0, int d
        if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
                goto drop;
 
+       extoff = 0;
        off = sizeof(struct ip6_hdr);
        proto = h->ip6_nxt;
        terminal = 0;
@@ -598,6 +838,7 @@ pf_normalize_ip6(struct mbuf **m0, int d
                        if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
                            NULL, AF_INET6))
                                goto shortpkt;
+                       extoff = off;
                        if (proto == IPPROTO_AH)
                                off += (ext.ip6e_len + 2) * 4;
                        else
@@ -608,6 +849,7 @@ pf_normalize_ip6(struct mbuf **m0, int d
                        if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
                            NULL, AF_INET6))
                                goto shortpkt;
+                       extoff = off;
                        optend = off + (ext.ip6e_len + 1) * 8;
                        ooff = off + sizeof(ext);
                        do {
@@ -657,10 +899,9 @@ pf_normalize_ip6(struct mbuf **m0, int d
        } while (!terminal);
 
        /* jumbo payload option must be present, or plen > 0 */
-       if (ntohs(h->ip6_plen) == 0)
+       plen = ntohs(h->ip6_plen);
+       if (plen == 0)
                plen = jumbolen;
-       else
-               plen = ntohs(h->ip6_plen);
        if (plen == 0)
                goto drop;
        if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
@@ -669,19 +910,26 @@ pf_normalize_ip6(struct mbuf **m0, int d
        return (PF_PASS);
 
  fragment:
-       if (ntohs(h->ip6_plen) == 0 || jumbolen)
-               goto drop;
+       /* jumbo payload packets cannot be fragmented */
        plen = ntohs(h->ip6_plen);
+       if (plen == 0 || jumbolen)
+               goto drop;
+       if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
+               goto shortpkt;
 
        if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
                goto shortpkt;
-       fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
-       if (fragoff + (sizeof(struct ip6_hdr) + plen - off - sizeof(frag)) >
-           IPV6_MAXPACKET)
-               goto badfrag;
+       /* offset now points to data portion */
+       off += sizeof(frag);
+
+       /* Returns PF_DROP or *m0 is NULL or completely reassembled mbuf */
+       if (pf_reassemble6(m0, h, &frag, off, extoff, dir, reason) != PF_PASS)
+               return (PF_DROP);
+       m = *m0;
+       if (m == NULL)
+               return (PF_PASS);
 
-       /* do something about it */
-       /* remember to set pd->flags |= PFDESC_IP_REAS */
+       pd->flags |= PFDESC_IP_REAS;
        return (PF_PASS);
 
  shortpkt:
@@ -690,10 +938,6 @@ pf_normalize_ip6(struct mbuf **m0, int d
 
  drop:
        REASON_SET(reason, PFRES_NORM);
-       return (PF_DROP);
-
- badfrag:
-       REASON_SET(reason, PFRES_FRAG);
        return (PF_DROP);
 }
 #endif /* INET6 */
Index: net/pfvar.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfvar.h,v
retrieving revision 1.320
diff -u -p -r1.320 pfvar.h
--- net/pfvar.h 11 Jan 2011 13:35:58 -0000      1.320
+++ net/pfvar.h 14 Jan 2011 21:30:38 -0000
@@ -58,7 +58,7 @@ struct ip6_hdr;
 #endif
 #endif
 
-enum   { PF_INOUT, PF_IN, PF_OUT };
+enum   { PF_INOUT, PF_IN, PF_OUT, PF_FWD };
 enum   { PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT,
          PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP, PF_DEFER,
          PF_MATCH, PF_DIVERT, PF_RT };
@@ -1771,6 +1771,7 @@ int       pf_match_port(u_int8_t, u_int16_t, u
 int    pf_match_uid(u_int8_t, uid_t, uid_t, uid_t);
 int    pf_match_gid(u_int8_t, gid_t, gid_t, gid_t);
 
+int    pf_refragment6(struct mbuf **, struct m_tag *mtag, int);
 void   pf_normalize_init(void);
 int    pf_normalize_ip(struct mbuf **, int, struct pfi_kif *, u_short *,
            struct pf_pdesc *);
@@ -1871,6 +1872,8 @@ int                pf_tag_packet(struct mbuf *, int, 
 u_int32_t       pf_qname2qid(char *);
 void            pf_qid2qname(u_int32_t, char *);
 void            pf_qid_unref(u_int32_t);
+__inline int    pf_addr_compare(struct pf_addr *, struct pf_addr *,
+                   sa_family_t);
 
 extern struct pf_status        pf_status;
 extern struct pool     pf_frent_pl, pf_frag_pl;
Index: netinet6/frag6.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/frag6.c,v
retrieving revision 1.31
diff -u -p -r1.31 frag6.c
--- netinet6/frag6.c    13 Jan 2011 23:36:53 -0000      1.31
+++ netinet6/frag6.c    25 Jan 2011 19:40:14 -0000
@@ -546,23 +546,12 @@ insert:
 #endif
 
        /* Delete frag6 header */
-       if (m->m_len >= offset + sizeof(struct ip6_frag)) {
-               /* This is the only possible case with !PULLDOWN_TEST */
-               ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag),
-                   offset);
-               m->m_data += sizeof(struct ip6_frag);
-               m->m_len -= sizeof(struct ip6_frag);
-       } else {
-               /* this comes with no copy if the boundary is on cluster */
-               if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) {
-                       frag6_remque(q6);
-                       frag6_nfrags -= q6->ip6q_nfrag;
-                       free(q6, M_FTABLE);
-                       frag6_nfragpackets--;
-                       goto dropfrag;
-               }
-               m_adj(t, sizeof(struct ip6_frag));
-               m_cat(m, t);
+       if (frag6_deletefraghdr(m, offset) != 0) {
+               frag6_remque(q6);
+               frag6_nfrags -= q6->ip6q_nfrag;
+               free(q6, M_FTABLE);
+               frag6_nfragpackets--;
+               goto dropfrag;
        }
 
        /*
@@ -604,6 +593,30 @@ insert:
        m_freem(m);
        IP6Q_UNLOCK();
        return IPPROTO_DONE;
+}
+
+/*
+ * Delete fragment header after the unfragmentable header portions.
+ */
+int
+frag6_deletefraghdr(struct mbuf *m, int offset)
+{
+       struct mbuf *t;
+
+       if (m->m_len >= offset + sizeof(struct ip6_frag)) {
+               ovbcopy(mtod(m, caddr_t), mtod(m, caddr_t) +
+                   sizeof(struct ip6_frag), offset);
+               m->m_data += sizeof(struct ip6_frag);
+               m->m_len -= sizeof(struct ip6_frag);
+       } else {
+               /* this comes with no copy if the boundary is on cluster */
+               if ((t = m_split(m, offset, M_DONTWAIT)) == NULL)
+                       return (ENOBUFS);
+               m_adj(t, sizeof(struct ip6_frag));
+               m_cat(m, t);
+       }
+
+       return (0);
 }
 
 /*
Index: netinet6/ip6_forward.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_forward.c,v
retrieving revision 1.50
diff -u -p -r1.50 ip6_forward.c
--- netinet6/ip6_forward.c      9 Jan 2011 20:25:46 -0000       1.50
+++ netinet6/ip6_forward.c      21 Jan 2011 09:21:29 -0000
@@ -351,7 +351,7 @@ reroute:
 #if NPF > 0
                if ((encif = enc_getif(tdb->tdb_rdomain,
                    tdb->tdb_tap)) == NULL ||
-                   pf_test6(PF_OUT, encif, &m, NULL) != PF_PASS) {
+                   pf_test6(PF_FWD, encif, &m, NULL) != PF_PASS) {
                        splx(s);
                        error = EHOSTUNREACH;
                        m_freem(m);
@@ -381,19 +381,6 @@ reroute:
        }
 #endif /* IPSEC */
 
-       if (m->m_pkthdr.len > IN6_LINKMTU(rt->rt_ifp)) {
-               in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig);
-               if (mcopy) {
-                       u_long mtu;
-
-                       mtu = IN6_LINKMTU(rt->rt_ifp);
-
-                       icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu);
-               }
-               m_freem(m);
-               goto freert;
-       }
-
        if (rt->rt_flags & RTF_GATEWAY)
                dst = (struct sockaddr_in6 *)rt->rt_gateway;
 
@@ -477,7 +464,7 @@ reroute:
                ip6->ip6_dst.s6_addr16[1] = 0;
 
 #if NPF > 0 
-       if (pf_test6(PF_OUT, rt->rt_ifp, &m, NULL) != PF_PASS) {
+       if (pf_test6(PF_FWD, rt->rt_ifp, &m, NULL) != PF_PASS) {
                m_freem(m);
                goto senderr;
        }
@@ -496,6 +483,20 @@ reroute:
                goto reroute;
        }
 #endif 
+
+       /* Check the size after pf_test6 to give pf a chance to refragment. */
+       if (m->m_pkthdr.len > IN6_LINKMTU(rt->rt_ifp)) {
+               in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig);
+               if (mcopy) {
+                       u_long mtu;
+
+                       mtu = IN6_LINKMTU(rt->rt_ifp);
+
+                       icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu);
+               }
+               m_freem(m);
+               goto freert;
+       }
 
        error = nd6_output(rt->rt_ifp, origifp, m, dst, rt);
        if (error) {
Index: netinet6/ip6_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v
retrieving revision 1.118
diff -u -p -r1.118 ip6_output.c
--- netinet6/ip6_output.c       23 Sep 2010 04:45:15 -0000      1.118
+++ netinet6/ip6_output.c       28 Jan 2011 16:10:17 -0000
@@ -156,10 +156,10 @@ ip6_output(struct mbuf *m0, struct ip6_p
     int flags, struct ip6_moptions *im6o, struct ifnet **ifpp, 
     struct inpcb *inp)
 {
-       struct ip6_hdr *ip6, *mhip6;
+       struct ip6_hdr *ip6;
        struct ifnet *ifp, *origifp = NULL;
        struct mbuf *m = m0;
-       int hlen, tlen, len, off;
+       int hlen, tlen;
        struct route_in6 ip6route;
        struct rtentry *rt = NULL;
        struct sockaddr_in6 *dst, dstsock;
@@ -893,9 +893,6 @@ reroute:
                in6_ifstat_inc(ifp, ifs6_out_fragfail);
                goto bad;
        } else {
-               struct mbuf **mnext, *m_frgpart;
-               struct ip6_frag *ip6f;
-               u_int32_t id = htonl(ip6_randomid());
                u_char nextproto;
 #if 0
                struct ip6ctlparam ip6cp;
@@ -920,15 +917,6 @@ reroute:
                    (void *)&ip6cp);
 #endif
 
-               len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
-               if (len < 8) {
-                       error = EMSGSIZE;
-                       in6_ifstat_inc(ifp, ifs6_out_fragfail);
-                       goto bad;
-               }
-
-               mnext = &m->m_nextpkt;
-
                /*
                 * Change the next header field of the last header in the
                 * unfragmentable part.
@@ -947,65 +935,25 @@ reroute:
                        ip6->ip6_nxt = IPPROTO_FRAGMENT;
                }
 
-               /*
-                * Loop through length of segment after first fragment,
-                * make new header and copy data of each part and link onto
-                * chain.
-                */
                m0 = m;
-               for (off = hlen; off < tlen; off += len) {
-                       struct mbuf *mlast;
+               error = ip6_fragment(m0, hlen, nextproto, mtu);
 
-                       MGETHDR(m, M_DONTWAIT, MT_HEADER);
-                       if (!m) {
-                               error = ENOBUFS;
-                               ip6stat.ip6s_odropped++;
-                               goto sendorfree;
-                       }
-                       m->m_pkthdr.rcvif = NULL;
-                       m->m_flags = m0->m_flags & M_COPYFLAGS;
-                       *mnext = m;
-                       mnext = &m->m_nextpkt;
-                       m->m_data += max_linkhdr;
-                       mhip6 = mtod(m, struct ip6_hdr *);
-                       *mhip6 = *ip6;
-                       m->m_len = sizeof(*mhip6);
-                       error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
-                       if (error) {
-                               ip6stat.ip6s_odropped++;
-                               goto sendorfree;
-                       }
-                       ip6f->ip6f_offlg = htons((u_int16_t)((off - hlen) & 
~7));
-                       if (off + len >= tlen)
-                               len = tlen - off;
-                       else
-                               ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
-                       mhip6->ip6_plen = htons((u_int16_t)(len + hlen +
-                           sizeof(*ip6f) - sizeof(struct ip6_hdr)));
-                       if ((m_frgpart = m_copy(m0, off, len)) == 0) {
-                               error = ENOBUFS;
-                               ip6stat.ip6s_odropped++;
-                               goto sendorfree;
-                       }
-                       for (mlast = m; mlast->m_next; mlast = mlast->m_next)
-                               ;
-                       mlast->m_next = m_frgpart;
-                       m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
-                       m->m_pkthdr.rcvif = (struct ifnet *)0;
-                       ip6f->ip6f_reserved = 0;
-                       ip6f->ip6f_ident = id;
-                       ip6f->ip6f_nxt = nextproto;
-                       ip6stat.ip6s_ofragments++;
-                       in6_ifstat_inc(ifp, ifs6_out_fragcreat);
+               switch (error) {
+               case 0:
+                       in6_ifstat_inc(ifp, ifs6_out_fragok);
+                       break;
+               case EMSGSIZE:
+                       in6_ifstat_inc(ifp, ifs6_out_fragfail);
+                       break;
+               default:
+                       ip6stat.ip6s_odropped++;
+                       break;
                }
-
-               in6_ifstat_inc(ifp, ifs6_out_fragok);
        }
 
        /*
         * Remove leading garbages.
         */
-sendorfree:
        m = m0->m_nextpkt;
        m0->m_nextpkt = 0;
        m_freem(m0);
@@ -1013,6 +961,8 @@ sendorfree:
                m0 = m->m_nextpkt;
                m->m_nextpkt = 0;
                if (error == 0) {
+                       ip6stat.ip6s_ofragments++;
+                       in6_ifstat_inc(ifp, ifs6_out_fragcreat);
                        error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
                } else
                        m_freem(m);
@@ -1039,6 +989,67 @@ freehdrs:
 bad:
        m_freem(m);
        goto done;
+}
+
+int
+ip6_fragment(struct mbuf *m0, int hlen, u_char nextproto, u_long mtu)
+{
+       struct mbuf     *m, **mnext, *m_frgpart;
+       struct ip6_hdr  *mhip6;
+       struct ip6_frag *ip6f;
+       u_int32_t        id;
+       int              tlen, len, off;
+       int              error;
+
+       id = htonl(ip6_randomid());
+
+       mnext = &m0->m_nextpkt;
+       *mnext = NULL;
+
+       tlen = m0->m_pkthdr.len;
+       len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
+       if (len < 8)
+               return (EMSGSIZE);
+
+       /*
+        * Loop through length of segment after first fragment,
+        * make new header and copy data of each part and link onto
+        * chain.
+        */
+       for (off = hlen; off < tlen; off += len) {
+               struct mbuf *mlast;
+
+               if ((m = m_gethdr(M_DONTWAIT, MT_HEADER)) == NULL)
+                       return (ENOBUFS);
+               *mnext = m;
+               mnext = &m->m_nextpkt;
+               if ((error = m_dup_pkthdr(m, m0)) != 0)
+                       return (error);
+               m->m_data += max_linkhdr;
+               mhip6 = mtod(m, struct ip6_hdr *);
+               *mhip6 = *mtod(m0, struct ip6_hdr *);
+               m->m_len = sizeof(*mhip6);
+               if ((error = ip6_insertfraghdr(m0, m, hlen, &ip6f)) != 0)
+                       return (error);
+               ip6f->ip6f_offlg = htons((u_int16_t)((off - hlen) & ~7));
+               if (off + len >= tlen)
+                       len = tlen - off;
+               else
+                       ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
+               mhip6->ip6_plen = htons((u_int16_t)(len + hlen +
+                   sizeof(*ip6f) - sizeof(struct ip6_hdr)));
+               if ((m_frgpart = m_copym(m0, off, len, M_DONTWAIT)) == NULL)
+                       return (ENOBUFS);
+               for (mlast = m; mlast->m_next; mlast = mlast->m_next)
+                       ;
+               mlast->m_next = m_frgpart;
+               m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
+               ip6f->ip6f_reserved = 0;
+               ip6f->ip6f_ident = id;
+               ip6f->ip6f_nxt = nextproto;
+       }
+
+       return (0);
 }
 
 int
Index: netinet6/ip6_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_var.h,v
retrieving revision 1.38
diff -u -p -r1.38 ip6_var.h
--- netinet6/ip6_var.h  21 Dec 2010 13:12:59 -0000      1.38
+++ netinet6/ip6_var.h  25 Jan 2011 19:40:36 -0000
@@ -290,6 +290,7 @@ void        ip6_forward(struct mbuf *, int);
 void   ip6_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in6 *);
 int    ip6_output(struct mbuf *, struct ip6_pktopts *, struct route_in6 *, int,
            struct ip6_moptions *, struct ifnet **, struct inpcb *);
+int    ip6_fragment(struct mbuf *, int, u_char, u_long);
 int    ip6_ctloutput(int, struct socket *, int, int, struct mbuf **);
 int    ip6_raw_ctloutput(int, struct socket *, int, int, struct mbuf **);
 void   ip6_initpktopts(struct ip6_pktopts *);
@@ -303,6 +304,7 @@ int route6_input(struct mbuf **, int *, 
 
 void   frag6_init(void);
 int    frag6_input(struct mbuf **, int *, int);
+int    frag6_deletefraghdr(struct mbuf *, int);
 void   frag6_slowtimo(void);
 void   frag6_drain(void);
 
Index: sys/mbuf.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/mbuf.h,v
retrieving revision 1.144
diff -u -p -r1.144 mbuf.h
--- sys/mbuf.h  5 Nov 2010 15:17:50 -0000       1.144
+++ sys/mbuf.h  6 Jan 2011 13:46:33 -0000
@@ -91,6 +91,7 @@ struct pkthdr_pf {
 #define        PF_TAG_DIVERTED                 0x08
 #define        PF_TAG_DIVERTED_PACKET          0x10
 #define        PF_TAG_REROUTE                  0x20
+#define        PF_TAG_REFRAGMENTED             0x40    /* refragmented ipv6 
packet */
 
 /* record/packet header in first mbuf of chain; valid if M_PKTHDR set */
 struct pkthdr {
@@ -445,5 +446,6 @@ struct m_tag *m_tag_next(struct mbuf *, 
 #define PACKET_TAG_DLT                 0x0100 /* data link layer type */
 #define PACKET_TAG_PF_DIVERT           0x0200 /* pf(4) diverted packet */
 #define PACKET_TAG_PIPEX               0x0400 /* pipex context XXX */
+#define PACKET_TAG_PF_REASSEMBLED      0x0800 /* pf reassembled ipv6 packet */
 
 #endif

Reply via email to