Hi, 

On 16/06/2015, at 1:09 PM, Richard Procter wrote:
>> - I was unable to test af-to, which does a lot of packet fiddling.
> I've now tested this without obvious issue. 

I neglected checksum regeneration within icmp af-to, which masked a 
couple of icmp af-to errata in my last patch.

I've re-included the entire patch refreshed against HEAD below. 
(Thanks to whoever mentioned 'quilt' the other day!) 

Two further diffs then 0) fix the errata and 1) reintroduce checksum 
modification for icmp af-to. 

I see no remaining regeneration cases in PF. 

Note: Checksumless IPv4 UDP packets, illegal under IPv6, are now 
checksummed on af-to IPv6. This improves on HEAD. 

Note: pf_translate_af() flushes pd->pcksum to mbuf by flushing the  
entire transport header. Simple but possibly suboptimal; you may
wish to do it another way.  

testing: 

$4 IPv4 -> $6 IPv6 
    TCP:                ssh $4 --> af-to $6 [good]
    ICMPv4->v6:         ping $4 --> af-to $6 [good]
    UDP, ICMPv6->v4 quoting UDP: traceroute $4 --> af-to $6 [good] 
    Checksumless UDP:   traceroute -x $4 --> af-to $6 [good] 

$6 IPv6 -> $4 IPv4
    TCP:                ssh $6 --> af-to $4 [good]
    ICMPv6:             ping6 $6 --> af-to $4 [good]
    UDP, ICMPv4->v6 quoting UDP: traceroute6 $6 --> af-to $4 [good]

best, 
Richard. 

To apply: 
# cd /src/sys/net
# cat - | patch 

---> Rename pf_change_a() -> pf_change_32_unaligned() to 
prepare for address-specific pf_change_a()

Index: net/pf.c
===================================================================
--- net.orig/pf.c
+++ net/pf.c
@@ -1664,7 +1664,7 @@ pf_change_ap(struct pf_pdesc *pd, struct
 
 /* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
 void
-pf_change_a(struct pf_pdesc *pd, void *a, u_int32_t an)
+pf_change_32_unaligned(struct pf_pdesc *pd, void *a, u_int32_t an)
 {
        if (pd->csum_status == PF_CSUM_UNKNOWN)
                pf_check_proto_cksum(pd, pd->off, pd->tot_len - pd->off,
@@ -2273,10 +2273,10 @@ pf_modulate_sack(struct pf_pdesc *pd, st
                                for (i = 2; i + TCPOLEN_SACK <= olen;
                                    i += TCPOLEN_SACK) {
                                        memcpy(&sack, &opt[i], sizeof(sack));
-                                       pf_change_a(pd, &sack.start,
+                                       pf_change_32_unaligned(pd, &sack.start,
                                            htonl(ntohl(sack.start) -
                                            dst->seqdiff));
-                                       pf_change_a(pd, &sack.end,
+                                       pf_change_32_unaligned(pd, &sack.end,
                                            htonl(ntohl(sack.end) -
                                            dst->seqdiff));
                                        memcpy(&opt[i], &sack, sizeof(sack));
@@ -3484,7 +3484,7 @@ pf_create_state(struct pf_pdesc *pd, str
                        if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
                            0)
                                s->src.seqdiff = 1;
-                       pf_change_a(pd, &th->th_seq,
+                       pf_change_32_unaligned(pd, &th->th_seq,
                            htonl(s->src.seqlo + s->src.seqdiff));
                        *rewrite = 1;
                } else
@@ -3680,12 +3680,12 @@ pf_translate(struct pf_pdesc *pd, struct
 #endif /* INET6 */
                } else {
                        if (PF_ANEQ(saddr, pd->src, pd->af)) {
-                               pf_change_a(pd, &pd->src->v4.s_addr,
+                               pf_change_32_unaligned(pd, &pd->src->v4.s_addr,
                                    saddr->v4.s_addr);
                                rewrite = 1;
                        }
                        if (PF_ANEQ(daddr, pd->dst, pd->af)) {
-                               pf_change_a(pd, &pd->dst->v4.s_addr,
+                               pf_change_32_unaligned(pd, &pd->dst->v4.s_addr,
                                    daddr->v4.s_addr);
                                rewrite = 1;
                        }
@@ -3745,12 +3745,12 @@ pf_translate(struct pf_pdesc *pd, struct
                switch (pd->af) {
                case AF_INET:
                        if (!afto && PF_ANEQ(saddr, pd->src, pd->af)) {
-                               pf_change_a(pd, &pd->src->v4.s_addr,
+                               pf_change_32_unaligned(pd, &pd->src->v4.s_addr,
                                    saddr->v4.s_addr);
                                rewrite = 1;
                        }
                        if (!afto && PF_ANEQ(daddr, pd->dst, pd->af)) {
-                               pf_change_a(pd, &pd->dst->v4.s_addr,
+                               pf_change_32_unaligned(pd, &pd->dst->v4.s_addr,
                                    daddr->v4.s_addr);
                                rewrite = 1;
                        }
@@ -3813,8 +3813,8 @@ pf_tcp_track_full(struct pf_pdesc *pd, s
                        while ((src->seqdiff = arc4random() - seq) == 0)
                                ;
                        ack = ntohl(th->th_ack) - dst->seqdiff;
-                       pf_change_a(pd, &th->th_seq, htonl(seq + src->seqdiff));
-                       pf_change_a(pd, &th->th_ack, htonl(ack));
+                       pf_change_32_unaligned(pd, &th->th_seq, htonl(seq + 
src->seqdiff));
+                       pf_change_32_unaligned(pd, &th->th_ack, htonl(ack));
                        *copyback = 1;
                } else {
                        ack = ntohl(th->th_ack);
@@ -3865,8 +3865,8 @@ pf_tcp_track_full(struct pf_pdesc *pd, s
                ack = ntohl(th->th_ack) - dst->seqdiff;
                if (src->seqdiff) {
                        /* Modulate sequence numbers */
-                       pf_change_a(pd, &th->th_seq, htonl(seq + src->seqdiff));
-                       pf_change_a(pd, &th->th_ack, htonl(ack));
+                       pf_change_32_unaligned(pd, &th->th_seq, htonl(seq + 
src->seqdiff));
+                       pf_change_32_unaligned(pd, &th->th_ack, htonl(ack));
                        *copyback = 1;
                }
                end = seq + pd->p_len;
@@ -4532,12 +4532,12 @@ pf_test_state_icmp(struct pf_pdesc *pd,
 #endif /* INET6 */
                                if (!afto && PF_ANEQ(pd->src,
                                    &nk->addr[sidx], AF_INET))
-                                       pf_change_a(pd, &saddr->v4.s_addr,
+                                       pf_change_32_unaligned(pd, 
&saddr->v4.s_addr,
                                            nk->addr[sidx].v4.s_addr);
 
                                if (!afto && PF_ANEQ(pd->dst,
                                    &nk->addr[didx], AF_INET)) {
-                                       pf_change_a(pd, &daddr->v4.s_addr,
+                                       pf_change_32_unaligned(pd, 
&daddr->v4.s_addr,
                                            nk->addr[didx].v4.s_addr);
                                        pd->destchg = 1;
                                }
@@ -4729,7 +4729,7 @@ pf_test_state_icmp(struct pf_pdesc *pd,
                        /* Demodulate sequence number */
                        seq = ntohl(th.th_seq) - src->seqdiff;
                        if (src->seqdiff) {
-                               pf_change_a(pd, &th.th_seq, htonl(seq));
+                               pf_change_32_unaligned(pd, &th.th_seq, 
htonl(seq));
                                copyback = 1;
                        }
 
Index: net/pf_norm.c
===================================================================
--- net.orig/pf_norm.c
+++ net/pf_norm.c
@@ -1062,7 +1062,7 @@ pf_normalize_tcp_stateful(struct pf_pdes
                                            (src->scrub->pfss_flags &
                                            PFSS_TIMESTAMP)) {
                                                tsval = ntohl(tsval);
-                                               pf_change_a(pd, &opt[2],
+                                               pf_change_32_unaligned(pd, 
&opt[2],
                                                    htonl(tsval +
                                                    src->scrub->pfss_ts_mod));
                                                copyback = 1;
@@ -1076,7 +1076,7 @@ pf_normalize_tcp_stateful(struct pf_pdes
                                            PFSS_TIMESTAMP)) {
                                                tsecr = ntohl(tsecr)
                                                    - dst->scrub->pfss_ts_mod;
-                                               pf_change_a(pd, &opt[6],
+                                               pf_change_32_unaligned(pd, 
&opt[6],
                                                    htonl(tsecr));
                                                copyback = 1;
                                        }
Index: net/pfvar.h
===================================================================
--- net.orig/pfvar.h
+++ net/pfvar.h
@@ -1810,7 +1810,7 @@ void      pf_addr_inc(struct pf_addr *, sa_fa
 
 void   *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *,
            sa_family_t);
-void   pf_change_a(struct pf_pdesc *, void *, u_int32_t);
+void   pf_change_32_unaligned(struct pf_pdesc *, void *, u_int32_t);
 int    pf_check_proto_cksum(struct pf_pdesc *, int, int, u_int8_t,
            sa_family_t);
 int    pflog_packet(struct pf_pdesc *, u_int8_t, struct pf_rule *,

---> Restore checksum modification of modified packets.

Index: net/pf.c
===================================================================
--- net.orig/pf.c
+++ net/pf.c
@@ -145,7 +145,14 @@ void                        pf_init_threshold(struct 
pf_thre
                            u_int32_t);
 void                    pf_add_threshold(struct pf_threshold *);
 int                     pf_check_threshold(struct pf_threshold *);
-
+void                    pf_cksum_fixup(u_int16_t *, u_int16_t, u_int16_t,
+                           u_int8_t);
+void                    pf_cksum_fixup_a(u_int16_t *, const struct pf_addr *,
+                           const struct pf_addr *, sa_family_t, u_int8_t);
+void                    pf_change_32(struct pf_pdesc *, u_int32_t *,
+                           u_int32_t);
+void                    pf_change_a(struct pf_pdesc *, struct pf_addr *,
+                           struct pf_addr *, sa_family_t, sa_family_t);
 void                    pf_change_ap(struct pf_pdesc *, struct pf_addr *,
                            u_int16_t *, struct pf_addr *, u_int16_t,
                            sa_family_t);
@@ -162,7 +169,7 @@ int                  pf_change_icmp_af(struct mbuf *,
                            struct pf_pdesc *, struct pf_pdesc *,
                            struct pf_addr *, struct pf_addr *, sa_family_t,
                            sa_family_t);
-int                     pf_translate_icmp_af(int, void *);
+int                     pf_translate_icmp_af(struct pf_pdesc*, int, void *);
 void                    pf_send_tcp(const struct pf_rule *, sa_family_t,
                            const struct pf_addr *, const struct pf_addr *,
                            u_int16_t, u_int16_t, u_int32_t, u_int32_t,
@@ -286,6 +293,8 @@ static __inline int pf_state_compare_key
        struct pf_state_key *);
 static __inline int pf_state_compare_id(struct pf_state *,
        struct pf_state *);
+static __inline void pf_cksum_uncover(u_int16_t *, u_int16_t, u_int8_t);
+static __inline void pf_cksum_cover(u_int16_t *, u_int16_t, u_int8_t);
 
 struct pf_src_tree tree_src_tracking;
 
@@ -1649,37 +1658,248 @@ pf_addr_wrap_neq(struct pf_addr_wrap *aw
        }
 }
 
+/* This function, given arguments of one endian, is invariant over the
+ * endian of the host. Why?
+ *
+ * Define the unary transpose operator ~ on a bitstring via python slice
+ * notation as lambda m: m[X:] + m[:X] , for some constant X
+ *
+ * Th. ~ distributes over ones-complement addition, denoted by +_1, i.e.
+ *
+ *     ~m +_1 ~n  =  ~(m +_1 n)    (for all bitstrings m,n of equal length)
+ *
+ * Proof. Regard the bitstrings in m +_1 n as split at X, forming at
+ * most two 'half-adds'. Under ones-complement addition, each half-add
+ * carries to the other, so the sum of each half-add is unaffected by
+ * their relative order. Therefore:
+ *
+ *     ~m +_1 ~n
+ *   =    { half-adds invariant under transposition }
+ *     ~s
+ *   =    { substitute }
+ *     ~(m +_1 n)                   [end of proof]
+ *
+ * Th. Summing two in-memory ones-complement 16-bit variables m,n
+ * on a machine with the converse endian does not alter the result.
+ *
+ * Proof.
+ *        { converse machine endian: load/store transposes, X := 8 }
+ *     ~(~m +_1 ~n)
+ *   =    { ~ over +_1 }
+ *     ~~m +_1 ~~n
+ *   =    { ~ is an involution }
+ *      m +_1 n                     [end of proof]
+ */
+void
+pf_cksum_fixup(u_int16_t *cksum, u_int16_t was, u_int16_t now,
+    u_int8_t proto)
+{
+       u_int32_t l;
+       const int udp = proto == IPPROTO_UDP;
+
+       if (udp && *cksum == 0x0000)
+               return;
+
+       l = *cksum + was - now;
+       l = ((l >> 16) + (l & 0xffff))  &  0xffff;
+
+       if (udp && l == 0x0000)
+               l = 0xffff;
+
+        *cksum = (u_int16_t)(l);
+}
+
+/* pre: coverage(cksum) covers coverage(cksum_covered) */
+static __inline void
+pf_cksum_uncover(u_int16_t *cksum, u_int16_t cksum_covered, u_int8_t proto)
+{
+       pf_cksum_fixup(cksum, ~cksum_covered, 0x0, proto);
+}
+
+/* pre: disjoint(coverage(cksum), coverage(cksum_uncovered)) */
+static __inline void
+pf_cksum_cover(u_int16_t *cksum, u_int16_t cksum_uncovered, u_int8_t proto)
+{
+       pf_cksum_fixup(cksum, 0x0, ~cksum_uncovered, proto);
+}
+
+/* pre: changes are 16-bit aligned within the packet
+ *
+ * We emulate 16-bit ones-complement arithmetic by conserving its carries,
+ * which twos-complement otherwise discards, in the upper 16 bits of l.
+ * These accumulated carries when added to the lower 16-bits then
+ * complete the ones-complement sum.
+ *
+ * Note, the accumulator, despite l being unsigned, supports net-negative
+ * carries:
+ *
+ * Arithmetic or assignment on n unsigned bits is modulo 2^n.
+ * Def. x mod y  =  x - (x//y)*y   for integer x,y
+ *
+ * Th. (x + (y mod z)) mod z
+ *    =  { def mod }
+ *     (x + y - (y//z)*z) mod z
+ *    =  { (x + y*z) mod z = x mod z }
+ *     (x + y) mod z   (0)
+ *
+ * Now, the value of the unsigned m-bit accumulator having assigned
+ * integer x to it is (x mod 2^m). Added to the sum, we have:
+ *
+ *   (sum + (x mod 2^m)) mod 2^n
+ * =     { accumulator same width as sum; m = n }
+ *   (sum + (x mod 2^n)) mod 2^n
+ * =     { (0) }
+ *   (sum + x) mod 2^n
+ *
+ * ... and when x < 0 this equals (sum - |x|) mod 2^n
+ *
+ * The scheme is therefore correct over a range of at least plus or
+ * minus 2^16 - 1 accumulated carries, afterwhich the accumulator
+ * wraps. This far exceeds the worst case below of plus or minus 8.
+ */
+void
+pf_cksum_fixup_a(u_int16_t *cksum, const struct pf_addr *a,
+    const struct pf_addr *an, sa_family_t af, u_int8_t proto)
+{
+       u_int32_t        l;
+       const u_int16_t *n = an->addr16;
+       const u_int16_t *o = a->addr16;
+       const int        udp = proto == IPPROTO_UDP;
+
+       switch (af) {
+       case AF_INET:
+               l = *cksum + o[0] - n[0] + o[1] - n[1];
+               break;
+#ifdef INET6
+       case AF_INET6:
+               l = *cksum + o[0] - n[0] + o[1] - n[1] + o[2] - n[2] +
+                   o[3] - n[3] + o[4] - n[4] + o[5] - n[5] + o[6] -
+                   n[6] + o[7] - n[7];
+               break;
+#endif /* INET6 */
+       }
+
+       if (udp && *cksum == 0x0000)
+               return;
+
+       l = ((l >> 16) + (l & 0xffff))  &  0xffff;
+
+       if (udp && l == 0x0000)
+               l = 0xffff;
+
+       *cksum = (u_int16_t)(l);
+}
+
+void
+pf_change_8(struct pf_pdesc *pd, u_int8_t *f, u_int8_t v, bool hi)
+{
+       u_int16_t new = hi ? ( v << 8) :  v;
+       u_int16_t old = hi ? (*f << 8) : *f;
+
+       pf_cksum_fixup(pd->pcksum, htons(old), htons(new), pd->proto);
+       *f = v;
+}
+
+/* pre: *f is 16-bit aligned within its packet */
+void
+pf_change_16(struct pf_pdesc *pd, u_int16_t *f, u_int16_t v)
+{
+       pf_cksum_fixup(pd->pcksum, *f, v, pd->proto);
+       *f = v;
+}
+
+void
+pf_change_16_unaligned(struct pf_pdesc *pd, void *f, u_int16_t v, bool hi)
+{
+       u_int8_t *fb = (u_int8_t*)f;
+       u_int8_t *vb = (u_int8_t*)&v;
+
+       if (hi && ALIGNED_POINTER(f, u_int16_t)) {
+               pf_change_16(pd, f, v); /* optimise */
+               return;
+       }
+
+       pf_change_8(pd, fb++, *vb++, hi);
+       pf_change_8(pd, fb++, *vb++,!hi);
+}
+
+/* pre: *f is 16-bit aligned within its packet */
+void
+pf_change_32(struct pf_pdesc *pd, u_int32_t *f, u_int32_t v)
+{
+       u_int16_t *pc = pd->pcksum;
+
+       pf_cksum_fixup(pc, *f / (1 << 16), v / (1 << 16), pd->proto);
+       pf_cksum_fixup(pc, *f % (1 << 16), v % (1 << 16), pd->proto);
+       *f = v;
+}
+
+/* pre: *a is 16-bit aligned within its packet */
+void
+pf_change_a(struct pf_pdesc *pd, struct pf_addr *a, struct pf_addr *an,
+           sa_family_t af, sa_family_t naf)
+{
+       static struct pf_addr    zero;
+       u_int16_t               *pc = pd->pcksum;
+
+       switch (pd->proto) {
+       case IPPROTO_TCP:       /* FALLTHROUGH */
+       case IPPROTO_UDP:
+       case IPPROTO_ICMPV6:
+               if (af == naf) {
+                       pf_cksum_fixup_a(pc, a, an, af, pd->proto);
+               }
+               else {
+                       /* "cksum_uncover_a(pc, a)" */
+                       /* "cksum_cover_a(pc, an)" */
+                       pf_cksum_fixup_a(pc, a, &zero, af, pd->proto);
+                       pf_cksum_fixup_a(pc, &zero, an, naf, pd->proto);
+               }
+               break;
+
+       case IPPROTO_ICMP:      /* ICMPv4 has no pseudo-header */
+       default:
+               break;
+       }
+
+       if (af == naf)
+               PF_ACPY(a, an, naf);
+}
+
 void
 pf_change_ap(struct pf_pdesc *pd, struct pf_addr *a, u_int16_t *p,
     struct pf_addr *an, u_int16_t pn, sa_family_t naf)
 {
-       if (pd->csum_status == PF_CSUM_UNKNOWN)
-               pf_check_proto_cksum(pd, pd->off, pd->tot_len - pd->off,
-                   pd->proto, pd->af);
-       if (pd->af == naf)
-               PF_ACPY(a, an, naf);
-       if (p != NULL)
+       if (p != NULL) {
+               pf_cksum_fixup(pd->pcksum, *p, pn, pd->proto);
                *p = pn;
+       }
+
+       pf_change_a(pd, a, an, pd->af, naf);
 }
 
-/* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
 void
-pf_change_32_unaligned(struct pf_pdesc *pd, void *a, u_int32_t an)
+pf_change_32_unaligned(struct pf_pdesc *pd, void *f, u_int32_t v, bool hi)
 {
-       if (pd->csum_status == PF_CSUM_UNKNOWN)
-               pf_check_proto_cksum(pd, pd->off, pd->tot_len - pd->off,
-                   pd->proto, pd->af);
-       memcpy(a, &an, sizeof(u_int32_t));
+       u_int8_t *fb = (u_int8_t*)f;
+       u_int8_t *vb = (u_int8_t*)&v;
+
+       if (hi && ALIGNED_POINTER(f, u_int32_t)) {
+               pf_change_32(pd, f, v); /* optimise */
+               return;
+       }
+
+       pf_change_8(pd, fb++, *vb++, hi);
+       pf_change_8(pd, fb++, *vb++,!hi);
+       pf_change_8(pd, fb++, *vb++, hi);
+       pf_change_8(pd, fb++, *vb++,!hi);
 }
 
 #ifdef INET6
 void
 pf_change_a6(struct pf_pdesc *pd, struct pf_addr *a, struct pf_addr *an)
 {
-       if (pd->csum_status == PF_CSUM_UNKNOWN)
-               pf_check_proto_cksum(pd, pd->off, pd->tot_len - pd->off,
-                   pd->proto, pd->af);
-       PF_ACPY(a, an, AF_INET6);
+       pf_change_a(pd, a, an, AF_INET6, AF_INET6);
 }
 #endif /* INET6 */
 
@@ -1861,24 +2081,27 @@ pf_icmp_mapping(struct pf_pdesc *pd, u_i
        return (0);  /* These types match to their own state */
 }
 
+/* pf_change_ap_icmp, allow for af/ ? */
 void
 pf_change_icmp(struct pf_pdesc *pd, struct pf_addr *ia, u_int16_t *ip,
     struct pf_addr *oa, struct pf_addr *na, u_int16_t np, sa_family_t af)
 {
-       if (pd->csum_status == PF_CSUM_UNKNOWN)
-               pf_check_proto_cksum(pd, pd->off, pd->tot_len - pd->off,
-                   pd->proto, pd->af);
+        /* note: doesn't trouble to fixup quoted checksums, if any */
 
-       /* Change inner protocol port */
-       if (ip != NULL)
+       /* change quoted protocol port */
+       if (ip != NULL) {
+               pf_cksum_fixup(pd->pcksum, *ip, np, pd->proto);
                *ip = np;
+       }
 
-       /* Change inner ip address */
+       /* change quoted ip address */
+       pf_cksum_fixup_a(pd->pcksum, ia, na, af, pd->proto);
        PF_ACPY(ia, na, af);
 
-       /* Outer ip address, fix outer icmpv6 checksum, if necessary. */
-       if (oa)
-               PF_ACPY(oa, na, af);
+       /* change outer ip address */
+       if (oa) {
+               pf_change_a(pd, oa, na, af, af);
+       }
 }
 
 #if INET6
@@ -1891,10 +2114,6 @@ pf_translate_af(struct pf_pdesc *pd)
        struct icmp6_hdr        *icmp;
        int                      hlen;
 
-       if (pd->csum_status == PF_CSUM_UNKNOWN)
-               pf_check_proto_cksum(pd, pd->off, pd->tot_len - pd->off,
-                   pd->proto, pd->af);
-
        hlen = pd->naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
 
        /* trim the old header */
@@ -1966,31 +2185,27 @@ pf_change_icmp_af(struct mbuf *m, int of
        struct ip6_hdr          *ip6;
        int                      hlen, olen, mlen;
 
-       if (pd->csum_status == PF_CSUM_UNKNOWN)
-               pf_check_proto_cksum(pd, pd->off, pd->tot_len - pd->off,
-                   pd->proto, pd->af);
-
        if (af == naf || (af != AF_INET && af != AF_INET6) ||
            (naf != AF_INET && naf != AF_INET6))
                return (-1);
 
-       /* split the mbuf chain on the inner ip/ip6 header boundary */
+       /* split the mbuf chain on the quoted ip/ip6 header boundary */
        if ((n = m_split(m, off, M_DONTWAIT)) == NULL)
                return (-1);
 
-       /* old header */
+       /* old quoted header */
        olen = pd2->off - off;
-       /* new header */
+       /* new quoted header */
        hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
 
-       /* trim old header */
+       /* trim old quoted header */
+       pf_cksum_uncover(pd->pcksum, in_cksum(n, olen << 2), pd->proto);
        m_adj(n, olen);
 
-       /* prepend a new one */
+       /* prepend a new, translated, quoted header */
        if ((M_PREPEND(n, hlen, M_DONTWAIT)) == NULL)
                return (-1);
 
-       /* translate inner ip/ip6 header */
        switch (naf) {
        case AF_INET:
                ip4 = mtod(n, struct ip *);
@@ -2031,11 +2246,16 @@ pf_change_icmp_af(struct mbuf *m, int of
        pd2->off += hlen - olen;
        pd->tot_len += hlen - olen;
 
-       /* merge modified inner packet with the original header */
+       /* reattach modified quoted packet to outer header */
        mlen = n->m_pkthdr.len;
        m_cat(m, n);
        m->m_pkthdr.len += mlen;
 
+       /* optimise: any new AF_INET header of ours sums to zero */
+       if (naf != AF_INET) {
+               pf_cksum_cover(pd->pcksum, in_cksum(n, hlen << 2), pd->proto);
+       }
+
        return (0);
 }
 
@@ -2044,7 +2264,7 @@ pf_change_icmp_af(struct mbuf *m, int of
 #define PTR_IP6(field) (offsetof(struct ip6_hdr, field))
 
 int
-pf_translate_icmp_af(int af, void *arg)
+pf_translate_icmp_af(struct pf_pdesc *pd, int af, void *arg)
 {
        struct icmp             *icmp4;
        struct icmp6_hdr        *icmp6;
@@ -2132,13 +2352,16 @@ pf_translate_icmp_af(int af, void *arg)
                default:
                        return (-1);
                }
-               icmp6->icmp6_type = type;
-               icmp6->icmp6_code = code;
+
+               pf_change_8(pd, &icmp6->icmp6_type, type, PF_HI);
+               pf_change_8(pd, &icmp6->icmp6_code, code, PF_LO);
+
                /* aligns well with a icmpv4 nextmtu */
-               icmp6->icmp6_mtu = htonl(mtu);
+               pf_change_32(pd, &icmp6->icmp6_mtu, htonl(mtu));
+
                /* icmpv4 pptr is a one most significant byte */
                if (ptr >= 0)
-                       icmp6->icmp6_pptr = htonl(ptr << 24);
+                       pf_change_32(pd, &icmp6->icmp6_pptr, htonl(ptr << 24));
                break;
        case AF_INET6:
                icmp4 = arg;
@@ -2227,11 +2450,12 @@ pf_translate_icmp_af(int af, void *arg)
                default:
                        return (-1);
                }
-               icmp4->icmp_type = type;
-               icmp4->icmp_code = code;
-               icmp4->icmp_nextmtu = htons(mtu);
-               if (ptr >= 0)
-                       icmp4->icmp_void = htonl(ptr);
+
+               pf_change_8(pd, &icmp4->icmp_type, type, PF_HI);
+               pf_change_8(pd, &icmp4->icmp_code, code, PF_LO);
+               pf_change_16(pd, &icmp4->icmp_nextmtu, htons(mtu));
+               if (ptr >= 0) /* => ICMP_PARAMPROB */
+                       pf_change_32(pd, &icmp4->icmp_void, htonl(ptr));
                break;
        }
 
@@ -2272,13 +2496,17 @@ pf_modulate_sack(struct pf_pdesc *pd, st
                        if (olen >= TCPOLEN_SACKLEN) {
                                for (i = 2; i + TCPOLEN_SACK <= olen;
                                    i += TCPOLEN_SACK) {
+                                       size_t startoff = (opt + i) - opts;
                                        memcpy(&sack, &opt[i], sizeof(sack));
                                        pf_change_32_unaligned(pd, &sack.start,
                                            htonl(ntohl(sack.start) -
-                                           dst->seqdiff));
+                                               dst->seqdiff),
+                                           PF_ALGNMNT(startoff));
                                        pf_change_32_unaligned(pd, &sack.end,
                                            htonl(ntohl(sack.end) -
-                                           dst->seqdiff));
+                                               dst->seqdiff),
+                                           PF_ALGNMNT(startoff +
+                                               sizeof(sack.start)));
                                        memcpy(&opt[i], &sack, sizeof(sack));
                                }
                                copyback = 1;
@@ -3392,7 +3620,6 @@ pf_test_rule(struct pf_pdesc *pd, struct
 
        /* copy back packet headers if needed */
        if (rewrite && pd->hdrlen) {
-               pf_cksum(pd, pd->m);
                m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any, M_NOWAIT);
        }
 
@@ -3485,7 +3712,7 @@ pf_create_state(struct pf_pdesc *pd, str
                            0)
                                s->src.seqdiff = 1;
                        pf_change_32_unaligned(pd, &th->th_seq,
-                           htonl(s->src.seqlo + s->src.seqdiff));
+                           htonl(s->src.seqlo + s->src.seqdiff), PF_HI);
                        *rewrite = 1;
                } else
                        s->src.seqdiff = 0;
@@ -3673,20 +3900,20 @@ pf_translate(struct pf_pdesc *pd, struct
 
                if (afto) {
 #ifdef INET6
-                       if (pf_translate_icmp_af(AF_INET6, pd->hdr.icmp))
+                       if (pf_translate_icmp_af(pd, AF_INET6, pd->hdr.icmp))
                                return (0);
                        pd->proto = IPPROTO_ICMPV6;
                        rewrite = 1;
 #endif /* INET6 */
                } else {
                        if (PF_ANEQ(saddr, pd->src, pd->af)) {
-                               pf_change_32_unaligned(pd, &pd->src->v4.s_addr,
-                                   saddr->v4.s_addr);
+                               pf_change_a(pd, pd->src, saddr,
+                                   pd->af, pd->af);
                                rewrite = 1;
                        }
                        if (PF_ANEQ(daddr, pd->dst, pd->af)) {
-                               pf_change_32_unaligned(pd, &pd->dst->v4.s_addr,
-                                   daddr->v4.s_addr);
+                               pf_change_a(pd, pd->dst, daddr,
+                                   pd->af, pd->af);
                                rewrite = 1;
                        }
                }
@@ -3694,11 +3921,8 @@ pf_translate(struct pf_pdesc *pd, struct
                        u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport;
 
                        if (icmpid != pd->hdr.icmp->icmp_id) {
-                               if (pd->csum_status == PF_CSUM_UNKNOWN)
-                                       pf_check_proto_cksum(pd, pd->off,
-                                           pd->tot_len - pd->off, pd->proto,
-                                           pd->af);
-                               pd->hdr.icmp->icmp_id = icmpid;
+                               pf_change_16(pd,
+                                   &pd->hdr.icmp->icmp_id, icmpid);
                                rewrite = 1;
                        }
                }
@@ -3712,7 +3936,7 @@ pf_translate(struct pf_pdesc *pd, struct
 
                if (afto) {
                        /* ip_sum will be recalculated in pf_translate_af */
-                       if (pf_translate_icmp_af(AF_INET, pd->hdr.icmp6))
+                       if (pf_translate_icmp_af(pd, AF_INET, pd->hdr.icmp6))
                                return (0);
                        pd->proto = IPPROTO_ICMP;
                        rewrite = 1;
@@ -3730,11 +3954,8 @@ pf_translate(struct pf_pdesc *pd, struct
                        u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport;
 
                        if (icmpid != pd->hdr.icmp6->icmp6_id) {
-                               if (pd->csum_status == PF_CSUM_UNKNOWN)
-                                       pf_check_proto_cksum(pd, pd->off,
-                                           pd->tot_len - pd->off, pd->proto,
-                                           pd->af);
-                               pd->hdr.icmp6->icmp6_id = icmpid;
+                               pf_change_16(pd,
+                                   &pd->hdr.icmp6->icmp6_id, icmpid);
                                rewrite = 1;
                        }
                }
@@ -3745,13 +3966,13 @@ pf_translate(struct pf_pdesc *pd, struct
                switch (pd->af) {
                case AF_INET:
                        if (!afto && PF_ANEQ(saddr, pd->src, pd->af)) {
-                               pf_change_32_unaligned(pd, &pd->src->v4.s_addr,
-                                   saddr->v4.s_addr);
+                               pf_change_a(pd, pd->src, saddr,
+                                   pd->af, pd->af);
                                rewrite = 1;
                        }
                        if (!afto && PF_ANEQ(daddr, pd->dst, pd->af)) {
-                               pf_change_32_unaligned(pd, &pd->dst->v4.s_addr,
-                                   daddr->v4.s_addr);
+                               pf_change_a(pd, pd->dst, daddr,
+                                   pd->af, pd->af);
                                rewrite = 1;
                        }
                        break;
@@ -3813,8 +4034,9 @@ pf_tcp_track_full(struct pf_pdesc *pd, s
                        while ((src->seqdiff = arc4random() - seq) == 0)
                                ;
                        ack = ntohl(th->th_ack) - dst->seqdiff;
-                       pf_change_32_unaligned(pd, &th->th_seq, htonl(seq + 
src->seqdiff));
-                       pf_change_32_unaligned(pd, &th->th_ack, htonl(ack));
+                       pf_change_32_unaligned(pd, &th->th_seq, htonl(seq +
+                                                  src->seqdiff), PF_HI);
+                       pf_change_32_unaligned(pd, &th->th_ack, htonl(ack), 
PF_HI);
                        *copyback = 1;
                } else {
                        ack = ntohl(th->th_ack);
@@ -3865,8 +4087,9 @@ pf_tcp_track_full(struct pf_pdesc *pd, s
                ack = ntohl(th->th_ack) - dst->seqdiff;
                if (src->seqdiff) {
                        /* Modulate sequence numbers */
-                       pf_change_32_unaligned(pd, &th->th_seq, htonl(seq + 
src->seqdiff));
-                       pf_change_32_unaligned(pd, &th->th_ack, htonl(ack));
+                       pf_change_32_unaligned(pd, &th->th_seq, htonl(seq +
+                                                  src->seqdiff), PF_HI);
+                       pf_change_32_unaligned(pd, &th->th_ack, htonl(ack), 
PF_HI);
                        *copyback = 1;
                }
                end = seq + pd->p_len;
@@ -4401,7 +4624,6 @@ pf_test_state(struct pf_pdesc *pd, struc
        }
 
        if (copyback && pd->hdrlen > 0) {
-               pf_cksum(pd, pd->m);
                m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any, M_NOWAIT);
        }
 
@@ -4524,7 +4746,7 @@ pf_test_state_icmp(struct pf_pdesc *pd,
                        case AF_INET:
 #ifdef INET6
                                if (afto) {
-                                       if (pf_translate_icmp_af(AF_INET6,
+                                       if (pf_translate_icmp_af(pd, AF_INET6,
                                            pd->hdr.icmp))
                                                return (PF_DROP);
                                        pd->proto = IPPROTO_ICMPV6;
@@ -4532,22 +4754,19 @@ pf_test_state_icmp(struct pf_pdesc *pd,
 #endif /* INET6 */
                                if (!afto && PF_ANEQ(pd->src,
                                    &nk->addr[sidx], AF_INET))
-                                       pf_change_32_unaligned(pd, 
&saddr->v4.s_addr,
-                                           nk->addr[sidx].v4.s_addr);
+                                       pf_change_a(pd, saddr, &nk->addr[sidx],
+                                           pd->af, pd->af);
 
                                if (!afto && PF_ANEQ(pd->dst,
                                    &nk->addr[didx], AF_INET)) {
-                                       pf_change_32_unaligned(pd, 
&daddr->v4.s_addr,
-                                           nk->addr[didx].v4.s_addr);
+                                       pf_change_a(pd, daddr, &nk->addr[didx],
+                                           pd->af, pd->af);
                                        pd->destchg = 1;
                                }
 
                                if (nk->port[iidx] !=  pd->hdr.icmp->icmp_id) {
-                                       if (pd->csum_status == PF_CSUM_UNKNOWN)
-                                               pf_check_proto_cksum(pd,
-                                                   pd->off, pd->tot_len -
-                                                   pd->off, pd->proto, pd->af);
-                                       pd->hdr.icmp->icmp_id = nk->port[iidx];
+                                       pf_change_16(pd, &pd->hdr.icmp->icmp_id,
+                                           nk->port[iidx]);
                                }
 
                                m_copyback(pd->m, pd->off, ICMP_MINLEN,
@@ -4557,7 +4776,7 @@ pf_test_state_icmp(struct pf_pdesc *pd,
 #ifdef INET6
                        case AF_INET6:
                                if (afto) {
-                                       if (pf_translate_icmp_af(AF_INET,
+                                       if (pf_translate_icmp_af(pd, AF_INET,
                                            pd->hdr.icmp6))
                                                return (PF_DROP);
                                        pd->proto = IPPROTO_ICMP;
@@ -4575,12 +4794,9 @@ pf_test_state_icmp(struct pf_pdesc *pd,
                                }
 
                                if (nk->port[iidx] != pd->hdr.icmp6->icmp6_id) {
-                                       if (pd->csum_status == PF_CSUM_UNKNOWN)
-                                               pf_check_proto_cksum(pd,
-                                                   pd->off, pd->tot_len -
-                                                   pd->off, pd->proto, pd->af);
-                                       pd->hdr.icmp6->icmp6_id =
-                                           nk->port[iidx];
+                                       pf_change_16(pd,
+                                           &pd->hdr.icmp6->icmp6_id,
+                                           nk->port[iidx]);
                                }
 
                                m_copyback(pd->m, pd->off,
@@ -4729,7 +4945,8 @@ pf_test_state_icmp(struct pf_pdesc *pd,
                        /* Demodulate sequence number */
                        seq = ntohl(th.th_seq) - src->seqdiff;
                        if (src->seqdiff) {
-                               pf_change_32_unaligned(pd, &th.th_seq, 
htonl(seq));
+                               pf_change_32_unaligned(pd, &th.th_seq,
+                                   htonl(seq), PF_HI);
                                copyback = 1;
                        }
 
@@ -4780,7 +4997,7 @@ pf_test_state_icmp(struct pf_pdesc *pd,
 
 #ifdef INET6
                                if (afto) {
-                                       if (pf_translate_icmp_af(nk->af,
+                                       if (pf_translate_icmp_af(pd, nk->af,
                                            pd->hdr.icmp))
                                                return (PF_DROP);
                                        m_copyback(pd->m, pd->off,
@@ -4895,7 +5112,7 @@ pf_test_state_icmp(struct pf_pdesc *pd,
 
 #ifdef INET6
                                if (afto) {
-                                       if (pf_translate_icmp_af(nk->af,
+                                       if (pf_translate_icmp_af(pd, nk->af,
                                            pd->hdr.icmp))
                                                return (PF_DROP);
                                        m_copyback(pd->m, pd->off,
@@ -4967,7 +5184,10 @@ pf_test_state_icmp(struct pf_pdesc *pd,
                                        break;
 #endif /* INET6 */
                                }
-                               uh.uh_sum = 0;
+                               /* Avoid recomputing quoted UDP checksum.
+                                * note: udp6 0 csum invalid per rfc2460 p27.
+                                * but presumed nothing cares in this context */
+                               pf_change_16(pd, &uh.uh_sum, 0);
                                m_copyback(pd2.m, pd2.off, sizeof(uh), &uh,
                                    M_NOWAIT);
                                copyback = 1;
@@ -5018,7 +5238,7 @@ pf_test_state_icmp(struct pf_pdesc *pd,
                                if (afto) {
                                        if (nk->af != AF_INET6)
                                                return (PF_DROP);
-                                       if (pf_translate_icmp_af(nk->af,
+                                       if (pf_translate_icmp_af(pd, nk->af,
                                            pd->hdr.icmp))
                                                return (PF_DROP);
                                        m_copyback(pd->m, pd->off,
@@ -5029,7 +5249,8 @@ pf_test_state_icmp(struct pf_pdesc *pd,
                                            &nk->addr[didx], pd->af, nk->af))
                                                return (PF_DROP);
                                        pd->proto = IPPROTO_ICMPV6;
-                                       if (pf_translate_icmp_af(nk->af, &iih))
+                                       if (pf_translate_icmp_af(pd,
+                                               nk->af, &iih))
                                                return (PF_DROP);
                                        if (virtual_type == htons(ICMP_ECHO) &&
                                            nk->port[iidx] != iih.icmp_id)
@@ -5129,7 +5350,7 @@ pf_test_state_icmp(struct pf_pdesc *pd,
                                if (afto) {
                                        if (nk->af != AF_INET)
                                                return (PF_DROP);
-                                       if (pf_translate_icmp_af(nk->af,
+                                       if (pf_translate_icmp_af(pd, nk->af,
                                            pd->hdr.icmp))
                                                return (PF_DROP);
                                        m_copyback(pd->m, pd->off,
@@ -5140,7 +5361,7 @@ pf_test_state_icmp(struct pf_pdesc *pd,
                                            &nk->addr[didx], pd->af, nk->af))
                                                return (PF_DROP);
                                        pd->proto = IPPROTO_ICMP;
-                                       if (pf_translate_icmp_af(nk->af, &iih))
+                                       if (pf_translate_icmp_af(pd, nk->af, 
&iih))
                                                return (PF_DROP);
                                        if (virtual_type ==
                                            htons(ICMP6_ECHO_REQUEST) &&
@@ -5254,7 +5475,6 @@ pf_test_state_icmp(struct pf_pdesc *pd,
                }
        }
        if (copyback) {
-               pf_cksum(pd, pd->m);
                m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any, M_NOWAIT);
        }
 
@@ -6201,6 +6421,7 @@ pf_setup_pdesc(struct pf_pdesc *pd, void
                        REASON_SET(reason, PFRES_SHORT);
                        return (PF_DROP);
                }
+               pd->pcksum = &pd->hdr.icmp6->icmp6_cksum;
                break;
        }
 #endif /* INET6 */
@@ -6629,37 +6850,6 @@ done:
        return (action);
 }
 
-void
-pf_cksum(struct pf_pdesc *pd, struct mbuf *m)
-{
-       if (pd->csum_status != PF_CSUM_OK)
-               return; /* don't fix broken cksums */
-
-       switch (pd->proto) {
-       case IPPROTO_TCP:
-               pd->hdr.tcp->th_sum = 0;
-               m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT;
-               break;
-       case IPPROTO_UDP:
-               pd->hdr.udp->uh_sum = 0;
-               m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT;
-               break;
-       case IPPROTO_ICMP:
-               pd->hdr.icmp->icmp_cksum = 0;
-               m->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT;
-               break;
-#ifdef INET6
-       case IPPROTO_ICMPV6:
-               pd->hdr.icmp6->icmp6_cksum = 0;
-               m->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT;
-               break;
-#endif /* INET6 */
-       default:
-               /* nothing */
-               break;
-       }
-}
-
 /*
  * must be called whenever any addressing information such as
  * address, port, protocol has changed
Index: net/pf_norm.c
===================================================================
--- net.orig/pf_norm.c
+++ net/pf_norm.c
@@ -832,10 +832,6 @@ pf_normalize_tcp(struct pf_pdesc *pd)
        u_int8_t         flags;
        u_int            rewrite = 0;
 
-       if (pd->csum_status == PF_CSUM_UNKNOWN)
-               pf_check_proto_cksum(pd, pd->off, pd->tot_len - pd->off,
-                   pd->proto, pd->af);
-
        flags = th->th_flags;
        if (flags & TH_SYN) {
                /* Illegal packet */
@@ -857,21 +853,23 @@ pf_normalize_tcp(struct pf_pdesc *pd)
        }
 
        /* If flags changed, or reserved data set, then adjust */
-       if (flags != th->th_flags || th->th_x2 != 0) {
-               th->th_flags = flags;
-               th->th_x2 = 0;
-               rewrite = 1;
-       }
+       if (flags != th->th_flags || th->th_x2 != 0) {
+               /* hack: set 4-bit th_x2 = 0 */
+               u_int8_t *th_off = (u_int8_t*)(&th->th_ack+1);
+               pf_change_8(pd, th_off, th->th_off << 4, PF_HI);
+
+               pf_change_8(pd, &th->th_flags, flags, PF_LO);
+               rewrite = 1;
+       }
 
        /* Remove urgent pointer, if TH_URG is not set */
        if (!(flags & TH_URG) && th->th_urp) {
-               th->th_urp = 0;
+               pf_change_16(pd, &th->th_urp, 0);
                rewrite = 1;
        }
 
        /* copy back packet headers if we sanitized */
        if (rewrite) {
-               pf_cksum(pd, pd->m);
                m_copyback(pd->m, pd->off, sizeof(*th), th, M_NOWAIT);
        }
 
@@ -987,7 +985,7 @@ pf_normalize_tcp_stateful(struct pf_pdes
        u_int32_t        tsval, tsecr;
        u_int            tsval_from_last;
        u_int8_t         hdr[60];
-       u_int8_t        *opt;
+       u_int8_t        *opts, *opt;
        int              copyback = 0;
        int              got_ts = 0;
 
@@ -1028,7 +1026,7 @@ pf_normalize_tcp_stateful(struct pf_pdes
            pd->af)) {
                /* Diddle with TCP options */
                int hlen;
-               opt = hdr + sizeof(struct tcphdr);
+               opt = opts = hdr + sizeof(struct tcphdr);
                hlen = (th->th_off << 2) - sizeof(struct tcphdr);
                while (hlen >= TCPOLEN_TIMESTAMP) {
                        switch (*opt) {
@@ -1056,28 +1054,37 @@ pf_normalize_tcp_stateful(struct pf_pdes
                                        return (PF_DROP);
                                }
                                if (opt[1] >= TCPOLEN_TIMESTAMP) {
-                                       memcpy(&tsval, &opt[2],
-                                           sizeof(u_int32_t));
+                                       u_int8_t *ts, *tsr;
+                                       ts = opt + 2;
+                                       memcpy(&tsval, ts, sizeof(u_int32_t));
+
+                                       /* modulate TS */
                                        if (tsval && src->scrub &&
                                            (src->scrub->pfss_flags &
                                            PFSS_TIMESTAMP)) {
+                                               /* note: tsval used later on */
                                                tsval = ntohl(tsval);
-                                               pf_change_32_unaligned(pd, 
&opt[2],
+                                               pf_change_32_unaligned(pd, ts,
                                                    htonl(tsval +
-                                                   src->scrub->pfss_ts_mod));
+                                                       
src->scrub->pfss_ts_mod),
+                                                   PF_ALGNMNT(ts - opts));
                                                copyback = 1;
                                        }
 
-                                       /* Modulate TS reply iff valid (!0) */
-                                       memcpy(&tsecr, &opt[6],
+                                       /* modulate TS reply if any */
+                                       tsr = opt + 6;
+                                       memcpy(&tsecr, tsr,
                                            sizeof(u_int32_t));
+
                                        if (tsecr && dst->scrub &&
                                            (dst->scrub->pfss_flags &
                                            PFSS_TIMESTAMP)) {
+                                               /* note: tsecr used later on */
                                                tsecr = ntohl(tsecr)
                                                    - dst->scrub->pfss_ts_mod;
-                                               pf_change_32_unaligned(pd, 
&opt[6],
-                                                   htonl(tsecr));
+                                               pf_change_32_unaligned(pd, tsr,
+                                                   htonl(tsecr),
+                                                   PF_ALGNMNT(tsr - opts));
                                                copyback = 1;
                                        }
                                        got_ts = 1;
@@ -1370,12 +1377,8 @@ pf_normalize_mss(struct pf_pdesc *pd, u_
        u_int16_t        mss;
        int              thoff;
        int              opt, cnt, optlen = 0;
-       u_char           opts[MAX_TCPOPTLEN];
-       u_char          *optp = opts;
-
-       if (pd->csum_status == PF_CSUM_UNKNOWN)
-               pf_check_proto_cksum(pd, pd->off, pd->tot_len - pd->off,
-                   pd->proto, pd->af);
+       u_int8_t         opts[MAX_TCPOPTLEN];
+       u_int8_t        *optp = opts;
 
        thoff = th->th_off << 2;
        cnt = thoff - sizeof(struct tcphdr);
@@ -1398,15 +1401,18 @@ pf_normalize_mss(struct pf_pdesc *pd, u_
                                break;
                }
                if (opt == TCPOPT_MAXSEG) {
-                       memcpy(&mss, (optp + 2), 2);
+                       u_int8_t *mssp = optp + 2;
+                       memcpy(&mss, mssp, sizeof(mss));
                        if (ntohs(mss) > maxmss) {
-                               mss = htons(maxmss);
+                               size_t mssoptsoff = mssp - opts;
+                               pf_change_16_unaligned(pd, &mss,
+                                   htons(maxmss), PF_ALGNMNT(mssoptsoff));
+                               m_copyback(pd->m,
+                                   pd->off + sizeof(*th) + mssoptsoff,
+                                   sizeof(mss), &mss, M_NOWAIT);
                                m_copyback(pd->m,
-                                   pd->off + sizeof(*th) + optp + 2 - opts,
-                                   2, &mss, M_NOWAIT);
-                               pf_cksum(pd, pd->m);
-                               m_copyback(pd->m, pd->off, sizeof(*th), th,
-                                   M_NOWAIT);
+                                   pd->off,
+                                   sizeof(*th), th, M_NOWAIT);
                        }
                }
        }
Index: net/pfvar.h
===================================================================
--- net.orig/pfvar.h
+++ net/pfvar.h
@@ -1810,7 +1810,13 @@ void     pf_addr_inc(struct pf_addr *, sa_fa
 
 void   *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *,
            sa_family_t);
-void   pf_change_32_unaligned(struct pf_pdesc *, void *, u_int32_t);
+#define PF_HI (true)
+#define PF_LO (!PF_HI)
+#define PF_ALGNMNT(off) (((off) % 2) == 0 ? PF_HI : PF_LO)
+void   pf_change_8(struct pf_pdesc *, u_int8_t *, u_int8_t, bool);
+void   pf_change_16(struct pf_pdesc *, u_int16_t *, u_int16_t);
+void   pf_change_16_unaligned(struct pf_pdesc *, void *, u_int16_t, bool);
+void   pf_change_32_unaligned(struct pf_pdesc *, void *, u_int32_t, bool);
 int    pf_check_proto_cksum(struct pf_pdesc *, int, int, u_int8_t,
            sa_family_t);
 int    pflog_packet(struct pf_pdesc *, u_int8_t, struct pf_rule *,
@@ -1996,8 +2002,6 @@ int                        pf_map_addr(sa_family_t, 
struct p
 
 int                     pf_postprocess_addr(struct pf_state *);
 
-void                    pf_cksum(struct pf_pdesc *, struct mbuf *);
-
 #endif /* _KERNEL */
 
 

---> Remove unnecessary pf_change_32_unaligned() calls

Unnecessary as their arguments 
- involve an address of an aligned structure member. 
- are 16-bit aligned within their packet (PF_HI)

Index: net/pf.c
===================================================================
--- net.orig/pf.c
+++ net/pf.c
@@ -3711,8 +3711,8 @@ pf_create_state(struct pf_pdesc *pd, str
                        if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
                            0)
                                s->src.seqdiff = 1;
-                       pf_change_32_unaligned(pd, &th->th_seq,
-                           htonl(s->src.seqlo + s->src.seqdiff), PF_HI);
+                       pf_change_32(pd, &th->th_seq,
+                           htonl(s->src.seqlo + s->src.seqdiff));
                        *rewrite = 1;
                } else
                        s->src.seqdiff = 0;
@@ -4034,9 +4034,8 @@ pf_tcp_track_full(struct pf_pdesc *pd, s
                        while ((src->seqdiff = arc4random() - seq) == 0)
                                ;
                        ack = ntohl(th->th_ack) - dst->seqdiff;
-                       pf_change_32_unaligned(pd, &th->th_seq, htonl(seq +
-                                                  src->seqdiff), PF_HI);
-                       pf_change_32_unaligned(pd, &th->th_ack, htonl(ack), 
PF_HI);
+                       pf_change_32(pd, &th->th_seq, htonl(seq + 
src->seqdiff));
+                       pf_change_32(pd, &th->th_ack, htonl(ack));
                        *copyback = 1;
                } else {
                        ack = ntohl(th->th_ack);
@@ -4087,9 +4086,10 @@ pf_tcp_track_full(struct pf_pdesc *pd, s
                ack = ntohl(th->th_ack) - dst->seqdiff;
                if (src->seqdiff) {
                        /* Modulate sequence numbers */
-                       pf_change_32_unaligned(pd, &th->th_seq, htonl(seq +
-                                                  src->seqdiff), PF_HI);
-                       pf_change_32_unaligned(pd, &th->th_ack, htonl(ack), 
PF_HI);
+                       pf_change_32(pd, &th->th_seq,
+                           htonl(seq + src->seqdiff));
+                       pf_change_32(pd, &th->th_ack,
+                           htonl(ack));
                        *copyback = 1;
                }
                end = seq + pd->p_len;
@@ -4945,8 +4945,7 @@ pf_test_state_icmp(struct pf_pdesc *pd,
                        /* Demodulate sequence number */
                        seq = ntohl(th.th_seq) - src->seqdiff;
                        if (src->seqdiff) {
-                               pf_change_32_unaligned(pd, &th.th_seq,
-                                   htonl(seq), PF_HI);
+                               pf_change_32(pd, &th.th_seq, htonl(seq));
                                copyback = 1;
                        }
 

---> Errata for checksum modification in af-to
Note: m_cat() may free n  

Index: net/pf.c
===================================================================
--- net.orig/pf.c
+++ net/pf.c
@@ -2172,6 +2172,12 @@ pf_translate_af(struct pf_pdesc *pd)
                    ntohs(ip6->ip6_plen));
        }
 
+       /* UDP over IPv6 must be checksummed */
+       if (naf_proto == IPPROTO_UDP && *pc == 0x0000 &&
+           pd->naf == AF_INET6) {
+               pd->m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT;
+       }
+
        return (0);
 }
 
@@ -2199,7 +2205,7 @@ pf_change_icmp_af(struct mbuf *m, int of
        hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
 
        /* trim old quoted header */
-       pf_cksum_uncover(pd->pcksum, in_cksum(n, olen << 2), pd->proto);
+       pf_cksum_uncover(pd->pcksum, in_cksum(n, olen), pd->proto);
        m_adj(n, olen);
 
        /* prepend a new, translated, quoted header */
@@ -2246,16 +2252,16 @@ pf_change_icmp_af(struct mbuf *m, int of
        pd2->off += hlen - olen;
        pd->tot_len += hlen - olen;
 
+       /* optimise: any new AF_INET header of ours sums to zero */
+       if (naf != AF_INET) {
+               pf_cksum_cover(pd->pcksum, in_cksum(n, hlen), pd->proto);
+       }
+
        /* reattach modified quoted packet to outer header */
        mlen = n->m_pkthdr.len;
        m_cat(m, n);
        m->m_pkthdr.len += mlen;
 
-       /* optimise: any new AF_INET header of ours sums to zero */
-       if (naf != AF_INET) {
-               pf_cksum_cover(pd->pcksum, in_cksum(n, hlen << 2), pd->proto);
-       }
-
        return (0);
 }
 

---> Reintroduce checksum modification for af-to between 
       icmp/icmpv6.
---> Shift af-to address checksum modifications to where
       af-to alters the address.

Note: pf_translate_af() flushes pd->pcksum by copyingback the 
entire transport header. Simple but possibly suboptimal.

Index: net/pf.c
===================================================================
--- net.orig/pf.c
+++ net/pf.c
@@ -1839,22 +1839,14 @@ void
 pf_change_a(struct pf_pdesc *pd, struct pf_addr *a, struct pf_addr *an,
            sa_family_t af, sa_family_t naf)
 {
-       static struct pf_addr    zero;
-       u_int16_t               *pc = pd->pcksum;
+       if (af != naf)
+               return; /* defer to pf_translate_af() and co. */
 
        switch (pd->proto) {
        case IPPROTO_TCP:       /* FALLTHROUGH */
        case IPPROTO_UDP:
-       case IPPROTO_ICMPV6:
-               if (af == naf) {
-                       pf_cksum_fixup_a(pc, a, an, af, pd->proto);
-               }
-               else {
-                       /* "cksum_uncover_a(pc, a)" */
-                       /* "cksum_cover_a(pc, an)" */
-                       pf_cksum_fixup_a(pc, a, &zero, af, pd->proto);
-                       pf_cksum_fixup_a(pc, &zero, an, naf, pd->proto);
-               }
+        case IPPROTO_ICMPV6:
+               pf_cksum_fixup_a(pd->pcksum, a, an, af, pd->proto);
                break;
 
        case IPPROTO_ICMP:      /* ICMPv4 has no pseudo-header */
@@ -1862,8 +1854,7 @@ pf_change_a(struct pf_pdesc *pd, struct
                break;
        }
 
-       if (af == naf)
-               PF_ACPY(a, an, naf);
+       PF_ACPY(a, an, naf);
 }
 
 void
@@ -2108,21 +2099,48 @@ pf_change_icmp(struct pf_pdesc *pd, stru
 int
 pf_translate_af(struct pf_pdesc *pd)
 {
-       struct mbuf             *mp;
-       struct ip               *ip4;
-       struct ip6_hdr          *ip6;
-       struct icmp6_hdr        *icmp;
-       int                      hlen;
+       static const struct pf_addr     zero;
+       struct ip                      *ip4;
+       struct ip6_hdr                 *ip6;
+       int                             copyback = 0;
+       unsigned int                    dlen, hlen;
+       u_int16_t                      *pc = pd->pcksum;
+       u_int8_t                        af_proto, naf_proto;
 
+       dlen = pd->tot_len - pd->off;
        hlen = pd->naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
+       af_proto = naf_proto = pd->proto;
 
-       /* trim the old header */
-       m_adj(pd->m, pd->off);
+       if (naf_proto == IPPROTO_ICMP)
+               af_proto = IPPROTO_ICMPV6;
+       if (naf_proto == IPPROTO_ICMPV6)
+               af_proto = IPPROTO_ICMP;
 
-       /* prepend a new one */
+       /* uncover stale pseudo-header */
+       switch (af_proto) {
+       case IPPROTO_ICMPV6:
+               /* optimise: unchanged for TCP/UDP */
+               pf_cksum_fixup(pc, htons(af_proto), 0x0, af_proto);
+               pf_cksum_fixup(pc, htons(dlen),     0x0, af_proto);
+                               /* FALLTHROUGH */
+       case IPPROTO_UDP:       /* FALLTHROUGH */
+       case IPPROTO_TCP:
+               pf_cksum_fixup_a(pc, pd->src, &zero, pd->af, af_proto);
+               pf_cksum_fixup_a(pc, pd->dst, &zero, pd->af, af_proto);
+                copyback = 1;
+               break;
+       default:
+               break;  /* assume no pseudo-header */
+       }
+
+       /* replace the network header */
+       m_adj(pd->m, pd->off);
        if ((M_PREPEND(pd->m, hlen, M_DONTWAIT)) == NULL)
                return (-1);
 
+       pd->off = hlen;
+       pd->tot_len += hlen - pd->off;
+
        switch (pd->naf) {
        case AF_INET:
                ip4 = mtod(pd->m, struct ip *);
@@ -2130,7 +2148,7 @@ pf_translate_af(struct pf_pdesc *pd)
                ip4->ip_v   = IPVERSION;
                ip4->ip_hl  = hlen >> 2;
                ip4->ip_tos = pd->tos;
-               ip4->ip_len = htons(hlen + (pd->tot_len - pd->off));
+               ip4->ip_len = htons(hlen + dlen);
                ip4->ip_id  = htons(ip_randomid());
                ip4->ip_off = htons(IP_DF);
                ip4->ip_ttl = pd->ttl;
@@ -2143,7 +2161,7 @@ pf_translate_af(struct pf_pdesc *pd)
                bzero(ip6, hlen);
                ip6->ip6_vfc  = IPV6_VERSION;
                ip6->ip6_flow |= htonl((u_int32_t)pd->tos << 20);
-               ip6->ip6_plen = htons(pd->tot_len - pd->off);
+               ip6->ip6_plen = htons(dlen);
                ip6->ip6_nxt  = pd->proto;
                if (!pd->ttl || pd->ttl > IPV6_DEFHLIM)
                        ip6->ip6_hlim = IPV6_DEFHLIM;
@@ -2156,51 +2174,57 @@ pf_translate_af(struct pf_pdesc *pd)
                return (-1);
        }
 
-       /* recalculate icmp/icmp6 checksums */
-       if (pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6) {
-               int off;
-               if ((mp = m_pulldown(pd->m, hlen, sizeof(*icmp), &off)) ==
-                   NULL) {
-                       pd->m = NULL;
-                       return (-1);
-               }
-               icmp = (struct icmp6_hdr *)(mp->m_data + off);
-               icmp->icmp6_cksum = 0;
-               icmp->icmp6_cksum = pd->naf == AF_INET ?
-                   in4_cksum(pd->m, 0, hlen, ntohs(ip4->ip_len) - hlen) :
-                   in6_cksum(pd->m, IPPROTO_ICMPV6, hlen,
-                   ntohs(ip6->ip6_plen));
-       }
-
        /* UDP over IPv6 must be checksummed */
        if (naf_proto == IPPROTO_UDP && *pc == 0x0000 &&
            pd->naf == AF_INET6) {
                pd->m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT;
        }
 
+       /* cover fresh pseudo-header */
+       switch (naf_proto) {
+       case IPPROTO_ICMPV6:
+               /* optimise: unchanged for TCP/UDP */
+               pf_cksum_fixup(pc, 0x0, htons(naf_proto), naf_proto);
+               pf_cksum_fixup(pc, 0x0, htons(dlen),      naf_proto);
+                               /* FALLTHROUGH */
+       case IPPROTO_UDP:       /* FALLTHROUGH */
+       case IPPROTO_TCP:
+               pf_cksum_fixup_a(pc, &zero, &pd->nsaddr, pd->naf, naf_proto);
+               pf_cksum_fixup_a(pc, &zero, &pd->ndaddr, pd->naf, naf_proto);
+               copyback = 1;
+               break;
+       default:
+               break;  /* assume no pseudo-header */
+       }
+
+       /* flush pd->pcksum to the mbuf's transport header */
+       if (copyback) /* copyback => pd->hdrlen > 0 */
+               m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any, M_NOWAIT);
+
        return (0);
 }
 
 int
-pf_change_icmp_af(struct mbuf *m, int off, struct pf_pdesc *pd,
+pf_change_icmp_af(struct mbuf *m, int ipoff2, struct pf_pdesc *pd,
     struct pf_pdesc *pd2, struct pf_addr *src, struct pf_addr *dst,
     sa_family_t af, sa_family_t naf)
 {
        struct mbuf             *n = NULL;
        struct ip               *ip4;
        struct ip6_hdr          *ip6;
-       int                      hlen, olen, mlen;
+       int                      hlen, olen;
+       int                      d;
 
        if (af == naf || (af != AF_INET && af != AF_INET6) ||
            (naf != AF_INET && naf != AF_INET6))
                return (-1);
 
        /* split the mbuf chain on the quoted ip/ip6 header boundary */
-       if ((n = m_split(m, off, M_DONTWAIT)) == NULL)
+       if ((n = m_split(m, ipoff2, M_DONTWAIT)) == NULL)
                return (-1);
 
        /* old quoted header */
-       olen = pd2->off - off;
+       olen = pd2->off - ipoff2;
        /* new quoted header */
        hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
 
@@ -2248,19 +2272,35 @@ pf_change_icmp_af(struct mbuf *m, int of
                break;
        }
 
-       /* adjust payload offset and total packet length */
-       pd2->off += hlen - olen;
-       pd->tot_len += hlen - olen;
-
-       /* optimise: any new AF_INET header of ours sums to zero */
+       /* cover new quoted header */
+       /* optimise: our fresh AF_INET headers sum to zero */
        if (naf != AF_INET) {
                pf_cksum_cover(pd->pcksum, in_cksum(n, hlen), pd->proto);
        }
 
        /* reattach modified quoted packet to outer header */
-       mlen = n->m_pkthdr.len;
-       m_cat(m, n);
-       m->m_pkthdr.len += mlen;
+       {
+               int nlen = n->m_pkthdr.len;
+               m_cat(m, n);
+               m->m_pkthdr.len += nlen;
+       }
+
+       /* account for altered length */
+       d = hlen - olen;
+
+       if (pd->proto == IPPROTO_ICMPV6) {
+               /* fixup pseudo-header */
+               int dlen = pd->tot_len - pd->off;
+               pf_cksum_fixup(pd->pcksum,
+                   htons(dlen), htons(dlen + d), pd->proto);
+       }
+
+       pd->tot_len  += d;
+       pd2->tot_len += d;
+       pd2->off     += d;
+
+       /* note: not bothering to update network headers as
+          these due for rewrite by pf_translate_af() */
 
        return (0);
 }
@@ -3941,7 +3981,6 @@ pf_translate(struct pf_pdesc *pd, struct
                        return (0);
 
                if (afto) {
-                       /* ip_sum will be recalculated in pf_translate_af */
                        if (pf_translate_icmp_af(pd, AF_INET, pd->hdr.icmp6))
                                return (0);
                        pd->proto = IPPROTO_ICMP;

Reply via email to