On 3/09/2015, at 10:41 AM, Richard Procter wrote:
> [...] or for that matter just unit-test the pf_change() calls. I
> think I'll rustle up some of the later.
That was a useful exercise. The pf_change_*() code is fine.
However, pf_cksum_fixup_a() reliably faulted a couple of times
over 2E7 random address, so it was back to the drawing board.
It turns out that 5.3's checksum modification algorithm is much
hairer than it appears and I was fooling myself I understood it;
the argument I provided it correct, it just doesn't establish the
conclusion! The lesson is I guess not to re-purpose highly
optimised code; instead refine something simple.
I've trialed four alternatives on three architectures and will
cover this in a later patch. In the meantime, unittest code
can be found here[0].
The attached against HEAD is the same as my last but does
not include pf_cksum_fixup_a() and adds some comments to
pf_cksum_fixup().
0) initialise pd->pcksum = icmp6 header cksum
3) re-introduce pf_cksum_fixup() (but not pf_cksum_fixup_a()
4) introduce pf_change_*()
22) misc. transitions to pf_change_*() interface
I will be offline for a couple of days.
best,
Richard.
[0] https://203.79.107.124/unittest/unittest.tar.gz
Index: net/pf.c
===================================================================
--- net.orig/pf.c
+++ net/pf.c
@@ -145,7 +145,8 @@ void pf_init_threshold(struct
pf_thre
u_int32_t);
void pf_add_threshold(struct pf_threshold *);
int pf_check_threshold(struct pf_threshold *);
-
+void pf_cksum_fixup(u_int16_t *, u_int16_t, u_int16_t,
+ u_int8_t);
void pf_change_ap(struct pf_pdesc *, struct pf_addr *,
u_int16_t *, struct pf_addr *, u_int16_t,
sa_family_t);
@@ -1651,6 +1652,105 @@ pf_addr_wrap_neq(struct pf_addr_wrap *aw
}
}
+/* This function is deceptively simple, but is in fact an incredibly hairy
+ * optimised special case of an algorithm to emulates 16-bit ones-complement
+ * arithmetic by conserving its carries, which twos-complement otherwise
+ * discards, in the upper 16 bits of x. These accumulated carries when added
+ * to the lower 16-bits over a number of 'reductions' then complete the
+ * ones-complement sum.
+ *
+ * It uses a trick to eliminate a reduction step by emulating in
+ * twos-complement ones-complement subtraction for a single u_int16_t,
+ * thereby reducing the number of accumulated carries to at most one, and
+ * saving one each of +, >>, and ~.
+ *
+ * The emulation works as follows: subtracting exactly one u_int16_t from x
+ * incurs at most one net underflow, wrapping the accumulator to 2^16 - 1,
+ * which, when folded back into the 16-bit sum preserves the ones-complement
+ * borrow:
+ *
+ * ((x >> 16) + (x & 0xffff)) & 0xffff
+ * = { x mod 2^n = x & (2^n - 1) }
+ * ((x >> 16) + (x mod 2^16)) mod 2^16
+ * = { def sum, def accumulator }
+ * (accumulator + sum) mod 2^16
+ * = { one underflow, accumulator := 2^16 - 1 }
+ * ((2^16 - 1) + sum) mod 2^16
+ * = { mod }
+ * (sum - 1) mod 2^16
+ *
+ * The last step accounts for the reduction's trailing '& 0xffff' (which
+ * would, in general, destroy accumulated carries, but not here as a single
+ * add requires at most one reduction).
+ *
+ * And although this breaks for sum = 0, giving 0xffff, which is
+ * ones-complement's other zero, not -1, that cannot occur: underflowing the
+ * sum to zero requires subtracting at least 2^16, which exceeds a single
+ * u_int16_t.
+ */
+void
+pf_cksum_fixup(u_int16_t *cksum, u_int16_t was, u_int16_t now,
+ u_int8_t proto)
+{
+ u_int32_t x;
+ const int udp = proto == IPPROTO_UDP;
+
+ if (udp && *cksum == 0x0000)
+ return;
+
+ x = *cksum + was - now;
+ x = ((x >> 16) + (x & 0xffff)) & 0xffff;
+
+ if (udp && x == 0x0000)
+ x = 0xffff;
+
+ *cksum = (u_int16_t)(x);
+}
+
+void
+pf_change_8(struct pf_pdesc *pd, u_int8_t *f, u_int8_t v, bool hi)
+{
+ u_int16_t new = htons(hi ? ( v << 8) : v);
+ u_int16_t old = htons(hi ? (*f << 8) : *f);
+
+ pf_cksum_fixup(pd->pcksum, old, new, pd->proto);
+ *f = v;
+}
+
+/* pre: *f is 16-bit aligned within its packet */
+void
+pf_change_16(struct pf_pdesc *pd, u_int16_t *f, u_int16_t v)
+{
+ pf_cksum_fixup(pd->pcksum, *f, v, pd->proto);
+ *f = v;
+}
+
+void
+pf_change_16_unaligned(struct pf_pdesc *pd, void *f, u_int16_t v, bool hi)
+{
+ u_int8_t *fb = (u_int8_t*)f;
+ u_int8_t *vb = (u_int8_t*)&v;
+
+ if (hi && ALIGNED_POINTER(f, u_int16_t)) {
+ pf_change_16(pd, f, v); /* optimise */
+ return;
+ }
+
+ pf_change_8(pd, fb++, *vb++, hi);
+ pf_change_8(pd, fb++, *vb++,!hi);
+}
+
+/* pre: *f is 16-bit aligned within its packet */
+void
+pf_change_32(struct pf_pdesc *pd, u_int32_t *f, u_int32_t v)
+{
+ u_int16_t *pc = pd->pcksum;
+
+ pf_cksum_fixup(pc, *f / (1 << 16), v / (1 << 16), pd->proto);
+ pf_cksum_fixup(pc, *f % (1 << 16), v % (1 << 16), pd->proto);
+ *f = v;
+}
+
void
pf_change_ap(struct pf_pdesc *pd, struct pf_addr *a, u_int16_t *p,
struct pf_addr *an, u_int16_t pn, sa_family_t naf)
@@ -3724,11 +3824,8 @@ pf_translate(struct pf_pdesc *pd, struct
u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport;
if (icmpid != pd->hdr.icmp->icmp_id) {
- if (pd->csum_status == PF_CSUM_UNKNOWN)
- pf_check_proto_cksum(pd, pd->off,
- pd->tot_len - pd->off, pd->proto,
- pd->af);
- pd->hdr.icmp->icmp_id = icmpid;
+ pf_change_16(pd,
+ &pd->hdr.icmp->icmp_id, icmpid);
rewrite = 1;
}
}
@@ -3760,11 +3857,8 @@ pf_translate(struct pf_pdesc *pd, struct
u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport;
if (icmpid != pd->hdr.icmp6->icmp6_id) {
- if (pd->csum_status == PF_CSUM_UNKNOWN)
- pf_check_proto_cksum(pd, pd->off,
- pd->tot_len - pd->off, pd->proto,
- pd->af);
- pd->hdr.icmp6->icmp6_id = icmpid;
+ pf_change_16(pd,
+ &pd->hdr.icmp6->icmp6_id, icmpid);
rewrite = 1;
}
}
@@ -4572,11 +4666,8 @@ pf_test_state_icmp(struct pf_pdesc *pd,
}
if (nk->port[iidx] != pd->hdr.icmp->icmp_id) {
- if (pd->csum_status == PF_CSUM_UNKNOWN)
- pf_check_proto_cksum(pd,
- pd->off, pd->tot_len -
- pd->off, pd->proto, pd->af);
- pd->hdr.icmp->icmp_id = nk->port[iidx];
+ pf_change_16(pd, &pd->hdr.icmp->icmp_id,
+ nk->port[iidx]);
}
m_copyback(pd->m, pd->off, ICMP_MINLEN,
@@ -4604,12 +4695,9 @@ pf_test_state_icmp(struct pf_pdesc *pd,
}
if (nk->port[iidx] != pd->hdr.icmp6->icmp6_id) {
- if (pd->csum_status == PF_CSUM_UNKNOWN)
- pf_check_proto_cksum(pd,
- pd->off, pd->tot_len -
- pd->off, pd->proto, pd->af);
- pd->hdr.icmp6->icmp6_id =
- nk->port[iidx];
+ pf_change_16(pd,
+ &pd->hdr.icmp6->icmp6_id,
+ nk->port[iidx]);
}
m_copyback(pd->m, pd->off,
@@ -6230,6 +6318,7 @@ pf_setup_pdesc(struct pf_pdesc *pd, void
REASON_SET(reason, PFRES_SHORT);
return (PF_DROP);
}
+ pd->pcksum = &pd->hdr.icmp6->icmp6_cksum;
break;
}
#endif /* INET6 */
Index: net/pfvar.h
===================================================================
--- net.orig/pfvar.h
+++ net/pfvar.h
@@ -1719,6 +1719,13 @@ void pf_addr_inc(struct pf_addr *, sa_fa
void *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *,
sa_family_t);
+#define PF_HI (true)
+#define PF_LO (!PF_HI)
+#define PF_ALGNMNT(off) (((off) % 2) == 0 ? PF_HI : PF_LO)
+void pf_change_8(struct pf_pdesc *, u_int8_t *, u_int8_t, bool);
+void pf_change_16(struct pf_pdesc *, u_int16_t *, u_int16_t);
+void pf_change_16_unaligned(struct pf_pdesc *, void *, u_int16_t, bool);
+void pf_change_32(struct pf_pdesc *, u_int32_t *, u_int32_t);
void pf_change_a(struct pf_pdesc *, void *, u_int32_t);
int pf_check_proto_cksum(struct pf_pdesc *, int, int, u_int8_t,
sa_family_t);
Index: net/pf_norm.c
===================================================================
--- net.orig/pf_norm.c
+++ net/pf_norm.c
@@ -844,10 +844,6 @@ pf_normalize_tcp(struct pf_pdesc *pd)
u_int8_t flags;
u_int rewrite = 0;
- if (pd->csum_status == PF_CSUM_UNKNOWN)
- pf_check_proto_cksum(pd, pd->off, pd->tot_len - pd->off,
- pd->proto, pd->af);
-
flags = th->th_flags;
if (flags & TH_SYN) {
/* Illegal packet */
@@ -869,15 +865,18 @@ pf_normalize_tcp(struct pf_pdesc *pd)
}
/* If flags changed, or reserved data set, then adjust */
- if (flags != th->th_flags || th->th_x2 != 0) {
- th->th_flags = flags;
- th->th_x2 = 0;
- rewrite = 1;
- }
+ if (flags != th->th_flags || th->th_x2 != 0) {
+ /* hack: set 4-bit th_x2 = 0 */
+ u_int8_t *th_off = (u_int8_t*)(&th->th_ack+1);
+ pf_change_8(pd, th_off, th->th_off << 4, PF_HI);
+
+ pf_change_8(pd, &th->th_flags, flags, PF_LO);
+ rewrite = 1;
+ }
/* Remove urgent pointer, if TH_URG is not set */
if (!(flags & TH_URG) && th->th_urp) {
- th->th_urp = 0;
+ pf_change_16(pd, &th->th_urp, 0);
rewrite = 1;
}
@@ -1381,12 +1380,8 @@ pf_normalize_mss(struct pf_pdesc *pd, u_
u_int16_t mss;
int thoff;
int opt, cnt, optlen = 0;
- u_char opts[MAX_TCPOPTLEN];
- u_char *optp = opts;
-
- if (pd->csum_status == PF_CSUM_UNKNOWN)
- pf_check_proto_cksum(pd, pd->off, pd->tot_len - pd->off,
- pd->proto, pd->af);
+ u_int8_t opts[MAX_TCPOPTLEN];
+ u_int8_t *optp = opts;
thoff = th->th_off << 2;
cnt = thoff - sizeof(struct tcphdr);
@@ -1409,12 +1404,15 @@ pf_normalize_mss(struct pf_pdesc *pd, u_
break;
}
if (opt == TCPOPT_MAXSEG) {
- memcpy(&mss, (optp + 2), 2);
+ u_int8_t *mssp = optp + 2;
+ memcpy(&mss, mssp, sizeof(mss));
if (ntohs(mss) > maxmss) {
- mss = htons(maxmss);
+ size_t mssoffopts = mssp - opts;
+ pf_change_16_unaligned(pd, &mss,
+ htons(maxmss), PF_ALGNMNT(mssoffopts));
m_copyback(pd->m,
- pd->off + sizeof(*th) + optp + 2 - opts,
- 2, &mss, M_NOWAIT);
+ pd->off + sizeof(*th) + mssoffopts,
+ sizeof(mss), &mss, M_NOWAIT);
pf_cksum(pd, pd->m);
m_copyback(pd->m, pd->off, sizeof(*th), th,
M_NOWAIT);