The final patch in the pf series. Will commit when I do the previous one in around 24 hours unless there are objections.
- pushes the 'field changed' guards into the 'change field' functions. This lets us normalise many of the existing guards and eliminate lots of code from pf_translate(). - while here, optimise pf_patch_32() a little. Relatively clean and gains 25% speedup for a function that can be called four times per TCP segment. I don't know if this matters in the big picture. - simplify pf_match_addr(). Compiler achieves the same optimisation; I saw no difference in the assembly. The 'working' for these appears at http://203.79.107.124/ under the phase two heading. best, Richard. Index: net/pf.c =================================================================== --- net.orig/pf.c +++ net/pf.c @@ -160,15 +160,15 @@ int pf_modulate_sack(struct pf_pdesc struct pf_state_peer *); int pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *, u_int16_t *, u_int16_t *); int pf_change_icmp_af(struct mbuf *, int, struct pf_pdesc *, struct pf_pdesc *, struct pf_addr *, struct pf_addr *, sa_family_t, sa_family_t); -void pf_translate_a(struct pf_pdesc *, struct pf_addr *, +int pf_translate_a(struct pf_pdesc *, struct pf_addr *, struct pf_addr *); void pf_translate_icmp(struct pf_pdesc *, struct pf_addr *, u_int16_t *, struct pf_addr *, struct pf_addr *, u_int16_t); int pf_translate_icmp_af(struct pf_pdesc*, int, void *); void pf_send_tcp(const struct pf_rule *, sa_family_t, const struct pf_addr *, const struct pf_addr *, @@ -1859,73 +1859,103 @@ pf_cksum_fixup_a(u_int16_t *cksum, const return; if (udp && x == 0x0000) x = 0xffff; *cksum = (u_int16_t)(x); } -void +int pf_patch_8(struct pf_pdesc *pd, u_int8_t *f, u_int8_t v, bool hi) { - u_int16_t new = htons(hi ? ( v << 8) : v); - u_int16_t old = htons(hi ? (*f << 8) : *f); + int rewrite = 0; - pf_cksum_fixup(pd->pcksum, old, new, pd->proto); - *f = v; + if (*f != v) { + u_int16_t old = htons(hi ? (*f << 8) : *f); + u_int16_t new = htons(hi ? ( v << 8) : v); + + pf_cksum_fixup(pd->pcksum, old, new, pd->proto); + *f = v; + rewrite = 1; + } + + return (rewrite); } /* pre: *f is 16-bit aligned within its packet */ -void +int pf_patch_16(struct pf_pdesc *pd, u_int16_t *f, u_int16_t v) { - pf_cksum_fixup(pd->pcksum, *f, v, pd->proto); - *f = v; + int rewrite = 0; + + if (*f != v) { + pf_cksum_fixup(pd->pcksum, *f, v, pd->proto); + *f = v; + rewrite = 1; + } + + return (rewrite); } -void +int pf_patch_16_unaligned(struct pf_pdesc *pd, void *f, u_int16_t v, bool hi) { - u_int8_t *fb = (u_int8_t*)f; - u_int8_t *vb = (u_int8_t*)&v; + int rewrite = 0; + u_int8_t *fb = (u_int8_t*)f; + u_int8_t *vb = (u_int8_t*)&v; if (hi && ALIGNED_POINTER(f, u_int16_t)) { - pf_patch_16(pd, f, v); /* optimise */ - return; + return (pf_patch_16(pd, f, v)); /* optimise */ } - pf_patch_8(pd, fb++, *vb++, hi); - pf_patch_8(pd, fb++, *vb++,!hi); + rewrite += pf_patch_8(pd, fb++, *vb++, hi); + rewrite += pf_patch_8(pd, fb++, *vb++,!hi); + + return (rewrite); } /* pre: *f is 16-bit aligned within its packet */ -void +/* pre: pd->proto != IPPROTO_UDP */ +int pf_patch_32(struct pf_pdesc *pd, u_int32_t *f, u_int32_t v) { - u_int16_t *pc = pd->pcksum; + int rewrite = 0; + u_int16_t *pc = pd->pcksum; + u_int8_t proto = pd->proto; + + /* optimise: inline udp fixup code is unused; let compiler scrub it */ + if (proto == IPPROTO_UDP) + panic("pf_patch_32: udp"); + + /* optimise: skip *f != v guard; true for all use-cases */ + pf_cksum_fixup(pc, *f / (1 << 16), v / (1 << 16), proto); + pf_cksum_fixup(pc, *f % (1 << 16), v % (1 << 16), proto); - pf_cksum_fixup(pc, *f / (1 << 16), v / (1 << 16), pd->proto); - pf_cksum_fixup(pc, *f % (1 << 16), v % (1 << 16), pd->proto); *f = v; + rewrite = 1; + + return (rewrite); } -void +int pf_patch_32_unaligned(struct pf_pdesc *pd, void *f, u_int32_t v, bool hi) { - u_int8_t *fb = (u_int8_t*)f; - u_int8_t *vb = (u_int8_t*)&v; + int rewrite = 0; + u_int8_t *fb = (u_int8_t*)f; + u_int8_t *vb = (u_int8_t*)&v; if (hi && ALIGNED_POINTER(f, u_int32_t)) { - pf_patch_32(pd, f, v); /* optimise */ - return; + return (pf_patch_32(pd, f, v)); /* optimise */ } - pf_patch_8(pd, fb++, *vb++, hi); - pf_patch_8(pd, fb++, *vb++,!hi); - pf_patch_8(pd, fb++, *vb++, hi); - pf_patch_8(pd, fb++, *vb++,!hi); + rewrite += pf_patch_8(pd, fb++, *vb++, hi); + rewrite += pf_patch_8(pd, fb++, *vb++,!hi); + rewrite += pf_patch_8(pd, fb++, *vb++, hi); + rewrite += pf_patch_8(pd, fb++, *vb++,!hi); + + return (rewrite); } int pf_icmp_mapping(struct pf_pdesc *pd, u_int8_t type, int *icmp_dir, u_int16_t *virtual_id, u_int16_t *virtual_type) { /* @@ -2131,29 +2161,38 @@ pf_translate_icmp(struct pf_pdesc *pd, s /* change network-header's ip address */ if (oa) pf_translate_a(pd, oa, na); } /* pre: *a is 16-bit aligned within its packet */ /* *a is a network header src/dst address */ -void +int pf_translate_a(struct pf_pdesc *pd, struct pf_addr *a, struct pf_addr *an) { + int rewrite = 0; + + /* warning: !PF_ANEQ != PF_AEQ */ + if (!PF_ANEQ(a, an, pd->af)) + return (0); + /* fixup transport pseudo-header, if any */ switch (pd->proto) { case IPPROTO_TCP: /* FALLTHROUGH */ case IPPROTO_UDP: /* FALLTHROUGH */ case IPPROTO_ICMPV6: pf_cksum_fixup_a(pd->pcksum, a, an, pd->af, pd->proto); break; default: break; /* assume no pseudo-header */ } PF_ACPY(a, an, pd->af); + rewrite = 1; + + return (rewrite); } #if INET6 int pf_translate_af(struct pf_pdesc *pd) { static const struct pf_addr zero; @@ -2774,55 +2813,43 @@ pf_send_icmp(struct mbuf *m, u_int8_t ty icmp6_error(m0, type, code, 0); break; #endif /* INET6 */ } } /* - * Return 1 if the addresses a and b match (with mask m), otherwise return 0. - * If n is 0, they match if they are equal. If n is != 0, they match if they - * are different. + * Return ((n = 0) == (a = b [with mask m])) + * Note: n != 0 => returns (a != b [with mask m]) */ int pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, struct pf_addr *b, sa_family_t af) { - int match = 0; - switch (af) { case AF_INET: if ((a->addr32[0] & m->addr32[0]) == (b->addr32[0] & m->addr32[0])) - match++; + return (n == 0); break; #ifdef INET6 case AF_INET6: if (((a->addr32[0] & m->addr32[0]) == (b->addr32[0] & m->addr32[0])) && ((a->addr32[1] & m->addr32[1]) == (b->addr32[1] & m->addr32[1])) && ((a->addr32[2] & m->addr32[2]) == (b->addr32[2] & m->addr32[2])) && ((a->addr32[3] & m->addr32[3]) == (b->addr32[3] & m->addr32[3]))) - match++; + return (n == 0); break; #endif /* INET6 */ } - if (match) { - if (n) - return (0); - else - return (1); - } else { - if (n) - return (1); - else - return (0); - } + + return (n != 0); } /* * Return 1 if b <= a <= e, otherwise return 0. */ int pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, @@ -3990,140 +4017,66 @@ pf_translate(struct pf_pdesc *pd, struct int rewrite = 0; int afto = pd->af != pd->naf; if (afto || PF_ANEQ(daddr, pd->dst, pd->af)) pd->destchg = 1; switch (pd->proto) { - case IPPROTO_TCP: - if (afto || PF_ANEQ(saddr, pd->src, pd->af) || - *pd->sport != sport) { - if (pd->af == pd->naf) - pf_translate_a(pd, pd->src, saddr); - pf_patch_16(pd, pd->sport, sport); - rewrite = 1; - } - if (afto || PF_ANEQ(daddr, pd->dst, pd->af) || - *pd->dport != dport) { - if (pd->af == pd->naf) - pf_translate_a(pd, pd->dst, daddr); - pf_patch_16(pd, pd->dport, dport); - rewrite = 1; - } - break; - + case IPPROTO_TCP: /* FALLTHROUGH */ case IPPROTO_UDP: - if (afto || PF_ANEQ(saddr, pd->src, pd->af) || - *pd->sport != sport) { - if (pd->af == pd->naf) - pf_translate_a(pd, pd->src, saddr); - pf_patch_16(pd, pd->sport, sport); - rewrite = 1; - } - if (afto || PF_ANEQ(daddr, pd->dst, pd->af) || - *pd->dport != dport) { - if (pd->af == pd->naf) - pf_translate_a(pd, pd->dst, daddr); - pf_patch_16(pd, pd->dport, dport); - rewrite = 1; - } + rewrite += pf_patch_16(pd, pd->sport, sport); + rewrite += pf_patch_16(pd, pd->dport, dport); break; case IPPROTO_ICMP: /* pf_translate() is also used when logging invalid packets */ if (pd->af != AF_INET) return (0); if (afto) { #ifdef INET6 if (pf_translate_icmp_af(pd, AF_INET6, pd->hdr.icmp)) return (0); pd->proto = IPPROTO_ICMPV6; rewrite = 1; #endif /* INET6 */ - } else { - if (PF_ANEQ(saddr, pd->src, pd->af)) { - pf_translate_a(pd, pd->src, saddr); - rewrite = 1; - } - if (PF_ANEQ(daddr, pd->dst, pd->af)) { - pf_translate_a(pd, pd->dst, daddr); - rewrite = 1; - } } if (virtual_type == htons(ICMP_ECHO)) { u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; - - if (icmpid != pd->hdr.icmp->icmp_id) { - pf_patch_16(pd, - &pd->hdr.icmp->icmp_id, icmpid); - rewrite = 1; - } + rewrite += pf_patch_16(pd, + &pd->hdr.icmp->icmp_id, icmpid); } break; #ifdef INET6 case IPPROTO_ICMPV6: /* pf_translate() is also used when logging invalid packets */ if (pd->af != AF_INET6) return (0); if (afto) { if (pf_translate_icmp_af(pd, AF_INET, pd->hdr.icmp6)) return (0); pd->proto = IPPROTO_ICMP; rewrite = 1; - } else { - if (PF_ANEQ(saddr, pd->src, pd->af)) { - pf_translate_a(pd, pd->src, saddr); - rewrite = 1; - } - if (PF_ANEQ(daddr, pd->dst, pd->af)) { - pf_translate_a(pd, pd->dst, daddr); - rewrite = 1; - } } if (virtual_type == htons(ICMP6_ECHO_REQUEST)) { u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; - - if (icmpid != pd->hdr.icmp6->icmp6_id) { - pf_patch_16(pd, - &pd->hdr.icmp6->icmp6_id, icmpid); - rewrite = 1; - } + rewrite += pf_patch_16(pd, + &pd->hdr.icmp6->icmp6_id, icmpid); } break; #endif /* INET6 */ + } - default: - switch (pd->af) { - case AF_INET: - if (!afto && PF_ANEQ(saddr, pd->src, pd->af)) { - pf_translate_a(pd, pd->src, saddr); - rewrite = 1; - } - if (!afto && PF_ANEQ(daddr, pd->dst, pd->af)) { - pf_translate_a(pd, pd->dst, daddr); - rewrite = 1; - } - break; -#ifdef INET6 - case AF_INET6: - if (!afto && PF_ANEQ(saddr, pd->src, pd->af)) { - pf_translate_a(pd, pd->src, saddr); - rewrite = 1; - } - if (!afto && PF_ANEQ(daddr, pd->dst, pd->af)) { - pf_translate_a(pd, pd->dst, daddr); - rewrite = 1; - } - break; -#endif /* INET6 */ - } + if (!afto) { + rewrite += pf_translate_a(pd, pd->src, saddr); + rewrite += pf_translate_a(pd, pd->dst, daddr); } + return (rewrite); } int pf_tcp_track_full(struct pf_pdesc *pd, struct pf_state_peer *src, struct pf_state_peer *dst, struct pf_state **state, u_short *reason, int *copyback) @@ -4729,33 +4682,29 @@ pf_test_state(struct pf_pdesc *pd, struc PF_ACPY(&pd->nsaddr, &nk->addr[sidx], nk->af); PF_ACPY(&pd->ndaddr, &nk->addr[didx], nk->af); pd->naf = nk->af; action = PF_AFRT; } #endif /* INET6 */ - if (afto || PF_ANEQ(pd->src, &nk->addr[sidx], pd->af) || - nk->port[sidx] != pd->osport) { - if (pd->af == pd->naf) - pf_translate_a(pd, pd->src, &nk->addr[sidx]); - if (pd->sport != NULL) - pf_patch_16(pd, pd->sport, nk->port[sidx]); - } + if (!afto) + pf_translate_a(pd, pd->src, &nk->addr[sidx]); + + if (pd->sport != NULL) + pf_patch_16(pd, pd->sport, nk->port[sidx]); if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) || pd->rdomain != nk->rdomain) pd->destchg = 1; - if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) || - nk->port[didx] != pd->odport) { - if (pd->af == pd->naf) - pf_translate_a(pd, pd->dst, &nk->addr[didx]); - if (pd->dport != NULL) - pf_patch_16(pd, pd->dport, nk->port[didx]); - } + if (!afto) + pf_translate_a(pd, pd->dst, &nk->addr[didx]); + + if (pd->dport != NULL) + pf_patch_16(pd, pd->dport, nk->port[didx]); pd->m->m_pkthdr.ph_rtableid = nk->rdomain; copyback = 1; } if (copyback && pd->hdrlen > 0) { m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any, M_NOWAIT); @@ -4873,75 +4822,54 @@ pf_test_state_icmp(struct pf_pdesc *pd, #ifdef INET6 if (afto) { PF_ACPY(&pd->nsaddr, &nk->addr[sidx], nk->af); PF_ACPY(&pd->ndaddr, &nk->addr[didx], nk->af); pd->naf = nk->af; } #endif /* INET6 */ + if (!afto) { + pf_translate_a(pd, pd->src, &nk->addr[sidx]); + pf_translate_a(pd, pd->dst, &nk->addr[didx]); + } + if (pd->rdomain != nk->rdomain) pd->destchg = 1; + if (!afto && PF_ANEQ(pd->dst, + &nk->addr[didx], pd->af)) + pd->destchg = 1; pd->m->m_pkthdr.ph_rtableid = nk->rdomain; switch (pd->af) { case AF_INET: #ifdef INET6 if (afto) { if (pf_translate_icmp_af(pd, AF_INET6, pd->hdr.icmp)) return (PF_DROP); pd->proto = IPPROTO_ICMPV6; } #endif /* INET6 */ - if (!afto && PF_ANEQ(pd->src, - &nk->addr[sidx], AF_INET)) - pf_translate_a(pd, - pd->src, &nk->addr[sidx]); - - if (!afto && PF_ANEQ(pd->dst, - &nk->addr[didx], AF_INET)) { - pf_translate_a(pd, - pd->dst, &nk->addr[didx]); - pd->destchg = 1; - } - - if (nk->port[iidx] != pd->hdr.icmp->icmp_id) { - pf_patch_16(pd, &pd->hdr.icmp->icmp_id, - nk->port[iidx]); - } + pf_patch_16(pd, + &pd->hdr.icmp->icmp_id, nk->port[iidx]); m_copyback(pd->m, pd->off, ICMP_MINLEN, pd->hdr.icmp, M_NOWAIT); copyback = 1; break; #ifdef INET6 case AF_INET6: if (afto) { if (pf_translate_icmp_af(pd, AF_INET, pd->hdr.icmp6)) return (PF_DROP); pd->proto = IPPROTO_ICMP; } - if (!afto && PF_ANEQ(pd->src, - &nk->addr[sidx], AF_INET6)) { - pf_translate_a(pd, - pd->src, &nk->addr[sidx]); - } - if (!afto && PF_ANEQ(pd->dst, - &nk->addr[didx], AF_INET6)) { - pf_translate_a(pd, - pd->dst, &nk->addr[didx]); - pd->destchg = 1; - } - - if (nk->port[iidx] != pd->hdr.icmp6->icmp6_id) { - pf_patch_16(pd, - &pd->hdr.icmp6->icmp6_id, - nk->port[iidx]); - } + pf_patch_16(pd, + &pd->hdr.icmp6->icmp6_id, nk->port[iidx]); m_copyback(pd->m, pd->off, sizeof(struct icmp6_hdr), pd->hdr.icmp6, M_NOWAIT); copyback = 1; break; #endif /* INET6 */ @@ -5385,16 +5313,15 @@ pf_test_state_icmp(struct pf_pdesc *pd, pd, &pd2, &nk->addr[sidx], &nk->addr[didx], pd->af, nk->af)) return (PF_DROP); pd->proto = IPPROTO_ICMPV6; if (pf_translate_icmp_af(pd, nk->af, &iih)) return (PF_DROP); - if (virtual_type == htons(ICMP_ECHO) && - nk->port[iidx] != iih.icmp_id) + if (virtual_type == htons(ICMP_ECHO)) pf_patch_16(pd, &iih.icmp_id, nk->port[iidx]); m_copyback(pd2.m, pd2.off, ICMP_MINLEN, &iih, M_NOWAIT); pd->m->m_pkthdr.ph_rtableid = nk->rdomain; pd->destchg = 1; @@ -5498,16 +5425,15 @@ pf_test_state_icmp(struct pf_pdesc *pd, &nk->addr[didx], pd->af, nk->af)) return (PF_DROP); pd->proto = IPPROTO_ICMP; if (pf_translate_icmp_af(pd, nk->af, &iih)) return (PF_DROP); if (virtual_type == - htons(ICMP6_ECHO_REQUEST) && - nk->port[iidx] != iih.icmp6_id) + htons(ICMP6_ECHO_REQUEST)) pf_patch_16(pd, &iih.icmp6_id, nk->port[iidx]); m_copyback(pd2.m, pd2.off, sizeof(struct icmp6_hdr), &iih, M_NOWAIT); pd->m->m_pkthdr.ph_rtableid = nk->rdomain; Index: net/pfvar.h =================================================================== --- net.orig/pfvar.h +++ net/pfvar.h @@ -1710,19 +1710,19 @@ void pf_poolmask(struct pf_addr *, struc void pf_addr_inc(struct pf_addr *, sa_family_t); void *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *, sa_family_t); #define PF_HI (true) #define PF_LO (!PF_HI) #define PF_ALGNMNT(off) (((off) % 2) == 0 ? PF_HI : PF_LO) -void pf_patch_8(struct pf_pdesc *, u_int8_t *, u_int8_t, bool); -void pf_patch_16(struct pf_pdesc *, u_int16_t *, u_int16_t); -void pf_patch_16_unaligned(struct pf_pdesc *, void *, u_int16_t, bool); -void pf_patch_32(struct pf_pdesc *, u_int32_t *, u_int32_t); -void pf_patch_32_unaligned(struct pf_pdesc *, void *, u_int32_t, bool); +int pf_patch_8(struct pf_pdesc *, u_int8_t *, u_int8_t, bool); +int pf_patch_16(struct pf_pdesc *, u_int16_t *, u_int16_t); +int pf_patch_16_unaligned(struct pf_pdesc *, void *, u_int16_t, bool); +int pf_patch_32(struct pf_pdesc *, u_int32_t *, u_int32_t); +int pf_patch_32_unaligned(struct pf_pdesc *, void *, u_int32_t, bool); int pflog_packet(struct pf_pdesc *, u_int8_t, struct pf_rule *, struct pf_rule *, struct pf_ruleset *, struct pf_rule *); void pf_send_deferred_syn(struct pf_state *); int pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *, struct pf_addr *, sa_family_t); int pf_match_addr_range(struct pf_addr *, struct pf_addr *, struct pf_addr *, sa_family_t);