Hi, PF implements six distinct TCP option parsing loops. This patch converts these to one inline function in pfvar_priv.h, normalises their semantics, and strips ~100 lines.
I've laid out the existing semantics below. The new loop implements the timestamp parser's semantics of "(s-b) (v-3) (t-a) (t-b)". As a result, - MSS and WSCALE parsers are stricter about candidate option len. - MSS normalization is more tolerant of malformed option lists. These changes were immaterial to the live traffic I've examined (thanks to Richard Cziva for help in gathering the data). I'd like test reports before committing. comments, oks? best, Richard. * postcondition on identity (return correct option) "validity" - 'r[]' contains the returned option. r[0] = TYPE /\ MIN_TYPELEN >= 2 /\ MAX_TYPELEN = r[1] = MIN_TYPELEN (v-1) (fixed-len opt) or MAX_TYPELEN >= r[1] >= MIN_TYPELEN (v-2) (weaker) or r[1] >= MIN_TYPELEN (v-3) (weaker) or r[1] >= 2 (v-4) (weaker) or true (v-5) (weakest) * postcondition on (eoh - opt) (ensure buffer safety) "safety" - 'eoh' points to one past the end of option header - 'opt' points to returned option (eoh - opt) >= r[1] (s-a) (eoh - opt) >= MIN_TYPELEN (s-b) * parsing (tolerance of malformed headers) "tolerance" r[0] = EOL => continue with r[1] := 1 (t-b) r[1] < 2 => continue with r[1] := 2 (t-a) r[1] < 2 => break (t-d) (eoh - opt) < r[1] => break (t-c) * Analysis of existing parsers safety validity tolerance [TS:fixed:10] pf_norm.c:pf_normalize_tcp_init(): (s-b) (v-3) (t-a) (t-b) [TS:fixed:10] pf_norm.c:pf_normalize_tcp_stateful(): (s-b) (v-3) (t-a) (t-b) [SACK:var:2 + (8*n)] pf.c:pf_modulate_sack(): (s-a) (s-b) (v-3) (t-a) (t-b) [MSS:fixed:4] pf_norm.c:pf_normalize_mss(): (s-a) (s-b) (v-4) (t-c) (t-d) [MSS:fixed:4] pf.c:pf_get_mss(): (s-b) (v-5) (t-a) (t-b) [WSCL:fixed:3] pf.c:pf_get_wscale(): (s-b) (v-5) (t-a) (t-b) --- Index: net/pf_norm.c =================================================================== --- net.orig/pf_norm.c +++ net/pf_norm.c @@ -901,8 +901,9 @@ pf_normalize_tcp_init(struct pf_pdesc *p { struct tcphdr *th = &pd->hdr.tcp; u_int32_t tsval, tsecr; - u_int8_t hdr[60]; - u_int8_t *opt; + int olen; + u_int8_t opts[MAX_TCPOPTLEN], *opt = opts; + KASSERT(src->scrub == NULL); @@ -935,44 +936,28 @@ pf_normalize_tcp_init(struct pf_pdesc *p if ((th->th_flags & TH_SYN) == 0) return (0); - if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub && - pf_pull_hdr(pd->m, pd->off, hdr, th->th_off << 2, NULL, NULL, - pd->af)) { - /* Diddle with TCP options */ - int hlen; + if (src->scrub == NULL) + return (0); - opt = hdr + sizeof(struct tcphdr); - hlen = (th->th_off << 2) - sizeof(struct tcphdr); - while (hlen >= TCPOLEN_TIMESTAMP) { - switch (*opt) { - case TCPOPT_EOL: /* FALLTHROUGH */ - case TCPOPT_NOP: - opt++; - hlen--; - break; - case TCPOPT_TIMESTAMP: - if (opt[1] >= TCPOLEN_TIMESTAMP) { - src->scrub->pfss_flags |= - PFSS_TIMESTAMP; - src->scrub->pfss_ts_mod = arc4random(); - - /* note PFSS_PAWS not set yet */ - memcpy(&tsval, &opt[2], - sizeof(u_int32_t)); - memcpy(&tsecr, &opt[6], - sizeof(u_int32_t)); - src->scrub->pfss_tsval0 = ntohl(tsval); - src->scrub->pfss_tsval = ntohl(tsval); - src->scrub->pfss_tsecr = ntohl(tsecr); - getmicrouptime(&src->scrub->pfss_last); - } - /* FALLTHROUGH */ - default: - hlen -= MAX(opt[1], 2); - opt += MAX(opt[1], 2); - break; - } - } + olen = (th->th_off << 2) - sizeof(*th); + if (olen < TCPOLEN_TIMESTAMP || !pf_pull_hdr(pd->m, + pd->off + sizeof(*th), opts, olen, NULL, NULL, pd->af)) + return (0); + + while ((opt = pf_find_tcpopt(opt, opts, olen, + TCPOPT_TIMESTAMP, TCPOLEN_TIMESTAMP)) != NULL) { + + src->scrub->pfss_flags |= PFSS_TIMESTAMP; + src->scrub->pfss_ts_mod = arc4random(); + /* note PFSS_PAWS not set yet */ + memcpy(&tsval, &opt[2], sizeof(u_int32_t)); + memcpy(&tsecr, &opt[6], sizeof(u_int32_t)); + src->scrub->pfss_tsval0 = ntohl(tsval); + src->scrub->pfss_tsval = ntohl(tsval); + src->scrub->pfss_tsecr = ntohl(tsecr); + getmicrouptime(&src->scrub->pfss_last); + + opt += MAX(opt[1], 2); } return (0); @@ -996,12 +981,12 @@ pf_normalize_tcp_stateful(struct pf_pdes { struct tcphdr *th = &pd->hdr.tcp; struct timeval uptime; - u_int32_t tsval, tsecr; u_int tsval_from_last; - u_int8_t hdr[60]; - u_int8_t *opts, *opt; + u_int32_t tsval, tsecr; int copyback = 0; int got_ts = 0; + int olen; + u_int8_t opts[MAX_TCPOPTLEN], *opt = opts; KASSERT(src->scrub || dst->scrub); @@ -1033,87 +1018,61 @@ pf_normalize_tcp_stateful(struct pf_pdes unhandled_af(pd->af); } - if (th->th_off > (sizeof(struct tcphdr) >> 2) && + olen = (th->th_off << 2) - sizeof(*th); + + if (olen >= TCPOLEN_TIMESTAMP && ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && - pf_pull_hdr(pd->m, pd->off, hdr, th->th_off << 2, NULL, NULL, + pf_pull_hdr(pd->m, pd->off + sizeof(*th), opts, olen, NULL, NULL, pd->af)) { - /* Diddle with TCP options */ - int hlen; - opt = opts = hdr + sizeof(struct tcphdr); - hlen = (th->th_off << 2) - sizeof(struct tcphdr); - while (hlen >= TCPOLEN_TIMESTAMP) { - switch (*opt) { - case TCPOPT_EOL: /* FALLTHROUGH */ - case TCPOPT_NOP: - opt++; - hlen--; - break; - case TCPOPT_TIMESTAMP: - /* Modulate the timestamps. Can be used for - * NAT detection, OS uptime determination or - * reboot detection. - */ - - if (got_ts) { - /* Huh? Multiple timestamps!? */ - if (pf_status.debug >= LOG_NOTICE) { - log(LOG_NOTICE, - "pf: %s: multiple TS??", - __func__); - pf_print_state(state); - addlog("\n"); - } - REASON_SET(reason, PFRES_TS); - return (PF_DROP); - } - if (opt[1] >= TCPOLEN_TIMESTAMP) { - u_int8_t *ts = opt + 2; - u_int8_t *tsr = opt + 6; - - memcpy(&tsval, ts, sizeof(u_int32_t)); - memcpy(&tsecr, tsr, sizeof(u_int32_t)); - - /* modulate TS */ - if (tsval && src->scrub && - (src->scrub->pfss_flags & - PFSS_TIMESTAMP)) { - /* tsval used further on */ - tsval = ntohl(tsval); - pf_patch_32_unaligned(pd, ts, - htonl(tsval + - src->scrub->pfss_ts_mod), - PF_ALGNMNT(ts - opts)); - copyback = 1; - } - - /* modulate TS reply if any (!0) */ - if (tsecr && dst->scrub && - (dst->scrub->pfss_flags & - PFSS_TIMESTAMP)) { - /* tsecr used further on */ - tsecr = ntohl(tsecr) - - dst->scrub->pfss_ts_mod; - pf_patch_32_unaligned(pd, tsr, - htonl(tsecr), - PF_ALGNMNT(tsr - opts)); - copyback = 1; - } - got_ts = 1; - } - /* FALLTHROUGH */ - default: - hlen -= MAX(opt[1], 2); - opt += MAX(opt[1], 2); - break; + + /* Modulate the timestamps. Can be used for NAT detection, OS + * uptime determination or reboot detection. + */ + while ((opt = pf_find_tcpopt(opt, opts, olen, + TCPOPT_TIMESTAMP, TCPOLEN_TIMESTAMP)) != NULL) { + + u_int8_t *ts = opt + 2; + u_int8_t *tsr = opt + 6; + + if (got_ts) { + /* Huh? Multiple timestamps!? */ + REASON_SET(reason, PFRES_TS); + return (PF_DROP); + } + + memcpy(&tsval, ts, sizeof(u_int32_t)); + memcpy(&tsecr, tsr, sizeof(u_int32_t)); + + /* modulate TS */ + if (tsval && src->scrub && + (src->scrub->pfss_flags & PFSS_TIMESTAMP)) { + /* tsval used further on */ + tsval = ntohl(tsval); + pf_patch_32_unaligned(pd, + ts, htonl(tsval + src->scrub->pfss_ts_mod), + PF_ALGNMNT(ts - opts)); + copyback = 1; } + + /* modulate TS reply if any (!0) */ + if (tsecr && dst->scrub && + (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { + /* tsecr used further on */ + tsecr = ntohl(tsecr) - dst->scrub->pfss_ts_mod; + pf_patch_32_unaligned(pd, tsr, htonl(tsecr), + PF_ALGNMNT(tsr - opts)); + copyback = 1; + } + got_ts = 1; + + opt += MAX(opt[1], 2); } + if (copyback) { /* Copyback the options, caller copys back header */ *writeback = 1; - m_copyback(pd->m, pd->off + sizeof(struct tcphdr), - (th->th_off << 2) - sizeof(struct tcphdr), hdr + - sizeof(struct tcphdr), M_NOWAIT); + m_copyback(pd->m, pd->off + sizeof(*th), olen, opts, M_NOWAIT); } } @@ -1385,47 +1344,33 @@ pf_normalize_tcp_stateful(struct pf_pdes int pf_normalize_mss(struct pf_pdesc *pd, u_int16_t maxmss) { - struct tcphdr *th = &pd->hdr.tcp; - u_int16_t mss; - int thoff; - int opt, cnt, optlen = 0; - u_int8_t opts[MAX_TCPOPTLEN]; - u_int8_t *optp = opts; + int olen, optsoff; + u_int8_t opts[MAX_TCPOPTLEN], *opt; - thoff = th->th_off << 2; - cnt = thoff - sizeof(struct tcphdr); + olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); + optsoff = pd->off + sizeof(struct tcphdr); - if (cnt <= 0 || cnt > MAX_TCPOPTLEN || !pf_pull_hdr(pd->m, - pd->off + sizeof(*th), opts, cnt, NULL, NULL, pd->af)) + if (olen < TCPOLEN_MAXSEG || + !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af)) return (0); - for (; cnt > 0; cnt -= optlen, optp += optlen) { - opt = optp[0]; - if (opt == TCPOPT_EOL) - break; - if (opt == TCPOPT_NOP) - optlen = 1; - else { - if (cnt < 2) - break; - optlen = optp[1]; - if (optlen < 2 || optlen > cnt) - break; - } - if (opt == TCPOPT_MAXSEG) { - u_int8_t *mssp = optp + 2; - memcpy(&mss, mssp, sizeof(mss)); - if (ntohs(mss) > maxmss) { - size_t mssoffopts = mssp - opts; - pf_patch_16_unaligned(pd, &mss, - htons(maxmss), PF_ALGNMNT(mssoffopts)); - m_copyback(pd->m, - pd->off + sizeof(*th) + mssoffopts, - sizeof(mss), &mss, M_NOWAIT); - m_copyback(pd->m, pd->off, sizeof(*th), th, - M_NOWAIT); - } + opt = opts; + while ((opt = pf_find_tcpopt(opt, opts, olen, + TCPOPT_MAXSEG, TCPOLEN_MAXSEG)) != NULL) { + u_int16_t mss; + u_int8_t *mssp = opt + 2; + memcpy(&mss, mssp, sizeof(mss)); + if (ntohs(mss) > maxmss) { + size_t mssoffopts = mssp - opts; + pf_patch_16_unaligned(pd, &mss, + htons(maxmss), PF_ALGNMNT(mssoffopts)); + m_copyback(pd->m, optsoff + mssoffopts, + sizeof(mss), &mss, M_NOWAIT); + m_copyback(pd->m, pd->off, + sizeof(struct tcphdr), &pd->hdr.tcp, M_NOWAIT); } + + opt += MAX(opt[1], 2); } return (0); Index: net/pf.c =================================================================== --- net.orig/pf.c +++ net/pf.c @@ -2622,59 +2622,44 @@ pf_translate_icmp_af(struct pf_pdesc *pd int pf_modulate_sack(struct pf_pdesc *pd, struct pf_state_peer *dst) { - struct tcphdr *th = &pd->hdr.tcp; - int hlen = (th->th_off << 2) - sizeof(*th); - int thoptlen = hlen; - u_int8_t opts[MAX_TCPOPTLEN], *opt = opts; - int copyback = 0, i, olen; struct sackblk sack; + int copyback = 0, i; + int olen, optsoff; + u_int8_t opts[MAX_TCPOPTLEN], *opt = opts, *eoh; + olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); + optsoff = pd->off + sizeof(struct tcphdr); #define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2) - if (hlen < TCPOLEN_SACKLEN || hlen > MAX_TCPOPTLEN || !pf_pull_hdr( - pd->m, pd->off + sizeof(*th), opts, hlen, NULL, NULL, pd->af)) - return 0; - - while (hlen >= TCPOLEN_SACKLEN) { - olen = opt[1]; - switch (*opt) { - case TCPOPT_EOL: /* FALLTHROUGH */ - case TCPOPT_NOP: - opt++; - hlen--; - break; - case TCPOPT_SACK: - if (olen > hlen) - olen = hlen; - if (olen >= TCPOLEN_SACKLEN) { - for (i = 2; i + TCPOLEN_SACK <= olen; - i += TCPOLEN_SACK) { - size_t startoff = (opt + i) - opts; - memcpy(&sack, &opt[i], sizeof(sack)); - pf_patch_32_unaligned(pd, &sack.start, - htonl(ntohl(sack.start) - - dst->seqdiff), - PF_ALGNMNT(startoff)); - pf_patch_32_unaligned(pd, &sack.end, - htonl(ntohl(sack.end) - - dst->seqdiff), - PF_ALGNMNT(startoff + - sizeof(sack.start))); - memcpy(&opt[i], &sack, sizeof(sack)); - } - copyback = 1; - } - /* FALLTHROUGH */ - default: - if (olen < 2) - olen = 2; - hlen -= olen; - opt += olen; + if (olen < TCPOLEN_SACKLEN || olen > MAX_TCPOPTLEN || + !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af)) + return (0); + + eoh = opts + olen; + while ((opt = pf_find_tcpopt(opt, opts, olen, + TCPOPT_SACK, TCPOLEN_SACKLEN)) != NULL) + { + size_t olen = MIN((eoh - opt), opt[1]); + if (olen >= TCPOLEN_SACKLEN) { + for (i = 2; i + TCPOLEN_SACK <= olen; i += TCPOLEN_SACK) + { + size_t startoff = (opt + i) - opts; + memcpy(&sack, &opt[i], sizeof(sack)); + pf_patch_32_unaligned(pd, &sack.start, + htonl(ntohl(sack.start) - dst->seqdiff), + PF_ALGNMNT(startoff)); + pf_patch_32_unaligned(pd, &sack.end, + htonl(ntohl(sack.end) - dst->seqdiff), + PF_ALGNMNT(startoff + sizeof(sack.start))); + memcpy(&opt[i], &sack, sizeof(sack)); + } + copyback = 1; } + + opt += MAX(opt[1], 2); } if (copyback) - m_copyback(pd->m, pd->off + sizeof(*th), thoptlen, opts, - M_NOWAIT); + m_copyback(pd->m, optsoff, olen, opts, M_NOWAIT); return (copyback); } @@ -3224,79 +3209,45 @@ pf_socket_lookup(struct pf_pdesc *pd) u_int8_t pf_get_wscale(struct pf_pdesc *pd) { - struct tcphdr *th = &pd->hdr.tcp; - int hlen; - u_int8_t hdr[60]; - u_int8_t *opt, optlen; + int olen; + u_int8_t opts[MAX_TCPOPTLEN], *opt = opts; u_int8_t wscale = 0; - hlen = th->th_off << 2; /* hlen <= sizeof(hdr) */ - if (hlen <= sizeof(struct tcphdr)) - return (0); - if (!pf_pull_hdr(pd->m, pd->off, hdr, hlen, NULL, NULL, pd->af)) + olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); + if (olen < TCPOLEN_WINDOW || !pf_pull_hdr(pd->m, + pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) return (0); - opt = hdr + sizeof(struct tcphdr); - hlen -= sizeof(struct tcphdr); - while (hlen >= 3) { - switch (*opt) { - case TCPOPT_EOL: - case TCPOPT_NOP: - ++opt; - --hlen; - break; - case TCPOPT_WINDOW: - wscale = opt[2]; - if (wscale > TCP_MAX_WINSHIFT) - wscale = TCP_MAX_WINSHIFT; - wscale |= PF_WSCALE_FLAG; - /* FALLTHROUGH */ - default: - optlen = opt[1]; - if (optlen < 2) - optlen = 2; - hlen -= optlen; - opt += optlen; - break; - } + + while ((opt = pf_find_tcpopt(opt, opts, olen, + TCPOPT_WINDOW, TCPOLEN_WINDOW)) != NULL) { + wscale = opt[2]; + wscale = MIN(wscale, TCP_MAX_WINSHIFT); + wscale |= PF_WSCALE_FLAG; + + opt += MAX(opt[1], 2); } + return (wscale); } u_int16_t pf_get_mss(struct pf_pdesc *pd) { - struct tcphdr *th = &pd->hdr.tcp; - int hlen; - u_int8_t hdr[60]; - u_int8_t *opt, optlen; + int olen; + u_int8_t opts[MAX_TCPOPTLEN], *opt = opts; u_int16_t mss = tcp_mssdflt; - hlen = th->th_off << 2; /* hlen <= sizeof(hdr) */ - if (hlen <= sizeof(struct tcphdr)) - return (0); - if (!pf_pull_hdr(pd->m, pd->off, hdr, hlen, NULL, NULL, pd->af)) + olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); + if (olen < TCPOLEN_MAXSEG || !pf_pull_hdr(pd->m, + pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) return (0); - opt = hdr + sizeof(struct tcphdr); - hlen -= sizeof(struct tcphdr); - while (hlen >= TCPOLEN_MAXSEG) { - switch (*opt) { - case TCPOPT_EOL: - case TCPOPT_NOP: - ++opt; - --hlen; - break; - case TCPOPT_MAXSEG: + + while ((opt = pf_find_tcpopt(opt, opts, olen, + TCPOPT_MAXSEG, TCPOLEN_MAXSEG)) != NULL) { memcpy(&mss, (opt + 2), 2); mss = ntohs(mss); - /* FALLTHROUGH */ - default: - optlen = opt[1]; - if (optlen < 2) - optlen = 2; - hlen -= optlen; - opt += optlen; - break; - } + + opt += MAX(opt[1], 2); } return (mss); } Index: net/pfvar_priv.h =================================================================== --- net.orig/pfvar_priv.h +++ net/pfvar_priv.h @@ -94,6 +94,42 @@ struct pf_pdesc { } hdr; }; +/* post: r => (r[0] == type /\ r[1] >= min_typelen >= 2 "validity" + * /\ (eoh - r) >= min_typelen >= 2 "safety" ) + * + * warning: r + r[1] may exceed opts bounds for r[1] > min_typelen + */ +static __inline u_int8_t* +pf_find_tcpopt(u_int8_t *opt, u_int8_t *opts, size_t hlen, u_int8_t type, + u_int8_t min_typelen) +{ + u_int8_t *eoh = opts + hlen; + + if (min_typelen < 2) + return (NULL); + + while ((eoh - opt) >= min_typelen) { + switch (*opt) { + case TCPOPT_EOL: + /* FALLTHROUGH - Workaround the failure of some + systems to NOP-pad their bzero'd option buffers, + producing spurious EOLs */ + case TCPOPT_NOP: + opt++; + continue; + default: + if (opt[0] == type && + opt[1] >= min_typelen) + return (opt); + } + + /* evade infinite loops and illegal options */ + opt += MAX(opt[1], 2); + } + return (NULL); +} + + #endif /* _KERNEL */ #endif /* _NET_PFVAR_PRIV_H_ */