There is one rather important change we'd like to commit so it makes the
3.9 release (which is rapidly approaching tree lock), and to do so we
need your help testing.

TCP SACK is enabled by default on OpenBSD (and most other peers you'll
connect to), yet pf's modulate state (and, therefore, synproxy state,
which by definition includes sequence number modulation) does NOT
translate sequence numbers found in SACK options.

This means that, basically, modulate/synproxy state cannot be reliably
used for SACK connections, as it causes connections to randomly stall
after a while (annoying to debug, too, because the problem doesn't occur
during or immediately after the connection establishment, but later).

Let me put it this way: modulate state and synproxy state are BROKEN and
INCOMPATIBLE with SACK right now. Since we (and most everyone else
you'll be filtering for) is enabling SACK by default, we NEED the patch
below. Otherwise we could just as well remove the modulate/synproxy
features, as they break connections in most setups.

So, please help test the patch from Mike below. I'm running with it on
production machines, so I personally trust it to be fine. But testing by
one person is not sufficient for inclusion in the source tree right now.

Apply the patch, rebuild the kernel, reboot. Watch connections using
SACK matching modulate state rules, especially long-lifed connections
and connections transferring large amounts of data. Check output of
netstat -sp tcp on the connection endpoints (the SACK related counters).

The full history of the issue is detailed on

  http://marc.theaimsgroup.com/?t=113396771300009&r=1&w=2

Daniel


Index: net/pf.c
===================================================================
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.508
diff -u -r1.508 pf.c
--- net/pf.c    14 Nov 2005 09:18:55 -0000      1.508
+++ net/pf.c    28 Jan 2006 01:04:27 -0000
@@ -127,6 +127,8 @@
 void                    pf_change_ap(struct pf_addr *, u_int16_t *,
                            u_int16_t *, u_int16_t *, struct pf_addr *,
                            u_int16_t, u_int8_t, sa_family_t);
+int                     pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
+                           struct tcphdr *, struct pf_state_peer *);
 #ifdef INET6
 void                    pf_change_a6(struct pf_addr *, u_int16_t *,
                            struct pf_addr *, u_int8_t);
@@ -1492,6 +1494,63 @@
        }
 }
 
+
+/*
+ * Need to modulate the sequence numbers in the TCP SACK option
+ */
+int
+pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
+    struct tcphdr *th, struct pf_state_peer *dst)
+{
+       int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
+       u_int8_t opts[MAX_TCPOPTLEN], *opt = opts;
+       int copyback = 0, i, olen;
+       struct sackblk sack;
+
+#define TCPOLEN_SACKLEN        (TCPOLEN_SACK + 2)
+       if (hlen < TCPOLEN_SACKLEN ||
+           !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
+               return 0;
+
+       while (hlen >= TCPOLEN_SACKLEN) {
+               olen = opt[1];
+               switch (*opt) {
+               case TCPOPT_EOL:        /* FALLTHROUGH */
+               case TCPOPT_NOP:
+                       opt++;
+                       hlen--;
+                       break;
+               case TCPOPT_SACK:
+                       if (olen > hlen)
+                               olen = hlen;
+                       if (olen >= TCPOLEN_SACKLEN) {
+                               for (i = 2; i + TCPOLEN_SACK <= olen;
+                                   i += TCPOLEN_SACK) {
+                                       memcpy(&sack, &opt[i], sizeof(sack));
+                                       pf_change_a(&sack.start, &th->th_sum,
+                                           htonl(ntohl(sack.start) -
+                                           dst->seqdiff), 0);
+                                       pf_change_a(&sack.end, &th->th_sum,
+                                           htonl(ntohl(sack.end) -
+                                           dst->seqdiff), 0);
+                                       memcpy(&opt[i], &sack, sizeof(sack));
+                               }
+                               copyback = 1;
+                       }
+                       /* FALLTHROUGH */
+               default:
+                       if (olen < 2)
+                               olen = 2;
+                       hlen -= olen;
+                       opt += olen;
+               }
+       }
+
+       if (copyback)
+               m_copyback(m, off + sizeof(*th), thoptlen, opts);
+       return (copyback);
+}
+
 void
 pf_send_tcp(const struct pf_rule *r, sa_family_t af,
     const struct pf_addr *saddr, const struct pf_addr *daddr,
@@ -4279,6 +4338,25 @@
        }
 
        ackskew = dst->seqlo - ack;
+
+
+       /*
+        * Need to demodulate the sequence numbers in any TCP SACK options
+        * (Selective ACK). We could optionally validate the SACK values
+        * against the current ACK window, either forwards or backwards, but
+        * I'm not confident that SACK has been implemented properly
+        * everywhere. It wouldn't surprise me if several stacks accidently
+        * SACK too far backwards of previously ACKed data. There really aren't
+        * any security implications of bad SACKing unless the target stack
+        * doesn't validate the option length correctly. Someone trying to
+        * spoof into a TCP connection won't bother blindly sending SACK
+        * options anyway.
+        */
+       if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
+               if (pf_modulate_sack(m, off, pd, th, dst))
+                       copyback = 1;
+       }
+
 
 #define MAXACKWINDOW (0xffff + 1500)   /* 1500 is an arbitrary fudge factor */
        if (SEQ_GEQ(src->seqhi, end) &&

Reply via email to