Author: pkelsey
Date: Thu Dec 24 19:09:48 2015
New Revision: 292706
URL: https://svnweb.freebsd.org/changeset/base/292706

Log:
  Implementation of server-side TCP Fast Open (TFO) [RFC7413].
  
  TFO is disabled by default in the kernel build.  See the top comment
  in sys/netinet/tcp_fastopen.c for implementation particulars.
  
  Reviewed by:  gnn, jch, stas
  MFC after:    3 days
  Sponsored by: Verisign, Inc.
  Differential Revision:        https://reviews.freebsd.org/D4350

Added:
  head/sys/netinet/tcp_fastopen.c   (contents, props changed)
  head/sys/netinet/tcp_fastopen.h   (contents, props changed)
Modified:
  head/sys/conf/files
  head/sys/conf/options
  head/sys/netinet/tcp.h
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_output.c
  head/sys/netinet/tcp_subr.c
  head/sys/netinet/tcp_syncache.c
  head/sys/netinet/tcp_syncache.h
  head/sys/netinet/tcp_timer.c
  head/sys/netinet/tcp_usrreq.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files Thu Dec 24 18:53:17 2015        (r292705)
+++ head/sys/conf/files Thu Dec 24 19:09:48 2015        (r292706)
@@ -3688,6 +3688,7 @@ netinet/sctp_usrreq.c             optional inet sct
 netinet/sctputil.c             optional inet sctp | inet6 sctp
 netinet/siftr.c                        optional inet siftr alq | inet6 siftr 
alq
 netinet/tcp_debug.c            optional tcpdebug
+netinet/tcp_fastopen.c         optional inet tcp_rfc7413 | inet6 tcp_rfc7413
 netinet/tcp_hostcache.c                optional inet | inet6
 netinet/tcp_input.c            optional inet | inet6
 netinet/tcp_lro.c              optional inet | inet6

Modified: head/sys/conf/options
==============================================================================
--- head/sys/conf/options       Thu Dec 24 18:53:17 2015        (r292705)
+++ head/sys/conf/options       Thu Dec 24 19:09:48 2015        (r292706)
@@ -440,6 +440,8 @@ TCPDEBUG
 TCPPCAP                opt_global.h
 SIFTR
 TCP_OFFLOAD            opt_inet.h # Enable code to dispatch TCP offloading
+TCP_RFC7413            opt_inet.h
+TCP_RFC7413_MAX_KEYS   opt_inet.h
 TCP_SIGNATURE          opt_inet.h
 VLAN_ARRAY             opt_vlan.h
 XBONEHACK

Modified: head/sys/netinet/tcp.h
==============================================================================
--- head/sys/netinet/tcp.h      Thu Dec 24 18:53:17 2015        (r292705)
+++ head/sys/netinet/tcp.h      Thu Dec 24 19:09:48 2015        (r292706)
@@ -97,6 +97,10 @@ struct tcphdr {
 #define    TCPOLEN_TSTAMP_APPA         (TCPOLEN_TIMESTAMP+2) /* appendix A */
 #define        TCPOPT_SIGNATURE        19              /* Keyed MD5: RFC 2385 
*/
 #define           TCPOLEN_SIGNATURE            18
+#define        TCPOPT_FAST_OPEN        34
+#define           TCPOLEN_FAST_OPEN_EMPTY      2
+#define           TCPOLEN_FAST_OPEN_MIN        6
+#define           TCPOLEN_FAST_OPEN_MAX        18
 
 /* Miscellaneous constants */
 #define        MAX_SACK_BLKS   6       /* Max # SACK blocks stored at receiver 
side */
@@ -165,6 +169,7 @@ struct tcphdr {
 #define        TCP_KEEPIDLE    256     /* L,N,X start keeplives after this 
period */
 #define        TCP_KEEPINTVL   512     /* L,N interval between keepalives */
 #define        TCP_KEEPCNT     1024    /* L,N number of keepalives before 
close */
+#define        TCP_FASTOPEN    1025    /* enable TFO / was created via TFO */
 #define        TCP_PCAP_OUT    2048    /* number of output packets to keep */
 #define        TCP_PCAP_IN     4096    /* number of input packets to keep */
 #define TCP_FUNCTION_BLK 8192  /* Set the tcp function pointers to the 
specified stack */

Added: head/sys/netinet/tcp_fastopen.c
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/netinet/tcp_fastopen.c     Thu Dec 24 19:09:48 2015        
(r292706)
@@ -0,0 +1,442 @@
+/*-
+ * Copyright (c) 2015 Patrick Kelsey
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This is a server-side implementation of TCP Fast Open (TFO) [RFC7413].
+ *
+ * This implementation is currently considered to be experimental and is not
+ * included in kernel builds by default.  To include this code, add the
+ * following line to your kernel config:
+ *
+ * options TCP_RFC7413
+ *
+ * The generated TFO cookies are the 64-bit output of
+ * SipHash24(<16-byte-key><client-ip>).  Multiple concurrent valid keys are
+ * supported so that time-based rolling cookie invalidation policies can be
+ * implemented in the system.  The default number of concurrent keys is 2.
+ * This can be adjusted in the kernel config as follows:
+ *
+ * options TCP_RFC7413_MAX_KEYS=<num-keys>
+ *
+ *
+ * The following TFO-specific sysctls are defined:
+ *
+ * net.inet.tcp.fastopen.acceptany (RW, default 0)
+ *     When non-zero, all client-supplied TFO cookies will be considered to
+ *     be valid.
+ *
+ * net.inet.tcp.fastopen.autokey (RW, default 120)
+ *     When this and net.inet.tcp.fastopen.enabled are non-zero, a new key
+ *     will be automatically generated after this many seconds.
+ *
+ * net.inet.tcp.fastopen.enabled (RW, default 0)
+ *     When zero, no new TFO connections can be created.  On the transition
+ *     from enabled to disabled, all installed keys are removed.  On the 
+ *     transition from disabled to enabled, if net.inet.tcp.fastopen.autokey
+ *     is non-zero and there are no keys installed, a new key will be 
+ *     generated immediately.  The transition from enabled to disabled does
+ *     not affect any TFO connections in progress; it only prevents new ones
+ *     from being made.
+ *
+ * net.inet.tcp.fastopen.keylen (RO)
+ *     The key length in bytes.
+ *
+ * net.inet.tcp.fastopen.maxkeys (RO)
+ *     The maximum number of keys supported.
+ *
+ * net.inet.tcp.fastopen.numkeys (RO)
+ *     The current number of keys installed.
+ *
+ * net.inet.tcp.fastopen.setkey (WO)
+ *     Install a new key by writing net.inet.tcp.fastopen.keylen bytes to this
+ *     sysctl.
+ *
+ *
+ * In order for TFO connections to be created via a listen socket, that
+ * socket must have the TCP_FASTOPEN socket option set on it.  This option
+ * can be set on the socket either before or after the listen() is invoked.
+ * Clearing this option on a listen socket after it has been set has no
+ * effect on existing TFO connections or TFO connections in progress; it
+ * only prevents new TFO connections from being made.
+ *
+ * For passively-created sockets, the TCP_FASTOPEN socket option can be
+ * queried to determine whether the connection was established using TFO.
+ * Note that connections that are established via a TFO SYN, but that fall
+ * back to using a non-TFO SYN|ACK will have the TCP_FASTOPEN socket option
+ * set.
+ *
+ * Per the RFC, this implementation limits the number of TFO connections
+ * that can be in the SYN_RECEIVED state on a per listen-socket basis.
+ * Whenever this limit is exceeded, requests for new TFO connections are
+ * serviced as non-TFO requests.  Without such a limit, given a valid TFO
+ * cookie, an attacker could keep the listen queue in an overflow condition
+ * using a TFO SYN flood.  This implementation sets the limit at half the
+ * configured listen backlog.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <crypto/siphash/siphash.h>
+
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/tcp_fastopen.h>
+#include <netinet/tcp_var.h>
+
+
+#define        TCP_FASTOPEN_KEY_LEN    SIPHASH_KEY_LENGTH
+
+#if !defined(TCP_RFC7413_MAX_KEYS) || (TCP_RFC7413_MAX_KEYS < 1)
+#define        TCP_FASTOPEN_MAX_KEYS   2
+#else
+#define        TCP_FASTOPEN_MAX_KEYS   TCP_RFC7413_MAX_KEYS
+#endif
+
+struct tcp_fastopen_keylist {
+       unsigned int newest;
+       uint8_t key[TCP_FASTOPEN_MAX_KEYS][TCP_FASTOPEN_KEY_LEN];
+};
+
+struct tcp_fastopen_callout {
+       struct callout c;
+       struct vnet *v;
+};
+
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, fastopen, CTLFLAG_RW, 0, "TCP Fast Open");
+
+static VNET_DEFINE(int, tcp_fastopen_acceptany) = 0;
+#define        V_tcp_fastopen_acceptany        VNET(tcp_fastopen_acceptany)
+SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, acceptany,
+    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_acceptany), 0,
+    "Accept any non-empty cookie");
+
+static VNET_DEFINE(unsigned int, tcp_fastopen_autokey) = 120;
+#define        V_tcp_fastopen_autokey  VNET(tcp_fastopen_autokey)
+static int sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, autokey,
+    CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0,
+    &sysctl_net_inet_tcp_fastopen_autokey, "IU",
+    "Number of seconds between auto-generation of a new key; zero disables");
+
+VNET_DEFINE(unsigned int, tcp_fastopen_enabled) = 0;
+static int sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, enabled,
+    CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0,
+    &sysctl_net_inet_tcp_fastopen_enabled, "IU",
+    "Enable/disable TCP Fast Open processing");
+
+SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, keylen,
+    CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_KEY_LEN,
+    "Key length in bytes");
+
+SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxkeys,
+    CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_KEYS,
+    "Maximum number of keys supported");
+
+static VNET_DEFINE(unsigned int, tcp_fastopen_numkeys) = 0;
+#define        V_tcp_fastopen_numkeys  VNET(tcp_fastopen_numkeys)
+SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numkeys,
+    CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numkeys), 0,
+    "Number of keys installed");
+
+static int sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setkey,
+    CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR, NULL, 0,
+    &sysctl_net_inet_tcp_fastopen_setkey, "",
+    "Install a new key");
+
+static VNET_DEFINE(struct rmlock, tcp_fastopen_keylock);
+#define        V_tcp_fastopen_keylock  VNET(tcp_fastopen_keylock)
+
+#define TCP_FASTOPEN_KEYS_RLOCK(t)     rm_rlock(&V_tcp_fastopen_keylock, (t))
+#define TCP_FASTOPEN_KEYS_RUNLOCK(t)   rm_runlock(&V_tcp_fastopen_keylock, (t))
+#define TCP_FASTOPEN_KEYS_WLOCK()      rm_wlock(&V_tcp_fastopen_keylock)
+#define TCP_FASTOPEN_KEYS_WUNLOCK()    rm_wunlock(&V_tcp_fastopen_keylock)
+
+static VNET_DEFINE(struct tcp_fastopen_keylist, tcp_fastopen_keys);
+#define V_tcp_fastopen_keys    VNET(tcp_fastopen_keys)
+
+static VNET_DEFINE(struct tcp_fastopen_callout, tcp_fastopen_autokey_ctx);
+#define V_tcp_fastopen_autokey_ctx     VNET(tcp_fastopen_autokey_ctx)
+
+static VNET_DEFINE(uma_zone_t, counter_zone);
+#define        V_counter_zone                  VNET(counter_zone)
+
+void
+tcp_fastopen_init(void)
+{
+       V_counter_zone = uma_zcreate("tfo", sizeof(unsigned int),
+           NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+       rm_init(&V_tcp_fastopen_keylock, "tfo_keylock");
+       callout_init_rm(&V_tcp_fastopen_autokey_ctx.c,
+           &V_tcp_fastopen_keylock, 0);
+       V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
+}
+
+void
+tcp_fastopen_destroy(void)
+{
+       callout_drain(&V_tcp_fastopen_autokey_ctx.c);
+       rm_destroy(&V_tcp_fastopen_keylock);
+       uma_zdestroy(V_counter_zone);
+}
+
+unsigned int *
+tcp_fastopen_alloc_counter(void)
+{
+       unsigned int *counter;
+       counter = uma_zalloc(V_counter_zone, M_NOWAIT);
+       if (counter)
+               *counter = 1;
+       return (counter);
+}
+
+void
+tcp_fastopen_decrement_counter(unsigned int *counter)
+{
+       if (*counter == 1)
+               uma_zfree(V_counter_zone, counter);
+       else
+               atomic_subtract_int(counter, 1);
+}
+
+static void
+tcp_fastopen_addkey_locked(uint8_t *key)
+{
+
+       V_tcp_fastopen_keys.newest++;
+       if (V_tcp_fastopen_keys.newest == TCP_FASTOPEN_MAX_KEYS)
+               V_tcp_fastopen_keys.newest = 0;
+       memcpy(V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest], key,
+           TCP_FASTOPEN_KEY_LEN);
+       if (V_tcp_fastopen_numkeys < TCP_FASTOPEN_MAX_KEYS)
+               V_tcp_fastopen_numkeys++;
+}
+
+static void
+tcp_fastopen_autokey_locked(void)
+{
+       uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
+
+       arc4rand(newkey, TCP_FASTOPEN_KEY_LEN, 0);
+       tcp_fastopen_addkey_locked(newkey);
+}
+
+static void
+tcp_fastopen_autokey_callout(void *arg)
+{
+       struct tcp_fastopen_callout *ctx = arg;
+
+       CURVNET_SET(ctx->v);
+       tcp_fastopen_autokey_locked();
+       callout_reset(&ctx->c, V_tcp_fastopen_autokey * hz,
+                     tcp_fastopen_autokey_callout, ctx);
+       CURVNET_RESTORE();
+}
+
+
+static uint64_t
+tcp_fastopen_make_cookie(uint8_t key[SIPHASH_KEY_LENGTH], struct in_conninfo 
*inc)
+{
+       SIPHASH_CTX ctx;
+       uint64_t siphash;
+
+       SipHash24_Init(&ctx);
+       SipHash_SetKey(&ctx, key);
+       switch (inc->inc_flags & INC_ISIPV6) {
+#ifdef INET
+       case 0:
+               SipHash_Update(&ctx, &inc->inc_faddr, sizeof(inc->inc_faddr));
+               break;
+#endif
+#ifdef INET6
+       case INC_ISIPV6:
+               SipHash_Update(&ctx, &inc->inc6_faddr, sizeof(inc->inc6_faddr));
+               break;
+#endif
+       }
+       SipHash_Final((u_int8_t *)&siphash, &ctx);
+
+       return (siphash);
+}
+
+
+/*
+ * Return values:
+ *     -1      the cookie is invalid and no valid cookie is available
+ *      0      the cookie is invalid and the latest cookie has been returned
+ *      1      the cookie is valid and the latest cookie has been returned
+ */
+int
+tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie,
+    unsigned int len, uint64_t *latest_cookie)
+{
+       struct rm_priotracker tracker;
+       unsigned int i, key_index;
+       uint64_t cur_cookie;
+
+       if (V_tcp_fastopen_acceptany) {
+               *latest_cookie = 0;
+               return (1);
+       }
+
+       if (len != TCP_FASTOPEN_COOKIE_LEN) {
+               if (V_tcp_fastopen_numkeys > 0) {
+                       *latest_cookie =
+                           tcp_fastopen_make_cookie(
+                               
V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest],
+                               inc);
+                       return (0);
+               }
+               return (-1);
+       }
+
+       /*
+        * Check against each available key, from newest to oldest.
+        */
+       TCP_FASTOPEN_KEYS_RLOCK(&tracker);
+       key_index = V_tcp_fastopen_keys.newest;
+       for (i = 0; i < V_tcp_fastopen_numkeys; i++) {
+               cur_cookie =
+                   tcp_fastopen_make_cookie(V_tcp_fastopen_keys.key[key_index],
+                       inc);
+               if (i == 0)
+                       *latest_cookie = cur_cookie;
+               if (memcmp(cookie, &cur_cookie, TCP_FASTOPEN_COOKIE_LEN) == 0) {
+                       TCP_FASTOPEN_KEYS_RUNLOCK(&tracker);
+                       return (1);
+               }
+               if (key_index == 0)
+                       key_index = TCP_FASTOPEN_MAX_KEYS - 1;
+               else
+                       key_index--;
+       }
+       TCP_FASTOPEN_KEYS_RUNLOCK(&tracker);
+
+       return (0);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS)
+{
+       int error;
+       unsigned int new;
+
+       new = V_tcp_fastopen_autokey;
+       error = sysctl_handle_int(oidp, &new, 0, req);
+       if (error == 0 && req->newptr) {
+               if (new > (INT_MAX / hz))
+                       return (EINVAL);
+
+               TCP_FASTOPEN_KEYS_WLOCK();
+               if (V_tcp_fastopen_enabled) {
+                       if (V_tcp_fastopen_autokey && !new)
+                               callout_stop(&V_tcp_fastopen_autokey_ctx.c);
+                       else if (new)
+                               callout_reset(&V_tcp_fastopen_autokey_ctx.c,
+                                   new * hz, tcp_fastopen_autokey_callout,
+                                   &V_tcp_fastopen_autokey_ctx);
+               }
+               V_tcp_fastopen_autokey = new;
+               TCP_FASTOPEN_KEYS_WUNLOCK();
+       }
+
+       return (error);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS)
+{
+       int error;
+       unsigned int new;
+
+       new = V_tcp_fastopen_enabled;
+       error = sysctl_handle_int(oidp, &new, 0, req);
+       if (error == 0 && req->newptr) {
+               if (V_tcp_fastopen_enabled && !new) {
+                       /* enabled -> disabled */
+                       TCP_FASTOPEN_KEYS_WLOCK();
+                       V_tcp_fastopen_numkeys = 0;
+                       V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
+                       if (V_tcp_fastopen_autokey)
+                               callout_stop(&V_tcp_fastopen_autokey_ctx.c);
+                       V_tcp_fastopen_enabled = 0;
+                       TCP_FASTOPEN_KEYS_WUNLOCK();
+               } else if (!V_tcp_fastopen_enabled && new) {
+                       /* disabled -> enabled */
+                       TCP_FASTOPEN_KEYS_WLOCK();
+                       if (V_tcp_fastopen_autokey &&
+                           (V_tcp_fastopen_numkeys == 0)) {
+                               tcp_fastopen_autokey_locked();
+                               callout_reset(&V_tcp_fastopen_autokey_ctx.c,
+                                   V_tcp_fastopen_autokey * hz,
+                                   tcp_fastopen_autokey_callout,
+                                   &V_tcp_fastopen_autokey_ctx);
+                       }
+                       V_tcp_fastopen_enabled = 1;
+                       TCP_FASTOPEN_KEYS_WUNLOCK();
+               }
+       }
+       return (error);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS)
+{
+       int error;
+       uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
+
+       if (req->oldptr != NULL || req->oldlen != 0)
+               return (EINVAL);
+       if (req->newptr == NULL)
+               return (EPERM);
+       if (req->newlen != sizeof(newkey))
+               return (EINVAL);
+       error = SYSCTL_IN(req, newkey, sizeof(newkey));
+       if (error)
+               return (error);
+
+       TCP_FASTOPEN_KEYS_WLOCK();
+       tcp_fastopen_addkey_locked(newkey);
+       TCP_FASTOPEN_KEYS_WUNLOCK();
+
+       return (0);
+}

Added: head/sys/netinet/tcp_fastopen.h
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/netinet/tcp_fastopen.h     Thu Dec 24 19:09:48 2015        
(r292706)
@@ -0,0 +1,47 @@
+/*-
+ * Copyright (c) 2015 Patrick Kelsey
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _TCP_FASTOPEN_H_
+#define _TCP_FASTOPEN_H_
+
+#ifdef _KERNEL
+
+#define        TCP_FASTOPEN_COOKIE_LEN 8       /* tied to SipHash24 64-bit 
output */
+
+VNET_DECLARE(unsigned int, tcp_fastopen_enabled);
+#define        V_tcp_fastopen_enabled  VNET(tcp_fastopen_enabled)
+
+void   tcp_fastopen_init(void);
+void   tcp_fastopen_destroy(void);
+unsigned int *tcp_fastopen_alloc_counter(void);
+void   tcp_fastopen_decrement_counter(unsigned int *counter);
+int    tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie,
+           unsigned int len, uint64_t *latest_cookie);
+#endif /* _KERNEL */
+
+#endif /* _TCP_FASTOPEN_H_ */

Modified: head/sys/netinet/tcp_input.c
==============================================================================
--- head/sys/netinet/tcp_input.c        Thu Dec 24 18:53:17 2015        
(r292705)
+++ head/sys/netinet/tcp_input.c        Thu Dec 24 19:09:48 2015        
(r292706)
@@ -98,6 +98,9 @@ __FBSDID("$FreeBSD$");
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
@@ -999,7 +1002,8 @@ relocked:
                INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 #endif
        if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) ||
-           (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN)))) {
+             (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) &&
+              !(tp->t_flags & TF_FASTOPEN)))) {
                if (ti_locked == TI_UNLOCKED) {
                        if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
                                in_pcbref(inp);
@@ -1091,6 +1095,9 @@ relocked:
                                rstreason = BANDLIM_RST_OPENPORT;
                                goto dropwithreset;
                        }
+#ifdef TCP_RFC7413
+new_tfo_socket:
+#endif
                        if (so == NULL) {
                                /*
                                 * We completed the 3-way handshake
@@ -1353,7 +1360,12 @@ relocked:
 #endif
                TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
                tcp_dooptions(&to, optp, optlen, TO_SYN);
+#ifdef TCP_RFC7413
+               if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL))
+                       goto new_tfo_socket;
+#else
                syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL);
+#endif
                /*
                 * Entry added to syncache and mbuf consumed.
                 * Only the listen socket is unlocked by syncache_add().
@@ -1468,7 +1480,8 @@ tcp_do_segment(struct mbuf *m, struct tc
        struct in_conninfo *inc;
        struct mbuf *mfree;
        struct tcpopt to;
-
+       int tfo_syn;
+       
 #ifdef TCPDEBUG
        /*
         * The size of tcp_saveipgen must be the size of the max ip header,
@@ -1921,6 +1934,28 @@ tcp_do_segment(struct mbuf *m, struct tc
                                rstreason = BANDLIM_RST_OPENPORT;
                                goto dropwithreset;
                }
+#ifdef TCP_RFC7413
+               if (tp->t_flags & TF_FASTOPEN) {
+                       /*
+                        * When a TFO connection is in SYN_RECEIVED, the
+                        * only valid packets are the initial SYN, a
+                        * retransmit/copy of the initial SYN (possibly with
+                        * a subset of the original data), a valid ACK, a
+                        * FIN, or a RST.
+                        */
+                       if ((thflags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)) {
+                               rstreason = BANDLIM_RST_OPENPORT;
+                               goto dropwithreset;
+                       } else if (thflags & TH_SYN) {
+                               /* non-initial SYN is ignored */
+                               if ((tcp_timer_active(tp, TT_DELACK) || 
+                                    tcp_timer_active(tp, TT_REXMT)))
+                                       goto drop;
+                       } else if (!(thflags & (TH_ACK|TH_FIN|TH_RST))) {
+                               goto drop;
+                       }
+               }
+#endif
                break;
 
        /*
@@ -2136,7 +2171,8 @@ tcp_do_segment(struct mbuf *m, struct tc
         * RFC5961 Section 4.2
         * Send challenge ACK for any SYN in synchronized state.
         */
-       if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT) {
+       if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT &&
+           tp->t_state != TCPS_SYN_RECEIVED) {
                KASSERT(ti_locked == TI_RLOCKED,
                    ("tcp_do_segment: TH_SYN ti_locked %d", ti_locked));
                INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
@@ -2330,9 +2366,16 @@ tcp_do_segment(struct mbuf *m, struct tc
         */
        if ((thflags & TH_ACK) == 0) {
                if (tp->t_state == TCPS_SYN_RECEIVED ||
-                   (tp->t_flags & TF_NEEDSYN))
+                   (tp->t_flags & TF_NEEDSYN)) {
+#ifdef TCP_RFC7413
+                       if (tp->t_state == TCPS_SYN_RECEIVED &&
+                           tp->t_flags & TF_FASTOPEN) {
+                               tp->snd_wnd = tiwin;
+                               cc_conn_init(tp);
+                       }
+#endif
                        goto step6;
-               else if (tp->t_flags & TF_ACKNOW)
+               } else if (tp->t_flags & TF_ACKNOW)
                        goto dropafterack;
                else
                        goto drop;
@@ -2371,7 +2414,27 @@ tcp_do_segment(struct mbuf *m, struct tc
                        tcp_state_change(tp, TCPS_ESTABLISHED);
                        TCP_PROBE5(accept__established, NULL, tp,
                            mtod(m, const char *), tp, th);
-                       cc_conn_init(tp);
+#ifdef TCP_RFC7413
+                       if (tp->t_tfo_pending) {
+                               
tcp_fastopen_decrement_counter(tp->t_tfo_pending);
+                               tp->t_tfo_pending = NULL;
+
+                               /*
+                                * Account for the ACK of our SYN prior to
+                                * regular ACK processing below.
+                                */ 
+                               tp->snd_una++;
+                       }
+                       /*
+                        * TFO connections call cc_conn_init() during SYN
+                        * processing.  Calling it again here for such
+                        * connections is not harmless as it would undo the
+                        * snd_cwnd reduction that occurs when a TFO SYN|ACK
+                        * is retransmitted.
+                        */
+                       if (!(tp->t_flags & TF_FASTOPEN))
+#endif
+                               cc_conn_init(tp);
                        tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
                }
                /*
@@ -2919,7 +2982,9 @@ dodata:                                                   
/* XXX */
         * case PRU_RCVD).  If a FIN has already been received on this
         * connection then we just ignore the text.
         */
-       if ((tlen || (thflags & TH_FIN)) &&
+       tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) &&
+                  (tp->t_flags & TF_FASTOPEN));
+       if ((tlen || (thflags & TH_FIN) || tfo_syn) &&
            TCPS_HAVERCVDFIN(tp->t_state) == 0) {
                tcp_seq save_start = th->th_seq;
                m_adj(m, drop_hdrlen);  /* delayed header drop */
@@ -2937,8 +3002,9 @@ dodata:                                                   
/* XXX */
                 */
                if (th->th_seq == tp->rcv_nxt &&
                    LIST_EMPTY(&tp->t_segq) &&
-                   TCPS_HAVEESTABLISHED(tp->t_state)) {
-                       if (DELAY_ACK(tp, tlen))
+                   (TCPS_HAVEESTABLISHED(tp->t_state) ||
+                    tfo_syn)) {
+                       if (DELAY_ACK(tp, tlen) || tfo_syn)
                                tp->t_flags |= TF_DELACK;
                        else
                                tp->t_flags |= TF_ACKNOW;
@@ -3293,6 +3359,21 @@ tcp_dooptions(struct tcpopt *to, u_char 
                        to->to_sacks = cp + 2;
                        TCPSTAT_INC(tcps_sack_rcv_blocks);
                        break;
+#ifdef TCP_RFC7413
+               case TCPOPT_FAST_OPEN:
+                       if ((optlen != TCPOLEN_FAST_OPEN_EMPTY) &&
+                           (optlen < TCPOLEN_FAST_OPEN_MIN) &&
+                           (optlen > TCPOLEN_FAST_OPEN_MAX))
+                               continue;
+                       if (!(flags & TO_SYN))
+                               continue;
+                       if (!V_tcp_fastopen_enabled)
+                               continue;
+                       to->to_flags |= TOF_FASTOPEN;
+                       to->to_tfo_len = optlen - 2;
+                       to->to_tfo_cookie = to->to_tfo_len ? cp + 2 : NULL;
+                       break;
+#endif
                default:
                        continue;
                }

Modified: head/sys/netinet/tcp_output.c
==============================================================================
--- head/sys/netinet/tcp_output.c       Thu Dec 24 18:53:17 2015        
(r292705)
+++ head/sys/netinet/tcp_output.c       Thu Dec 24 19:09:48 2015        
(r292706)
@@ -68,6 +68,9 @@ __FBSDID("$FreeBSD$");
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #endif
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
 #define        TCPOUTFLAGS
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
@@ -204,6 +207,17 @@ tcp_output(struct tcpcb *tp)
                return (tcp_offload_output(tp));
 #endif
 
+#ifdef TCP_RFC7413
+       /*
+        * For TFO connections in SYN_RECEIVED, only allow the initial
+        * SYN|ACK and those sent by the retransmit timer.
+        */
+       if ((tp->t_flags & TF_FASTOPEN) &&
+           (tp->t_state == TCPS_SYN_RECEIVED) &&
+           SEQ_GT(tp->snd_max, tp->snd_una) &&    /* inital SYN|ACK sent */
+           (tp->snd_nxt != tp->snd_una))          /* not a retransmit */
+               return (0);
+#endif
        /*
         * Determine length of data that should be transmitted,
         * and flags that will be used.
@@ -390,6 +404,15 @@ after_sack_rexmit:
        if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
                if (tp->t_state != TCPS_SYN_RECEIVED)
                        flags &= ~TH_SYN;
+#ifdef TCP_RFC7413
+               /*
+                * When sending additional segments following a TFO SYN|ACK,
+                * do not include the SYN bit.
+                */
+               if ((tp->t_flags & TF_FASTOPEN) &&
+                   (tp->t_state == TCPS_SYN_RECEIVED))
+                       flags &= ~TH_SYN;
+#endif
                off--, len++;
        }
 
@@ -403,6 +426,17 @@ after_sack_rexmit:
                flags &= ~TH_FIN;
        }
 
+#ifdef TCP_RFC7413
+       /*
+        * When retransmitting SYN|ACK on a passively-created TFO socket,
+        * don't include data, as the presence of data may have caused the
+        * original SYN|ACK to have been dropped by a middlebox.
+        */
+       if ((tp->t_flags & TF_FASTOPEN) &&
+           (((tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_rxtshift > 0)) ||
+            (flags & TH_RST)))
+               len = 0;
+#endif
        if (len <= 0) {
                /*
                 * If FIN has been sent but not acked,
@@ -725,6 +759,22 @@ send:
                        tp->snd_nxt = tp->iss;
                        to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc);
                        to.to_flags |= TOF_MSS;
+#ifdef TCP_RFC7413
+                       /*
+                        * Only include the TFO option on the first
+                        * transmission of the SYN|ACK on a
+                        * passively-created TFO socket, as the presence of
+                        * the TFO option may have caused the original
+                        * SYN|ACK to have been dropped by a middlebox.
+                        */
+                       if ((tp->t_flags & TF_FASTOPEN) &&
+                           (tp->t_state == TCPS_SYN_RECEIVED) &&
+                           (tp->t_rxtshift == 0)) {
+                               to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
+                               to.to_tfo_cookie = (u_char *)&tp->t_tfo_cookie;
+                               to.to_flags |= TOF_FASTOPEN;
+                       }
+#endif
                }
                /* Window scaling. */
                if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) {
@@ -1004,7 +1054,7 @@ send:
                 * give data to the user when a buffer fills or
                 * a PUSH comes in.)
                 */
-               if (off + len == sbused(&so->so_snd))
+               if ((off + len == sbused(&so->so_snd)) && !(flags & TH_SYN))
                        flags |= TH_PUSH;
                SOCKBUF_UNLOCK(&so->so_snd);
        } else {
@@ -1711,6 +1761,25 @@ tcp_addoptions(struct tcpopt *to, u_char
                        TCPSTAT_INC(tcps_sack_send_blocks);
                        break;
                        }
+#ifdef TCP_RFC7413
+               case TOF_FASTOPEN:
+                       {
+                       int total_len;
+
+                       /* XXX is there any point to aligning this option? */
+                       total_len = TCPOLEN_FAST_OPEN_EMPTY + to->to_tfo_len;
+                       if (TCP_MAXOLEN - optlen < total_len)
+                               continue;
+                       *optp++ = TCPOPT_FAST_OPEN;
+                       *optp++ = total_len;
+                       if (to->to_tfo_len > 0) {
+                               bcopy(to->to_tfo_cookie, optp, to->to_tfo_len);
+                               optp += to->to_tfo_len;
+                       }
+                       optlen += total_len;
+                       break;
+                       }
+#endif
                default:
                        panic("%s: unknown TCP option type", __func__);
                        break;

Modified: head/sys/netinet/tcp_subr.c
==============================================================================
--- head/sys/netinet/tcp_subr.c Thu Dec 24 18:53:17 2015        (r292705)
+++ head/sys/netinet/tcp_subr.c Thu Dec 24 19:09:48 2015        (r292706)
@@ -84,6 +84,9 @@ __FBSDID("$FreeBSD$");
 #include <netinet6/nd6.h>
 #endif
 
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
@@ -704,6 +707,10 @@ tcp_init(void)
 #ifdef TCPPCAP
        tcp_pcap_init();
 #endif
+
+#ifdef TCP_RFC7413
+       tcp_fastopen_init();
+#endif
 }
 
 #ifdef VIMAGE
@@ -712,6 +719,9 @@ tcp_destroy(void)
 {
        int error;
 
+#ifdef TCP_RFC7413
+       tcp_fastopen_destroy();
+#endif
        tcp_hc_destroy();
        syncache_destroy();
        tcp_tw_destroy();
@@ -1439,6 +1449,17 @@ tcp_close(struct tcpcb *tp)
        if (tp->t_state == TCPS_LISTEN)
                tcp_offload_listen_stop(tp);
 #endif
+#ifdef TCP_RFC7413
+       /*
+        * This releases the TFO pending counter resource for TFO listen
+        * sockets as well as passively-created TFO sockets that transition
+        * from SYN_RECEIVED to CLOSED.
+        */
+       if (tp->t_tfo_pending) {
+               tcp_fastopen_decrement_counter(tp->t_tfo_pending);
+               tp->t_tfo_pending = NULL;
+       }
+#endif
        in_pcbdrop(inp);
        TCPSTAT_INC(tcps_closed);
        KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));

Modified: head/sys/netinet/tcp_syncache.c
==============================================================================
--- head/sys/netinet/tcp_syncache.c     Thu Dec 24 18:53:17 2015        
(r292705)
+++ head/sys/netinet/tcp_syncache.c     Thu Dec 24 19:09:48 2015        
(r292706)
@@ -81,6 +81,9 @@ __FBSDID("$FreeBSD$");
 #include <netinet6/in6_pcb.h>
 #endif
 #include <netinet/tcp.h>
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
@@ -1083,6 +1086,39 @@ failed:
        return (0);
 }
 
+#ifdef TCP_RFC7413
+static void
+syncache_tfo_expand(struct syncache *sc, struct socket **lsop, struct mbuf *m,
+    uint64_t response_cookie)
+{
+       struct inpcb *inp;
+       struct tcpcb *tp;
+       unsigned int *pending_counter;
+
+       /*
+        * Global TCP locks are held because we manipulate the PCB lists
+        * and create a new socket.
+        */
+       INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+
+       pending_counter = intotcpcb(sotoinpcb(*lsop))->t_tfo_pending;
+       *lsop = syncache_socket(sc, *lsop, m);
+       if (*lsop == NULL) {
+               TCPSTAT_INC(tcps_sc_aborted);
+               atomic_subtract_int(pending_counter, 1);
+       } else {
+               inp = sotoinpcb(*lsop);
+               tp = intotcpcb(inp);
+               tp->t_flags |= TF_FASTOPEN;
+               tp->t_tfo_cookie = response_cookie;
+               tp->snd_max = tp->iss;
+               tp->snd_nxt = tp->iss;
+               tp->t_tfo_pending = pending_counter;
+               TCPSTAT_INC(tcps_sc_completed);
+       }
+}
+#endif /* TCP_RFC7413 */
+
 /*
  * Given a LISTEN socket and an inbound SYN request, add
  * this to the syn cache, and send back a segment:
@@ -1095,8 +1131,15 @@ failed:
  * DoS attack, an attacker could send data which would eventually
  * consume all available buffer space if it were ACKed.  By not ACKing
  * the data, we avoid this DoS scenario.
+ *
+ * The exception to the above is when a SYN with a valid TCP Fast Open (TFO)
+ * cookie is processed, V_tcp_fastopen_enabled set to true, and the
+ * TCP_FASTOPEN socket option is set.  In this case, a new socket is created
+ * and returned via lsop, the mbuf is not freed so that tcp_input() can
+ * queue its data to the socket, and 1 is returned to indicate the
+ * TFO-socket-creation path was taken.
  */
-void
+int
 syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
     struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod,
     void *todctx)
@@ -1109,6 +1152,7 @@ syncache_add(struct in_conninfo *inc, st
        u_int ltflags;
        int win, sb_hiwat, ip_ttl, ip_tos;
        char *s;
+       int rv = 0;
 #ifdef INET6
        int autoflowlabel = 0;
 #endif
@@ -1117,6 +1161,11 @@ syncache_add(struct in_conninfo *inc, st
 #endif
        struct syncache scs;
        struct ucred *cred;
+#ifdef TCP_RFC7413
+       uint64_t tfo_response_cookie;
+       int tfo_cookie_valid = 0;
+       int tfo_response_cookie_valid = 0;
+#endif
 
        INP_WLOCK_ASSERT(inp);                  /* listen socket */
        KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
@@ -1141,6 +1190,29 @@ syncache_add(struct in_conninfo *inc, st
        sb_hiwat = so->so_rcv.sb_hiwat;
        ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE));
 
+#ifdef TCP_RFC7413
+       if (V_tcp_fastopen_enabled && (tp->t_flags & TF_FASTOPEN) &&
+           (tp->t_tfo_pending != NULL) && (to->to_flags & TOF_FASTOPEN)) {
+               /*
+                * Limit the number of pending TFO connections to
+                * approximately half of the queue limit.  This prevents TFO
+                * SYN floods from starving the service by filling the
+                * listen queue with bogus TFO connections.
+                */
+               if (atomic_fetchadd_int(tp->t_tfo_pending, 1) <=

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to