The branch main has been updated by rscheff:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=7994ef3c394d16e37af7a4848e58d01c28b81fbc

commit 7994ef3c394d16e37af7a4848e58d01c28b81fbc
Author:     Richard Scheffenegger <[email protected]>
AuthorDate: 2022-02-05 00:07:51 +0000
Commit:     Richard Scheffenegger <[email protected]>
CommitDate: 2022-02-05 00:07:51 +0000

    Revert "tcp: move ECN handling code to a common file"
    
    This reverts commit 0c424c90eaa6602e07bca7836b1d178b91f2a88a.
---
 sys/conf/files                |   1 -
 sys/netinet/tcp_ecn.c         | 296 ------------------------------------------
 sys/netinet/tcp_ecn.h         |  55 --------
 sys/netinet/tcp_input.c       |  46 ++++++-
 sys/netinet/tcp_output.c      |  63 ++++++---
 sys/netinet/tcp_stacks/rack.c | 192 ++++++++++++++++++---------
 sys/netinet/tcp_syncache.c    |  15 ++-
 7 files changed, 224 insertions(+), 444 deletions(-)

diff --git a/sys/conf/files b/sys/conf/files
index 148bd9f4f7b4..78921d2c9fa0 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4364,7 +4364,6 @@ netinet/sctp_usrreq.c             optional inet sctp | 
inet6 sctp
 netinet/sctputil.c             optional inet sctp | inet6 sctp
 netinet/siftr.c                        optional inet siftr alq | inet6 siftr 
alq
 netinet/tcp_debug.c            optional tcpdebug
-netinet/tcp_ecn.c              optional inet | inet6
 netinet/tcp_fastopen.c         optional inet tcp_rfc7413 | inet6 tcp_rfc7413
 netinet/tcp_hostcache.c                optional inet | inet6
 netinet/tcp_input.c            optional inet | inet6
diff --git a/sys/netinet/tcp_ecn.c b/sys/netinet/tcp_ecn.c
deleted file mode 100644
index cf29431ea5d2..000000000000
--- a/sys/netinet/tcp_ecn.c
+++ /dev/null
@@ -1,296 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-3-Clause
- *
- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
- *      The Regents of the University of California.  All rights reserved.
- * Copyright (c) 2007-2008,2010
- *      Swinburne University of Technology, Melbourne, Australia.
- * Copyright (c) 2009-2010 Lawrence Stewart <[email protected]>
- * Copyright (c) 2010 The FreeBSD Foundation
- * Copyright (c) 2010-2011 Juniper Networks, Inc.
- * Copyright (c) 2019 Richard Scheffenegger <[email protected]>
- * All rights reserved.
- *
- * Portions of this software were developed at the Centre for Advanced Internet
- * Architectures, Swinburne University of Technology, by Lawrence Stewart,
- * James Healy and David Hayes, made possible in part by a grant from the Cisco
- * University Research Program Fund at Community Foundation Silicon Valley.
- *
- * Portions of this software were developed at the Centre for Advanced
- * Internet Architectures, Swinburne University of Technology, Melbourne,
- * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
- *
- * Portions of this software were developed by Robert N. M. Watson under
- * contract to Juniper Networks, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *      @(#)tcp_ecn.c 8.12 (Berkeley) 5/24/95
- */
-
-/*
- * Utility functions to deal with Explicit Congestion Notification in TCP
- * implementing the essential parts of the Accurate ECN extension
- * https://tools.ietf.org/html/draft-ietf-tcpm-accurate-ecn-09
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include "opt_inet.h"
-#include "opt_inet6.h"
-#include "opt_tcpdebug.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/sysctl.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-
-#include <machine/cpu.h>
-
-#include <vm/uma.h>
-
-#include <net/if.h>
-#include <net/if_var.h>
-#include <net/route.h>
-#include <net/vnet.h>
-
-#include <netinet/in.h>
-#include <netinet/in_systm.h>
-#include <netinet/ip.h>
-#include <netinet/in_var.h>
-#include <netinet/in_pcb.h>
-#include <netinet/ip_var.h>
-#include <netinet/ip6.h>
-#include <netinet/icmp6.h>
-#include <netinet6/nd6.h>
-#include <netinet6/ip6_var.h>
-#include <netinet6/in6_pcb.h>
-#include <netinet/tcp.h>
-#include <netinet/tcp_fsm.h>
-#include <netinet/tcp_seq.h>
-#include <netinet/tcp_timer.h>
-#include <netinet/tcp_var.h>
-#include <netinet6/tcp6_var.h>
-#include <netinet/tcpip.h>
-#include <netinet/tcp_ecn.h>
-
-
-/*
- * Process incoming SYN,ACK packet
- */
-void
-tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos)
-{
-       thflags &= (TH_CWR|TH_ECE);
-
-       if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
-           V_tcp_do_ecn) {
-               tp->t_flags2 |= TF2_ECN_PERMIT;
-               KMOD_TCPSTAT_INC(tcps_ecn_shs);
-       }
-}
-
-/*
- * Handle parallel SYN for ECN
- */
-void
-tcp_ecn_input_parallel_syn(struct tcpcb *tp, uint16_t thflags, int iptos)
-{
-       if (thflags & TH_ACK)
-               return;
-       if (V_tcp_do_ecn == 0)
-               return;
-       if ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2)) {
-               /* RFC3168 ECN handling */
-               if ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) {
-                       tp->t_flags2 |= TF2_ECN_PERMIT;
-                       tp->t_flags2 |= TF2_ECN_SND_ECE;
-                       KMOD_TCPSTAT_INC(tcps_ecn_shs);
-               }
-       }
-}
-
-/*
- * TCP ECN processing.
- */
-int
-tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int iptos)
-{
-       int delta_ace = 0;
-
-       if (tp->t_flags2 & TF2_ECN_PERMIT) {
-               switch (iptos & IPTOS_ECN_MASK) {
-               case IPTOS_ECN_CE:
-                       KMOD_TCPSTAT_INC(tcps_ecn_ce);
-                       break;
-               case IPTOS_ECN_ECT0:
-                       KMOD_TCPSTAT_INC(tcps_ecn_ect0);
-                       break;
-               case IPTOS_ECN_ECT1:
-                       KMOD_TCPSTAT_INC(tcps_ecn_ect1);
-                       break;
-               }
-
-               /* RFC3168 ECN handling */
-               if (thflags & TH_ECE)
-                       delta_ace = 1;
-               if (thflags & TH_CWR) {
-                       tp->t_flags2 &= ~TF2_ECN_SND_ECE;
-                       tp->t_flags |= TF_ACKNOW;
-               }
-               if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
-                       tp->t_flags2 |= TF2_ECN_SND_ECE;
-
-               /* Process a packet differently from RFC3168. */
-               cc_ecnpkt_handler_flags(tp, thflags, iptos);
-       }
-
-       return delta_ace;
-}
-
-/*
- * Send ECN setup <SYN> packet header flags
- */
-uint16_t
-tcp_ecn_output_syn_sent(struct tcpcb *tp)
-{
-       uint16_t thflags = 0;
-
-       if (V_tcp_do_ecn == 1) {
-               /* Send a RFC3168 ECN setup <SYN> packet */
-               if (tp->t_rxtshift >= 1) {
-                       if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
-                               thflags = TH_ECE|TH_CWR;
-               } else
-                       thflags = TH_ECE|TH_CWR;
-       }
-
-       return thflags;
-}
-
-/*
- * output processing of ECN feature
- * returning IP ECN header codepoint
- */
-int
-tcp_ecn_output_established(struct tcpcb *tp, uint16_t *thflags, int len)
-{
-       int ipecn = IPTOS_ECN_NOTECT;
-       bool newdata;
-
-       /*
-        * If the peer has ECN, mark data packets with
-        * ECN capable transmission (ECT).
-        * Ignore pure control packets, retransmissions
-        * and window probes.
-        */
-       newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
-                   !((tp->t_flags & TF_FORCEDATA) && len == 1));
-       if (newdata) {
-               ipecn = IPTOS_ECN_ECT0;
-               KMOD_TCPSTAT_INC(tcps_ecn_ect0);
-       }
-       /*
-        * Reply with proper ECN notifications.
-        */
-       if (newdata &&
-           (tp->t_flags2 & TF2_ECN_SND_CWR)) {
-               *thflags |= TH_CWR;
-               tp->t_flags2 &= ~TF2_ECN_SND_CWR;
-       }
-       if (tp->t_flags2 & TF2_ECN_SND_ECE)
-               *thflags |= TH_ECE;
-
-       return ipecn;
-}
-
-/*
- * Set up the ECN related tcpcb fields from
- * a syncache entry
- */
-void
-tcp_ecn_syncache_socket(struct tcpcb *tp, struct syncache *sc)
-{
-       if (sc->sc_flags & SCF_ECN) {
-               switch (sc->sc_flags & SCF_ECN) {
-               case SCF_ECN:
-                       tp->t_flags2 |= TF2_ECN_PERMIT;
-                       break;
-               /* undefined SCF codepoint */
-               default:
-                       break;
-               }
-       }
-}
-
-/*
- * Process a <SYN> packets ECN information, and provide the
- * syncache with the relevant information.
- */
-int
-tcp_ecn_syncache_add(uint16_t thflags, int iptos)
-{
-       int scflags = 0;
-
-       switch (thflags & (TH_CWR|TH_ECE)) {
-       /* no ECN */
-       case (0|0):
-               break;
-       /* legacy ECN */
-       case (TH_CWR|TH_ECE):
-               scflags = SCF_ECN;
-               break;
-       default:
-               break;
-       }
-       return scflags;
-}
-
-/*
- * Set up the ECN information for the <SYN,ACK> from
- * syncache information.
- */
-uint16_t
-tcp_ecn_syncache_respond(uint16_t thflags, struct syncache *sc)
-{
-       if ((thflags & TH_SYN) &&
-           (sc->sc_flags & SCF_ECN)) {
-               switch (sc->sc_flags & SCF_ECN) {
-               case SCF_ECN:
-                       thflags |= (0 | TH_ECE);
-                       KMOD_TCPSTAT_INC(tcps_ecn_shs);
-                       break;
-               /* undefined SCF codepoint */
-               default:
-                       break;
-               }
-       }
-       return thflags;
-}
diff --git a/sys/netinet/tcp_ecn.h b/sys/netinet/tcp_ecn.h
deleted file mode 100644
index 5ee49ce53a7a..000000000000
--- a/sys/netinet/tcp_ecn.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-3-Clause
- *
- * Copyright (c) 1982, 1986, 1993, 1994, 1995
- *     The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *     @(#)tcp_ecn.h   8.4 (Berkeley) 5/24/95
- * $FreeBSD$
- */
-
-#ifndef _NETINET_TCP_ECN_H_
-#define _NETINET_TCP_ECN_H_
-
-#include <netinet/tcp.h>
-#include <netinet/tcp_var.h>
-#include <netinet/tcp_syncache.h>
-
-#ifdef _KERNEL
-
-void    tcp_ecn_input_syn_sent(struct tcpcb *, uint16_t, int);
-void    tcp_ecn_input_parallel_syn(struct tcpcb *, uint16_t, int);
-int     tcp_ecn_input_segment(struct tcpcb *, uint16_t, int);
-uint16_t tcp_ecn_output_syn_sent(struct tcpcb *);
-int     tcp_ecn_output_established(struct tcpcb *, uint16_t *, int);
-void    tcp_ecn_syncache_socket(struct tcpcb *, struct syncache *);
-int     tcp_ecn_syncache_add(uint16_t, int);
-uint16_t tcp_ecn_syncache_respond(uint16_t, struct syncache *);
-
-#endif /* _KERNEL */
-
-#endif /* _NETINET_TCP_ECN_H_ */
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index d0b323723e6b..9a1f3ace2541 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -104,7 +104,6 @@ __FBSDID("$FreeBSD$");
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet/tcp.h>
-#include <netinet/tcp_ecn.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_log_buf.h>
 #include <netinet/tcp_seq.h>
@@ -1518,8 +1517,7 @@ void
 tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos)
 {
-       uint16_t thflags;
-       int acked, ourfinisacked, needoutput = 0, sack_changed;
+       int thflags, acked, ourfinisacked, needoutput = 0, sack_changed;
        int rstreason, todrop, win, incforsyn = 0;
        uint32_t tiwin;
        uint16_t nsegs;
@@ -1599,8 +1597,32 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct 
socket *so,
        /*
         * TCP ECN processing.
         */
-       if (tcp_ecn_input_segment(tp, thflags, iptos))
-               cc_cong_signal(tp, th, CC_ECN);
+       if (tp->t_flags2 & TF2_ECN_PERMIT) {
+               if (thflags & TH_CWR) {
+                       tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+                       tp->t_flags |= TF_ACKNOW;
+               }
+               switch (iptos & IPTOS_ECN_MASK) {
+               case IPTOS_ECN_CE:
+                       tp->t_flags2 |= TF2_ECN_SND_ECE;
+                       TCPSTAT_INC(tcps_ecn_ce);
+                       break;
+               case IPTOS_ECN_ECT0:
+                       TCPSTAT_INC(tcps_ecn_ect0);
+                       break;
+               case IPTOS_ECN_ECT1:
+                       TCPSTAT_INC(tcps_ecn_ect1);
+                       break;
+               }
+
+               /* Process a packet differently from RFC3168. */
+               cc_ecnpkt_handler(tp, th, iptos);
+
+               /* Congestion experienced. */
+               if (thflags & TH_ECE) {
+                       cc_cong_signal(tp, th, CC_ECN);
+               }
+       }
 
        /*
         * Parse options on any incoming segment.
@@ -1641,7 +1663,13 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct 
socket *so,
         */
        if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
                /* Handle parallel SYN for ECN */
-               tcp_ecn_input_parallel_syn(tp, thflags, iptos);
+               if (!(thflags & TH_ACK) &&
+                   ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) &&
+                   ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2))) {
+                       tp->t_flags2 |= TF2_ECN_PERMIT;
+                       tp->t_flags2 |= TF2_ECN_SND_ECE;
+                       TCPSTAT_INC(tcps_ecn_shs);
+               }
                if ((to.to_flags & TOF_SCALE) &&
                    (tp->t_flags & TF_REQ_SCALE) &&
                    !(tp->t_flags & TF_NOOPT)) {
@@ -2047,7 +2075,11 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct 
socket *so,
                        else
                                tp->t_flags |= TF_ACKNOW;
 
-                       tcp_ecn_input_syn_sent(tp, thflags, iptos);
+                       if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
+                           (V_tcp_do_ecn == 1)) {
+                               tp->t_flags2 |= TF2_ECN_PERMIT;
+                               TCPSTAT_INC(tcps_ecn_shs);
+                       }
 
                        /*
                         * Received <SYN,ACK> in SYN_SENT[*] state.
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index df9ce167b7d5..ce6d9b86e73f 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -98,7 +98,6 @@ __FBSDID("$FreeBSD$");
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
-#include <netinet/tcp_ecn.h>
 
 #include <netipsec/ipsec_support.h>
 
@@ -200,8 +199,7 @@ tcp_default_output(struct tcpcb *tp)
        struct socket *so = tp->t_inpcb->inp_socket;
        int32_t len;
        uint32_t recwin, sendwin;
-       uint16_t flags;
-       int off, error = 0;     /* Keep compiler happy */
+       int off, flags, error = 0;      /* Keep compiler happy */
        u_int if_hw_tsomaxsegcount = 0;
        u_int if_hw_tsomaxsegsize = 0;
        struct mbuf *m;
@@ -1199,27 +1197,54 @@ send:
         * resend those bits a number of times as per
         * RFC 3168.
         */
-       if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
-               flags |= tcp_ecn_output_syn_sent(tp);
+       if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 1) {
+               if (tp->t_rxtshift >= 1) {
+                       if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
+                               flags |= TH_ECE|TH_CWR;
+               } else
+                       flags |= TH_ECE|TH_CWR;
        }
-       /* Also handle parallel SYN for ECN */
-       if ((TCPS_HAVERCVDSYN(tp->t_state)) &&
-           (tp->t_flags2 & TF2_ECN_PERMIT)) {
-               int ect = tcp_ecn_output_established(tp, &flags, len);
-               if ((tp->t_state == TCPS_SYN_RECEIVED) &&
-                   (tp->t_flags2 & TF2_ECN_SND_ECE))
+       /* Handle parallel SYN for ECN */
+       if ((tp->t_state == TCPS_SYN_RECEIVED) &&
+           (tp->t_flags2 & TF2_ECN_SND_ECE)) {
+                       flags |= TH_ECE;
                        tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+       }
+
+       if (TCPS_HAVEESTABLISHED(tp->t_state) &&
+           (tp->t_flags2 & TF2_ECN_PERMIT)) {
+               /*
+                * If the peer has ECN, mark data packets with
+                * ECN capable transmission (ECT).
+                * Ignore pure ack packets, retransmissions and window probes.
+                */
+               if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
+                   (sack_rxmit == 0) &&
+                   !((tp->t_flags & TF_FORCEDATA) && len == 1 &&
+                   SEQ_LT(tp->snd_una, tp->snd_max))) {
 #ifdef INET6
-               if (isipv6) {
-                       ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
-                       ip6->ip6_flow |= htonl(ect << 20);
-               }
-               else
+                       if (isipv6) {
+                               ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
+                               ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
+                       }
+                       else
 #endif
-               {
-                       ip->ip_tos &= ~IPTOS_ECN_MASK;
-                       ip->ip_tos |= ect;
+                       {
+                               ip->ip_tos &= ~IPTOS_ECN_MASK;
+                               ip->ip_tos |= IPTOS_ECN_ECT0;
+                       }
+                       TCPSTAT_INC(tcps_ecn_ect0);
+                       /*
+                        * Reply with proper ECN notifications.
+                        * Only set CWR on new data segments.
+                        */
+                       if (tp->t_flags2 & TF2_ECN_SND_CWR) {
+                               flags |= TH_CWR;
+                               tp->t_flags2 &= ~TF2_ECN_SND_CWR;
+                       }
                }
+               if (tp->t_flags2 & TF2_ECN_SND_ECE)
+                       flags |= TH_ECE;
        }
 
        /*
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 7bc37a9552a7..6d5b3f2133a6 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -113,7 +113,6 @@ __FBSDID("$FreeBSD$");
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
-#include <netinet/tcp_ecn.h>
 
 #include <netipsec/ipsec_support.h>
 
@@ -11407,9 +11406,11 @@ rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, 
struct socket *so,
                        tp->t_flags |= TF_ACKNOW;
                        rack->rc_dack_toggle = 0;
                }
-
-               tcp_ecn_input_syn_sent(tp, thflags, iptos);
-
+               if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
+                   (V_tcp_do_ecn == 1)) {
+                       tp->t_flags2 |= TF2_ECN_PERMIT;
+                       KMOD_TCPSTAT_INC(tcps_ecn_shs);
+               }
                if (SEQ_GT(th->th_ack, tp->snd_una)) {
                        /*
                         * We advance snd_una for the
@@ -13682,8 +13683,31 @@ rack_do_compressed_ack_processing(struct tcpcb *tp, 
struct socket *so, struct mb
                }
                tp->t_rcvtime = ticks;
                /* Now what about ECN? */
-               if (tcp_ecn_input_segment(tp, ae->flags, ae->codepoint))
-                       rack_cong_signal(tp, CC_ECN, ae->ack);
+               if (tp->t_flags2 & TF2_ECN_PERMIT) {
+                       if (ae->flags & TH_CWR) {
+                               tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+                               tp->t_flags |= TF_ACKNOW;
+                       }
+                       switch (ae->codepoint & IPTOS_ECN_MASK) {
+                       case IPTOS_ECN_CE:
+                               tp->t_flags2 |= TF2_ECN_SND_ECE;
+                               KMOD_TCPSTAT_INC(tcps_ecn_ce);
+                               break;
+                       case IPTOS_ECN_ECT0:
+                               KMOD_TCPSTAT_INC(tcps_ecn_ect0);
+                               break;
+                       case IPTOS_ECN_ECT1:
+                               KMOD_TCPSTAT_INC(tcps_ecn_ect1);
+                               break;
+                       }
+
+                       /* Process a packet differently from RFC3168. */
+                       cc_ecnpkt_handler_flags(tp, ae->flags, ae->codepoint);
+                       /* Congestion experienced. */
+                       if (ae->flags & TH_ECE) {
+                               rack_cong_signal(tp,  CC_ECN, ae->ack);
+                       }
+               }
 #ifdef TCP_ACCOUNTING
                /* Count for the specific type of ack in */
                counter_u64_add(tcp_cnt_counters[ae->ack_val_set], 1);
@@ -14433,8 +14457,32 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr 
*th, struct socket *so,
         * TCP ECN processing. XXXJTL: If we ever use ECN, we need to move
         * this to occur after we've validated the segment.
         */
-       if (tcp_ecn_input_segment(tp, thflags, iptos))
-               rack_cong_signal(tp, CC_ECN, th->th_ack);
+       if (tp->t_flags2 & TF2_ECN_PERMIT) {
+               if (thflags & TH_CWR) {
+                       tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+                       tp->t_flags |= TF_ACKNOW;
+               }
+               switch (iptos & IPTOS_ECN_MASK) {
+               case IPTOS_ECN_CE:
+                       tp->t_flags2 |= TF2_ECN_SND_ECE;
+                       KMOD_TCPSTAT_INC(tcps_ecn_ce);
+                       break;
+               case IPTOS_ECN_ECT0:
+                       KMOD_TCPSTAT_INC(tcps_ecn_ect0);
+                       break;
+               case IPTOS_ECN_ECT1:
+                       KMOD_TCPSTAT_INC(tcps_ecn_ect1);
+                       break;
+               }
+
+               /* Process a packet differently from RFC3168. */
+               cc_ecnpkt_handler(tp, th, iptos);
+
+               /* Congestion experienced. */
+               if (thflags & TH_ECE) {
+                       rack_cong_signal(tp, CC_ECN, th->th_ack);
+               }
+       }
 
        /*
         * If echoed timestamp is later than the current time, fall back to
@@ -14468,7 +14516,13 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr 
*th, struct socket *so,
                 */
                if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
                        /* Handle parallel SYN for ECN */
-                       tcp_ecn_input_parallel_syn(tp, thflags, iptos);
+                       if (!(thflags & TH_ACK) &&
+                           ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | 
TH_ECE)) &&
+                           ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2))) {
+                               tp->t_flags2 |= TF2_ECN_PERMIT;
+                               tp->t_flags2 |= TF2_ECN_SND_ECE;
+                               TCPSTAT_INC(tcps_ecn_shs);
+                       }
                        if ((to.to_flags & TOF_SCALE) &&
                            (tp->t_flags & TF_REQ_SCALE)) {
                                tp->t_flags |= TF_RCVD_SCALE;
@@ -16002,24 +16056,6 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack 
*rack, struct rack_sendma
                udp->uh_ulen = htons(ulen);
        }
        m->m_pkthdr.rcvif = (struct ifnet *)0;
-       if (TCPS_HAVERCVDSYN(tp->t_state) &&
-           (tp->t_flags2 & TF2_ECN_PERMIT)) {
-               int ect = tcp_ecn_output_established(tp, &flags, len);
-               if ((tp->t_state == TCPS_SYN_RECEIVED) &&
-                   (tp->t_flags2 & TF2_ECN_SND_ECE))
-                   tp->t_flags2 &= ~TF2_ECN_SND_ECE;
-#ifdef INET6
-               if (rack->r_is_v6) {
-                   ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
-                   ip6->ip6_flow |= htonl(ect << 20);
-               }
-               else
-#endif
-               {
-                   ip->ip_tos &= ~IPTOS_ECN_MASK;
-                   ip->ip_tos |= ect;
-               }
-       }
        m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
 #ifdef INET6
        if (rack->r_is_v6) {
@@ -16343,8 +16379,7 @@ rack_fast_output(struct tcpcb *tp, struct tcp_rack 
*rack, uint64_t ts_val,
        u_char opt[TCP_MAXOLEN];
        uint32_t hdrlen, optlen;
        int cnt_thru = 1;
-       int32_t slot, segsiz, len, max_val, tso = 0, sb_offset, error, ulen = 0;
-       uint16_t flags;
+       int32_t slot, segsiz, len, max_val, tso = 0, sb_offset, error, flags, 
ulen = 0;
        uint32_t s_soff;
        uint32_t if_hw_tsomaxsegcount = 0, startseq;
        uint32_t if_hw_tsomaxsegsize;
@@ -16493,23 +16528,37 @@ again:
                udp->uh_ulen = htons(ulen);
        }
        m->m_pkthdr.rcvif = (struct ifnet *)0;
-       if (TCPS_HAVERCVDSYN(tp->t_state) &&
+       if (tp->t_state == TCPS_ESTABLISHED &&
            (tp->t_flags2 & TF2_ECN_PERMIT)) {
-               int ect = tcp_ecn_output_established(tp, &flags, len);
-               if ((tp->t_state == TCPS_SYN_RECEIVED) &&
-                   (tp->t_flags2 & TF2_ECN_SND_ECE))
-                       tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+               /*
+                * If the peer has ECN, mark data packets with ECN capable
+                * transmission (ECT). Ignore pure ack packets,
+                * retransmissions.
+                */
+               if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max)) {
 #ifdef INET6
-               if (rack->r_is_v6) {
-                       ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
-                       ip6->ip6_flow |= htonl(ect << 20);
-               }
-               else
+                       if (rack->r_is_v6) {
+                               ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
+                               ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
+                       }
+                       else
 #endif
-               {
-                       ip->ip_tos &= ~IPTOS_ECN_MASK;
-                       ip->ip_tos |= ect;
+                       {
+                               ip->ip_tos &= ~IPTOS_ECN_MASK;
+                               ip->ip_tos |= IPTOS_ECN_ECT0;
+                       }
+                       KMOD_TCPSTAT_INC(tcps_ecn_ect0);
+                       /*
+                        * Reply with proper ECN notifications.
+                        * Only set CWR on new data segments.
+                        */
+                       if (tp->t_flags2 & TF2_ECN_SND_CWR) {
+                               flags |= TH_CWR;
+                               tp->t_flags2 &= ~TF2_ECN_SND_CWR;
+                       }
                }
+               if (tp->t_flags2 & TF2_ECN_SND_ECE)
+                       flags |= TH_ECE;
        }
        m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
 #ifdef INET6
@@ -16737,8 +16786,7 @@ rack_output(struct tcpcb *tp)
        struct socket *so;
        uint32_t recwin;
        uint32_t sb_offset, s_moff = 0;
-       int32_t len, error = 0;
-       uint16_t flags;
+       int32_t len, flags, error = 0;
        struct mbuf *m, *s_mb = NULL;
        struct mbuf *mb;
        uint32_t if_hw_tsomaxsegcount = 0;
@@ -18548,27 +18596,51 @@ send:
         * are on a retransmit, we may resend those bits a number of times
         * as per RFC 3168.
         */
-       if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
-               flags |= tcp_ecn_output_syn_sent(tp);
+       if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 1) {
+               if (tp->t_rxtshift >= 1) {
+                       if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
+                               flags |= TH_ECE | TH_CWR;
+               } else
+                       flags |= TH_ECE | TH_CWR;
        }
-       /* Also handle parallel SYN for ECN */
-       if (TCPS_HAVERCVDSYN(tp->t_state) &&
+       /* Handle parallel SYN for ECN */
+       if ((tp->t_state == TCPS_SYN_RECEIVED) &&
+           (tp->t_flags2 & TF2_ECN_SND_ECE)) {
+               flags |= TH_ECE;
+               tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+       }
+       if (TCPS_HAVEESTABLISHED(tp->t_state) &&
            (tp->t_flags2 & TF2_ECN_PERMIT)) {
-               int ect = tcp_ecn_output_established(tp, &flags, len);
-               if ((tp->t_state == TCPS_SYN_RECEIVED) &&
-                   (tp->t_flags2 & TF2_ECN_SND_ECE))
-                       tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+               /*
+                * If the peer has ECN, mark data packets with ECN capable
+                * transmission (ECT). Ignore pure ack packets,
+                * retransmissions.
+                */
+               if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
+                   (sack_rxmit == 0)) {
 #ifdef INET6
-               if (isipv6) {
-                       ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
-                       ip6->ip6_flow |= htonl(ect << 20);
-               }
-               else
+                       if (isipv6) {
+                               ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
+                               ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
+                       }
+                       else
 #endif
-               {
-                       ip->ip_tos &= ~IPTOS_ECN_MASK;
-                       ip->ip_tos |= ect;
+                       {
+                               ip->ip_tos &= ~IPTOS_ECN_MASK;
+                               ip->ip_tos |= IPTOS_ECN_ECT0;
+                       }
+                       KMOD_TCPSTAT_INC(tcps_ecn_ect0);
+                       /*
+                        * Reply with proper ECN notifications.
+                        * Only set CWR on new data segments.
+                        */
+                       if (tp->t_flags2 & TF2_ECN_SND_CWR) {
+                               flags |= TH_CWR;
+                               tp->t_flags2 &= ~TF2_ECN_SND_CWR;
+                       }
                }
+               if (tp->t_flags2 & TF2_ECN_SND_ECE)
+                       flags |= TH_ECE;
        }
        /*
         * If we are doing retransmissions, then snd_nxt will not reflect
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index ed4adda59c22..5fcafa44cc97 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -89,7 +89,6 @@ __FBSDID("$FreeBSD$");
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_syncache.h>
-#include <netinet/tcp_ecn.h>
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
@@ -1028,7 +1027,8 @@ syncache_socket(struct syncache *sc, struct socket *lso, 
struct mbuf *m)
                        tp->t_flags |= TF_SACK_PERMIT;
        }
 
-       tcp_ecn_syncache_socket(tp, sc);
+       if (sc->sc_flags & SCF_ECN)
+               tp->t_flags2 |= TF2_ECN_PERMIT;
 
        /*
         * Set up MSS and get cached values from tcp_hostcache.
@@ -1743,9 +1743,9 @@ skip_alloc:
                sc->sc_peer_mss = to->to_mss;   /* peer mss may be zero */
        if (ltflags & TF_NOOPT)
                sc->sc_flags |= SCF_NOOPT;
-       /* ECN Handshake */
-       if (V_tcp_do_ecn)
-               sc->sc_flags |= tcp_ecn_syncache_add(tcp_get_flags(th), iptos);
+       if (((tcp_get_flags(th) & (TH_ECE|TH_CWR)) == (TH_ECE|TH_CWR)) &&
+           V_tcp_do_ecn)
+               sc->sc_flags |= SCF_ECN;
 
        if (V_tcp_syncookies)
                sc->sc_iss = syncookie_generate(sch, sc);
@@ -1938,7 +1938,10 @@ syncache_respond(struct syncache *sc, const struct mbuf 
*m0, int flags)
        th->th_win = htons(sc->sc_wnd);
        th->th_urp = 0;
 
-       flags = tcp_ecn_syncache_respond(flags, sc);
+       if ((flags & TH_SYN) && (sc->sc_flags & SCF_ECN)) {
+               flags |= TH_ECE;
+               TCPSTAT_INC(tcps_ecn_shs);
+       }
        tcp_set_flags(th, flags);
 
        /* Tack on the TCP options. */

Reply via email to