On 24/10/17(Tue) 23:22, Job Snijders wrote:
> Dear all,
>
> This patch builds upon the work shared in the following email. Mike's
> patch is a prerequisite to apply this patch.
>
> Date: Tue, 24 Oct 2017 15:21:08 +0200
> From: Mike Belopuhov <m...@belopuhov.com>
> Subject: Re: Refactor TCP partial ACK handling
>
> TCP_FACK was disabled by provos@ in June 1999. This patch removes
> the TCP_FACK option and associated #if{,n}def code.
>
> TCP_FACK is an algorithm that decides that when something is lost, all
> not SACKed packets until the most forward SACK are lost. It may be a
> correct estimate, if network does not reorder packets.
>
> The algorithm described in RFC 6675 may be a better replacement. This
> culling patch can provide guidance how and where to implement 6675.
I'm happy to see fewer #ifdef in these spaghetti. Especially now that
some refactoring is welcome for future CC and MP works.
ok mpi@
> diff --git share/man/man4/options.4 share/man/man4/options.4
> index c28d4e27896..737dc29efea 100644
> --- share/man/man4/options.4
> +++ share/man/man4/options.4
> @@ -445,11 +445,6 @@ TCP to adjust the transmission rate using this signal.
> Both communication endpoints negotiate enabling
> .Em ECN
> functionality at the TCP connection establishment.
> -.It Cd option TCP_FACK
> -Turns on forward acknowledgements allowing a more precise estimate of
> -outstanding data during the fast recovery phase by using
> -.Em SACK
> -information.
> .It Cd option TCP_SIGNATURE
> Turns on support for the TCP MD5 Signature option (RFC 2385).
> This is used by
> diff --git sys/conf/GENERIC sys/conf/GENERIC
> index 6df800175ed..e385b45785c 100644
> --- sys/conf/GENERIC
> +++ sys/conf/GENERIC
> @@ -47,7 +47,6 @@ option FUSE # FUSE
> option SOCKET_SPLICE # Socket Splicing for TCP and UDP
> option TCP_ECN # Explicit Congestion Notification for
> TCP
> option TCP_SIGNATURE # TCP MD5 Signatures, for BGP routing
> sessions
> -#option TCP_FACK # Forward Acknowledgements for TCP
>
> option INET6 # IPv6
> option IPSEC # IPsec
> diff --git sys/netinet/tcp_input.c sys/netinet/tcp_input.c
> index 8d172e2905c..4321d85854c 100644
> --- sys/netinet/tcp_input.c
> +++ sys/netinet/tcp_input.c
> @@ -974,10 +974,6 @@ findpcb:
> if (SEQ_GT(tp->snd_una, tp->snd_last))
> #endif
> tp->snd_last = tp->snd_una;
> -#ifdef TCP_FACK
> - tp->snd_fack = tp->snd_una;
> - tp->retran_data = 0;
> -#endif
> m_freem(m);
>
> /*
> @@ -1566,18 +1562,7 @@ trimthenstep6:
> */
> if (TCP_TIMER_ISARMED(tp, TCPT_REXMT) == 0)
> tp->t_dupacks = 0;
> -#ifdef TCP_FACK
> - /*
> - * In FACK, can enter fast rec. if the receiver
> - * reports a reass. queue longer than 3 segs.
> - */
> - else if (++tp->t_dupacks == tcprexmtthresh ||
> - ((SEQ_GT(tp->snd_fack, tcprexmtthresh *
> - tp->t_maxseg + tp->snd_una)) &&
> - SEQ_GT(tp->snd_una, tp->snd_last))) {
> -#else
> else if (++tp->t_dupacks == tcprexmtthresh) {
> -#endif /* TCP_FACK */
> tcp_seq onxt = tp->snd_nxt;
> u_long win =
> ulmin(tp->snd_wnd, tp->snd_cwnd) /
> @@ -1603,15 +1588,6 @@ trimthenstep6:
> #endif
> tcpstat_inc(tcps_cwr_frecovery);
>
> tcpstat_inc(tcps_sack_recovery_episode);
> -#ifdef TCP_FACK
> - tp->t_dupacks = tcprexmtthresh;
> - (void) tcp_output(tp);
> - /*
> - * During FR, snd_cwnd is held
> - * constant for FACK.
> - */
> - tp->snd_cwnd = tp->snd_ssthresh;
> -#else
> /*
> * tcp_output() will send
> * oldest SACK-eligible rtx.
> @@ -1619,7 +1595,6 @@ trimthenstep6:
> (void) tcp_output(tp);
> tp->snd_cwnd = tp->snd_ssthresh+
> tp->t_maxseg * tp->t_dupacks;
> -#endif /* TCP_FACK */
> goto drop;
> }
> TCP_TIMER_DISARM(tp, TCPT_REXMT);
> @@ -1639,17 +1614,6 @@ trimthenstep6:
> tp->snd_nxt = onxt;
> goto drop;
> } else if (tp->t_dupacks > tcprexmtthresh) {
> -#ifdef TCP_FACK
> - /*
> - * while (awnd < cwnd)
> - * sendsomething();
> - */
> - if (tp->sack_enable) {
> - if (tp->snd_awnd < tp->snd_cwnd)
> - tcp_output(tp);
> - goto drop;
> - }
> -#endif /* TCP_FACK */
> tp->snd_cwnd += tp->t_maxseg;
> (void) tcp_output(tp);
> goto drop;
> @@ -1685,11 +1649,6 @@ trimthenstep6:
> tcp_seq_subtract(tp->snd_max,
> th->th_ack);
> tp->t_dupacks = 0;
> -#ifdef TCP_FACK
> - if (tp->sack_enable &&
> - SEQ_GT(th->th_ack, tp->snd_fack))
> - tp->snd_fack = th->th_ack;
> -#endif
> }
> } else {
> /*
> @@ -1789,16 +1748,6 @@ trimthenstep6:
> #endif
> if (SEQ_LT(tp->snd_nxt, tp->snd_una))
> tp->snd_nxt = tp->snd_una;
> -#ifdef TCP_FACK
> - if (SEQ_GT(tp->snd_una, tp->snd_fack)) {
> - tp->snd_fack = tp->snd_una;
> - /* Update snd_awnd for partial ACK
> - * without any SACK blocks.
> - */
> - tp->snd_awnd = tcp_seq_subtract(tp->snd_nxt,
> - tp->snd_fack) + tp->retran_data;
> - }
> -#endif
>
> switch (tp->t_state) {
>
> @@ -2467,11 +2416,6 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th,
> u_char *cp, int optlen)
> continue; /* bad SACK fields */
> if (SEQ_LEQ(sack.end, tp->snd_una))
> continue; /* old block */
> -#ifdef TCP_FACK
> - /* Updates snd_fack. */
> - if (SEQ_GT(sack.end, tp->snd_fack))
> - tp->snd_fack = sack.end;
> -#endif
> if (SEQ_GT(th->th_ack, tp->snd_una)) {
> if (SEQ_LT(sack.start, th->th_ack))
> continue;
> @@ -2520,16 +2464,6 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th,
> u_char *cp, int optlen)
> }
> if (SEQ_LEQ(sack.start, cur->start)) {
> /* Data acks at least the beginning of hole */
> -#ifdef TCP_FACK
> - if (SEQ_GT(sack.end, cur->rxmit))
> - tp->retran_data -=
> - tcp_seq_subtract(cur->rxmit,
> - cur->start);
> - else
> - tp->retran_data -=
> - tcp_seq_subtract(sack.end,
> - cur->start);
> -#endif
> if (SEQ_GEQ(sack.end, cur->end)) {
> /* Acks entire hole, so delete hole */
> if (p != cur) {
> @@ -2554,12 +2488,6 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th,
> u_char *cp, int optlen)
> }
> /* move end of hole backward */
> if (SEQ_GEQ(sack.end, cur->end)) {
> -#ifdef TCP_FACK
> - if (SEQ_GT(cur->rxmit, sack.start))
> - tp->retran_data -=
> - tcp_seq_subtract(cur->rxmit,
> - sack.start);
> -#endif
> cur->end = sack.start;
> cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
> cur->dups++;
> @@ -2580,16 +2508,6 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th,
> u_char *cp, int optlen)
> pool_get(&sackhl_pool, PR_NOWAIT);
> if (temp == NULL)
> goto done; /* ENOBUFS */
> -#ifdef TCP_FACK
> - if (SEQ_GT(cur->rxmit, sack.end))
> - tp->retran_data -=
> - tcp_seq_subtract(sack.end,
> - sack.start);
> - else if (SEQ_GT(cur->rxmit, sack.start))
> - tp->retran_data -=
> - tcp_seq_subtract(cur->rxmit,
> - sack.start);
> -#endif
> temp->next = cur->next;
> temp->start = sack.end;
> temp->end = cur->end;
> @@ -2631,21 +2549,6 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th,
> u_char *cp, int optlen)
> }
> }
> done:
> -#ifdef TCP_FACK
> - /*
> - * Update retran_data and snd_awnd. Go through the list of
> - * holes. Increment retran_data by (hole->rxmit - hole->start).
> - */
> - tp->retran_data = 0;
> - cur = tp->snd_holes;
> - while (cur) {
> - tp->retran_data += cur->rxmit - cur->start;
> - cur = cur->next;
> - }
> - tp->snd_awnd = tcp_seq_subtract(tp->snd_nxt, tp->snd_fack) +
> - tp->retran_data;
> -#endif
> -
> return;
> }
>
> @@ -2705,11 +2608,9 @@ tcp_sack_partialack(struct tcpcb *tp, struct tcphdr
> *th)
> /* Turn off retx. timer (will start again next segment) */
> TCP_TIMER_DISARM(tp, TCPT_REXMT);
> tp->t_rtttime = 0;
> -#ifndef TCP_FACK
> /*
> * Partial window deflation. This statement relies on the
> - * fact that tp->snd_una has not been updated yet. In FACK
> - * hold snd_cwnd constant during fast recovery.
> + * fact that tp->snd_una has not been updated yet.
> */
> if (tp->snd_cwnd > (th->th_ack - tp->snd_una)) {
> tp->snd_cwnd -= th->th_ack - tp->snd_una;
> @@ -2718,11 +2619,6 @@ tcp_sack_partialack(struct tcpcb *tp, struct tcphdr
> *th)
> tp->snd_cwnd = tp->t_maxseg;
> tp->snd_cwnd += tp->t_maxseg;
> tp->t_flags |= TF_NEEDOUTPUT;
> -#else
> - /* Force call to tcp_output */
> - if (tp->snd_awnd < tp->snd_cwnd)
> - tp->t_flags |= TF_NEEDOUTPUT;
> -#endif
> }
>
> /*
> @@ -3703,11 +3599,6 @@ syn_cache_get(struct sockaddr *src, struct sockaddr
> *dst, struct tcphdr *th,
> tp->irs = sc->sc_irs;
> tcp_sendseqinit(tp);
> tp->snd_last = tp->snd_una;
> -#ifdef TCP_FACK
> - tp->snd_fack = tp->snd_una;
> - tp->retran_data = 0;
> - tp->snd_awnd = 0;
> -#endif
> #ifdef TCP_ECN
> if (sc->sc_flags & SCF_ECN_PERMIT) {
> tp->t_flags |= TF_ECN_PERMIT;
> diff --git sys/netinet/tcp_output.c sys/netinet/tcp_output.c
> index a0a88b5ec3d..a652ad1d80d 100644
> --- sys/netinet/tcp_output.c
> +++ sys/netinet/tcp_output.c
> @@ -130,17 +130,7 @@ tcp_sack_output(struct tcpcb *tp)
> return (NULL);
> p = tp->snd_holes;
> while (p) {
> -#ifndef TCP_FACK
> if (p->dups >= tcprexmtthresh && SEQ_LT(p->rxmit, p->end)) {
> -#else
> - /* In FACK, if p->dups is less than tcprexmtthresh, but
> - * snd_fack advances more than tcprextmtthresh * tp->t_maxseg,
> - * tcp_input() will try fast retransmit. This forces output.
> - */
> - if ((p->dups >= tcprexmtthresh ||
> - tp->t_dupacks == tcprexmtthresh) &&
> - SEQ_LT(p->rxmit, p->end)) {
> -#endif /* TCP_FACK */
> if (SEQ_LT(p->rxmit, tp->snd_una)) {/* old SACK hole */
> p = p->next;
> continue;
> @@ -258,15 +248,6 @@ again:
> if (tp->sack_enable && SEQ_LT(tp->snd_nxt, tp->snd_max))
> tcp_sack_adjust(tp);
> off = tp->snd_nxt - tp->snd_una;
> -#ifdef TCP_FACK
> - /* Normally, sendable data is limited by off < tp->snd_cwnd.
> - * But in FACK, sendable data is limited by snd_awnd < snd_cwnd,
> - * regardless of offset.
> - */
> - if (tp->sack_enable && (tp->t_dupacks > tcprexmtthresh))
> - win = tp->snd_wnd;
> - else
> -#endif
> win = ulmin(tp->snd_wnd, tp->snd_cwnd);
>
> flags = tcp_outflags[tp->t_state];
> @@ -285,11 +266,8 @@ again:
> sack_rxmit = 1;
> /* Coalesce holes into a single retransmission */
> len = min(tp->t_maxseg, p->end - p->rxmit);
> -#ifndef TCP_FACK
> - /* in FACK, hold snd_cwnd constant during recovery */
> if (SEQ_LT(tp->snd_una, tp->snd_last))
> tp->snd_cwnd -= tp->t_maxseg;
> -#endif
> }
> }
>
> @@ -329,17 +307,6 @@ again:
>
> if (!sack_rxmit) {
> len = ulmin(so->so_snd.sb_cc, win) - off;
> -
> -#ifdef TCP_FACK
> - /*
> - * If we're in fast recovery (SEQ_GT(tp->snd_last,
> tp->snd_una)),
> - * and amount of outstanding data (snd_awnd) is >= snd_cwnd,
> then
> - * do not send data (like zero window conditions)
> - */
> - if (tp->sack_enable && SEQ_GT(tp->snd_last, tp->snd_una) &&
> - len && (tp->snd_awnd >= tp->snd_cwnd))
> - len = 0;
> -#endif /* TCP_FACK */
> }
>
> if (len < 0) {
> @@ -794,9 +761,6 @@ send:
> sendalot = 0;
> th->th_seq = htonl(p->rxmit);
> p->rxmit += len;
> -#ifdef TCP_FACK
> - tp->retran_data += len;
> -#endif
> tcpstat_pkt(tcps_sack_rexmits, tcps_sack_rexmit_bytes, len);
> }
>
> @@ -1096,12 +1060,6 @@ send:
> #endif /* INET6 */
> }
>
> -#ifdef TCP_FACK
> - /* Update snd_awnd to reflect the new data that was sent. */
> - tp->snd_awnd = tcp_seq_subtract(tp->snd_max, tp->snd_fack) +
> - tp->retran_data;
> -#endif
> -
> if (error) {
> out:
> if (error == ENOBUFS) {
> diff --git sys/netinet/tcp_timer.c sys/netinet/tcp_timer.c
> index 9d2e75942cd..3452121da6d 100644
> --- sys/netinet/tcp_timer.c
> +++ sys/netinet/tcp_timer.c
> @@ -172,11 +172,6 @@ tcp_timer_freesack(struct tcpcb *tp)
> pool_put(&sackhl_pool, p);
> }
> tp->snd_holes = 0;
> -#ifdef TCP_FACK
> - tp->snd_fack = tp->snd_una;
> - tp->retran_data = 0;
> - tp->snd_awnd = 0;
> -#endif
> }
>
> void
> diff --git sys/netinet/tcp_usrreq.c sys/netinet/tcp_usrreq.c
> index d7ee1ecc517..549149a0531 100644
> --- sys/netinet/tcp_usrreq.c
> +++ sys/netinet/tcp_usrreq.c
> @@ -269,11 +269,6 @@ tcp_usrreq(struct socket *so, int req, struct mbuf *m,
> struct mbuf *nam,
> tcp_set_iss_tsm(tp);
> tcp_sendseqinit(tp);
> tp->snd_last = tp->snd_una;
> -#ifdef TCP_FACK
> - tp->snd_fack = tp->snd_una;
> - tp->retran_data = 0;
> - tp->snd_awnd = 0;
> -#endif
> error = tcp_output(tp);
> break;
>
> diff --git sys/netinet/tcp_var.h sys/netinet/tcp_var.h
> index 4c1b37b2b68..4ca5fc0afe4 100644
> --- sys/netinet/tcp_var.h
> +++ sys/netinet/tcp_var.h
> @@ -119,12 +119,6 @@ struct tcpcb {
> int sack_enable; /* enable SACK for this connection */
> int snd_numholes; /* number of holes seen by sender */
> struct sackhole *snd_holes; /* linked list of holes (sorted) */
> -#if 1 /*defined(TCP_FACK)*/
> - tcp_seq snd_fack; /* for FACK congestion control */
> - u_long snd_awnd; /* snd_nxt - snd_fack + */
> - /* retransmitted data */
> - int retran_data; /* amount of outstanding retx. data */
> -#endif /* TCP_FACK */
> tcp_seq snd_last; /* for use in fast recovery */
> /* receive sequence variables */
> u_long rcv_wnd; /* receive window */
> diff --git usr.bin/netstat/inet.c usr.bin/netstat/inet.c
> index 468d514a22e..4cb82ea7379 100644
> --- usr.bin/netstat/inet.c
> +++ usr.bin/netstat/inet.c
> @@ -1496,9 +1496,6 @@ tcpcb_dump(u_long off)
> p("%lu", snd_wnd, "\n ");
> p("%d", sack_enable, ", ");
> p("%d", snd_numholes, ", ");
> - p("%u", snd_fack, ", ");
> - p("%lu",snd_awnd, "\n ");
> - p("%u", retran_data, ", ");
> p("%u", snd_last, "\n ");
> p("%u", irs, "\n ");
> p("%u", rcv_nxt, ", ");
>