The branch main has been updated by rscheff:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=43b117f88f3044d5f08e70b0daf0bb964f9ecb4b

commit 43b117f88f3044d5f08e70b0daf0bb964f9ecb4b
Author:     Richard Scheffenegger <[email protected]>
AuthorDate: 2023-06-06 20:56:44 +0000
Commit:     Richard Scheffenegger <[email protected]>
CommitDate: 2023-06-06 20:58:54 +0000

    tcp: make the maximum number of retransmissions tunable per VNET
    
    Both Windows (TcpMaxDataRetransmissions) and Linux (tcp_retries2)
    allow to restrict the maximum number of consecutive timer based
    retransmissions. Add that same capability on a per-VNet basis to
    FreeBSD.
    
    Reviewed By:            cc, tuexen, #transport
    Sponsored by:           NetApp, Inc.
    Differential Revision:  https://reviews.freebsd.org/D40424
---
 share/man/man4/tcp.4          |  5 ++++-
 sys/netinet/tcp_output.c      |  2 +-
 sys/netinet/tcp_stacks/bbr.c  |  8 ++++----
 sys/netinet/tcp_stacks/rack.c |  8 ++++----
 sys/netinet/tcp_timer.c       | 30 ++++++++++++++++++++++++++----
 sys/netinet/tcp_var.h         |  2 ++
 6 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/share/man/man4/tcp.4 b/share/man/man4/tcp.4
index ce27705f7eda..382e39a4355d 100644
--- a/share/man/man4/tcp.4
+++ b/share/man/man4/tcp.4
@@ -34,7 +34,7 @@
 .\"     From: @(#)tcp.4        8.1 (Berkeley) 6/5/93
 .\" $FreeBSD$
 .\"
-.Dd February 3, 2023
+.Dd June 6, 2023
 .Dt TCP 4
 .Os
 .Sh NAME
@@ -843,6 +843,9 @@ Maximum size of automatic receive buffer.
 Initial
 .Tn TCP
 receive window (buffer size).
+.It Va retries
+Maximum number of consecutive timer based retransmits sent after a data
+segment is lost (default and maximum is 12).
 .It Va rexmit_drop_options
 Drop TCP options from third and later retransmitted SYN segments
 of a connection.
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index abfab1a62176..800480413586 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -1766,7 +1766,7 @@ tcp_setpersist(struct tcpcb *tp)
                        tt = maxunacktime;
        }
        tcp_timer_activate(tp, TT_PERSIST, tt);
-       if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
+       if (tp->t_rxtshift < V_tcp_retries)
                tp->t_rxtshift++;
 }
 
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index 5ecb558dadb3..1e8053afc45c 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -4763,7 +4763,7 @@ bbr_timeout_persist(struct tcpcb *tp, struct tcp_bbr 
*bbr, uint32_t cts)
         * the idle time (no responses to probes) reaches the maximum
         * backoff that we would use if retransmitting.
         */
-       if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
+       if (tp->t_rxtshift >= V_tcp_retries &&
            (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
            ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
                KMOD_TCPSTAT_INC(tcps_persistdrop);
@@ -4796,7 +4796,7 @@ bbr_timeout_persist(struct tcpcb *tp, struct tcp_bbr 
*bbr, uint32_t cts)
                        tp->t_flags &= ~TF_DELACK;
                free(t_template, M_TEMP);
        }
-       if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
+       if (tp->t_rxtshift < V_tcp_retries)
                tp->t_rxtshift++;
        bbr_start_hpts_timer(bbr, tp, cts, 3, 0, 0);
 out:
@@ -4990,8 +4990,8 @@ bbr_timeout_rxt(struct tcpcb *tp, struct tcp_bbr *bbr, 
uint32_t cts)
                 */
                tp->t_rxtshift++;
        }
-       if (tp->t_rxtshift > TCP_MAXRXTSHIFT) {
-               tp->t_rxtshift = TCP_MAXRXTSHIFT;
+       if (tp->t_rxtshift > V_tcp_retries) {
+               tp->t_rxtshift = V_tcp_retries;
                KMOD_TCPSTAT_INC(tcps_timeoutdrop);
                tcp_log_end_status(tp, TCP_EI_STATUS_RETRAN);
                /* XXXGL: previously t_softerror was casted to uint16_t */
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index c9b5b937cc46..36fd5daf07dd 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -7445,7 +7445,7 @@ rack_timeout_persist(struct tcpcb *tp, struct tcp_rack 
*rack, uint32_t cts)
         * the idle time (no responses to probes) reaches the maximum
         * backoff that we would use if retransmitting.
         */
-       if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
+       if (tp->t_rxtshift >= V_tcp_retries &&
            (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
             TICKS_2_USEC(ticks - tp->t_rcvtime) >= RACK_REXMTVAL(tp) * 
tcp_totbackoff)) {
                KMOD_TCPSTAT_INC(tcps_persistdrop);
@@ -7491,7 +7491,7 @@ rack_timeout_persist(struct tcpcb *tp, struct tcp_rack 
*rack, uint32_t cts)
                        tp->t_flags &= ~TF_DELACK;
                free(t_template, M_TEMP);
        }
-       if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
+       if (tp->t_rxtshift < V_tcp_retries)
                tp->t_rxtshift++;
 out:
        rack_log_to_event(rack, RACK_TO_FRM_PERSIST, NULL);
@@ -7783,10 +7783,10 @@ rack_timeout_rxt(struct tcpcb *tp, struct tcp_rack 
*rack, uint32_t cts)
                 */
                tp->t_rxtshift++;
        }
-       if (tp->t_rxtshift > TCP_MAXRXTSHIFT) {
+       if (tp->t_rxtshift > V_tcp_retries) {
                tcp_log_end_status(tp, TCP_EI_STATUS_RETRAN);
 drop_it:
-               tp->t_rxtshift = TCP_MAXRXTSHIFT;
+               tp->t_rxtshift = V_tcp_retries;
                KMOD_TCPSTAT_INC(tcps_timeoutdrop);
                /* XXXGL: previously t_softerror was casted to uint16_t */
                MPASS(tp->t_softerror >= 0);
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
index d1301c18d54f..6126d85c7565 100644
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -201,6 +201,28 @@ static int per_cpu_timers = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
     &per_cpu_timers , 0, "run tcp timers on all cpus");
 
+static int
+sysctl_net_inet_tcp_retries(SYSCTL_HANDLER_ARGS)
+{
+       int error, new;
+
+       new = V_tcp_retries;
+       error = sysctl_handle_int(oidp, &new, 0, req);
+       if (error == 0 && req->newptr) {
+               if ((new < 1) || (new > TCP_MAXRXTSHIFT))
+                       error = EINVAL;
+               else
+                       V_tcp_retries = new;
+       }
+       return (error);
+}
+
+VNET_DEFINE(int, tcp_retries) = TCP_MAXRXTSHIFT;
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, retries,
+    CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RW,
+    &VNET_NAME(tcp_retries), 0, sysctl_net_inet_tcp_retries, "I",
+    "maximum number of consecutive timer based retransmissions");
+
 /*
  * Map the given inp to a CPU id.
  *
@@ -492,7 +514,7 @@ tcp_timer_persist(struct tcpcb *tp)
         * progress.
         */
        progdrop = tcp_maxunacktime_check(tp);
-       if (progdrop || (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
+       if (progdrop || (tp->t_rxtshift >= V_tcp_retries &&
            (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
             ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff))) {
                if (!progdrop)
@@ -555,10 +577,10 @@ tcp_timer_rexmt(struct tcpcb *tp)
         * or we've gone long enough without making progress, then drop
         * the session.
         */
-       if (++tp->t_rxtshift > TCP_MAXRXTSHIFT || tcp_maxunacktime_check(tp)) {
-               if (tp->t_rxtshift > TCP_MAXRXTSHIFT)
+       if (++tp->t_rxtshift > V_tcp_retries || tcp_maxunacktime_check(tp)) {
+               if (tp->t_rxtshift > V_tcp_retries)
                        TCPSTAT_INC(tcps_timeoutdrop);
-               tp->t_rxtshift = TCP_MAXRXTSHIFT;
+               tp->t_rxtshift = V_tcp_retries;
                tcp_log_end_status(tp, TCP_EI_STATUS_RETRAN);
                NET_EPOCH_ENTER(et);
                tp = tcp_drop(tp, ETIMEDOUT);
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 86345b2aa630..587998331fbf 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -1289,6 +1289,7 @@ VNET_DECLARE(int, tcp_perconn_stats_dflt_tpl);
 VNET_DECLARE(int, tcp_perconn_stats_enable);
 #endif /* STATS */
 VNET_DECLARE(int, tcp_recvspace);
+VNET_DECLARE(int, tcp_retries);
 VNET_DECLARE(int, tcp_sack_globalholes);
 VNET_DECLARE(int, tcp_sack_globalmaxholes);
 VNET_DECLARE(int, tcp_sack_maxholes);
@@ -1335,6 +1336,7 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
 #define        V_tcp_perconn_stats_enable      VNET(tcp_perconn_stats_enable)
 #endif /* STATS */
 #define        V_tcp_recvspace                 VNET(tcp_recvspace)
+#define        V_tcp_retries                   VNET(tcp_retries)
 #define        V_tcp_sack_globalholes          VNET(tcp_sack_globalholes)
 #define        V_tcp_sack_globalmaxholes       VNET(tcp_sack_globalmaxholes)
 #define        V_tcp_sack_maxholes             VNET(tcp_sack_maxholes)

Reply via email to